From 97f17497d162afdb82c8704bf097f0fee3724b2e Mon Sep 17 00:00:00 2001
From: "C.J. Collier" <cjcollier@linuxfoundation.org>
Date: Tue, 14 Jun 2016 07:50:17 -0700
Subject: Imported Upstream version 16.04

Change-Id: I77eadcd8538a9122e4773cbe55b24033dc451757
Signed-off-by: C.J. Collier <cjcollier@linuxfoundation.org>
---
 lib/Makefile                                       |    66 +
 lib/librte_acl/Makefile                            |    96 +
 lib/librte_acl/acl.h                               |   241 +
 lib/librte_acl/acl_bld.c                           |  1598 +++
 lib/librte_acl/acl_gen.c                           |   561 +
 lib/librte_acl/acl_run.h                           |   263 +
 lib/librte_acl/acl_run_avx2.c                      |    54 +
 lib/librte_acl/acl_run_avx2.h                      |   284 +
 lib/librte_acl/acl_run_neon.c                      |    46 +
 lib/librte_acl/acl_run_neon.h                      |   289 +
 lib/librte_acl/acl_run_scalar.c                    |   192 +
 lib/librte_acl/acl_run_sse.c                       |    47 +
 lib/librte_acl/acl_run_sse.h                       |   357 +
 lib/librte_acl/acl_vect.h                          |   116 +
 lib/librte_acl/rte_acl.c                           |   393 +
 lib/librte_acl/rte_acl.h                           |   388 +
 lib/librte_acl/rte_acl_osdep.h                     |    80 +
 lib/librte_acl/rte_acl_version.map                 |    19 +
 lib/librte_acl/tb_mem.c                            |   108 +
 lib/librte_acl/tb_mem.h                            |    76 +
 lib/librte_cfgfile/Makefile                        |    57 +
 lib/librte_cfgfile/rte_cfgfile.c                   |   374 +
 lib/librte_cfgfile/rte_cfgfile.h                   |   239 +
 lib/librte_cfgfile/rte_cfgfile_version.map         |    22 +
 lib/librte_cmdline/Makefile                        |    67 +
 lib/librte_cmdline/cmdline.c                       |   301 +
 lib/librte_cmdline/cmdline.h                       |   115 +
 lib/librte_cmdline/cmdline_cirbuf.c                |   466 +
 lib/librte_cmdline/cmdline_cirbuf.h                |   245 +
 lib/librte_cmdline/cmdline_parse.c                 |   563 +
 lib/librte_cmdline/cmdline_parse.h                 |   191 +
 lib/librte_cmdline/cmdline_parse_etheraddr.c       |   180 +
 lib/librte_cmdline/cmdline_parse_etheraddr.h       |    96 +
 lib/librte_cmdline/cmdline_parse_ipaddr.c          |   408 +
 lib/librte_cmdline/cmdline_parse_ipaddr.h          |   189 +
 lib/librte_cmdline/cmdline_parse_num.c             |   402 +
 lib/librte_cmdline/cmdline_parse_num.h             |   115 +
 lib/librte_cmdline/cmdline_parse_portlist.c        |   173 +
 lib/librte_cmdline/cmdline_parse_portlist.h        |   103 +
 lib/librte_cmdline/cmdline_parse_string.c          |   253 +
 lib/librte_cmdline/cmdline_parse_string.h          |   112 +
 lib/librte_cmdline/cmdline_rdline.c                |   697 ++
 lib/librte_cmdline/cmdline_rdline.h                |   255 +
 lib/librte_cmdline/cmdline_socket.c                |   119 +
 lib/librte_cmdline/cmdline_socket.h                |    76 +
 lib/librte_cmdline/cmdline_vt100.c                 |   185 +
 lib/librte_cmdline/cmdline_vt100.h                 |   153 +
 lib/librte_cmdline/rte_cmdline_version.map         |    78 +
 lib/librte_compat/Makefile                         |    40 +
 lib/librte_compat/rte_compat.h                     |   105 +
 lib/librte_cryptodev/Makefile                      |    62 +
 lib/librte_cryptodev/rte_crypto.h                  |   415 +
 lib/librte_cryptodev/rte_crypto_sym.h              |   662 ++
 lib/librte_cryptodev/rte_cryptodev.c               |  1162 +++
 lib/librte_cryptodev/rte_cryptodev.h               |   896 ++
 lib/librte_cryptodev/rte_cryptodev_pmd.h           |   543 +
 lib/librte_cryptodev/rte_cryptodev_version.map     |    34 +
 lib/librte_distributor/Makefile                    |    54 +
 lib/librte_distributor/rte_distributor.c           |   487 +
 lib/librte_distributor/rte_distributor.h           |   247 +
 lib/librte_distributor/rte_distributor_version.map |    15 +
 lib/librte_eal/Makefile                            |    38 +
 lib/librte_eal/bsdapp/Makefile                     |    38 +
 lib/librte_eal/bsdapp/contigmem/BSDmakefile        |    36 +
 lib/librte_eal/bsdapp/contigmem/Makefile           |    52 +
 lib/librte_eal/bsdapp/contigmem/contigmem.c        |   232 +
 lib/librte_eal/bsdapp/eal/Makefile                 |   117 +
 lib/librte_eal/bsdapp/eal/eal.c                    |   638 ++
 lib/librte_eal/bsdapp/eal/eal_alarm.c              |    60 +
 lib/librte_eal/bsdapp/eal/eal_debug.c              |   119 +
 lib/librte_eal/bsdapp/eal/eal_hugepage_info.c      |   134 +
 lib/librte_eal/bsdapp/eal/eal_interrupts.c         |   119 +
 lib/librte_eal/bsdapp/eal/eal_lcore.c              |    79 +
 lib/librte_eal/bsdapp/eal/eal_log.c                |    57 +
 lib/librte_eal/bsdapp/eal/eal_memory.c             |   194 +
 lib/librte_eal/bsdapp/eal/eal_pci.c                |   633 ++
 lib/librte_eal/bsdapp/eal/eal_thread.c             |   201 +
 lib/librte_eal/bsdapp/eal/eal_timer.c              |    94 +
 .../bsdapp/eal/include/exec-env/rte_dom0_common.h  |   107 +
 .../bsdapp/eal/include/exec-env/rte_interrupts.h   |   137 +
 lib/librte_eal/bsdapp/eal/rte_eal_version.map      |   153 +
 lib/librte_eal/bsdapp/nic_uio/BSDmakefile          |    36 +
 lib/librte_eal/bsdapp/nic_uio/Makefile             |    52 +
 lib/librte_eal/bsdapp/nic_uio/nic_uio.c            |   335 +
 lib/librte_eal/common/Makefile                     |    61 +
 lib/librte_eal/common/arch/arm/rte_cpuflags.c      |   179 +
 lib/librte_eal/common/arch/ppc_64/rte_cpuflags.c   |   147 +
 lib/librte_eal/common/arch/tile/rte_cpuflags.c     |    47 +
 lib/librte_eal/common/arch/x86/rte_cpuflags.c      |   220 +
 lib/librte_eal/common/eal_common_cpuflags.c        |    76 +
 lib/librte_eal/common/eal_common_dev.c             |   152 +
 lib/librte_eal/common/eal_common_devargs.c         |   176 +
 lib/librte_eal/common/eal_common_errno.c           |    72 +
 lib/librte_eal/common/eal_common_hexdump.c         |   120 +
 lib/librte_eal/common/eal_common_launch.c          |   118 +
 lib/librte_eal/common/eal_common_lcore.c           |   110 +
 lib/librte_eal/common/eal_common_log.c             |   337 +
 lib/librte_eal/common/eal_common_memory.c          |   154 +
 lib/librte_eal/common/eal_common_memzone.c         |   445 +
 lib/librte_eal/common/eal_common_options.c         |  1022 ++
 lib/librte_eal/common/eal_common_pci.c             |   457 +
 lib/librte_eal/common/eal_common_pci_uio.c         |   222 +
 lib/librte_eal/common/eal_common_proc.c            |    61 +
 lib/librte_eal/common/eal_common_string_fns.c      |    69 +
 lib/librte_eal/common/eal_common_tailqs.c          |   202 +
 lib/librte_eal/common/eal_common_thread.c          |   157 +
 lib/librte_eal/common/eal_common_timer.c           |    86 +
 lib/librte_eal/common/eal_filesystem.h             |   118 +
 lib/librte_eal/common/eal_hugepages.h              |    67 +
 lib/librte_eal/common/eal_internal_cfg.h           |    94 +
 lib/librte_eal/common/eal_options.h                |   100 +
 lib/librte_eal/common/eal_private.h                |   331 +
 lib/librte_eal/common/eal_thread.h                 |   100 +
 .../common/include/arch/arm/rte_atomic.h           |    48 +
 .../common/include/arch/arm/rte_atomic_32.h        |    74 +
 .../common/include/arch/arm/rte_atomic_64.h        |    88 +
 .../common/include/arch/arm/rte_byteorder.h        |   107 +
 .../common/include/arch/arm/rte_cpuflags.h         |    42 +
 .../common/include/arch/arm/rte_cpuflags_32.h      |    82 +
 .../common/include/arch/arm/rte_cpuflags_64.h      |    64 +
 .../common/include/arch/arm/rte_cycles.h           |    42 +
 .../common/include/arch/arm/rte_cycles_32.h        |   121 +
 .../common/include/arch/arm/rte_cycles_64.h        |    71 +
 .../common/include/arch/arm/rte_memcpy.h           |    42 +
 .../common/include/arch/arm/rte_memcpy_32.h        |   338 +
 .../common/include/arch/arm/rte_memcpy_64.h        |    93 +
 .../common/include/arch/arm/rte_prefetch.h         |    42 +
 .../common/include/arch/arm/rte_prefetch_32.h      |    67 +
 .../common/include/arch/arm/rte_prefetch_64.h      |    66 +
 .../common/include/arch/arm/rte_rwlock.h           |    40 +
 .../common/include/arch/arm/rte_spinlock.h         |    92 +
 lib/librte_eal/common/include/arch/arm/rte_vect.h  |    83 +
 .../common/include/arch/ppc_64/rte_atomic.h        |   432 +
 .../common/include/arch/ppc_64/rte_byteorder.h     |   149 +
 .../common/include/arch/ppc_64/rte_cpuflags.h      |    88 +
 .../common/include/arch/ppc_64/rte_cycles.h        |    94 +
 .../common/include/arch/ppc_64/rte_memcpy.h        |   225 +
 .../common/include/arch/ppc_64/rte_prefetch.h      |    67 +
 .../common/include/arch/ppc_64/rte_rwlock.h        |    38 +
 .../common/include/arch/ppc_64/rte_spinlock.h      |   114 +
 .../common/include/arch/tile/rte_atomic.h          |    92 +
 .../common/include/arch/tile/rte_byteorder.h       |    91 +
 .../common/include/arch/tile/rte_cpuflags.h        |    53 +
 .../common/include/arch/tile/rte_cycles.h          |    70 +
 .../common/include/arch/tile/rte_memcpy.h          |    93 +
 .../common/include/arch/tile/rte_prefetch.h        |    67 +
 .../common/include/arch/tile/rte_rwlock.h          |    70 +
 .../common/include/arch/tile/rte_spinlock.h        |    92 +
 .../common/include/arch/x86/rte_atomic.h           |   222 +
 .../common/include/arch/x86/rte_atomic_32.h        |   222 +
 .../common/include/arch/x86/rte_atomic_64.h        |   191 +
 .../common/include/arch/x86/rte_byteorder.h        |   125 +
 .../common/include/arch/x86/rte_byteorder_32.h     |    51 +
 .../common/include/arch/x86/rte_byteorder_64.h     |    52 +
 .../common/include/arch/x86/rte_cpuflags.h         |   153 +
 .../common/include/arch/x86/rte_cycles.h           |   121 +
 .../common/include/arch/x86/rte_memcpy.h           |   884 ++
 .../common/include/arch/x86/rte_prefetch.h         |    67 +
 lib/librte_eal/common/include/arch/x86/rte_rtm.h   |    73 +
 .../common/include/arch/x86/rte_rwlock.h           |    82 +
 .../common/include/arch/x86/rte_spinlock.h         |   201 +
 lib/librte_eal/common/include/arch/x86/rte_vect.h  |   132 +
 lib/librte_eal/common/include/generic/rte_atomic.h |   945 ++
 .../common/include/generic/rte_byteorder.h         |   217 +
 .../common/include/generic/rte_cpuflags.h          |    82 +
 lib/librte_eal/common/include/generic/rte_cycles.h |   205 +
 lib/librte_eal/common/include/generic/rte_memcpy.h |   144 +
 .../common/include/generic/rte_prefetch.h          |    83 +
 lib/librte_eal/common/include/generic/rte_rwlock.h |   208 +
 .../common/include/generic/rte_spinlock.h          |   325 +
 lib/librte_eal/common/include/rte_alarm.h          |   106 +
 .../common/include/rte_branch_prediction.h         |    70 +
 lib/librte_eal/common/include/rte_common.h         |   401 +
 lib/librte_eal/common/include/rte_debug.h          |   103 +
 lib/librte_eal/common/include/rte_dev.h            |   192 +
 lib/librte_eal/common/include/rte_devargs.h        |   177 +
 lib/librte_eal/common/include/rte_eal.h            |   259 +
 lib/librte_eal/common/include/rte_eal_memconfig.h  |   100 +
 lib/librte_eal/common/include/rte_errno.h          |    95 +
 lib/librte_eal/common/include/rte_hexdump.h        |    89 +
 lib/librte_eal/common/include/rte_interrupts.h     |   120 +
 lib/librte_eal/common/include/rte_keepalive.h      |   108 +
 lib/librte_eal/common/include/rte_launch.h         |   177 +
 lib/librte_eal/common/include/rte_lcore.h          |   276 +
 lib/librte_eal/common/include/rte_log.h            |   311 +
 lib/librte_eal/common/include/rte_malloc.h         |   342 +
 lib/librte_eal/common/include/rte_malloc_heap.h    |    55 +
 lib/librte_eal/common/include/rte_memory.h         |   263 +
 lib/librte_eal/common/include/rte_memzone.h        |   305 +
 lib/librte_eal/common/include/rte_pci.h            |   598 ++
 .../common/include/rte_pci_dev_feature_defs.h      |    70 +
 .../common/include/rte_pci_dev_features.h          |    69 +
 lib/librte_eal/common/include/rte_pci_dev_ids.h    |   704 ++
 lib/librte_eal/common/include/rte_per_lcore.h      |    79 +
 lib/librte_eal/common/include/rte_random.h         |    91 +
 lib/librte_eal/common/include/rte_string_fns.h     |    81 +
 lib/librte_eal/common/include/rte_tailq.h          |   162 +
 lib/librte_eal/common/include/rte_time.h           |   122 +
 lib/librte_eal/common/include/rte_version.h        |   130 +
 lib/librte_eal/common/include/rte_warnings.h       |    84 +
 lib/librte_eal/common/malloc_elem.c                |   344 +
 lib/librte_eal/common/malloc_elem.h                |   192 +
 lib/librte_eal/common/malloc_heap.c                |   236 +
 lib/librte_eal/common/malloc_heap.h                |    70 +
 lib/librte_eal/common/rte_keepalive.c              |   149 +
 lib/librte_eal/common/rte_malloc.c                 |   262 +
 lib/librte_eal/linuxapp/Makefile                   |    39 +
 lib/librte_eal/linuxapp/eal/Makefile               |   139 +
 lib/librte_eal/linuxapp/eal/eal.c                  |   936 ++
 lib/librte_eal/linuxapp/eal/eal_alarm.c            |   273 +
 lib/librte_eal/linuxapp/eal/eal_debug.c            |   119 +
 lib/librte_eal/linuxapp/eal/eal_hugepage_info.c    |   365 +
 lib/librte_eal/linuxapp/eal/eal_interrupts.c       |  1224 +++
 lib/librte_eal/linuxapp/eal/eal_ivshmem.c          |   955 ++
 lib/librte_eal/linuxapp/eal/eal_lcore.c            |   110 +
 lib/librte_eal/linuxapp/eal/eal_log.c              |   146 +
 lib/librte_eal/linuxapp/eal/eal_memory.c           |  1559 +++
 lib/librte_eal/linuxapp/eal/eal_pci.c              |   762 ++
 lib/librte_eal/linuxapp/eal/eal_pci_init.h         |   127 +
 lib/librte_eal/linuxapp/eal/eal_pci_uio.c          |   491 +
 lib/librte_eal/linuxapp/eal/eal_pci_vfio.c         |  1097 ++
 lib/librte_eal/linuxapp/eal/eal_pci_vfio_mp_sync.c |   405 +
 lib/librte_eal/linuxapp/eal/eal_thread.c           |   199 +
 lib/librte_eal/linuxapp/eal/eal_timer.c            |   305 +
 lib/librte_eal/linuxapp/eal/eal_vfio.h             |    67 +
 lib/librte_eal/linuxapp/eal/eal_xen_memory.c       |   369 +
 .../eal/include/exec-env/rte_dom0_common.h         |   108 +
 .../linuxapp/eal/include/exec-env/rte_interrupts.h |   228 +
 .../linuxapp/eal/include/exec-env/rte_kni_common.h |   172 +
 lib/librte_eal/linuxapp/eal/rte_eal_version.map    |   156 +
 lib/librte_eal/linuxapp/igb_uio/Makefile           |    53 +
 lib/librte_eal/linuxapp/igb_uio/compat.h           |   116 +
 lib/librte_eal/linuxapp/igb_uio/igb_uio.c          |   580 ++
 lib/librte_eal/linuxapp/kni/Makefile               |    93 +
 lib/librte_eal/linuxapp/kni/compat.h               |    29 +
 lib/librte_eal/linuxapp/kni/ethtool/README         |   100 +
 lib/librte_eal/linuxapp/kni/ethtool/igb/COPYING    |   339 +
 .../linuxapp/kni/ethtool/igb/e1000_82575.c         |  3665 +++++++
 .../linuxapp/kni/ethtool/igb/e1000_82575.h         |   509 +
 .../linuxapp/kni/ethtool/igb/e1000_api.c           |  1159 +++
 .../linuxapp/kni/ethtool/igb/e1000_api.h           |   157 +
 .../linuxapp/kni/ethtool/igb/e1000_defines.h       |  1380 +++
 lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_hw.h |   793 ++
 .../linuxapp/kni/ethtool/igb/e1000_i210.c          |   909 ++
 .../linuxapp/kni/ethtool/igb/e1000_i210.h          |    91 +
 .../linuxapp/kni/ethtool/igb/e1000_mac.c           |  2096 ++++
 .../linuxapp/kni/ethtool/igb/e1000_mac.h           |    80 +
 .../linuxapp/kni/ethtool/igb/e1000_manage.c        |   554 +
 .../linuxapp/kni/ethtool/igb/e1000_manage.h        |    89 +
 .../linuxapp/kni/ethtool/igb/e1000_mbx.c           |   525 +
 .../linuxapp/kni/ethtool/igb/e1000_mbx.h           |    87 +
 .../linuxapp/kni/ethtool/igb/e1000_nvm.c           |   965 ++
 .../linuxapp/kni/ethtool/igb/e1000_nvm.h           |    75 +
 .../linuxapp/kni/ethtool/igb/e1000_osdep.h         |   136 +
 .../linuxapp/kni/ethtool/igb/e1000_phy.c           |  3405 ++++++
 .../linuxapp/kni/ethtool/igb/e1000_phy.h           |   256 +
 .../linuxapp/kni/ethtool/igb/e1000_regs.h          |   646 ++
 lib/librte_eal/linuxapp/kni/ethtool/igb/igb.h      |   859 ++
 .../linuxapp/kni/ethtool/igb/igb_debugfs.c         |    28 +
 .../linuxapp/kni/ethtool/igb/igb_ethtool.c         |  2857 +++++
 .../linuxapp/kni/ethtool/igb/igb_hwmon.c           |   260 +
 lib/librte_eal/linuxapp/kni/ethtool/igb/igb_main.c | 10291 +++++++++++++++++++
 .../linuxapp/kni/ethtool/igb/igb_param.c           |   847 ++
 .../linuxapp/kni/ethtool/igb/igb_procfs.c          |   363 +
 lib/librte_eal/linuxapp/kni/ethtool/igb/igb_ptp.c  |   944 ++
 .../linuxapp/kni/ethtool/igb/igb_regtest.h         |   249 +
 lib/librte_eal/linuxapp/kni/ethtool/igb/igb_vmdq.c |   436 +
 lib/librte_eal/linuxapp/kni/ethtool/igb/igb_vmdq.h |    46 +
 lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.c  |  1482 +++
 lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.h  |  3918 +++++++
 .../linuxapp/kni/ethtool/igb/kcompat_ethtool.c     |  1171 +++
 lib/librte_eal/linuxapp/kni/ethtool/ixgbe/COPYING  |   339 +
 lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe.h  |   925 ++
 .../linuxapp/kni/ethtool/ixgbe/ixgbe_82598.c       |  1296 +++
 .../linuxapp/kni/ethtool/ixgbe/ixgbe_82598.h       |    44 +
 .../linuxapp/kni/ethtool/ixgbe/ixgbe_82599.c       |  2313 +++++
 .../linuxapp/kni/ethtool/ixgbe/ixgbe_82599.h       |    58 +
 .../linuxapp/kni/ethtool/ixgbe/ixgbe_api.c         |  1157 +++
 .../linuxapp/kni/ethtool/ixgbe/ixgbe_api.h         |   168 +
 .../linuxapp/kni/ethtool/ixgbe/ixgbe_common.c      |  4082 ++++++++
 .../linuxapp/kni/ethtool/ixgbe/ixgbe_common.h      |   140 +
 .../linuxapp/kni/ethtool/ixgbe/ixgbe_dcb.h         |   168 +
 .../linuxapp/kni/ethtool/ixgbe/ixgbe_ethtool.c     |  2901 ++++++
 .../linuxapp/kni/ethtool/ixgbe/ixgbe_fcoe.h        |    91 +
 .../linuxapp/kni/ethtool/ixgbe/ixgbe_main.c        |  2970 ++++++
 .../linuxapp/kni/ethtool/ixgbe/ixgbe_mbx.h         |   105 +
 .../linuxapp/kni/ethtool/ixgbe/ixgbe_osdep.h       |   132 +
 .../linuxapp/kni/ethtool/ixgbe/ixgbe_phy.c         |  1847 ++++
 .../linuxapp/kni/ethtool/ixgbe/ixgbe_phy.h         |   137 +
 .../linuxapp/kni/ethtool/ixgbe/ixgbe_sriov.h       |    73 +
 .../linuxapp/kni/ethtool/ixgbe/ixgbe_type.h        |  3254 ++++++
 .../linuxapp/kni/ethtool/ixgbe/ixgbe_x540.c        |   937 ++
 .../linuxapp/kni/ethtool/ixgbe/ixgbe_x540.h        |    58 +
 .../linuxapp/kni/ethtool/ixgbe/kcompat.c           |  1246 +++
 .../linuxapp/kni/ethtool/ixgbe/kcompat.h           |  3143 ++++++
 lib/librte_eal/linuxapp/kni/kni_dev.h              |   149 +
 lib/librte_eal/linuxapp/kni/kni_ethtool.c          |   217 +
 lib/librte_eal/linuxapp/kni/kni_fifo.h             |   108 +
 lib/librte_eal/linuxapp/kni/kni_misc.c             |   688 ++
 lib/librte_eal/linuxapp/kni/kni_net.c              |   706 ++
 lib/librte_eal/linuxapp/kni/kni_vhost.c            |   857 ++
 lib/librte_eal/linuxapp/xen_dom0/Makefile          |    56 +
 lib/librte_eal/linuxapp/xen_dom0/compat.h          |    15 +
 lib/librte_eal/linuxapp/xen_dom0/dom0_mm_dev.h     |   107 +
 lib/librte_eal/linuxapp/xen_dom0/dom0_mm_misc.c    |   780 ++
 lib/librte_ether/Makefile                          |    59 +
 lib/librte_ether/rte_dev_info.h                    |    57 +
 lib/librte_ether/rte_eth_ctrl.h                    |   846 ++
 lib/librte_ether/rte_ethdev.c                      |  3371 ++++++
 lib/librte_ether/rte_ethdev.h                      |  4286 ++++++++
 lib/librte_ether/rte_ether.h                       |   416 +
 lib/librte_ether/rte_ether_version.map             |   134 +
 lib/librte_hash/Makefile                           |    61 +
 lib/librte_hash/rte_cmp_arm64.h                    |   114 +
 lib/librte_hash/rte_cmp_x86.h                      |   109 +
 lib/librte_hash/rte_crc_arm64.h                    |   215 +
 lib/librte_hash/rte_cuckoo_hash.c                  |  1323 +++
 lib/librte_hash/rte_fbk_hash.c                     |   231 +
 lib/librte_hash/rte_fbk_hash.h                     |   396 +
 lib/librte_hash/rte_hash.h                         |   436 +
 lib/librte_hash/rte_hash_crc.h                     |   639 ++
 lib/librte_hash/rte_hash_version.map               |    40 +
 lib/librte_hash/rte_jhash.h                        |   410 +
 lib/librte_hash/rte_thash.h                        |   250 +
 lib/librte_ip_frag/Makefile                        |    59 +
 lib/librte_ip_frag/ip_frag_common.h                |   169 +
 lib/librte_ip_frag/ip_frag_internal.c              |   418 +
 lib/librte_ip_frag/rte_ip_frag.h                   |   363 +
 lib/librte_ip_frag/rte_ip_frag_common.c            |   139 +
 lib/librte_ip_frag/rte_ipfrag_version.map          |    13 +
 lib/librte_ip_frag/rte_ipv4_fragmentation.c        |   209 +
 lib/librte_ip_frag/rte_ipv4_reassembly.c           |   184 +
 lib/librte_ip_frag/rte_ipv6_fragmentation.c        |   207 +
 lib/librte_ip_frag/rte_ipv6_reassembly.c           |   225 +
 lib/librte_ivshmem/Makefile                        |    52 +
 lib/librte_ivshmem/rte_ivshmem.c                   |   905 ++
 lib/librte_ivshmem/rte_ivshmem.h                   |   165 +
 lib/librte_ivshmem/rte_ivshmem_version.map         |    12 +
 lib/librte_jobstats/Makefile                       |    53 +
 lib/librte_jobstats/rte_jobstats.c                 |   293 +
 lib/librte_jobstats/rte_jobstats.h                 |   336 +
 lib/librte_jobstats/rte_jobstats_version.map       |    26 +
 lib/librte_kni/Makefile                            |    53 +
 lib/librte_kni/rte_kni.c                           |   712 ++
 lib/librte_kni/rte_kni.h                           |   256 +
 lib/librte_kni/rte_kni_fifo.h                      |    93 +
 lib/librte_kni/rte_kni_version.map                 |    17 +
 lib/librte_kvargs/Makefile                         |    55 +
 lib/librte_kvargs/rte_kvargs.c                     |   210 +
 lib/librte_kvargs/rte_kvargs.h                     |   156 +
 lib/librte_kvargs/rte_kvargs_version.map           |    10 +
 lib/librte_lpm/Makefile                            |    59 +
 lib/librte_lpm/rte_lpm.c                           |  1919 ++++
 lib/librte_lpm/rte_lpm.h                           |   491 +
 lib/librte_lpm/rte_lpm6.c                          |   883 ++
 lib/librte_lpm/rte_lpm6.h                          |   227 +
 lib/librte_lpm/rte_lpm_neon.h                      |   153 +
 lib/librte_lpm/rte_lpm_sse.h                       |   149 +
 lib/librte_lpm/rte_lpm_version.map                 |    36 +
 lib/librte_mbuf/Makefile                           |    52 +
 lib/librte_mbuf/rte_mbuf.c                         |   288 +
 lib/librte_mbuf/rte_mbuf.h                         |  1946 ++++
 lib/librte_mbuf/rte_mbuf_version.map               |    20 +
 lib/librte_mempool/Makefile                        |    53 +
 lib/librte_mempool/rte_dom0_mempool.c              |   133 +
 lib/librte_mempool/rte_mempool.c                   |   919 ++
 lib/librte_mempool/rte_mempool.h                   |  1408 +++
 lib/librte_mempool/rte_mempool_version.map         |    19 +
 lib/librte_meter/Makefile                          |    59 +
 lib/librte_meter/rte_meter.c                       |   120 +
 lib/librte_meter/rte_meter.h                       |   387 +
 lib/librte_meter/rte_meter_version.map             |    12 +
 lib/librte_net/Makefile                            |    40 +
 lib/librte_net/rte_arp.h                           |    83 +
 lib/librte_net/rte_icmp.h                          |   101 +
 lib/librte_net/rte_ip.h                            |   413 +
 lib/librte_net/rte_sctp.h                          |    99 +
 lib/librte_net/rte_tcp.h                           |   104 +
 lib/librte_net/rte_udp.h                           |    99 +
 lib/librte_pipeline/Makefile                       |    58 +
 lib/librte_pipeline/rte_pipeline.c                 |  1649 +++
 lib/librte_pipeline/rte_pipeline.h                 |   875 ++
 lib/librte_pipeline/rte_pipeline_version.map       |    47 +
 lib/librte_port/Makefile                           |    79 +
 lib/librte_port/rte_port.h                         |   263 +
 lib/librte_port/rte_port_ethdev.c                  |   553 +
 lib/librte_port/rte_port_ethdev.h                  |   105 +
 lib/librte_port/rte_port_frag.c                    |   305 +
 lib/librte_port/rte_port_frag.h                    |   101 +
 lib/librte_port/rte_port_ras.c                     |   359 +
 lib/librte_port/rte_port_ras.h                     |    90 +
 lib/librte_port/rte_port_ring.c                    |   810 ++
 lib/librte_port/rte_port_ring.h                    |   123 +
 lib/librte_port/rte_port_sched.c                   |   323 +
 lib/librte_port/rte_port_sched.h                   |    82 +
 lib/librte_port/rte_port_source_sink.c             |   667 ++
 lib/librte_port/rte_port_source_sink.h             |    90 +
 lib/librte_port/rte_port_version.map               |    37 +
 lib/librte_power/Makefile                          |    53 +
 lib/librte_power/channel_commands.h                |    74 +
 lib/librte_power/guest_channel.c                   |   161 +
 lib/librte_power/guest_channel.h                   |    89 +
 lib/librte_power/rte_power.c                       |   143 +
 lib/librte_power/rte_power.h                       |   243 +
 lib/librte_power/rte_power_acpi_cpufreq.c          |   545 +
 lib/librte_power/rte_power_acpi_cpufreq.h          |   192 +
 lib/librte_power/rte_power_common.h                |    39 +
 lib/librte_power/rte_power_kvm_vm.c                |   135 +
 lib/librte_power/rte_power_kvm_vm.h                |   179 +
 lib/librte_power/rte_power_version.map             |    18 +
 lib/librte_reorder/Makefile                        |    54 +
 lib/librte_reorder/rte_reorder.c                   |   411 +
 lib/librte_reorder/rte_reorder.h                   |   180 +
 lib/librte_reorder/rte_reorder_version.map         |    13 +
 lib/librte_ring/Makefile                           |    51 +
 lib/librte_ring/rte_ring.c                         |   373 +
 lib/librte_ring/rte_ring.h                         |  1261 +++
 lib/librte_ring/rte_ring_version.map               |    20 +
 lib/librte_sched/Makefile                          |    65 +
 lib/librte_sched/rte_approx.c                      |   196 +
 lib/librte_sched/rte_approx.h                      |    75 +
 lib/librte_sched/rte_bitmap.h                      |   560 +
 lib/librte_sched/rte_reciprocal.c                  |    72 +
 lib/librte_sched/rte_reciprocal.h                  |    39 +
 lib/librte_sched/rte_red.c                         |   158 +
 lib/librte_sched/rte_red.h                         |   453 +
 lib/librte_sched/rte_sched.c                       |  2156 ++++
 lib/librte_sched/rte_sched.h                       |   455 +
 lib/librte_sched/rte_sched_common.h                |   129 +
 lib/librte_sched/rte_sched_version.map             |    31 +
 lib/librte_table/Makefile                          |    85 +
 lib/librte_table/rte_lru.h                         |   213 +
 lib/librte_table/rte_table.h                       |   301 +
 lib/librte_table/rte_table_acl.c                   |   835 ++
 lib/librte_table/rte_table_acl.h                   |    95 +
 lib/librte_table/rte_table_array.c                 |   237 +
 lib/librte_table/rte_table_array.h                 |    76 +
 lib/librte_table/rte_table_hash.h                  |   370 +
 lib/librte_table/rte_table_hash_ext.c              |  1159 +++
 lib/librte_table/rte_table_hash_key16.c            |  1515 +++
 lib/librte_table/rte_table_hash_key32.c            |  1144 +++
 lib/librte_table/rte_table_hash_key8.c             |  1471 +++
 lib/librte_table/rte_table_hash_lru.c              |  1102 ++
 lib/librte_table/rte_table_lpm.c                   |   393 +
 lib/librte_table/rte_table_lpm.h                   |   124 +
 lib/librte_table/rte_table_lpm_ipv6.c              |   398 +
 lib/librte_table/rte_table_lpm_ipv6.h              |   122 +
 lib/librte_table/rte_table_stub.c                  |   121 +
 lib/librte_table/rte_table_stub.h                  |    62 +
 lib/librte_table/rte_table_version.map             |    28 +
 lib/librte_timer/Makefile                          |    52 +
 lib/librte_timer/rte_timer.c                       |   637 ++
 lib/librte_timer/rte_timer.h                       |   335 +
 lib/librte_timer/rte_timer_version.map             |    15 +
 lib/librte_vhost/Makefile                          |    71 +
 lib/librte_vhost/eventfd_link/Makefile             |    41 +
 lib/librte_vhost/eventfd_link/eventfd_link.c       |   277 +
 lib/librte_vhost/eventfd_link/eventfd_link.h       |    94 +
 lib/librte_vhost/libvirt/qemu-wrap.py              |   387 +
 lib/librte_vhost/rte_vhost_version.map             |    22 +
 lib/librte_vhost/rte_virtio_net.h                  |   286 +
 lib/librte_vhost/vhost-net.h                       |   114 +
 lib/librte_vhost/vhost_cuse/eventfd_copy.c         |   104 +
 lib/librte_vhost/vhost_cuse/eventfd_copy.h         |    45 +
 lib/librte_vhost/vhost_cuse/vhost-net-cdev.c       |   426 +
 lib/librte_vhost/vhost_cuse/virtio-net-cdev.c      |   435 +
 lib/librte_vhost/vhost_cuse/virtio-net-cdev.h      |    48 +
 lib/librte_vhost/vhost_rxtx.c                      |   947 ++
 lib/librte_vhost/vhost_user/fd_man.c               |   289 +
 lib/librte_vhost/vhost_user/fd_man.h               |    67 +
 lib/librte_vhost/vhost_user/vhost-net-user.c       |   531 +
 lib/librte_vhost/vhost_user/vhost-net-user.h       |   117 +
 lib/librte_vhost/vhost_user/virtio-net-user.c      |   446 +
 lib/librte_vhost/vhost_user/virtio-net-user.h      |    64 +
 lib/librte_vhost/virtio-net.c                      |   772 ++
 lib/librte_vhost/virtio-net.h                      |    43 +
 476 files changed, 185671 insertions(+)
 create mode 100644 lib/Makefile
 create mode 100644 lib/librte_acl/Makefile
 create mode 100644 lib/librte_acl/acl.h
 create mode 100644 lib/librte_acl/acl_bld.c
 create mode 100644 lib/librte_acl/acl_gen.c
 create mode 100644 lib/librte_acl/acl_run.h
 create mode 100644 lib/librte_acl/acl_run_avx2.c
 create mode 100644 lib/librte_acl/acl_run_avx2.h
 create mode 100644 lib/librte_acl/acl_run_neon.c
 create mode 100644 lib/librte_acl/acl_run_neon.h
 create mode 100644 lib/librte_acl/acl_run_scalar.c
 create mode 100644 lib/librte_acl/acl_run_sse.c
 create mode 100644 lib/librte_acl/acl_run_sse.h
 create mode 100644 lib/librte_acl/acl_vect.h
 create mode 100644 lib/librte_acl/rte_acl.c
 create mode 100644 lib/librte_acl/rte_acl.h
 create mode 100644 lib/librte_acl/rte_acl_osdep.h
 create mode 100644 lib/librte_acl/rte_acl_version.map
 create mode 100644 lib/librte_acl/tb_mem.c
 create mode 100644 lib/librte_acl/tb_mem.h
 create mode 100644 lib/librte_cfgfile/Makefile
 create mode 100644 lib/librte_cfgfile/rte_cfgfile.c
 create mode 100644 lib/librte_cfgfile/rte_cfgfile.h
 create mode 100644 lib/librte_cfgfile/rte_cfgfile_version.map
 create mode 100644 lib/librte_cmdline/Makefile
 create mode 100644 lib/librte_cmdline/cmdline.c
 create mode 100644 lib/librte_cmdline/cmdline.h
 create mode 100644 lib/librte_cmdline/cmdline_cirbuf.c
 create mode 100644 lib/librte_cmdline/cmdline_cirbuf.h
 create mode 100644 lib/librte_cmdline/cmdline_parse.c
 create mode 100644 lib/librte_cmdline/cmdline_parse.h
 create mode 100644 lib/librte_cmdline/cmdline_parse_etheraddr.c
 create mode 100644 lib/librte_cmdline/cmdline_parse_etheraddr.h
 create mode 100644 lib/librte_cmdline/cmdline_parse_ipaddr.c
 create mode 100644 lib/librte_cmdline/cmdline_parse_ipaddr.h
 create mode 100644 lib/librte_cmdline/cmdline_parse_num.c
 create mode 100644 lib/librte_cmdline/cmdline_parse_num.h
 create mode 100644 lib/librte_cmdline/cmdline_parse_portlist.c
 create mode 100644 lib/librte_cmdline/cmdline_parse_portlist.h
 create mode 100644 lib/librte_cmdline/cmdline_parse_string.c
 create mode 100644 lib/librte_cmdline/cmdline_parse_string.h
 create mode 100644 lib/librte_cmdline/cmdline_rdline.c
 create mode 100644 lib/librte_cmdline/cmdline_rdline.h
 create mode 100644 lib/librte_cmdline/cmdline_socket.c
 create mode 100644 lib/librte_cmdline/cmdline_socket.h
 create mode 100644 lib/librte_cmdline/cmdline_vt100.c
 create mode 100644 lib/librte_cmdline/cmdline_vt100.h
 create mode 100644 lib/librte_cmdline/rte_cmdline_version.map
 create mode 100644 lib/librte_compat/Makefile
 create mode 100644 lib/librte_compat/rte_compat.h
 create mode 100644 lib/librte_cryptodev/Makefile
 create mode 100644 lib/librte_cryptodev/rte_crypto.h
 create mode 100644 lib/librte_cryptodev/rte_crypto_sym.h
 create mode 100644 lib/librte_cryptodev/rte_cryptodev.c
 create mode 100644 lib/librte_cryptodev/rte_cryptodev.h
 create mode 100644 lib/librte_cryptodev/rte_cryptodev_pmd.h
 create mode 100644 lib/librte_cryptodev/rte_cryptodev_version.map
 create mode 100644 lib/librte_distributor/Makefile
 create mode 100644 lib/librte_distributor/rte_distributor.c
 create mode 100644 lib/librte_distributor/rte_distributor.h
 create mode 100644 lib/librte_distributor/rte_distributor_version.map
 create mode 100644 lib/librte_eal/Makefile
 create mode 100644 lib/librte_eal/bsdapp/Makefile
 create mode 100644 lib/librte_eal/bsdapp/contigmem/BSDmakefile
 create mode 100644 lib/librte_eal/bsdapp/contigmem/Makefile
 create mode 100644 lib/librte_eal/bsdapp/contigmem/contigmem.c
 create mode 100644 lib/librte_eal/bsdapp/eal/Makefile
 create mode 100644 lib/librte_eal/bsdapp/eal/eal.c
 create mode 100644 lib/librte_eal/bsdapp/eal/eal_alarm.c
 create mode 100644 lib/librte_eal/bsdapp/eal/eal_debug.c
 create mode 100644 lib/librte_eal/bsdapp/eal/eal_hugepage_info.c
 create mode 100644 lib/librte_eal/bsdapp/eal/eal_interrupts.c
 create mode 100644 lib/librte_eal/bsdapp/eal/eal_lcore.c
 create mode 100644 lib/librte_eal/bsdapp/eal/eal_log.c
 create mode 100644 lib/librte_eal/bsdapp/eal/eal_memory.c
 create mode 100644 lib/librte_eal/bsdapp/eal/eal_pci.c
 create mode 100644 lib/librte_eal/bsdapp/eal/eal_thread.c
 create mode 100644 lib/librte_eal/bsdapp/eal/eal_timer.c
 create mode 100644 lib/librte_eal/bsdapp/eal/include/exec-env/rte_dom0_common.h
 create mode 100644 lib/librte_eal/bsdapp/eal/include/exec-env/rte_interrupts.h
 create mode 100644 lib/librte_eal/bsdapp/eal/rte_eal_version.map
 create mode 100644 lib/librte_eal/bsdapp/nic_uio/BSDmakefile
 create mode 100644 lib/librte_eal/bsdapp/nic_uio/Makefile
 create mode 100644 lib/librte_eal/bsdapp/nic_uio/nic_uio.c
 create mode 100644 lib/librte_eal/common/Makefile
 create mode 100644 lib/librte_eal/common/arch/arm/rte_cpuflags.c
 create mode 100644 lib/librte_eal/common/arch/ppc_64/rte_cpuflags.c
 create mode 100644 lib/librte_eal/common/arch/tile/rte_cpuflags.c
 create mode 100644 lib/librte_eal/common/arch/x86/rte_cpuflags.c
 create mode 100644 lib/librte_eal/common/eal_common_cpuflags.c
 create mode 100644 lib/librte_eal/common/eal_common_dev.c
 create mode 100644 lib/librte_eal/common/eal_common_devargs.c
 create mode 100644 lib/librte_eal/common/eal_common_errno.c
 create mode 100644 lib/librte_eal/common/eal_common_hexdump.c
 create mode 100644 lib/librte_eal/common/eal_common_launch.c
 create mode 100644 lib/librte_eal/common/eal_common_lcore.c
 create mode 100644 lib/librte_eal/common/eal_common_log.c
 create mode 100644 lib/librte_eal/common/eal_common_memory.c
 create mode 100644 lib/librte_eal/common/eal_common_memzone.c
 create mode 100644 lib/librte_eal/common/eal_common_options.c
 create mode 100644 lib/librte_eal/common/eal_common_pci.c
 create mode 100644 lib/librte_eal/common/eal_common_pci_uio.c
 create mode 100644 lib/librte_eal/common/eal_common_proc.c
 create mode 100644 lib/librte_eal/common/eal_common_string_fns.c
 create mode 100644 lib/librte_eal/common/eal_common_tailqs.c
 create mode 100644 lib/librte_eal/common/eal_common_thread.c
 create mode 100644 lib/librte_eal/common/eal_common_timer.c
 create mode 100644 lib/librte_eal/common/eal_filesystem.h
 create mode 100644 lib/librte_eal/common/eal_hugepages.h
 create mode 100644 lib/librte_eal/common/eal_internal_cfg.h
 create mode 100644 lib/librte_eal/common/eal_options.h
 create mode 100644 lib/librte_eal/common/eal_private.h
 create mode 100644 lib/librte_eal/common/eal_thread.h
 create mode 100644 lib/librte_eal/common/include/arch/arm/rte_atomic.h
 create mode 100644 lib/librte_eal/common/include/arch/arm/rte_atomic_32.h
 create mode 100644 lib/librte_eal/common/include/arch/arm/rte_atomic_64.h
 create mode 100644 lib/librte_eal/common/include/arch/arm/rte_byteorder.h
 create mode 100644 lib/librte_eal/common/include/arch/arm/rte_cpuflags.h
 create mode 100644 lib/librte_eal/common/include/arch/arm/rte_cpuflags_32.h
 create mode 100644 lib/librte_eal/common/include/arch/arm/rte_cpuflags_64.h
 create mode 100644 lib/librte_eal/common/include/arch/arm/rte_cycles.h
 create mode 100644 lib/librte_eal/common/include/arch/arm/rte_cycles_32.h
 create mode 100644 lib/librte_eal/common/include/arch/arm/rte_cycles_64.h
 create mode 100644 lib/librte_eal/common/include/arch/arm/rte_memcpy.h
 create mode 100644 lib/librte_eal/common/include/arch/arm/rte_memcpy_32.h
 create mode 100644 lib/librte_eal/common/include/arch/arm/rte_memcpy_64.h
 create mode 100644 lib/librte_eal/common/include/arch/arm/rte_prefetch.h
 create mode 100644 lib/librte_eal/common/include/arch/arm/rte_prefetch_32.h
 create mode 100644 lib/librte_eal/common/include/arch/arm/rte_prefetch_64.h
 create mode 100644 lib/librte_eal/common/include/arch/arm/rte_rwlock.h
 create mode 100644 lib/librte_eal/common/include/arch/arm/rte_spinlock.h
 create mode 100644 lib/librte_eal/common/include/arch/arm/rte_vect.h
 create mode 100644 lib/librte_eal/common/include/arch/ppc_64/rte_atomic.h
 create mode 100644 lib/librte_eal/common/include/arch/ppc_64/rte_byteorder.h
 create mode 100644 lib/librte_eal/common/include/arch/ppc_64/rte_cpuflags.h
 create mode 100644 lib/librte_eal/common/include/arch/ppc_64/rte_cycles.h
 create mode 100644 lib/librte_eal/common/include/arch/ppc_64/rte_memcpy.h
 create mode 100644 lib/librte_eal/common/include/arch/ppc_64/rte_prefetch.h
 create mode 100644 lib/librte_eal/common/include/arch/ppc_64/rte_rwlock.h
 create mode 100644 lib/librte_eal/common/include/arch/ppc_64/rte_spinlock.h
 create mode 100644 lib/librte_eal/common/include/arch/tile/rte_atomic.h
 create mode 100644 lib/librte_eal/common/include/arch/tile/rte_byteorder.h
 create mode 100644 lib/librte_eal/common/include/arch/tile/rte_cpuflags.h
 create mode 100644 lib/librte_eal/common/include/arch/tile/rte_cycles.h
 create mode 100644 lib/librte_eal/common/include/arch/tile/rte_memcpy.h
 create mode 100644 lib/librte_eal/common/include/arch/tile/rte_prefetch.h
 create mode 100644 lib/librte_eal/common/include/arch/tile/rte_rwlock.h
 create mode 100644 lib/librte_eal/common/include/arch/tile/rte_spinlock.h
 create mode 100644 lib/librte_eal/common/include/arch/x86/rte_atomic.h
 create mode 100644 lib/librte_eal/common/include/arch/x86/rte_atomic_32.h
 create mode 100644 lib/librte_eal/common/include/arch/x86/rte_atomic_64.h
 create mode 100644 lib/librte_eal/common/include/arch/x86/rte_byteorder.h
 create mode 100644 lib/librte_eal/common/include/arch/x86/rte_byteorder_32.h
 create mode 100644 lib/librte_eal/common/include/arch/x86/rte_byteorder_64.h
 create mode 100644 lib/librte_eal/common/include/arch/x86/rte_cpuflags.h
 create mode 100644 lib/librte_eal/common/include/arch/x86/rte_cycles.h
 create mode 100644 lib/librte_eal/common/include/arch/x86/rte_memcpy.h
 create mode 100644 lib/librte_eal/common/include/arch/x86/rte_prefetch.h
 create mode 100644 lib/librte_eal/common/include/arch/x86/rte_rtm.h
 create mode 100644 lib/librte_eal/common/include/arch/x86/rte_rwlock.h
 create mode 100644 lib/librte_eal/common/include/arch/x86/rte_spinlock.h
 create mode 100644 lib/librte_eal/common/include/arch/x86/rte_vect.h
 create mode 100644 lib/librte_eal/common/include/generic/rte_atomic.h
 create mode 100644 lib/librte_eal/common/include/generic/rte_byteorder.h
 create mode 100644 lib/librte_eal/common/include/generic/rte_cpuflags.h
 create mode 100644 lib/librte_eal/common/include/generic/rte_cycles.h
 create mode 100644 lib/librte_eal/common/include/generic/rte_memcpy.h
 create mode 100644 lib/librte_eal/common/include/generic/rte_prefetch.h
 create mode 100644 lib/librte_eal/common/include/generic/rte_rwlock.h
 create mode 100644 lib/librte_eal/common/include/generic/rte_spinlock.h
 create mode 100644 lib/librte_eal/common/include/rte_alarm.h
 create mode 100644 lib/librte_eal/common/include/rte_branch_prediction.h
 create mode 100644 lib/librte_eal/common/include/rte_common.h
 create mode 100644 lib/librte_eal/common/include/rte_debug.h
 create mode 100644 lib/librte_eal/common/include/rte_dev.h
 create mode 100644 lib/librte_eal/common/include/rte_devargs.h
 create mode 100644 lib/librte_eal/common/include/rte_eal.h
 create mode 100644 lib/librte_eal/common/include/rte_eal_memconfig.h
 create mode 100644 lib/librte_eal/common/include/rte_errno.h
 create mode 100644 lib/librte_eal/common/include/rte_hexdump.h
 create mode 100644 lib/librte_eal/common/include/rte_interrupts.h
 create mode 100644 lib/librte_eal/common/include/rte_keepalive.h
 create mode 100644 lib/librte_eal/common/include/rte_launch.h
 create mode 100644 lib/librte_eal/common/include/rte_lcore.h
 create mode 100644 lib/librte_eal/common/include/rte_log.h
 create mode 100644 lib/librte_eal/common/include/rte_malloc.h
 create mode 100644 lib/librte_eal/common/include/rte_malloc_heap.h
 create mode 100644 lib/librte_eal/common/include/rte_memory.h
 create mode 100644 lib/librte_eal/common/include/rte_memzone.h
 create mode 100644 lib/librte_eal/common/include/rte_pci.h
 create mode 100644 lib/librte_eal/common/include/rte_pci_dev_feature_defs.h
 create mode 100644 lib/librte_eal/common/include/rte_pci_dev_features.h
 create mode 100644 lib/librte_eal/common/include/rte_pci_dev_ids.h
 create mode 100644 lib/librte_eal/common/include/rte_per_lcore.h
 create mode 100644 lib/librte_eal/common/include/rte_random.h
 create mode 100644 lib/librte_eal/common/include/rte_string_fns.h
 create mode 100644 lib/librte_eal/common/include/rte_tailq.h
 create mode 100644 lib/librte_eal/common/include/rte_time.h
 create mode 100644 lib/librte_eal/common/include/rte_version.h
 create mode 100644 lib/librte_eal/common/include/rte_warnings.h
 create mode 100644 lib/librte_eal/common/malloc_elem.c
 create mode 100644 lib/librte_eal/common/malloc_elem.h
 create mode 100644 lib/librte_eal/common/malloc_heap.c
 create mode 100644 lib/librte_eal/common/malloc_heap.h
 create mode 100644 lib/librte_eal/common/rte_keepalive.c
 create mode 100644 lib/librte_eal/common/rte_malloc.c
 create mode 100644 lib/librte_eal/linuxapp/Makefile
 create mode 100644 lib/librte_eal/linuxapp/eal/Makefile
 create mode 100644 lib/librte_eal/linuxapp/eal/eal.c
 create mode 100644 lib/librte_eal/linuxapp/eal/eal_alarm.c
 create mode 100644 lib/librte_eal/linuxapp/eal/eal_debug.c
 create mode 100644 lib/librte_eal/linuxapp/eal/eal_hugepage_info.c
 create mode 100644 lib/librte_eal/linuxapp/eal/eal_interrupts.c
 create mode 100644 lib/librte_eal/linuxapp/eal/eal_ivshmem.c
 create mode 100644 lib/librte_eal/linuxapp/eal/eal_lcore.c
 create mode 100644 lib/librte_eal/linuxapp/eal/eal_log.c
 create mode 100644 lib/librte_eal/linuxapp/eal/eal_memory.c
 create mode 100644 lib/librte_eal/linuxapp/eal/eal_pci.c
 create mode 100644 lib/librte_eal/linuxapp/eal/eal_pci_init.h
 create mode 100644 lib/librte_eal/linuxapp/eal/eal_pci_uio.c
 create mode 100644 lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
 create mode 100644 lib/librte_eal/linuxapp/eal/eal_pci_vfio_mp_sync.c
 create mode 100644 lib/librte_eal/linuxapp/eal/eal_thread.c
 create mode 100644 lib/librte_eal/linuxapp/eal/eal_timer.c
 create mode 100644 lib/librte_eal/linuxapp/eal/eal_vfio.h
 create mode 100644 lib/librte_eal/linuxapp/eal/eal_xen_memory.c
 create mode 100644 lib/librte_eal/linuxapp/eal/include/exec-env/rte_dom0_common.h
 create mode 100644 lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h
 create mode 100644 lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h
 create mode 100644 lib/librte_eal/linuxapp/eal/rte_eal_version.map
 create mode 100644 lib/librte_eal/linuxapp/igb_uio/Makefile
 create mode 100644 lib/librte_eal/linuxapp/igb_uio/compat.h
 create mode 100644 lib/librte_eal/linuxapp/igb_uio/igb_uio.c
 create mode 100644 lib/librte_eal/linuxapp/kni/Makefile
 create mode 100644 lib/librte_eal/linuxapp/kni/compat.h
 create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/README
 create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/COPYING
 create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_82575.c
 create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_82575.h
 create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_api.c
 create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_api.h
 create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_defines.h
 create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_hw.h
 create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_i210.c
 create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_i210.h
 create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mac.c
 create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mac.h
 create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_manage.c
 create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_manage.h
 create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mbx.c
 create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mbx.h
 create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_nvm.c
 create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_nvm.h
 create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_osdep.h
 create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_phy.c
 create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_phy.h
 create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_regs.h
 create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/igb.h
 create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/igb_debugfs.c
 create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/igb_ethtool.c
 create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/igb_hwmon.c
 create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/igb_main.c
 create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/igb_param.c
 create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/igb_procfs.c
 create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/igb_ptp.c
 create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/igb_regtest.h
 create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/igb_vmdq.c
 create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/igb_vmdq.h
 create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.c
 create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.h
 create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat_ethtool.c
 create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/ixgbe/COPYING
 create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe.h
 create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82598.c
 create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82598.h
 create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82599.c
 create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82599.h
 create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_api.c
 create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_api.h
 create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_common.c
 create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_common.h
 create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_dcb.h
 create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_ethtool.c
 create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_fcoe.h
 create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_main.c
 create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_mbx.h
 create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_osdep.h
 create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_phy.c
 create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_phy.h
 create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_sriov.h
 create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_type.h
 create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_x540.c
 create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_x540.h
 create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/ixgbe/kcompat.c
 create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/ixgbe/kcompat.h
 create mode 100644 lib/librte_eal/linuxapp/kni/kni_dev.h
 create mode 100644 lib/librte_eal/linuxapp/kni/kni_ethtool.c
 create mode 100644 lib/librte_eal/linuxapp/kni/kni_fifo.h
 create mode 100644 lib/librte_eal/linuxapp/kni/kni_misc.c
 create mode 100644 lib/librte_eal/linuxapp/kni/kni_net.c
 create mode 100644 lib/librte_eal/linuxapp/kni/kni_vhost.c
 create mode 100644 lib/librte_eal/linuxapp/xen_dom0/Makefile
 create mode 100644 lib/librte_eal/linuxapp/xen_dom0/compat.h
 create mode 100644 lib/librte_eal/linuxapp/xen_dom0/dom0_mm_dev.h
 create mode 100644 lib/librte_eal/linuxapp/xen_dom0/dom0_mm_misc.c
 create mode 100644 lib/librte_ether/Makefile
 create mode 100644 lib/librte_ether/rte_dev_info.h
 create mode 100644 lib/librte_ether/rte_eth_ctrl.h
 create mode 100644 lib/librte_ether/rte_ethdev.c
 create mode 100644 lib/librte_ether/rte_ethdev.h
 create mode 100644 lib/librte_ether/rte_ether.h
 create mode 100644 lib/librte_ether/rte_ether_version.map
 create mode 100644 lib/librte_hash/Makefile
 create mode 100644 lib/librte_hash/rte_cmp_arm64.h
 create mode 100644 lib/librte_hash/rte_cmp_x86.h
 create mode 100644 lib/librte_hash/rte_crc_arm64.h
 create mode 100644 lib/librte_hash/rte_cuckoo_hash.c
 create mode 100644 lib/librte_hash/rte_fbk_hash.c
 create mode 100644 lib/librte_hash/rte_fbk_hash.h
 create mode 100644 lib/librte_hash/rte_hash.h
 create mode 100644 lib/librte_hash/rte_hash_crc.h
 create mode 100644 lib/librte_hash/rte_hash_version.map
 create mode 100644 lib/librte_hash/rte_jhash.h
 create mode 100644 lib/librte_hash/rte_thash.h
 create mode 100644 lib/librte_ip_frag/Makefile
 create mode 100644 lib/librte_ip_frag/ip_frag_common.h
 create mode 100644 lib/librte_ip_frag/ip_frag_internal.c
 create mode 100644 lib/librte_ip_frag/rte_ip_frag.h
 create mode 100644 lib/librte_ip_frag/rte_ip_frag_common.c
 create mode 100644 lib/librte_ip_frag/rte_ipfrag_version.map
 create mode 100644 lib/librte_ip_frag/rte_ipv4_fragmentation.c
 create mode 100644 lib/librte_ip_frag/rte_ipv4_reassembly.c
 create mode 100644 lib/librte_ip_frag/rte_ipv6_fragmentation.c
 create mode 100644 lib/librte_ip_frag/rte_ipv6_reassembly.c
 create mode 100644 lib/librte_ivshmem/Makefile
 create mode 100644 lib/librte_ivshmem/rte_ivshmem.c
 create mode 100644 lib/librte_ivshmem/rte_ivshmem.h
 create mode 100644 lib/librte_ivshmem/rte_ivshmem_version.map
 create mode 100644 lib/librte_jobstats/Makefile
 create mode 100644 lib/librte_jobstats/rte_jobstats.c
 create mode 100644 lib/librte_jobstats/rte_jobstats.h
 create mode 100644 lib/librte_jobstats/rte_jobstats_version.map
 create mode 100644 lib/librte_kni/Makefile
 create mode 100644 lib/librte_kni/rte_kni.c
 create mode 100644 lib/librte_kni/rte_kni.h
 create mode 100644 lib/librte_kni/rte_kni_fifo.h
 create mode 100644 lib/librte_kni/rte_kni_version.map
 create mode 100644 lib/librte_kvargs/Makefile
 create mode 100644 lib/librte_kvargs/rte_kvargs.c
 create mode 100644 lib/librte_kvargs/rte_kvargs.h
 create mode 100644 lib/librte_kvargs/rte_kvargs_version.map
 create mode 100644 lib/librte_lpm/Makefile
 create mode 100644 lib/librte_lpm/rte_lpm.c
 create mode 100644 lib/librte_lpm/rte_lpm.h
 create mode 100644 lib/librte_lpm/rte_lpm6.c
 create mode 100644 lib/librte_lpm/rte_lpm6.h
 create mode 100644 lib/librte_lpm/rte_lpm_neon.h
 create mode 100644 lib/librte_lpm/rte_lpm_sse.h
 create mode 100644 lib/librte_lpm/rte_lpm_version.map
 create mode 100644 lib/librte_mbuf/Makefile
 create mode 100644 lib/librte_mbuf/rte_mbuf.c
 create mode 100644 lib/librte_mbuf/rte_mbuf.h
 create mode 100644 lib/librte_mbuf/rte_mbuf_version.map
 create mode 100644 lib/librte_mempool/Makefile
 create mode 100644 lib/librte_mempool/rte_dom0_mempool.c
 create mode 100644 lib/librte_mempool/rte_mempool.c
 create mode 100644 lib/librte_mempool/rte_mempool.h
 create mode 100644 lib/librte_mempool/rte_mempool_version.map
 create mode 100644 lib/librte_meter/Makefile
 create mode 100644 lib/librte_meter/rte_meter.c
 create mode 100644 lib/librte_meter/rte_meter.h
 create mode 100644 lib/librte_meter/rte_meter_version.map
 create mode 100644 lib/librte_net/Makefile
 create mode 100644 lib/librte_net/rte_arp.h
 create mode 100644 lib/librte_net/rte_icmp.h
 create mode 100644 lib/librte_net/rte_ip.h
 create mode 100644 lib/librte_net/rte_sctp.h
 create mode 100644 lib/librte_net/rte_tcp.h
 create mode 100644 lib/librte_net/rte_udp.h
 create mode 100644 lib/librte_pipeline/Makefile
 create mode 100644 lib/librte_pipeline/rte_pipeline.c
 create mode 100644 lib/librte_pipeline/rte_pipeline.h
 create mode 100644 lib/librte_pipeline/rte_pipeline_version.map
 create mode 100644 lib/librte_port/Makefile
 create mode 100644 lib/librte_port/rte_port.h
 create mode 100644 lib/librte_port/rte_port_ethdev.c
 create mode 100644 lib/librte_port/rte_port_ethdev.h
 create mode 100644 lib/librte_port/rte_port_frag.c
 create mode 100644 lib/librte_port/rte_port_frag.h
 create mode 100644 lib/librte_port/rte_port_ras.c
 create mode 100644 lib/librte_port/rte_port_ras.h
 create mode 100644 lib/librte_port/rte_port_ring.c
 create mode 100644 lib/librte_port/rte_port_ring.h
 create mode 100644 lib/librte_port/rte_port_sched.c
 create mode 100644 lib/librte_port/rte_port_sched.h
 create mode 100644 lib/librte_port/rte_port_source_sink.c
 create mode 100644 lib/librte_port/rte_port_source_sink.h
 create mode 100644 lib/librte_port/rte_port_version.map
 create mode 100644 lib/librte_power/Makefile
 create mode 100644 lib/librte_power/channel_commands.h
 create mode 100644 lib/librte_power/guest_channel.c
 create mode 100644 lib/librte_power/guest_channel.h
 create mode 100644 lib/librte_power/rte_power.c
 create mode 100644 lib/librte_power/rte_power.h
 create mode 100644 lib/librte_power/rte_power_acpi_cpufreq.c
 create mode 100644 lib/librte_power/rte_power_acpi_cpufreq.h
 create mode 100644 lib/librte_power/rte_power_common.h
 create mode 100644 lib/librte_power/rte_power_kvm_vm.c
 create mode 100644 lib/librte_power/rte_power_kvm_vm.h
 create mode 100644 lib/librte_power/rte_power_version.map
 create mode 100644 lib/librte_reorder/Makefile
 create mode 100644 lib/librte_reorder/rte_reorder.c
 create mode 100644 lib/librte_reorder/rte_reorder.h
 create mode 100644 lib/librte_reorder/rte_reorder_version.map
 create mode 100644 lib/librte_ring/Makefile
 create mode 100644 lib/librte_ring/rte_ring.c
 create mode 100644 lib/librte_ring/rte_ring.h
 create mode 100644 lib/librte_ring/rte_ring_version.map
 create mode 100644 lib/librte_sched/Makefile
 create mode 100644 lib/librte_sched/rte_approx.c
 create mode 100644 lib/librte_sched/rte_approx.h
 create mode 100644 lib/librte_sched/rte_bitmap.h
 create mode 100644 lib/librte_sched/rte_reciprocal.c
 create mode 100644 lib/librte_sched/rte_reciprocal.h
 create mode 100644 lib/librte_sched/rte_red.c
 create mode 100644 lib/librte_sched/rte_red.h
 create mode 100644 lib/librte_sched/rte_sched.c
 create mode 100644 lib/librte_sched/rte_sched.h
 create mode 100644 lib/librte_sched/rte_sched_common.h
 create mode 100644 lib/librte_sched/rte_sched_version.map
 create mode 100644 lib/librte_table/Makefile
 create mode 100644 lib/librte_table/rte_lru.h
 create mode 100644 lib/librte_table/rte_table.h
 create mode 100644 lib/librte_table/rte_table_acl.c
 create mode 100644 lib/librte_table/rte_table_acl.h
 create mode 100644 lib/librte_table/rte_table_array.c
 create mode 100644 lib/librte_table/rte_table_array.h
 create mode 100644 lib/librte_table/rte_table_hash.h
 create mode 100644 lib/librte_table/rte_table_hash_ext.c
 create mode 100644 lib/librte_table/rte_table_hash_key16.c
 create mode 100644 lib/librte_table/rte_table_hash_key32.c
 create mode 100644 lib/librte_table/rte_table_hash_key8.c
 create mode 100644 lib/librte_table/rte_table_hash_lru.c
 create mode 100644 lib/librte_table/rte_table_lpm.c
 create mode 100644 lib/librte_table/rte_table_lpm.h
 create mode 100644 lib/librte_table/rte_table_lpm_ipv6.c
 create mode 100644 lib/librte_table/rte_table_lpm_ipv6.h
 create mode 100644 lib/librte_table/rte_table_stub.c
 create mode 100644 lib/librte_table/rte_table_stub.h
 create mode 100644 lib/librte_table/rte_table_version.map
 create mode 100644 lib/librte_timer/Makefile
 create mode 100644 lib/librte_timer/rte_timer.c
 create mode 100644 lib/librte_timer/rte_timer.h
 create mode 100644 lib/librte_timer/rte_timer_version.map
 create mode 100644 lib/librte_vhost/Makefile
 create mode 100644 lib/librte_vhost/eventfd_link/Makefile
 create mode 100644 lib/librte_vhost/eventfd_link/eventfd_link.c
 create mode 100644 lib/librte_vhost/eventfd_link/eventfd_link.h
 create mode 100755 lib/librte_vhost/libvirt/qemu-wrap.py
 create mode 100644 lib/librte_vhost/rte_vhost_version.map
 create mode 100644 lib/librte_vhost/rte_virtio_net.h
 create mode 100644 lib/librte_vhost/vhost-net.h
 create mode 100644 lib/librte_vhost/vhost_cuse/eventfd_copy.c
 create mode 100644 lib/librte_vhost/vhost_cuse/eventfd_copy.h
 create mode 100644 lib/librte_vhost/vhost_cuse/vhost-net-cdev.c
 create mode 100644 lib/librte_vhost/vhost_cuse/virtio-net-cdev.c
 create mode 100644 lib/librte_vhost/vhost_cuse/virtio-net-cdev.h
 create mode 100644 lib/librte_vhost/vhost_rxtx.c
 create mode 100644 lib/librte_vhost/vhost_user/fd_man.c
 create mode 100644 lib/librte_vhost/vhost_user/fd_man.h
 create mode 100644 lib/librte_vhost/vhost_user/vhost-net-user.c
 create mode 100644 lib/librte_vhost/vhost_user/vhost-net-user.h
 create mode 100644 lib/librte_vhost/vhost_user/virtio-net-user.c
 create mode 100644 lib/librte_vhost/vhost_user/virtio-net-user.h
 create mode 100644 lib/librte_vhost/virtio-net.c
 create mode 100644 lib/librte_vhost/virtio-net.h

(limited to 'lib')

diff --git a/lib/Makefile b/lib/Makefile
new file mode 100644
index 00000000..f254dba8
--- /dev/null
+++ b/lib/Makefile
@@ -0,0 +1,66 @@
+#   BSD LICENSE
+#
+#   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+#   All rights reserved.
+#
+#   Redistribution and use in source and binary forms, with or without
+#   modification, are permitted provided that the following conditions
+#   are met:
+#
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in
+#       the documentation and/or other materials provided with the
+#       distribution.
+#     * Neither the name of Intel Corporation nor the names of its
+#       contributors may be used to endorse or promote products derived
+#       from this software without specific prior written permission.
+#
+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+DIRS-y += librte_compat
+DIRS-$(CONFIG_RTE_LIBRTE_EAL) += librte_eal
+DIRS-$(CONFIG_RTE_LIBRTE_RING) += librte_ring
+DIRS-$(CONFIG_RTE_LIBRTE_MEMPOOL) += librte_mempool
+DIRS-$(CONFIG_RTE_LIBRTE_MBUF) += librte_mbuf
+DIRS-$(CONFIG_RTE_LIBRTE_TIMER) += librte_timer
+DIRS-$(CONFIG_RTE_LIBRTE_CFGFILE) += librte_cfgfile
+DIRS-$(CONFIG_RTE_LIBRTE_CMDLINE) += librte_cmdline
+DIRS-$(CONFIG_RTE_LIBRTE_ETHER) += librte_ether
+DIRS-$(CONFIG_RTE_LIBRTE_CRYPTODEV) += librte_cryptodev
+DIRS-$(CONFIG_RTE_LIBRTE_VHOST) += librte_vhost
+DIRS-$(CONFIG_RTE_LIBRTE_HASH) += librte_hash
+DIRS-$(CONFIG_RTE_LIBRTE_LPM) += librte_lpm
+DIRS-$(CONFIG_RTE_LIBRTE_ACL) += librte_acl
+DIRS-$(CONFIG_RTE_LIBRTE_NET) += librte_net
+DIRS-$(CONFIG_RTE_LIBRTE_IP_FRAG) += librte_ip_frag
+DIRS-$(CONFIG_RTE_LIBRTE_JOBSTATS) += librte_jobstats
+DIRS-$(CONFIG_RTE_LIBRTE_POWER) += librte_power
+DIRS-$(CONFIG_RTE_LIBRTE_METER) += librte_meter
+DIRS-$(CONFIG_RTE_LIBRTE_SCHED) += librte_sched
+DIRS-$(CONFIG_RTE_LIBRTE_KVARGS) += librte_kvargs
+DIRS-$(CONFIG_RTE_LIBRTE_DISTRIBUTOR) += librte_distributor
+DIRS-$(CONFIG_RTE_LIBRTE_PORT) += librte_port
+DIRS-$(CONFIG_RTE_LIBRTE_TABLE) += librte_table
+DIRS-$(CONFIG_RTE_LIBRTE_PIPELINE) += librte_pipeline
+DIRS-$(CONFIG_RTE_LIBRTE_REORDER) += librte_reorder
+
+ifeq ($(CONFIG_RTE_EXEC_ENV_LINUXAPP),y)
+DIRS-$(CONFIG_RTE_LIBRTE_KNI) += librte_kni
+DIRS-$(CONFIG_RTE_LIBRTE_IVSHMEM) += librte_ivshmem
+endif
+
+include $(RTE_SDK)/mk/rte.subdir.mk
diff --git a/lib/librte_acl/Makefile b/lib/librte_acl/Makefile
new file mode 100644
index 00000000..2e394c97
--- /dev/null
+++ b/lib/librte_acl/Makefile
@@ -0,0 +1,96 @@
+#   BSD LICENSE
+#
+#   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+#   All rights reserved.
+#
+#   Redistribution and use in source and binary forms, with or without
+#   modification, are permitted provided that the following conditions
+#   are met:
+#
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in
+#       the documentation and/or other materials provided with the
+#       distribution.
+#     * Neither the name of Intel Corporation nor the names of its
+#       contributors may be used to endorse or promote products derived
+#       from this software without specific prior written permission.
+#
+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# library name
+LIB = librte_acl.a
+
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR)
+
+EXPORT_MAP := rte_acl_version.map
+
+LIBABIVER := 2
+
+# all source are stored in SRCS-y
+SRCS-$(CONFIG_RTE_LIBRTE_ACL) += tb_mem.c
+
+SRCS-$(CONFIG_RTE_LIBRTE_ACL) += rte_acl.c
+SRCS-$(CONFIG_RTE_LIBRTE_ACL) += acl_bld.c
+SRCS-$(CONFIG_RTE_LIBRTE_ACL) += acl_gen.c
+SRCS-$(CONFIG_RTE_LIBRTE_ACL) += acl_run_scalar.c
+
+ifneq ($(filter y,$(CONFIG_RTE_ARCH_ARM) $(CONFIG_RTE_ARCH_ARM64)),)
+SRCS-$(CONFIG_RTE_LIBRTE_ACL) += acl_run_neon.c
+CFLAGS_acl_run_neon.o += -flax-vector-conversions -Wno-maybe-uninitialized
+else
+SRCS-$(CONFIG_RTE_LIBRTE_ACL) += acl_run_sse.c
+#check if flag for SSE4.1 is already on, if not set it up manually
+	ifeq ($(findstring RTE_MACHINE_CPUFLAG_SSE4_1,$(CFLAGS)),)
+		CFLAGS_acl_run_sse.o += -msse4.1
+	endif
+endif
+
+#
+# If the compiler supports AVX2 instructions,
+# then add support for AVX2 classify method.
+#
+
+#check if flag for AVX2 is already on, if not set it up manually
+ifeq ($(findstring RTE_MACHINE_CPUFLAG_AVX2,$(CFLAGS)),RTE_MACHINE_CPUFLAG_AVX2)
+	CC_AVX2_SUPPORT=1
+else
+	CC_AVX2_SUPPORT=\
+	$(shell $(CC) -march=core-avx2 -dM -E - </dev/null 2>&1 | \
+	grep -q AVX2 && echo 1)
+	ifeq ($(CC_AVX2_SUPPORT), 1)
+		ifeq ($(CC), icc)
+		CFLAGS_acl_run_avx2.o += -march=core-avx2
+		else
+		CFLAGS_acl_run_avx2.o += -mavx2
+		endif
+	endif
+endif
+
+ifeq ($(CC_AVX2_SUPPORT), 1)
+	SRCS-$(CONFIG_RTE_LIBRTE_ACL) += acl_run_avx2.c
+	CFLAGS_rte_acl.o += -DCC_AVX2_SUPPORT
+endif
+
+# install this header file
+SYMLINK-$(CONFIG_RTE_LIBRTE_ACL)-include := rte_acl_osdep.h
+SYMLINK-$(CONFIG_RTE_LIBRTE_ACL)-include += rte_acl.h
+
+# this lib needs eal
+DEPDIRS-$(CONFIG_RTE_LIBRTE_ACL) += lib/librte_eal
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_acl/acl.h b/lib/librte_acl/acl.h
new file mode 100644
index 00000000..09d67841
--- /dev/null
+++ b/lib/librte_acl/acl.h
@@ -0,0 +1,241 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef	_ACL_H_
+#define	_ACL_H_
+
+#ifdef __cplusplus
+extern"C" {
+#endif /* __cplusplus */
+
+#define RTE_ACL_QUAD_MAX	5
+#define RTE_ACL_QUAD_SIZE	4
+#define RTE_ACL_QUAD_SINGLE	UINT64_C(0x7f7f7f7f00000000)
+
+#define RTE_ACL_SINGLE_TRIE_SIZE	2000
+
+#define RTE_ACL_DFA_MAX		UINT8_MAX
+#define RTE_ACL_DFA_SIZE	(UINT8_MAX + 1)
+
+#define	RTE_ACL_DFA_GR64_SIZE	64
+#define	RTE_ACL_DFA_GR64_NUM	(RTE_ACL_DFA_SIZE / RTE_ACL_DFA_GR64_SIZE)
+#define	RTE_ACL_DFA_GR64_BIT	\
+	(CHAR_BIT * sizeof(uint32_t) / RTE_ACL_DFA_GR64_NUM)
+
+typedef int bits_t;
+
+#define	RTE_ACL_BIT_SET_SIZE	((UINT8_MAX + 1) / (sizeof(bits_t) * CHAR_BIT))
+
+struct rte_acl_bitset {
+	bits_t             bits[RTE_ACL_BIT_SET_SIZE];
+};
+
+#define	RTE_ACL_NODE_DFA	(0 << RTE_ACL_TYPE_SHIFT)
+#define	RTE_ACL_NODE_SINGLE	(1U << RTE_ACL_TYPE_SHIFT)
+#define	RTE_ACL_NODE_QRANGE	(3U << RTE_ACL_TYPE_SHIFT)
+#define	RTE_ACL_NODE_MATCH	(4U << RTE_ACL_TYPE_SHIFT)
+#define	RTE_ACL_NODE_TYPE	(7U << RTE_ACL_TYPE_SHIFT)
+#define	RTE_ACL_NODE_UNDEFINED	UINT32_MAX
+
+/*
+ * ACL RT structure is a set of multibit tries (with stride == 8)
+ * represented by an array of transitions. The next position is calculated
+ * based on the current position and the input byte.
+ * Each transition is 64 bit value with the following format:
+ * | node_type_specific : 32 | node_type : 3 | node_addr : 29 |
+ * For all node types except RTE_ACL_NODE_MATCH, node_addr is an index
+ * to the start of the node in the transtions array.
+ * Few different node types are used:
+ * RTE_ACL_NODE_MATCH:
+ * node_addr value is and index into an array that contains the return value
+ * and its priority for each category.
+ * Upper 32 bits of the transition value are not used for that node type.
+ * RTE_ACL_NODE_QRANGE:
+ * that node consist of up to 5 transitions.
+ * Upper 32 bits are interpreted as 4 signed character values which
+ * are ordered from smallest(INT8_MIN) to largest (INT8_MAX).
+ * These values define 5 ranges:
+ * INT8_MIN <= range[0]  <= ((int8_t *)&transition)[4]
+ * ((int8_t *)&transition)[4] < range[1] <= ((int8_t *)&transition)[5]
+ * ((int8_t *)&transition)[5] < range[2] <= ((int8_t *)&transition)[6]
+ * ((int8_t *)&transition)[6] < range[3] <= ((int8_t *)&transition)[7]
+ * ((int8_t *)&transition)[7] < range[4] <= INT8_MAX
+ * So for input byte value within range[i] i-th transition within that node
+ * will be used.
+ * RTE_ACL_NODE_SINGLE:
+ * always transitions to the same node regardless of the input value.
+ * RTE_ACL_NODE_DFA:
+ * that node consits of up to 256 transitions.
+ * In attempt to conserve space all transitions are divided into 4 consecutive
+ * groups, by 64 transitions per group:
+ * group64[i] contains transitions[i * 64, .. i * 64 + 63].
+ * Upper 32 bits are interpreted as 4 unsigned character values one per group,
+ * which contain index to the start of the given group within the node.
+ * So to calculate transition index within the node for given input byte value:
+ * input_byte - ((uint8_t *)&transition)[4 + input_byte / 64].
+ */
+
+/*
+ * Structure of a node is a set of ptrs and each ptr has a bit map
+ * of values associated with this transition.
+ */
+struct rte_acl_ptr_set {
+	struct rte_acl_bitset values;	/* input values associated with ptr */
+	struct rte_acl_node  *ptr;	/* transition to next node */
+};
+
+struct rte_acl_classifier_results {
+	int results[RTE_ACL_MAX_CATEGORIES];
+};
+
+struct rte_acl_match_results {
+	uint32_t results[RTE_ACL_MAX_CATEGORIES];
+	int32_t priority[RTE_ACL_MAX_CATEGORIES];
+};
+
+struct rte_acl_node {
+	uint64_t node_index;  /* index for this node */
+	uint32_t level;       /* level 0-n in the trie */
+	uint32_t ref_count;   /* ref count for this node */
+	struct rte_acl_bitset  values;
+	/* set of all values that map to another node
+	 * (union of bits in each transition.
+	 */
+	uint32_t                num_ptrs; /* number of ptr_set in use */
+	uint32_t                max_ptrs; /* number of allocated ptr_set */
+	uint32_t                min_add;  /* number of ptr_set per allocation */
+	struct rte_acl_ptr_set *ptrs;     /* transitions array for this node */
+	int32_t                 match_flag;
+	int32_t                 match_index; /* index to match data */
+	uint32_t                node_type;
+	int32_t                 fanout;
+	/* number of ranges (transitions w/ consecutive bits) */
+	int32_t                 id;
+	struct rte_acl_match_results *mrt; /* only valid when match_flag != 0 */
+	union {
+		char            transitions[RTE_ACL_QUAD_SIZE];
+		/* boundaries for ranged node */
+		uint8_t         dfa_gr64[RTE_ACL_DFA_GR64_NUM];
+	};
+	struct rte_acl_node     *next;
+	/* free list link or pointer to duplicate node during merge */
+	struct rte_acl_node     *prev;
+	/* points to node from which this node was duplicated */
+};
+
+/*
+ * Types of tries used to generate runtime structure(s)
+ */
+enum {
+	RTE_ACL_FULL_TRIE = 0,
+	RTE_ACL_NOSRC_TRIE = 1,
+	RTE_ACL_NODST_TRIE = 2,
+	RTE_ACL_NOPORTS_TRIE = 4,
+	RTE_ACL_NOVLAN_TRIE = 8,
+	RTE_ACL_UNUSED_TRIE = 0x80000000
+};
+
+
+/** MAX number of tries per one ACL context.*/
+#define RTE_ACL_MAX_TRIES	8
+
+/** Max number of characters in PM name.*/
+#define RTE_ACL_NAMESIZE	32
+
+
+struct rte_acl_trie {
+	uint32_t        type;
+	uint32_t        count;
+	uint32_t        root_index;
+	const uint32_t *data_index;
+	uint32_t        num_data_indexes;
+};
+
+struct rte_acl_bld_trie {
+	struct rte_acl_node *trie;
+};
+
+struct rte_acl_ctx {
+	char                name[RTE_ACL_NAMESIZE];
+	/** Name of the ACL context. */
+	int32_t             socket_id;
+	/** Socket ID to allocate memory from. */
+	enum rte_acl_classify_alg alg;
+	void               *rules;
+	uint32_t            max_rules;
+	uint32_t            rule_sz;
+	uint32_t            num_rules;
+	uint32_t            num_categories;
+	uint32_t            num_tries;
+	uint32_t            match_index;
+	uint64_t            no_match;
+	uint64_t            idle;
+	uint64_t           *trans_table;
+	uint32_t           *data_indexes;
+	struct rte_acl_trie trie[RTE_ACL_MAX_TRIES];
+	void               *mem;
+	size_t              mem_sz;
+	struct rte_acl_config config; /* copy of build config. */
+};
+
+int rte_acl_gen(struct rte_acl_ctx *ctx, struct rte_acl_trie *trie,
+	struct rte_acl_bld_trie *node_bld_trie, uint32_t num_tries,
+	uint32_t num_categories, uint32_t data_index_sz, size_t max_size);
+
+typedef int (*rte_acl_classify_t)
+(const struct rte_acl_ctx *, const uint8_t **, uint32_t *, uint32_t, uint32_t);
+
+/*
+ * Different implementations of ACL classify.
+ */
+int
+rte_acl_classify_scalar(const struct rte_acl_ctx *ctx, const uint8_t **data,
+	uint32_t *results, uint32_t num, uint32_t categories);
+
+int
+rte_acl_classify_sse(const struct rte_acl_ctx *ctx, const uint8_t **data,
+	uint32_t *results, uint32_t num, uint32_t categories);
+
+int
+rte_acl_classify_avx2(const struct rte_acl_ctx *ctx, const uint8_t **data,
+	uint32_t *results, uint32_t num, uint32_t categories);
+
+int
+rte_acl_classify_neon(const struct rte_acl_ctx *ctx, const uint8_t **data,
+	uint32_t *results, uint32_t num, uint32_t categories);
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif /* _ACL_H_ */
diff --git a/lib/librte_acl/acl_bld.c b/lib/librte_acl/acl_bld.c
new file mode 100644
index 00000000..9e5ad1c6
--- /dev/null
+++ b/lib/librte_acl/acl_bld.c
@@ -0,0 +1,1598 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <rte_acl.h>
+#include "tb_mem.h"
+#include "acl.h"
+
+#define	ACL_POOL_ALIGN		8
+#define	ACL_POOL_ALLOC_MIN	0x800000
+
+/* number of pointers per alloc */
+#define ACL_PTR_ALLOC	32
+
+/* macros for dividing rule sets heuristics */
+#define NODE_MAX	0x4000
+#define NODE_MIN	0x800
+
+/* TALLY are statistics per field */
+enum {
+	TALLY_0 = 0,        /* number of rules that are 0% or more wild. */
+	TALLY_25,	    /* number of rules that are 25% or more wild. */
+	TALLY_50,
+	TALLY_75,
+	TALLY_100,
+	TALLY_DEACTIVATED, /* deactivated fields (100% wild in all rules). */
+	TALLY_DEPTH,
+	/* number of rules that are 100% wild for this field and higher. */
+	TALLY_NUM
+};
+
+static const uint32_t wild_limits[TALLY_DEACTIVATED] = {0, 25, 50, 75, 100};
+
+enum {
+	ACL_INTERSECT_NONE = 0,
+	ACL_INTERSECT_A = 1,    /* set A is a superset of A and B intersect */
+	ACL_INTERSECT_B = 2,    /* set B is a superset of A and B intersect */
+	ACL_INTERSECT = 4,	/* sets A and B intersect */
+};
+
+enum {
+	ACL_PRIORITY_EQUAL = 0,
+	ACL_PRIORITY_NODE_A = 1,
+	ACL_PRIORITY_NODE_B = 2,
+	ACL_PRIORITY_MIXED = 3
+};
+
+
+struct acl_mem_block {
+	uint32_t block_size;
+	void     *mem_ptr;
+};
+
+#define	MEM_BLOCK_NUM	16
+
+/* Single ACL rule, build representation.*/
+struct rte_acl_build_rule {
+	struct rte_acl_build_rule   *next;
+	struct rte_acl_config       *config;
+	/**< configuration for each field in the rule. */
+	const struct rte_acl_rule   *f;
+	uint32_t                    *wildness;
+};
+
+/* Context for build phase */
+struct acl_build_context {
+	const struct rte_acl_ctx *acx;
+	struct rte_acl_build_rule *build_rules;
+	struct rte_acl_config     cfg;
+	int32_t                   node_max;
+	int32_t                   cur_node_max;
+	uint32_t                  node;
+	uint32_t                  num_nodes;
+	uint32_t                  category_mask;
+	uint32_t                  num_rules;
+	uint32_t                  node_id;
+	uint32_t                  src_mask;
+	uint32_t                  num_build_rules;
+	uint32_t                  num_tries;
+	struct tb_mem_pool        pool;
+	struct rte_acl_trie       tries[RTE_ACL_MAX_TRIES];
+	struct rte_acl_bld_trie   bld_tries[RTE_ACL_MAX_TRIES];
+	uint32_t            data_indexes[RTE_ACL_MAX_TRIES][RTE_ACL_MAX_FIELDS];
+
+	/* memory free lists for nodes and blocks used for node ptrs */
+	struct acl_mem_block      blocks[MEM_BLOCK_NUM];
+	struct rte_acl_node       *node_free_list;
+};
+
+static int acl_merge_trie(struct acl_build_context *context,
+	struct rte_acl_node *node_a, struct rte_acl_node *node_b,
+	uint32_t level, struct rte_acl_node **node_c);
+
+static void
+acl_deref_ptr(struct acl_build_context *context,
+	struct rte_acl_node *node, int index);
+
+static void *
+acl_build_alloc(struct acl_build_context *context, size_t n, size_t s)
+{
+	uint32_t m;
+	void *p;
+	size_t alloc_size = n * s;
+
+	/*
+	 * look for memory in free lists
+	 */
+	for (m = 0; m < RTE_DIM(context->blocks); m++) {
+		if (context->blocks[m].block_size ==
+		   alloc_size && context->blocks[m].mem_ptr != NULL) {
+			p = context->blocks[m].mem_ptr;
+			context->blocks[m].mem_ptr = *((void **)p);
+			memset(p, 0, alloc_size);
+			return p;
+		}
+	}
+
+	/*
+	 * return allocation from memory pool
+	 */
+	p = tb_alloc(&context->pool, alloc_size);
+	return p;
+}
+
+/*
+ * Free memory blocks (kept in context for reuse).
+ */
+static void
+acl_build_free(struct acl_build_context *context, size_t s, void *p)
+{
+	uint32_t n;
+
+	for (n = 0; n < RTE_DIM(context->blocks); n++) {
+		if (context->blocks[n].block_size == s) {
+			*((void **)p) = context->blocks[n].mem_ptr;
+			context->blocks[n].mem_ptr = p;
+			return;
+		}
+	}
+	for (n = 0; n < RTE_DIM(context->blocks); n++) {
+		if (context->blocks[n].block_size == 0) {
+			context->blocks[n].block_size = s;
+			*((void **)p) = NULL;
+			context->blocks[n].mem_ptr = p;
+			return;
+		}
+	}
+}
+
+/*
+ * Allocate and initialize a new node.
+ */
+static struct rte_acl_node *
+acl_alloc_node(struct acl_build_context *context, int level)
+{
+	struct rte_acl_node *node;
+
+	if (context->node_free_list != NULL) {
+		node = context->node_free_list;
+		context->node_free_list = node->next;
+		memset(node, 0, sizeof(struct rte_acl_node));
+	} else {
+		node = acl_build_alloc(context, sizeof(struct rte_acl_node), 1);
+	}
+
+	if (node != NULL) {
+		node->num_ptrs = 0;
+		node->level = level;
+		node->node_type = RTE_ACL_NODE_UNDEFINED;
+		node->node_index = RTE_ACL_NODE_UNDEFINED;
+		context->num_nodes++;
+		node->id = context->node_id++;
+	}
+	return node;
+}
+
+/*
+ * Dereference all nodes to which this node points
+ */
+static void
+acl_free_node(struct acl_build_context *context,
+	struct rte_acl_node *node)
+{
+	uint32_t n;
+
+	if (node->prev != NULL)
+		node->prev->next = NULL;
+	for (n = 0; n < node->num_ptrs; n++)
+		acl_deref_ptr(context, node, n);
+
+	/* free mrt if this is a match node */
+	if (node->mrt != NULL) {
+		acl_build_free(context, sizeof(struct rte_acl_match_results),
+			node->mrt);
+		node->mrt = NULL;
+	}
+
+	/* free transitions to other nodes */
+	if (node->ptrs != NULL) {
+		acl_build_free(context,
+			node->max_ptrs * sizeof(struct rte_acl_ptr_set),
+			node->ptrs);
+		node->ptrs = NULL;
+	}
+
+	/* put it on the free list */
+	context->num_nodes--;
+	node->next = context->node_free_list;
+	context->node_free_list = node;
+}
+
+
+/*
+ * Include src bitset in dst bitset
+ */
+static void
+acl_include(struct rte_acl_bitset *dst, struct rte_acl_bitset *src, bits_t mask)
+{
+	uint32_t n;
+
+	for (n = 0; n < RTE_ACL_BIT_SET_SIZE; n++)
+		dst->bits[n] = (dst->bits[n] & mask) | src->bits[n];
+}
+
+/*
+ * Set dst to bits of src1 that are not in src2
+ */
+static int
+acl_exclude(struct rte_acl_bitset *dst,
+	struct rte_acl_bitset *src1,
+	struct rte_acl_bitset *src2)
+{
+	uint32_t n;
+	bits_t all_bits = 0;
+
+	for (n = 0; n < RTE_ACL_BIT_SET_SIZE; n++) {
+		dst->bits[n] = src1->bits[n] & ~src2->bits[n];
+		all_bits |= dst->bits[n];
+	}
+	return all_bits != 0;
+}
+
+/*
+ * Add a pointer (ptr) to a node.
+ */
+static int
+acl_add_ptr(struct acl_build_context *context,
+	struct rte_acl_node *node,
+	struct rte_acl_node *ptr,
+	struct rte_acl_bitset *bits)
+{
+	uint32_t n, num_ptrs;
+	struct rte_acl_ptr_set *ptrs = NULL;
+
+	/*
+	 * If there's already a pointer to the same node, just add to the bitset
+	 */
+	for (n = 0; n < node->num_ptrs; n++) {
+		if (node->ptrs[n].ptr != NULL) {
+			if (node->ptrs[n].ptr == ptr) {
+				acl_include(&node->ptrs[n].values, bits, -1);
+				acl_include(&node->values, bits, -1);
+				return 0;
+			}
+		}
+	}
+
+	/* if there's no room for another pointer, make room */
+	if (node->num_ptrs >= node->max_ptrs) {
+		/* add room for more pointers */
+		num_ptrs = node->max_ptrs + ACL_PTR_ALLOC;
+		ptrs = acl_build_alloc(context, num_ptrs, sizeof(*ptrs));
+
+		/* copy current points to new memory allocation */
+		if (node->ptrs != NULL) {
+			memcpy(ptrs, node->ptrs,
+				node->num_ptrs * sizeof(*ptrs));
+			acl_build_free(context, node->max_ptrs * sizeof(*ptrs),
+				node->ptrs);
+		}
+		node->ptrs = ptrs;
+		node->max_ptrs = num_ptrs;
+	}
+
+	/* Find available ptr and add a new pointer to this node */
+	for (n = node->min_add; n < node->max_ptrs; n++) {
+		if (node->ptrs[n].ptr == NULL) {
+			node->ptrs[n].ptr = ptr;
+			acl_include(&node->ptrs[n].values, bits, 0);
+			acl_include(&node->values, bits, -1);
+			if (ptr != NULL)
+				ptr->ref_count++;
+			if (node->num_ptrs <= n)
+				node->num_ptrs = n + 1;
+			return 0;
+		}
+	}
+
+	return 0;
+}
+
+/*
+ * Add a pointer for a range of values
+ */
+static int
+acl_add_ptr_range(struct acl_build_context *context,
+	struct rte_acl_node *root,
+	struct rte_acl_node *node,
+	uint8_t low,
+	uint8_t high)
+{
+	uint32_t n;
+	struct rte_acl_bitset bitset;
+
+	/* clear the bitset values */
+	for (n = 0; n < RTE_ACL_BIT_SET_SIZE; n++)
+		bitset.bits[n] = 0;
+
+	/* for each bit in range, add bit to set */
+	for (n = 0; n < UINT8_MAX + 1; n++)
+		if (n >= low && n <= high)
+			bitset.bits[n / (sizeof(bits_t) * 8)] |=
+				1 << (n % (sizeof(bits_t) * 8));
+
+	return acl_add_ptr(context, root, node, &bitset);
+}
+
+/*
+ * Generate a bitset from a byte value and mask.
+ */
+static int
+acl_gen_mask(struct rte_acl_bitset *bitset, uint32_t value, uint32_t mask)
+{
+	int range = 0;
+	uint32_t n;
+
+	/* clear the bitset values */
+	for (n = 0; n < RTE_ACL_BIT_SET_SIZE; n++)
+		bitset->bits[n] = 0;
+
+	/* for each bit in value/mask, add bit to set */
+	for (n = 0; n < UINT8_MAX + 1; n++) {
+		if ((n & mask) == value) {
+			range++;
+			bitset->bits[n / (sizeof(bits_t) * 8)] |=
+				1 << (n % (sizeof(bits_t) * 8));
+		}
+	}
+	return range;
+}
+
+/*
+ * Determine how A and B intersect.
+ * Determine if A and/or B are supersets of the intersection.
+ */
+static int
+acl_intersect_type(const struct rte_acl_bitset *a_bits,
+	const struct rte_acl_bitset *b_bits,
+	struct rte_acl_bitset *intersect)
+{
+	uint32_t n;
+	bits_t intersect_bits = 0;
+	bits_t a_superset = 0;
+	bits_t b_superset = 0;
+
+	/*
+	 * calculate and store intersection and check if A and/or B have
+	 * bits outside the intersection (superset)
+	 */
+	for (n = 0; n < RTE_ACL_BIT_SET_SIZE; n++) {
+		intersect->bits[n] = a_bits->bits[n] & b_bits->bits[n];
+		a_superset |= a_bits->bits[n] ^ intersect->bits[n];
+		b_superset |= b_bits->bits[n] ^ intersect->bits[n];
+		intersect_bits |= intersect->bits[n];
+	}
+
+	n = (intersect_bits == 0 ? ACL_INTERSECT_NONE : ACL_INTERSECT) |
+		(b_superset == 0 ? 0 : ACL_INTERSECT_B) |
+		(a_superset == 0 ? 0 : ACL_INTERSECT_A);
+
+	return n;
+}
+
+/*
+ * Duplicate a node
+ */
+static struct rte_acl_node *
+acl_dup_node(struct acl_build_context *context, struct rte_acl_node *node)
+{
+	uint32_t n;
+	struct rte_acl_node *next;
+
+	next = acl_alloc_node(context, node->level);
+
+	/* allocate the pointers */
+	if (node->num_ptrs > 0) {
+		next->ptrs = acl_build_alloc(context,
+			node->max_ptrs,
+			sizeof(struct rte_acl_ptr_set));
+		next->max_ptrs = node->max_ptrs;
+	}
+
+	/* copy over the pointers */
+	for (n = 0; n < node->num_ptrs; n++) {
+		if (node->ptrs[n].ptr != NULL) {
+			next->ptrs[n].ptr = node->ptrs[n].ptr;
+			next->ptrs[n].ptr->ref_count++;
+			acl_include(&next->ptrs[n].values,
+				&node->ptrs[n].values, -1);
+		}
+	}
+
+	next->num_ptrs = node->num_ptrs;
+
+	/* copy over node's match results */
+	if (node->match_flag == 0)
+		next->match_flag = 0;
+	else {
+		next->match_flag = -1;
+		next->mrt = acl_build_alloc(context, 1, sizeof(*next->mrt));
+		memcpy(next->mrt, node->mrt, sizeof(*next->mrt));
+	}
+
+	/* copy over node's bitset */
+	acl_include(&next->values, &node->values, -1);
+
+	node->next = next;
+	next->prev = node;
+
+	return next;
+}
+
+/*
+ * Dereference a pointer from a node
+ */
+static void
+acl_deref_ptr(struct acl_build_context *context,
+	struct rte_acl_node *node, int index)
+{
+	struct rte_acl_node *ref_node;
+
+	/* De-reference the node at the specified pointer */
+	if (node != NULL && node->ptrs[index].ptr != NULL) {
+		ref_node = node->ptrs[index].ptr;
+		ref_node->ref_count--;
+		if (ref_node->ref_count == 0)
+			acl_free_node(context, ref_node);
+	}
+}
+
+/*
+ * acl_exclude rte_acl_bitset from src and copy remaining pointer to dst
+ */
+static int
+acl_copy_ptr(struct acl_build_context *context,
+	struct rte_acl_node *dst,
+	struct rte_acl_node *src,
+	int index,
+	struct rte_acl_bitset *b_bits)
+{
+	int rc;
+	struct rte_acl_bitset bits;
+
+	if (b_bits != NULL)
+		if (!acl_exclude(&bits, &src->ptrs[index].values, b_bits))
+			return 0;
+
+	rc = acl_add_ptr(context, dst, src->ptrs[index].ptr, &bits);
+	if (rc < 0)
+		return rc;
+	return 1;
+}
+
+/*
+ * Fill in gaps in ptrs list with the ptr at the end of the list
+ */
+static void
+acl_compact_node_ptrs(struct rte_acl_node *node_a)
+{
+	uint32_t n;
+	int min_add = node_a->min_add;
+
+	while (node_a->num_ptrs > 0  &&
+			node_a->ptrs[node_a->num_ptrs - 1].ptr == NULL)
+		node_a->num_ptrs--;
+
+	for (n = min_add; n + 1 < node_a->num_ptrs; n++) {
+
+		/* if this entry is empty */
+		if (node_a->ptrs[n].ptr == NULL) {
+
+			/* move the last pointer to this entry */
+			acl_include(&node_a->ptrs[n].values,
+				&node_a->ptrs[node_a->num_ptrs - 1].values,
+				0);
+			node_a->ptrs[n].ptr =
+				node_a->ptrs[node_a->num_ptrs - 1].ptr;
+
+			/*
+			 * mark the end as empty and adjust the number
+			 * of used pointer enum_tries
+			 */
+			node_a->ptrs[node_a->num_ptrs - 1].ptr = NULL;
+			while (node_a->num_ptrs > 0  &&
+				node_a->ptrs[node_a->num_ptrs - 1].ptr == NULL)
+				node_a->num_ptrs--;
+		}
+	}
+}
+
+static int
+acl_resolve_leaf(struct acl_build_context *context,
+	struct rte_acl_node *node_a,
+	struct rte_acl_node *node_b,
+	struct rte_acl_node **node_c)
+{
+	uint32_t n;
+	int combined_priority = ACL_PRIORITY_EQUAL;
+
+	for (n = 0; n < context->cfg.num_categories; n++) {
+		if (node_a->mrt->priority[n] != node_b->mrt->priority[n]) {
+			combined_priority |= (node_a->mrt->priority[n] >
+				node_b->mrt->priority[n]) ?
+				ACL_PRIORITY_NODE_A : ACL_PRIORITY_NODE_B;
+		}
+	}
+
+	/*
+	 * if node a is higher or equal priority for all categories,
+	 * then return node_a.
+	 */
+	if (combined_priority == ACL_PRIORITY_NODE_A ||
+			combined_priority == ACL_PRIORITY_EQUAL) {
+		*node_c = node_a;
+		return 0;
+	}
+
+	/*
+	 * if node b is higher or equal priority for all categories,
+	 * then return node_b.
+	 */
+	if (combined_priority == ACL_PRIORITY_NODE_B) {
+		*node_c = node_b;
+		return 0;
+	}
+
+	/*
+	 * mixed priorities - create a new node with the highest priority
+	 * for each category.
+	 */
+
+	/* force new duplication. */
+	node_a->next = NULL;
+
+	*node_c = acl_dup_node(context, node_a);
+	for (n = 0; n < context->cfg.num_categories; n++) {
+		if ((*node_c)->mrt->priority[n] < node_b->mrt->priority[n]) {
+			(*node_c)->mrt->priority[n] = node_b->mrt->priority[n];
+			(*node_c)->mrt->results[n] = node_b->mrt->results[n];
+		}
+	}
+	return 0;
+}
+
+/*
+ * Merge nodes A and B together,
+ *   returns a node that is the path for the intersection
+ *
+ * If match node (leaf on trie)
+ *	For each category
+ *		return node = highest priority result
+ *
+ * Create C as a duplicate of A to point to child intersections
+ * If any pointers in C intersect with any in B
+ *	For each intersection
+ *		merge children
+ *		remove intersection from C pointer
+ *		add a pointer from C to child intersection node
+ * Compact the pointers in A and B
+ * Copy any B pointers that are outside of the intersection to C
+ * If C has no references to the B trie
+ *   free C and return A
+ * Else If C has no references to the A trie
+ *   free C and return B
+ * Else
+ *   return C
+ */
+static int
+acl_merge_trie(struct acl_build_context *context,
+	struct rte_acl_node *node_a, struct rte_acl_node *node_b,
+	uint32_t level, struct rte_acl_node **return_c)
+{
+	uint32_t n, m, ptrs_c, ptrs_b;
+	uint32_t min_add_c, min_add_b;
+	int node_intersect_type;
+	struct rte_acl_bitset node_intersect;
+	struct rte_acl_node *node_c;
+	struct rte_acl_node *node_a_next;
+	int node_b_refs;
+	int node_a_refs;
+
+	node_c = node_a;
+	node_a_next = node_a->next;
+	min_add_c = 0;
+	min_add_b = 0;
+	node_a_refs = node_a->num_ptrs;
+	node_b_refs = 0;
+	node_intersect_type = 0;
+
+	/* Resolve leaf nodes (matches) */
+	if (node_a->match_flag != 0) {
+		acl_resolve_leaf(context, node_a, node_b, return_c);
+		return 0;
+	}
+
+	/*
+	 * Create node C as a copy of node A, and do: C = merge(A,B);
+	 * If node A can be used instead (A==C), then later we'll
+	 * destroy C and return A.
+	 */
+	if (level > 0)
+		node_c = acl_dup_node(context, node_a);
+
+	/*
+	 * If the two node transitions intersect then merge the transitions.
+	 * Check intersection for entire node (all pointers)
+	 */
+	node_intersect_type = acl_intersect_type(&node_c->values,
+		&node_b->values,
+		&node_intersect);
+
+	if (node_intersect_type & ACL_INTERSECT) {
+
+		min_add_b = node_b->min_add;
+		node_b->min_add = node_b->num_ptrs;
+		ptrs_b = node_b->num_ptrs;
+
+		min_add_c = node_c->min_add;
+		node_c->min_add = node_c->num_ptrs;
+		ptrs_c = node_c->num_ptrs;
+
+		for (n = 0; n < ptrs_c; n++) {
+			if (node_c->ptrs[n].ptr == NULL) {
+				node_a_refs--;
+				continue;
+			}
+			node_c->ptrs[n].ptr->next = NULL;
+			for (m = 0; m < ptrs_b; m++) {
+
+				struct rte_acl_bitset child_intersect;
+				int child_intersect_type;
+				struct rte_acl_node *child_node_c = NULL;
+
+				if (node_b->ptrs[m].ptr == NULL ||
+						node_c->ptrs[n].ptr ==
+						node_b->ptrs[m].ptr)
+						continue;
+
+				child_intersect_type = acl_intersect_type(
+					&node_c->ptrs[n].values,
+					&node_b->ptrs[m].values,
+					&child_intersect);
+
+				if ((child_intersect_type & ACL_INTERSECT) !=
+						0) {
+					if (acl_merge_trie(context,
+							node_c->ptrs[n].ptr,
+							node_b->ptrs[m].ptr,
+							level + 1,
+							&child_node_c))
+						return 1;
+
+					if (child_node_c != NULL &&
+							child_node_c !=
+							node_c->ptrs[n].ptr) {
+
+						node_b_refs++;
+
+						/*
+						 * Added link from C to
+						 * child_C for all transitions
+						 * in the intersection.
+						 */
+						acl_add_ptr(context, node_c,
+							child_node_c,
+							&child_intersect);
+
+						/*
+						 * inc refs if pointer is not
+						 * to node b.
+						 */
+						node_a_refs += (child_node_c !=
+							node_b->ptrs[m].ptr);
+
+						/*
+						 * Remove intersection from C
+						 * pointer.
+						 */
+						if (!acl_exclude(
+							&node_c->ptrs[n].values,
+							&node_c->ptrs[n].values,
+							&child_intersect)) {
+							acl_deref_ptr(context,
+								node_c, n);
+							node_c->ptrs[n].ptr =
+								NULL;
+							node_a_refs--;
+						}
+					}
+				}
+			}
+		}
+
+		/* Compact pointers */
+		node_c->min_add = min_add_c;
+		acl_compact_node_ptrs(node_c);
+		node_b->min_add = min_add_b;
+		acl_compact_node_ptrs(node_b);
+	}
+
+	/*
+	 *  Copy pointers outside of the intersection from B to C
+	 */
+	if ((node_intersect_type & ACL_INTERSECT_B) != 0) {
+		node_b_refs++;
+		for (m = 0; m < node_b->num_ptrs; m++)
+			if (node_b->ptrs[m].ptr != NULL)
+				acl_copy_ptr(context, node_c,
+					node_b, m, &node_intersect);
+	}
+
+	/*
+	 * Free node C if top of trie is contained in A or B
+	 *  if node C is a duplicate of node A &&
+	 *     node C was not an existing duplicate
+	 */
+	if (node_c != node_a && node_c != node_a_next) {
+
+		/*
+		 * if the intersection has no references to the
+		 * B side, then it is contained in A
+		 */
+		if (node_b_refs == 0) {
+			acl_free_node(context, node_c);
+			node_c = node_a;
+		} else {
+			/*
+			 * if the intersection has no references to the
+			 * A side, then it is contained in B.
+			 */
+			if (node_a_refs == 0) {
+				acl_free_node(context, node_c);
+				node_c = node_b;
+			}
+		}
+	}
+
+	if (return_c != NULL)
+		*return_c = node_c;
+
+	if (level == 0)
+		acl_free_node(context, node_b);
+
+	return 0;
+}
+
+/*
+ * Reset current runtime fields before next build:
+ *  - free allocated RT memory.
+ *  - reset all RT related fields to zero.
+ */
+static void
+acl_build_reset(struct rte_acl_ctx *ctx)
+{
+	rte_free(ctx->mem);
+	memset(&ctx->num_categories, 0,
+		sizeof(*ctx) - offsetof(struct rte_acl_ctx, num_categories));
+}
+
+static void
+acl_gen_range(struct acl_build_context *context,
+	const uint8_t *hi, const uint8_t *lo, int size, int level,
+	struct rte_acl_node *root, struct rte_acl_node *end)
+{
+	struct rte_acl_node *node, *prev;
+	uint32_t n;
+
+	prev = root;
+	for (n = size - 1; n > 0; n--) {
+		node = acl_alloc_node(context, level++);
+		acl_add_ptr_range(context, prev, node, lo[n], hi[n]);
+		prev = node;
+	}
+	acl_add_ptr_range(context, prev, end, lo[0], hi[0]);
+}
+
+static struct rte_acl_node *
+acl_gen_range_trie(struct acl_build_context *context,
+	const void *min, const void *max,
+	int size, int level, struct rte_acl_node **pend)
+{
+	int32_t n;
+	struct rte_acl_node *root;
+	const uint8_t *lo = (const uint8_t *)min;
+	const uint8_t *hi = (const uint8_t *)max;
+
+	*pend = acl_alloc_node(context, level+size);
+	root = acl_alloc_node(context, level++);
+
+	if (lo[size - 1] == hi[size - 1]) {
+		acl_gen_range(context, hi, lo, size, level, root, *pend);
+	} else {
+		uint8_t limit_lo[64];
+		uint8_t limit_hi[64];
+		uint8_t hi_ff = UINT8_MAX;
+		uint8_t lo_00 = 0;
+
+		memset(limit_lo, 0, RTE_DIM(limit_lo));
+		memset(limit_hi, UINT8_MAX, RTE_DIM(limit_hi));
+
+		for (n = size - 2; n >= 0; n--) {
+			hi_ff = (uint8_t)(hi_ff & hi[n]);
+			lo_00 = (uint8_t)(lo_00 | lo[n]);
+		}
+
+		if (hi_ff != UINT8_MAX) {
+			limit_lo[size - 1] = hi[size - 1];
+			acl_gen_range(context, hi, limit_lo, size, level,
+				root, *pend);
+		}
+
+		if (lo_00 != 0) {
+			limit_hi[size - 1] = lo[size - 1];
+			acl_gen_range(context, limit_hi, lo, size, level,
+				root, *pend);
+		}
+
+		if (hi[size - 1] - lo[size - 1] > 1 ||
+				lo_00 == 0 ||
+				hi_ff == UINT8_MAX) {
+			limit_lo[size-1] = (uint8_t)(lo[size-1] + (lo_00 != 0));
+			limit_hi[size-1] = (uint8_t)(hi[size-1] -
+				(hi_ff != UINT8_MAX));
+			acl_gen_range(context, limit_hi, limit_lo, size,
+				level, root, *pend);
+		}
+	}
+	return root;
+}
+
+static struct rte_acl_node *
+acl_gen_mask_trie(struct acl_build_context *context,
+	const void *value, const void *mask,
+	int size, int level, struct rte_acl_node **pend)
+{
+	int32_t n;
+	struct rte_acl_node *root;
+	struct rte_acl_node *node, *prev;
+	struct rte_acl_bitset bits;
+	const uint8_t *val = (const uint8_t *)value;
+	const uint8_t *msk = (const uint8_t *)mask;
+
+	root = acl_alloc_node(context, level++);
+	prev = root;
+
+	for (n = size - 1; n >= 0; n--) {
+		node = acl_alloc_node(context, level++);
+		acl_gen_mask(&bits, val[n] & msk[n], msk[n]);
+		acl_add_ptr(context, prev, node, &bits);
+		prev = node;
+	}
+
+	*pend = prev;
+	return root;
+}
+
+static struct rte_acl_node *
+build_trie(struct acl_build_context *context, struct rte_acl_build_rule *head,
+	struct rte_acl_build_rule **last, uint32_t *count)
+{
+	uint32_t n, m;
+	int field_index, node_count;
+	struct rte_acl_node *trie;
+	struct rte_acl_build_rule *prev, *rule;
+	struct rte_acl_node *end, *merge, *root, *end_prev;
+	const struct rte_acl_field *fld;
+
+	prev = head;
+	rule = head;
+	*last = prev;
+
+	trie = acl_alloc_node(context, 0);
+
+	while (rule != NULL) {
+
+		root = acl_alloc_node(context, 0);
+
+		root->ref_count = 1;
+		end = root;
+
+		for (n = 0; n < rule->config->num_fields; n++) {
+
+			field_index = rule->config->defs[n].field_index;
+			fld = rule->f->field + field_index;
+			end_prev = end;
+
+			/* build a mini-trie for this field */
+			switch (rule->config->defs[n].type) {
+
+			case RTE_ACL_FIELD_TYPE_BITMASK:
+				merge = acl_gen_mask_trie(context,
+					&fld->value,
+					&fld->mask_range,
+					rule->config->defs[n].size,
+					end->level + 1,
+					&end);
+				break;
+
+			case RTE_ACL_FIELD_TYPE_MASK:
+			{
+				/*
+				 * set msb for the size of the field and
+				 * all higher bits.
+				 */
+				uint64_t mask;
+				mask = RTE_ACL_MASKLEN_TO_BITMASK(
+					fld->mask_range.u32,
+					rule->config->defs[n].size);
+
+				/* gen a mini-trie for this field */
+				merge = acl_gen_mask_trie(context,
+					&fld->value,
+					(char *)&mask,
+					rule->config->defs[n].size,
+					end->level + 1,
+					&end);
+			}
+			break;
+
+			case RTE_ACL_FIELD_TYPE_RANGE:
+				merge = acl_gen_range_trie(context,
+					&rule->f->field[field_index].value,
+					&rule->f->field[field_index].mask_range,
+					rule->config->defs[n].size,
+					end->level + 1,
+					&end);
+				break;
+
+			default:
+				RTE_LOG(ERR, ACL,
+					"Error in rule[%u] type - %hhu\n",
+					rule->f->data.userdata,
+					rule->config->defs[n].type);
+				return NULL;
+			}
+
+			/* merge this field on to the end of the rule */
+			if (acl_merge_trie(context, end_prev, merge, 0,
+					NULL) != 0) {
+				return NULL;
+			}
+		}
+
+		end->match_flag = ++context->num_build_rules;
+
+		/*
+		 * Setup the results for this rule.
+		 * The result and priority of each category.
+		 */
+		if (end->mrt == NULL)
+			end->mrt = acl_build_alloc(context, 1,
+				sizeof(*end->mrt));
+
+		for (m = context->cfg.num_categories; 0 != m--; ) {
+			if (rule->f->data.category_mask & (1 << m)) {
+				end->mrt->results[m] = rule->f->data.userdata;
+				end->mrt->priority[m] = rule->f->data.priority;
+			} else {
+				end->mrt->results[m] = 0;
+				end->mrt->priority[m] = 0;
+			}
+		}
+
+		node_count = context->num_nodes;
+		(*count)++;
+
+		/* merge this rule into the trie */
+		if (acl_merge_trie(context, trie, root, 0, NULL))
+			return NULL;
+
+		node_count = context->num_nodes - node_count;
+		if (node_count > context->cur_node_max) {
+			*last = prev;
+			return trie;
+		}
+
+		prev = rule;
+		rule = rule->next;
+	}
+
+	*last = NULL;
+	return trie;
+}
+
+static void
+acl_calc_wildness(struct rte_acl_build_rule *head,
+	const struct rte_acl_config *config)
+{
+	uint32_t n;
+	struct rte_acl_build_rule *rule;
+
+	for (rule = head; rule != NULL; rule = rule->next) {
+
+		for (n = 0; n < config->num_fields; n++) {
+
+			double wild = 0;
+			uint32_t bit_len = CHAR_BIT * config->defs[n].size;
+			uint64_t msk_val = RTE_LEN2MASK(bit_len,
+				typeof(msk_val));
+			double size = bit_len;
+			int field_index = config->defs[n].field_index;
+			const struct rte_acl_field *fld = rule->f->field +
+				field_index;
+
+			switch (rule->config->defs[n].type) {
+			case RTE_ACL_FIELD_TYPE_BITMASK:
+				wild = (size - __builtin_popcountll(
+					fld->mask_range.u64 & msk_val)) /
+					size;
+				break;
+
+			case RTE_ACL_FIELD_TYPE_MASK:
+				wild = (size - fld->mask_range.u32) / size;
+				break;
+
+			case RTE_ACL_FIELD_TYPE_RANGE:
+				wild = (fld->mask_range.u64 & msk_val) -
+					(fld->value.u64 & msk_val);
+				wild = wild / msk_val;
+				break;
+			}
+
+			rule->wildness[field_index] = (uint32_t)(wild * 100);
+		}
+	}
+}
+
+static void
+acl_rule_stats(struct rte_acl_build_rule *head, struct rte_acl_config *config)
+{
+	struct rte_acl_build_rule *rule;
+	uint32_t n, m, fields_deactivated = 0;
+	uint32_t start = 0, deactivate = 0;
+	int tally[RTE_ACL_MAX_LEVELS][TALLY_NUM];
+
+	memset(tally, 0, sizeof(tally));
+
+	for (rule = head; rule != NULL; rule = rule->next) {
+
+		for (n = 0; n < config->num_fields; n++) {
+			uint32_t field_index = config->defs[n].field_index;
+
+			tally[n][TALLY_0]++;
+			for (m = 1; m < RTE_DIM(wild_limits); m++) {
+				if (rule->wildness[field_index] >=
+						wild_limits[m])
+					tally[n][m]++;
+			}
+		}
+
+		for (n = config->num_fields - 1; n > 0; n--) {
+			uint32_t field_index = config->defs[n].field_index;
+
+			if (rule->wildness[field_index] == 100)
+				tally[n][TALLY_DEPTH]++;
+			else
+				break;
+		}
+	}
+
+	/*
+	 * Look for any field that is always wild and drop it from the config
+	 * Only deactivate if all fields for a given input loop are deactivated.
+	 */
+	for (n = 1; n < config->num_fields; n++) {
+		if (config->defs[n].input_index !=
+				config->defs[n - 1].input_index) {
+			for (m = start; m < n; m++)
+				tally[m][TALLY_DEACTIVATED] = deactivate;
+			fields_deactivated += deactivate;
+			start = n;
+			deactivate = 1;
+		}
+
+		/* if the field is not always completely wild */
+		if (tally[n][TALLY_100] != tally[n][TALLY_0])
+			deactivate = 0;
+	}
+
+	for (m = start; m < n; m++)
+		tally[m][TALLY_DEACTIVATED] = deactivate;
+
+	fields_deactivated += deactivate;
+
+	/* remove deactivated fields */
+	if (fields_deactivated) {
+		uint32_t k, l = 0;
+
+		for (k = 0; k < config->num_fields; k++) {
+			if (tally[k][TALLY_DEACTIVATED] == 0) {
+				memmove(&tally[l][0], &tally[k][0],
+					TALLY_NUM * sizeof(tally[0][0]));
+				memmove(&config->defs[l++],
+					&config->defs[k],
+					sizeof(struct rte_acl_field_def));
+			}
+		}
+		config->num_fields = l;
+	}
+}
+
+static int
+rule_cmp_wildness(struct rte_acl_build_rule *r1, struct rte_acl_build_rule *r2)
+{
+	uint32_t n;
+
+	for (n = 1; n < r1->config->num_fields; n++) {
+		int field_index = r1->config->defs[n].field_index;
+
+		if (r1->wildness[field_index] != r2->wildness[field_index])
+			return r1->wildness[field_index] -
+				r2->wildness[field_index];
+	}
+	return 0;
+}
+
+/*
+ * Split the rte_acl_build_rule list into two lists.
+ */
+static void
+rule_list_split(struct rte_acl_build_rule *source,
+	struct rte_acl_build_rule **list_a,
+	struct rte_acl_build_rule **list_b)
+{
+	struct rte_acl_build_rule *fast;
+	struct rte_acl_build_rule *slow;
+
+	if (source == NULL || source->next == NULL) {
+		/* length < 2 cases */
+		*list_a = source;
+		*list_b = NULL;
+	} else {
+		slow = source;
+		fast = source->next;
+		/* Advance 'fast' two nodes, and advance 'slow' one node */
+		while (fast != NULL) {
+			fast = fast->next;
+			if (fast != NULL) {
+				slow = slow->next;
+				fast = fast->next;
+			}
+		}
+		/* 'slow' is before the midpoint in the list, so split it in two
+		   at that point. */
+		*list_a = source;
+		*list_b = slow->next;
+		slow->next = NULL;
+	}
+}
+
+/*
+ * Merge two sorted lists.
+ */
+static struct rte_acl_build_rule *
+rule_list_sorted_merge(struct rte_acl_build_rule *a,
+	struct rte_acl_build_rule *b)
+{
+	struct rte_acl_build_rule *result = NULL;
+	struct rte_acl_build_rule **last_next = &result;
+
+	while (1) {
+		if (a == NULL) {
+			*last_next = b;
+			break;
+		} else if (b == NULL) {
+			*last_next = a;
+			break;
+		}
+		if (rule_cmp_wildness(a, b) >= 0) {
+			*last_next = a;
+			last_next = &a->next;
+			a = a->next;
+		} else {
+			*last_next = b;
+			last_next = &b->next;
+			b = b->next;
+		}
+	}
+	return result;
+}
+
+/*
+ * Sort list of rules based on the rules wildness.
+ * Use recursive mergesort algorithm.
+ */
+static struct rte_acl_build_rule *
+sort_rules(struct rte_acl_build_rule *head)
+{
+	struct rte_acl_build_rule *a;
+	struct rte_acl_build_rule *b;
+
+	/* Base case -- length 0 or 1 */
+	if (head == NULL || head->next == NULL)
+		return head;
+
+	/* Split head into 'a' and 'b' sublists */
+	rule_list_split(head, &a, &b);
+
+	/* Recursively sort the sublists */
+	a = sort_rules(a);
+	b = sort_rules(b);
+
+	/* answer = merge the two sorted lists together */
+	return rule_list_sorted_merge(a, b);
+}
+
+static uint32_t
+acl_build_index(const struct rte_acl_config *config, uint32_t *data_index)
+{
+	uint32_t n, m;
+	int32_t last_header;
+
+	m = 0;
+	last_header = -1;
+
+	for (n = 0; n < config->num_fields; n++) {
+		if (last_header != config->defs[n].input_index) {
+			last_header = config->defs[n].input_index;
+			data_index[m++] = config->defs[n].offset;
+		}
+	}
+
+	return m;
+}
+
+static struct rte_acl_build_rule *
+build_one_trie(struct acl_build_context *context,
+	struct rte_acl_build_rule *rule_sets[RTE_ACL_MAX_TRIES],
+	uint32_t n, int32_t node_max)
+{
+	struct rte_acl_build_rule *last;
+	struct rte_acl_config *config;
+
+	config = rule_sets[n]->config;
+
+	acl_rule_stats(rule_sets[n], config);
+	rule_sets[n] = sort_rules(rule_sets[n]);
+
+	context->tries[n].type = RTE_ACL_FULL_TRIE;
+	context->tries[n].count = 0;
+
+	context->tries[n].num_data_indexes = acl_build_index(config,
+		context->data_indexes[n]);
+	context->tries[n].data_index = context->data_indexes[n];
+
+	context->cur_node_max = node_max;
+
+	context->bld_tries[n].trie = build_trie(context, rule_sets[n],
+		&last, &context->tries[n].count);
+
+	return last;
+}
+
+static int
+acl_build_tries(struct acl_build_context *context,
+	struct rte_acl_build_rule *head)
+{
+	uint32_t n, num_tries;
+	struct rte_acl_config *config;
+	struct rte_acl_build_rule *last;
+	struct rte_acl_build_rule *rule_sets[RTE_ACL_MAX_TRIES];
+
+	config = head->config;
+	rule_sets[0] = head;
+
+	/* initialize tries */
+	for (n = 0; n < RTE_DIM(context->tries); n++) {
+		context->tries[n].type = RTE_ACL_UNUSED_TRIE;
+		context->bld_tries[n].trie = NULL;
+		context->tries[n].count = 0;
+	}
+
+	context->tries[0].type = RTE_ACL_FULL_TRIE;
+
+	/* calc wildness of each field of each rule */
+	acl_calc_wildness(head, config);
+
+	for (n = 0;; n = num_tries) {
+
+		num_tries = n + 1;
+
+		last = build_one_trie(context, rule_sets, n, context->node_max);
+		if (context->bld_tries[n].trie == NULL) {
+			RTE_LOG(ERR, ACL, "Build of %u-th trie failed\n", n);
+			return -ENOMEM;
+		}
+
+		/* Build of the last trie completed. */
+		if (last == NULL)
+			break;
+
+		if (num_tries == RTE_DIM(context->tries)) {
+			RTE_LOG(ERR, ACL,
+				"Exceeded max number of tries: %u\n",
+				num_tries);
+			return -ENOMEM;
+		}
+
+		/* Trie is getting too big, split remaining rule set. */
+		rule_sets[num_tries] = last->next;
+		last->next = NULL;
+		acl_free_node(context, context->bld_tries[n].trie);
+
+		/* Create a new copy of config for remaining rules. */
+		config = acl_build_alloc(context, 1, sizeof(*config));
+		memcpy(config, rule_sets[n]->config, sizeof(*config));
+
+		/* Make remaining rules use new config. */
+		for (head = rule_sets[num_tries]; head != NULL;
+				head = head->next)
+			head->config = config;
+
+		/*
+		 * Rebuild the trie for the reduced rule-set.
+		 * Don't try to split it any further.
+		 */
+		last = build_one_trie(context, rule_sets, n, INT32_MAX);
+		if (context->bld_tries[n].trie == NULL || last != NULL) {
+			RTE_LOG(ERR, ACL, "Build of %u-th trie failed\n", n);
+			return -ENOMEM;
+		}
+
+	}
+
+	context->num_tries = num_tries;
+	return 0;
+}
+
+static void
+acl_build_log(const struct acl_build_context *ctx)
+{
+	uint32_t n;
+
+	RTE_LOG(DEBUG, ACL, "Build phase for ACL \"%s\":\n"
+		"node limit for tree split: %u\n"
+		"nodes created: %u\n"
+		"memory consumed: %zu\n",
+		ctx->acx->name,
+		ctx->node_max,
+		ctx->num_nodes,
+		ctx->pool.alloc);
+
+	for (n = 0; n < RTE_DIM(ctx->tries); n++) {
+		if (ctx->tries[n].count != 0)
+			RTE_LOG(DEBUG, ACL,
+				"trie %u: number of rules: %u, indexes: %u\n",
+				n, ctx->tries[n].count,
+				ctx->tries[n].num_data_indexes);
+	}
+}
+
+static int
+acl_build_rules(struct acl_build_context *bcx)
+{
+	struct rte_acl_build_rule *br, *head;
+	const struct rte_acl_rule *rule;
+	uint32_t *wp;
+	uint32_t fn, i, n, num;
+	size_t ofs, sz;
+
+	fn = bcx->cfg.num_fields;
+	n = bcx->acx->num_rules;
+	ofs = n * sizeof(*br);
+	sz = ofs + n * fn * sizeof(*wp);
+
+	br = tb_alloc(&bcx->pool, sz);
+
+	wp = (uint32_t *)((uintptr_t)br + ofs);
+	num = 0;
+	head = NULL;
+
+	for (i = 0; i != n; i++) {
+		rule = (const struct rte_acl_rule *)
+			((uintptr_t)bcx->acx->rules + bcx->acx->rule_sz * i);
+		if ((rule->data.category_mask & bcx->category_mask) != 0) {
+			br[num].next = head;
+			br[num].config = &bcx->cfg;
+			br[num].f = rule;
+			br[num].wildness = wp;
+			wp += fn;
+			head = br + num;
+			num++;
+		}
+	}
+
+	bcx->num_rules = num;
+	bcx->build_rules = head;
+
+	return 0;
+}
+
+/*
+ * Copy data_indexes for each trie into RT location.
+ */
+static void
+acl_set_data_indexes(struct rte_acl_ctx *ctx)
+{
+	uint32_t i, n, ofs;
+
+	ofs = 0;
+	for (i = 0; i != ctx->num_tries; i++) {
+		n = ctx->trie[i].num_data_indexes;
+		memcpy(ctx->data_indexes + ofs, ctx->trie[i].data_index,
+			n * sizeof(ctx->data_indexes[0]));
+		ctx->trie[i].data_index = ctx->data_indexes + ofs;
+		ofs += RTE_ACL_MAX_FIELDS;
+	}
+}
+
+/*
+ * Internal routine, performs 'build' phase of trie generation:
+ * - setups build context.
+ * - analizes given set of rules.
+ * - builds internal tree(s).
+ */
+static int
+acl_bld(struct acl_build_context *bcx, struct rte_acl_ctx *ctx,
+	const struct rte_acl_config *cfg, uint32_t node_max)
+{
+	int32_t rc;
+
+	/* setup build context. */
+	memset(bcx, 0, sizeof(*bcx));
+	bcx->acx = ctx;
+	bcx->pool.alignment = ACL_POOL_ALIGN;
+	bcx->pool.min_alloc = ACL_POOL_ALLOC_MIN;
+	bcx->cfg = *cfg;
+	bcx->category_mask = RTE_LEN2MASK(bcx->cfg.num_categories,
+		typeof(bcx->category_mask));
+	bcx->node_max = node_max;
+
+	rc = sigsetjmp(bcx->pool.fail, 0);
+
+	/* build phase runs out of memory. */
+	if (rc != 0) {
+		RTE_LOG(ERR, ACL,
+			"ACL context: %s, %s() failed with error code: %d\n",
+			bcx->acx->name, __func__, rc);
+		return rc;
+	}
+
+	/* Create a build rules copy. */
+	rc = acl_build_rules(bcx);
+	if (rc != 0)
+		return rc;
+
+	/* No rules to build for that context+config */
+	if (bcx->build_rules == NULL) {
+		rc = -EINVAL;
+	} else {
+		/* build internal trie representation. */
+		rc = acl_build_tries(bcx, bcx->build_rules);
+	}
+	return rc;
+}
+
+/*
+ * Check that parameters for acl_build() are valid.
+ */
+static int
+acl_check_bld_param(struct rte_acl_ctx *ctx, const struct rte_acl_config *cfg)
+{
+	static const size_t field_sizes[] = {
+		sizeof(uint8_t), sizeof(uint16_t),
+		sizeof(uint32_t), sizeof(uint64_t),
+	};
+
+	uint32_t i, j;
+
+	if (ctx == NULL || cfg == NULL || cfg->num_categories == 0 ||
+			cfg->num_categories > RTE_ACL_MAX_CATEGORIES ||
+			cfg->num_fields == 0 ||
+			cfg->num_fields > RTE_ACL_MAX_FIELDS)
+		return -EINVAL;
+
+	for (i = 0; i != cfg->num_fields; i++) {
+		if (cfg->defs[i].type > RTE_ACL_FIELD_TYPE_BITMASK) {
+			RTE_LOG(ERR, ACL,
+			"ACL context: %s, invalid type: %hhu for %u-th field\n",
+			ctx->name, cfg->defs[i].type, i);
+			return -EINVAL;
+		}
+		for (j = 0;
+				j != RTE_DIM(field_sizes) &&
+				cfg->defs[i].size != field_sizes[j];
+				j++)
+			;
+
+		if (j == RTE_DIM(field_sizes)) {
+			RTE_LOG(ERR, ACL,
+			"ACL context: %s, invalid size: %hhu for %u-th field\n",
+			ctx->name, cfg->defs[i].size, i);
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
+int
+rte_acl_build(struct rte_acl_ctx *ctx, const struct rte_acl_config *cfg)
+{
+	int32_t rc;
+	uint32_t n;
+	size_t max_size;
+	struct acl_build_context bcx;
+
+	rc = acl_check_bld_param(ctx, cfg);
+	if (rc != 0)
+		return rc;
+
+	acl_build_reset(ctx);
+
+	if (cfg->max_size == 0) {
+		n = NODE_MIN;
+		max_size = SIZE_MAX;
+	} else {
+		n = NODE_MAX;
+		max_size = cfg->max_size;
+	}
+
+	for (rc = -ERANGE; n >= NODE_MIN && rc == -ERANGE; n /= 2) {
+
+		/* perform build phase. */
+		rc = acl_bld(&bcx, ctx, cfg, n);
+
+		if (rc == 0) {
+			/* allocate and fill run-time  structures. */
+			rc = rte_acl_gen(ctx, bcx.tries, bcx.bld_tries,
+				bcx.num_tries, bcx.cfg.num_categories,
+				RTE_ACL_MAX_FIELDS * RTE_DIM(bcx.tries) *
+				sizeof(ctx->data_indexes[0]), max_size);
+			if (rc == 0) {
+				/* set data indexes. */
+				acl_set_data_indexes(ctx);
+
+				/* copy in build config. */
+				ctx->config = *cfg;
+			}
+		}
+
+		acl_build_log(&bcx);
+
+		/* cleanup after build. */
+		tb_free_pool(&bcx.pool);
+	}
+
+	return rc;
+}
diff --git a/lib/librte_acl/acl_gen.c b/lib/librte_acl/acl_gen.c
new file mode 100644
index 00000000..ea557ab9
--- /dev/null
+++ b/lib/librte_acl/acl_gen.c
@@ -0,0 +1,561 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <rte_acl.h>
+#include "acl.h"
+
+#define	QRANGE_MIN	((uint8_t)INT8_MIN)
+
+#define	RTE_ACL_VERIFY(exp)	do {                                          \
+	if (!(exp))                                                           \
+		rte_panic("line %d\tassert \"" #exp "\" failed\n", __LINE__); \
+} while (0)
+
+struct acl_node_counters {
+	int32_t match;
+	int32_t match_used;
+	int32_t single;
+	int32_t quad;
+	int32_t quad_vectors;
+	int32_t dfa;
+	int32_t dfa_gr64;
+};
+
+struct rte_acl_indices {
+	int32_t dfa_index;
+	int32_t quad_index;
+	int32_t single_index;
+	int32_t match_index;
+	int32_t match_start;
+};
+
+static void
+acl_gen_log_stats(const struct rte_acl_ctx *ctx,
+	const struct acl_node_counters *counts,
+	const struct rte_acl_indices *indices,
+	size_t max_size)
+{
+	RTE_LOG(DEBUG, ACL, "Gen phase for ACL \"%s\":\n"
+		"runtime memory footprint on socket %d:\n"
+		"single nodes/bytes used: %d/%zu\n"
+		"quad nodes/vectors/bytes used: %d/%d/%zu\n"
+		"DFA nodes/group64/bytes used: %d/%d/%zu\n"
+		"match nodes/bytes used: %d/%zu\n"
+		"total: %zu bytes\n"
+		"max limit: %zu bytes\n",
+		ctx->name, ctx->socket_id,
+		counts->single, counts->single * sizeof(uint64_t),
+		counts->quad, counts->quad_vectors,
+		(indices->quad_index - indices->dfa_index) * sizeof(uint64_t),
+		counts->dfa, counts->dfa_gr64,
+		indices->dfa_index * sizeof(uint64_t),
+		counts->match,
+		counts->match * sizeof(struct rte_acl_match_results),
+		ctx->mem_sz,
+		max_size);
+}
+
+static uint64_t
+acl_dfa_gen_idx(const struct rte_acl_node *node, uint32_t index)
+{
+	uint64_t idx;
+	uint32_t i;
+
+	idx = 0;
+	for (i = 0; i != RTE_DIM(node->dfa_gr64); i++) {
+		RTE_ACL_VERIFY(node->dfa_gr64[i] < RTE_ACL_DFA_GR64_NUM);
+		RTE_ACL_VERIFY(node->dfa_gr64[i] < node->fanout);
+		idx |= (i - node->dfa_gr64[i]) <<
+			(6 + RTE_ACL_DFA_GR64_BIT * i);
+	}
+
+	return idx << (CHAR_BIT * sizeof(index)) | index | node->node_type;
+}
+
+static void
+acl_dfa_fill_gr64(const struct rte_acl_node *node,
+	const uint64_t src[RTE_ACL_DFA_SIZE], uint64_t dst[RTE_ACL_DFA_SIZE])
+{
+	uint32_t i;
+
+	for (i = 0; i != RTE_DIM(node->dfa_gr64); i++) {
+		memcpy(dst + node->dfa_gr64[i] * RTE_ACL_DFA_GR64_SIZE,
+			src + i * RTE_ACL_DFA_GR64_SIZE,
+			RTE_ACL_DFA_GR64_SIZE * sizeof(dst[0]));
+	}
+}
+
+static uint32_t
+acl_dfa_count_gr64(const uint64_t array_ptr[RTE_ACL_DFA_SIZE],
+	uint8_t gr64[RTE_ACL_DFA_GR64_NUM])
+{
+	uint32_t i, j, k;
+
+	k = 0;
+	for (i = 0; i != RTE_ACL_DFA_GR64_NUM; i++) {
+		gr64[i] = i;
+		for (j = 0; j != i; j++) {
+			if (memcmp(array_ptr + i * RTE_ACL_DFA_GR64_SIZE,
+					array_ptr + j * RTE_ACL_DFA_GR64_SIZE,
+					RTE_ACL_DFA_GR64_SIZE *
+					sizeof(array_ptr[0])) == 0)
+				break;
+		}
+		gr64[i] = (j != i) ? gr64[j] : k++;
+	}
+
+	return k;
+}
+
+static uint32_t
+acl_node_fill_dfa(const struct rte_acl_node *node,
+	uint64_t dfa[RTE_ACL_DFA_SIZE], uint64_t no_match, int32_t resolved)
+{
+	uint32_t n, x;
+	uint32_t ranges, last_bit;
+	struct rte_acl_node *child;
+	struct rte_acl_bitset *bits;
+
+	ranges = 0;
+	last_bit = 0;
+
+	for (n = 0; n < RTE_ACL_DFA_SIZE; n++)
+		dfa[n] = no_match;
+
+	for (x = 0; x < node->num_ptrs; x++) {
+
+		child = node->ptrs[x].ptr;
+		if (child == NULL)
+			continue;
+
+		bits = &node->ptrs[x].values;
+		for (n = 0; n < RTE_ACL_DFA_SIZE; n++) {
+
+			if (bits->bits[n / (sizeof(bits_t) * CHAR_BIT)] &
+				(1 << (n % (sizeof(bits_t) * CHAR_BIT)))) {
+
+				dfa[n] = resolved ? child->node_index : x;
+				ranges += (last_bit == 0);
+				last_bit = 1;
+			} else {
+				last_bit = 0;
+			}
+		}
+	}
+
+	return ranges;
+}
+
+/*
+*  Counts the number of groups of sequential bits that are
+*  either 0 or 1, as specified by the zero_one parameter. This is used to
+*  calculate the number of ranges in a node to see if it fits in a quad range
+*  node.
+*/
+static int
+acl_count_sequential_groups(struct rte_acl_bitset *bits, int zero_one)
+{
+	int n, ranges, last_bit;
+
+	ranges = 0;
+	last_bit = zero_one ^ 1;
+
+	for (n = QRANGE_MIN; n < UINT8_MAX + 1; n++) {
+		if (bits->bits[n / (sizeof(bits_t) * 8)] &
+				(1 << (n % (sizeof(bits_t) * 8)))) {
+			if (zero_one == 1 && last_bit != 1)
+				ranges++;
+			last_bit = 1;
+		} else {
+			if (zero_one == 0 && last_bit != 0)
+				ranges++;
+			last_bit = 0;
+		}
+	}
+	for (n = 0; n < QRANGE_MIN; n++) {
+		if (bits->bits[n / (sizeof(bits_t) * 8)] &
+				(1 << (n % (sizeof(bits_t) * 8)))) {
+			if (zero_one == 1 && last_bit != 1)
+				ranges++;
+			last_bit = 1;
+		} else {
+			if (zero_one == 0 && last_bit != 0)
+				ranges++;
+			last_bit = 0;
+		}
+	}
+
+	return ranges;
+}
+
+/*
+ * Count number of ranges spanned by the node's pointers
+ */
+static int
+acl_count_fanout(struct rte_acl_node *node)
+{
+	uint32_t n;
+	int ranges;
+
+	if (node->fanout != 0)
+		return node->fanout;
+
+	ranges = acl_count_sequential_groups(&node->values, 0);
+
+	for (n = 0; n < node->num_ptrs; n++) {
+		if (node->ptrs[n].ptr != NULL)
+			ranges += acl_count_sequential_groups(
+				&node->ptrs[n].values, 1);
+	}
+
+	node->fanout = ranges;
+	return node->fanout;
+}
+
+/*
+ * Determine the type of nodes and count each type
+ */
+static void
+acl_count_trie_types(struct acl_node_counters *counts,
+	struct rte_acl_node *node, uint64_t no_match, int force_dfa)
+{
+	uint32_t n;
+	int num_ptrs;
+	uint64_t dfa[RTE_ACL_DFA_SIZE];
+
+	/* skip if this node has been counted */
+	if (node->node_type != (uint32_t)RTE_ACL_NODE_UNDEFINED)
+		return;
+
+	if (node->match_flag != 0 || node->num_ptrs == 0) {
+		counts->match++;
+		node->node_type = RTE_ACL_NODE_MATCH;
+		return;
+	}
+
+	num_ptrs = acl_count_fanout(node);
+
+	/* Force type to dfa */
+	if (force_dfa)
+		num_ptrs = RTE_ACL_DFA_SIZE;
+
+	/* determine node type based on number of ranges */
+	if (num_ptrs == 1) {
+		counts->single++;
+		node->node_type = RTE_ACL_NODE_SINGLE;
+	} else if (num_ptrs <= RTE_ACL_QUAD_MAX) {
+		counts->quad++;
+		counts->quad_vectors += node->fanout;
+		node->node_type = RTE_ACL_NODE_QRANGE;
+	} else {
+		counts->dfa++;
+		node->node_type = RTE_ACL_NODE_DFA;
+		if (force_dfa != 0) {
+			/* always expand to a max number of nodes. */
+			for (n = 0; n != RTE_DIM(node->dfa_gr64); n++)
+				node->dfa_gr64[n] = n;
+			node->fanout = n;
+		} else {
+			acl_node_fill_dfa(node, dfa, no_match, 0);
+			node->fanout = acl_dfa_count_gr64(dfa, node->dfa_gr64);
+		}
+		counts->dfa_gr64 += node->fanout;
+	}
+
+	/*
+	 * recursively count the types of all children
+	 */
+	for (n = 0; n < node->num_ptrs; n++) {
+		if (node->ptrs[n].ptr != NULL)
+			acl_count_trie_types(counts, node->ptrs[n].ptr,
+				no_match, 0);
+	}
+}
+
+static void
+acl_add_ptrs(struct rte_acl_node *node, uint64_t *node_array, uint64_t no_match,
+	int resolved)
+{
+	uint32_t x;
+	int32_t m;
+	uint64_t *node_a, index, dfa[RTE_ACL_DFA_SIZE];
+
+	acl_node_fill_dfa(node, dfa, no_match, resolved);
+
+	/*
+	 * Rather than going from 0 to 256, the range count and
+	 * the layout are from 80-ff then 0-7f due to signed compare
+	 * for SSE (cmpgt).
+	 */
+	if (node->node_type == RTE_ACL_NODE_QRANGE) {
+
+		m = 0;
+		node_a = node_array;
+		index = dfa[QRANGE_MIN];
+		*node_a++ = index;
+
+		for (x = QRANGE_MIN + 1; x < UINT8_MAX + 1; x++) {
+			if (dfa[x] != index) {
+				index = dfa[x];
+				*node_a++ = index;
+				node->transitions[m++] = (uint8_t)(x - 1);
+			}
+		}
+
+		for (x = 0; x < INT8_MAX + 1; x++) {
+			if (dfa[x] != index) {
+				index = dfa[x];
+				*node_a++ = index;
+				node->transitions[m++] = (uint8_t)(x - 1);
+			}
+		}
+
+		/* fill unused locations with max value - nothing is greater */
+		for (; m < RTE_ACL_QUAD_SIZE; m++)
+			node->transitions[m] = INT8_MAX;
+
+		RTE_ACL_VERIFY(m <= RTE_ACL_QUAD_SIZE);
+
+	} else if (node->node_type == RTE_ACL_NODE_DFA && resolved) {
+		acl_dfa_fill_gr64(node, dfa, node_array);
+	}
+}
+
+/*
+ * Routine that allocates space for this node and recursively calls
+ * to allocate space for each child. Once all the children are allocated,
+ * then resolve all transitions for this node.
+ */
+static void
+acl_gen_node(struct rte_acl_node *node, uint64_t *node_array,
+	uint64_t no_match, struct rte_acl_indices *index, int num_categories)
+{
+	uint32_t n, sz, *qtrp;
+	uint64_t *array_ptr;
+	struct rte_acl_match_results *match;
+
+	if (node->node_index != RTE_ACL_NODE_UNDEFINED)
+		return;
+
+	array_ptr = NULL;
+
+	switch (node->node_type) {
+	case RTE_ACL_NODE_DFA:
+		array_ptr = &node_array[index->dfa_index];
+		node->node_index = acl_dfa_gen_idx(node, index->dfa_index);
+		sz = node->fanout * RTE_ACL_DFA_GR64_SIZE;
+		index->dfa_index += sz;
+		for (n = 0; n < sz; n++)
+			array_ptr[n] = no_match;
+		break;
+	case RTE_ACL_NODE_SINGLE:
+		node->node_index = RTE_ACL_QUAD_SINGLE | index->single_index |
+			node->node_type;
+		array_ptr = &node_array[index->single_index];
+		index->single_index += 1;
+		array_ptr[0] = no_match;
+		break;
+	case RTE_ACL_NODE_QRANGE:
+		array_ptr = &node_array[index->quad_index];
+		acl_add_ptrs(node, array_ptr, no_match, 0);
+		qtrp = (uint32_t *)node->transitions;
+		node->node_index = qtrp[0];
+		node->node_index <<= sizeof(index->quad_index) * CHAR_BIT;
+		node->node_index |= index->quad_index | node->node_type;
+		index->quad_index += node->fanout;
+		break;
+	case RTE_ACL_NODE_MATCH:
+		match = ((struct rte_acl_match_results *)
+			(node_array + index->match_start));
+		for (n = 0; n != RTE_DIM(match->results); n++)
+			RTE_ACL_VERIFY(match->results[0] == 0);
+		memcpy(match + index->match_index, node->mrt,
+			sizeof(*node->mrt));
+		node->node_index = index->match_index | node->node_type;
+		index->match_index += 1;
+		break;
+	case RTE_ACL_NODE_UNDEFINED:
+		RTE_ACL_VERIFY(node->node_type !=
+			(uint32_t)RTE_ACL_NODE_UNDEFINED);
+		break;
+	}
+
+	/* recursively allocate space for all children */
+	for (n = 0; n < node->num_ptrs; n++) {
+		if (node->ptrs[n].ptr != NULL)
+			acl_gen_node(node->ptrs[n].ptr,
+				node_array,
+				no_match,
+				index,
+				num_categories);
+	}
+
+	/* All children are resolved, resolve this node's pointers */
+	switch (node->node_type) {
+	case RTE_ACL_NODE_DFA:
+		acl_add_ptrs(node, array_ptr, no_match, 1);
+		break;
+	case RTE_ACL_NODE_SINGLE:
+		for (n = 0; n < node->num_ptrs; n++) {
+			if (node->ptrs[n].ptr != NULL)
+				array_ptr[0] = node->ptrs[n].ptr->node_index;
+		}
+		break;
+	case RTE_ACL_NODE_QRANGE:
+		acl_add_ptrs(node, array_ptr, no_match, 1);
+		break;
+	case RTE_ACL_NODE_MATCH:
+		break;
+	case RTE_ACL_NODE_UNDEFINED:
+		RTE_ACL_VERIFY(node->node_type !=
+			(uint32_t)RTE_ACL_NODE_UNDEFINED);
+		break;
+	}
+}
+
+static void
+acl_calc_counts_indices(struct acl_node_counters *counts,
+	struct rte_acl_indices *indices,
+	struct rte_acl_bld_trie *node_bld_trie, uint32_t num_tries,
+	uint64_t no_match)
+{
+	uint32_t n;
+
+	memset(indices, 0, sizeof(*indices));
+	memset(counts, 0, sizeof(*counts));
+
+	/* Get stats on nodes */
+	for (n = 0; n < num_tries; n++) {
+		acl_count_trie_types(counts, node_bld_trie[n].trie,
+			no_match, 1);
+	}
+
+	indices->dfa_index = RTE_ACL_DFA_SIZE + 1;
+	indices->quad_index = indices->dfa_index +
+		counts->dfa_gr64 * RTE_ACL_DFA_GR64_SIZE;
+	indices->single_index = indices->quad_index + counts->quad_vectors;
+	indices->match_start = indices->single_index + counts->single + 1;
+	indices->match_start = RTE_ALIGN(indices->match_start,
+		(XMM_SIZE / sizeof(uint64_t)));
+	indices->match_index = 1;
+}
+
+/*
+ * Generate the runtime structure using build structure
+ */
+int
+rte_acl_gen(struct rte_acl_ctx *ctx, struct rte_acl_trie *trie,
+	struct rte_acl_bld_trie *node_bld_trie, uint32_t num_tries,
+	uint32_t num_categories, uint32_t data_index_sz, size_t max_size)
+{
+	void *mem;
+	size_t total_size;
+	uint64_t *node_array, no_match;
+	uint32_t n, match_index;
+	struct rte_acl_match_results *match;
+	struct acl_node_counters counts;
+	struct rte_acl_indices indices;
+
+	no_match = RTE_ACL_NODE_MATCH;
+
+	/* Fill counts and indices arrays from the nodes. */
+	acl_calc_counts_indices(&counts, &indices,
+		node_bld_trie, num_tries, no_match);
+
+	/* Allocate runtime memory (align to cache boundary) */
+	total_size = RTE_ALIGN(data_index_sz, RTE_CACHE_LINE_SIZE) +
+		indices.match_start * sizeof(uint64_t) +
+		(counts.match + 1) * sizeof(struct rte_acl_match_results) +
+		XMM_SIZE;
+
+	if (total_size > max_size) {
+		RTE_LOG(DEBUG, ACL,
+			"Gen phase for ACL ctx \"%s\" exceeds max_size limit, "
+			"bytes required: %zu, allowed: %zu\n",
+			ctx->name, total_size, max_size);
+		return -ERANGE;
+	}
+
+	mem = rte_zmalloc_socket(ctx->name, total_size, RTE_CACHE_LINE_SIZE,
+			ctx->socket_id);
+	if (mem == NULL) {
+		RTE_LOG(ERR, ACL,
+			"allocation of %zu bytes on socket %d for %s failed\n",
+			total_size, ctx->socket_id, ctx->name);
+		return -ENOMEM;
+	}
+
+	/* Fill the runtime structure */
+	match_index = indices.match_start;
+	node_array = (uint64_t *)((uintptr_t)mem +
+		RTE_ALIGN(data_index_sz, RTE_CACHE_LINE_SIZE));
+
+	/*
+	 * Setup the NOMATCH node (a SINGLE at the
+	 * highest index, that points to itself)
+	 */
+
+	node_array[RTE_ACL_DFA_SIZE] = RTE_ACL_DFA_SIZE | RTE_ACL_NODE_SINGLE;
+
+	for (n = 0; n < RTE_ACL_DFA_SIZE; n++)
+		node_array[n] = no_match;
+
+	/* NOMATCH result at index 0 */
+	match = ((struct rte_acl_match_results *)(node_array + match_index));
+	memset(match, 0, sizeof(*match));
+
+	for (n = 0; n < num_tries; n++) {
+
+		acl_gen_node(node_bld_trie[n].trie, node_array, no_match,
+			&indices, num_categories);
+
+		if (node_bld_trie[n].trie->node_index == no_match)
+			trie[n].root_index = 0;
+		else
+			trie[n].root_index = node_bld_trie[n].trie->node_index;
+	}
+
+	ctx->mem = mem;
+	ctx->mem_sz = total_size;
+	ctx->data_indexes = mem;
+	ctx->num_tries = num_tries;
+	ctx->num_categories = num_categories;
+	ctx->match_index = match_index;
+	ctx->no_match = no_match;
+	ctx->idle = node_array[RTE_ACL_DFA_SIZE];
+	ctx->trans_table = node_array;
+	memcpy(ctx->trie, trie, sizeof(ctx->trie));
+
+	acl_gen_log_stats(ctx, &counts, &indices, max_size);
+	return 0;
+}
diff --git a/lib/librte_acl/acl_run.h b/lib/librte_acl/acl_run.h
new file mode 100644
index 00000000..b2fc42c6
--- /dev/null
+++ b/lib/librte_acl/acl_run.h
@@ -0,0 +1,263 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef	_ACL_RUN_H_
+#define	_ACL_RUN_H_
+
+#include <rte_acl.h>
+#include "acl.h"
+
+#define MAX_SEARCHES_AVX16	16
+#define MAX_SEARCHES_SSE8	8
+#define MAX_SEARCHES_SSE4	4
+#define MAX_SEARCHES_SCALAR	2
+
+#define GET_NEXT_4BYTES(prm, idx)	\
+	(*((const int32_t *)((prm)[(idx)].data + *(prm)[idx].data_index++)))
+
+
+#define RTE_ACL_NODE_INDEX	((uint32_t)~RTE_ACL_NODE_TYPE)
+
+#define	SCALAR_QRANGE_MULT	0x01010101
+#define	SCALAR_QRANGE_MASK	0x7f7f7f7f
+#define	SCALAR_QRANGE_MIN	0x80808080
+
+/*
+ * Structure to manage N parallel trie traversals.
+ * The runtime trie traversal routines can process 8, 4, or 2 tries
+ * in parallel. Each packet may require multiple trie traversals (up to 4).
+ * This structure is used to fill the slots (0 to n-1) for parallel processing
+ * with the trie traversals needed for each packet.
+ */
+struct acl_flow_data {
+	uint32_t            num_packets;
+	/* number of packets processed */
+	uint32_t            started;
+	/* number of trie traversals in progress */
+	uint32_t            trie;
+	/* current trie index (0 to N-1) */
+	uint32_t            cmplt_size;
+	uint32_t            total_packets;
+	uint32_t            categories;
+	/* number of result categories per packet. */
+	/* maximum number of packets to process */
+	const uint64_t     *trans;
+	const uint8_t     **data;
+	uint32_t           *results;
+	struct completion  *last_cmplt;
+	struct completion  *cmplt_array;
+};
+
+/*
+ * Structure to maintain running results for
+ * a single packet (up to 4 tries).
+ */
+struct completion {
+	uint32_t *results;                          /* running results. */
+	int32_t   priority[RTE_ACL_MAX_CATEGORIES]; /* running priorities. */
+	uint32_t  count;                            /* num of remaining tries */
+	/* true for allocated struct */
+} __attribute__((aligned(XMM_SIZE)));
+
+/*
+ * One parms structure for each slot in the search engine.
+ */
+struct parms {
+	const uint8_t              *data;
+	/* input data for this packet */
+	const uint32_t             *data_index;
+	/* data indirection for this trie */
+	struct completion          *cmplt;
+	/* completion data for this packet */
+};
+
+/*
+ * Define an global idle node for unused engine slots
+ */
+static const uint32_t idle[UINT8_MAX + 1];
+
+/*
+ * Allocate a completion structure to manage the tries for a packet.
+ */
+static inline struct completion *
+alloc_completion(struct completion *p, uint32_t size, uint32_t tries,
+	uint32_t *results)
+{
+	uint32_t n;
+
+	for (n = 0; n < size; n++) {
+
+		if (p[n].count == 0) {
+
+			/* mark as allocated and set number of tries. */
+			p[n].count = tries;
+			p[n].results = results;
+			return &(p[n]);
+		}
+	}
+
+	/* should never get here */
+	return NULL;
+}
+
+/*
+ * Resolve priority for a single result trie.
+ */
+static inline void
+resolve_single_priority(uint64_t transition, int n,
+	const struct rte_acl_ctx *ctx, struct parms *parms,
+	const struct rte_acl_match_results *p)
+{
+	if (parms[n].cmplt->count == ctx->num_tries ||
+			parms[n].cmplt->priority[0] <=
+			p[transition].priority[0]) {
+
+		parms[n].cmplt->priority[0] = p[transition].priority[0];
+		parms[n].cmplt->results[0] = p[transition].results[0];
+	}
+}
+
+/*
+ * Routine to fill a slot in the parallel trie traversal array (parms) from
+ * the list of packets (flows).
+ */
+static inline uint64_t
+acl_start_next_trie(struct acl_flow_data *flows, struct parms *parms, int n,
+	const struct rte_acl_ctx *ctx)
+{
+	uint64_t transition;
+
+	/* if there are any more packets to process */
+	if (flows->num_packets < flows->total_packets) {
+		parms[n].data = flows->data[flows->num_packets];
+		parms[n].data_index = ctx->trie[flows->trie].data_index;
+
+		/* if this is the first trie for this packet */
+		if (flows->trie == 0) {
+			flows->last_cmplt = alloc_completion(flows->cmplt_array,
+				flows->cmplt_size, ctx->num_tries,
+				flows->results +
+				flows->num_packets * flows->categories);
+		}
+
+		/* set completion parameters and starting index for this slot */
+		parms[n].cmplt = flows->last_cmplt;
+		transition =
+			flows->trans[parms[n].data[*parms[n].data_index++] +
+			ctx->trie[flows->trie].root_index];
+
+		/*
+		 * if this is the last trie for this packet,
+		 * then setup next packet.
+		 */
+		flows->trie++;
+		if (flows->trie >= ctx->num_tries) {
+			flows->trie = 0;
+			flows->num_packets++;
+		}
+
+		/* keep track of number of active trie traversals */
+		flows->started++;
+
+	/* no more tries to process, set slot to an idle position */
+	} else {
+		transition = ctx->idle;
+		parms[n].data = (const uint8_t *)idle;
+		parms[n].data_index = idle;
+	}
+	return transition;
+}
+
+static inline void
+acl_set_flow(struct acl_flow_data *flows, struct completion *cmplt,
+	uint32_t cmplt_size, const uint8_t **data, uint32_t *results,
+	uint32_t data_num, uint32_t categories, const uint64_t *trans)
+{
+	flows->num_packets = 0;
+	flows->started = 0;
+	flows->trie = 0;
+	flows->last_cmplt = NULL;
+	flows->cmplt_array = cmplt;
+	flows->total_packets = data_num;
+	flows->categories = categories;
+	flows->cmplt_size = cmplt_size;
+	flows->data = data;
+	flows->results = results;
+	flows->trans = trans;
+}
+
+typedef void (*resolve_priority_t)
+(uint64_t transition, int n, const struct rte_acl_ctx *ctx,
+	struct parms *parms, const struct rte_acl_match_results *p,
+	uint32_t categories);
+
+/*
+ * Detect matches. If a match node transition is found, then this trie
+ * traversal is complete and fill the slot with the next trie
+ * to be processed.
+ */
+static inline uint64_t
+acl_match_check(uint64_t transition, int slot,
+	const struct rte_acl_ctx *ctx, struct parms *parms,
+	struct acl_flow_data *flows, resolve_priority_t resolve_priority)
+{
+	const struct rte_acl_match_results *p;
+
+	p = (const struct rte_acl_match_results *)
+		(flows->trans + ctx->match_index);
+
+	if (transition & RTE_ACL_NODE_MATCH) {
+
+		/* Remove flags from index and decrement active traversals */
+		transition &= RTE_ACL_NODE_INDEX;
+		flows->started--;
+
+		/* Resolve priorities for this trie and running results */
+		if (flows->categories == 1)
+			resolve_single_priority(transition, slot, ctx,
+				parms, p);
+		else
+			resolve_priority(transition, slot, ctx, parms,
+				p, flows->categories);
+
+		/* Count down completed tries for this search request */
+		parms[slot].cmplt->count--;
+
+		/* Fill the slot with the next trie or idle trie */
+		transition = acl_start_next_trie(flows, parms, slot, ctx);
+	}
+
+	return transition;
+}
+
+#endif /* _ACL_RUN_H_ */
diff --git a/lib/librte_acl/acl_run_avx2.c b/lib/librte_acl/acl_run_avx2.c
new file mode 100644
index 00000000..79ebbd6c
--- /dev/null
+++ b/lib/librte_acl/acl_run_avx2.c
@@ -0,0 +1,54 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#include "acl_run_avx2.h"
+
+/*
+ * Note, that to be able to use AVX2 classify method,
+ * both compiler and target cpu have to support AVX2 instructions.
+ */
+int
+rte_acl_classify_avx2(const struct rte_acl_ctx *ctx, const uint8_t **data,
+	uint32_t *results, uint32_t num, uint32_t categories)
+{
+	if (likely(num >= MAX_SEARCHES_AVX16))
+		return search_avx2x16(ctx, data, results, num, categories);
+	else if (num >= MAX_SEARCHES_SSE8)
+		return search_sse_8(ctx, data, results, num, categories);
+	else if (num >= MAX_SEARCHES_SSE4)
+		return search_sse_4(ctx, data, results, num, categories);
+	else
+		return rte_acl_classify_scalar(ctx, data, results, num,
+			categories);
+}
diff --git a/lib/librte_acl/acl_run_avx2.h b/lib/librte_acl/acl_run_avx2.h
new file mode 100644
index 00000000..b01a46a5
--- /dev/null
+++ b/lib/librte_acl/acl_run_avx2.h
@@ -0,0 +1,284 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "acl_run_sse.h"
+
+static const rte_ymm_t ymm_match_mask = {
+	.u32 = {
+		RTE_ACL_NODE_MATCH,
+		RTE_ACL_NODE_MATCH,
+		RTE_ACL_NODE_MATCH,
+		RTE_ACL_NODE_MATCH,
+		RTE_ACL_NODE_MATCH,
+		RTE_ACL_NODE_MATCH,
+		RTE_ACL_NODE_MATCH,
+		RTE_ACL_NODE_MATCH,
+	},
+};
+
+static const rte_ymm_t ymm_index_mask = {
+	.u32 = {
+		RTE_ACL_NODE_INDEX,
+		RTE_ACL_NODE_INDEX,
+		RTE_ACL_NODE_INDEX,
+		RTE_ACL_NODE_INDEX,
+		RTE_ACL_NODE_INDEX,
+		RTE_ACL_NODE_INDEX,
+		RTE_ACL_NODE_INDEX,
+		RTE_ACL_NODE_INDEX,
+	},
+};
+
+static const rte_ymm_t ymm_shuffle_input = {
+	.u32 = {
+		0x00000000, 0x04040404, 0x08080808, 0x0c0c0c0c,
+		0x00000000, 0x04040404, 0x08080808, 0x0c0c0c0c,
+	},
+};
+
+static const rte_ymm_t ymm_ones_16 = {
+	.u16 = {
+		1, 1, 1, 1, 1, 1, 1, 1,
+		1, 1, 1, 1, 1, 1, 1, 1,
+	},
+};
+
+static const rte_ymm_t ymm_range_base = {
+	.u32 = {
+		0xffffff00, 0xffffff04, 0xffffff08, 0xffffff0c,
+		0xffffff00, 0xffffff04, 0xffffff08, 0xffffff0c,
+	},
+};
+
+/*
+ * Process 8 transitions in parallel.
+ * tr_lo contains low 32 bits for 8 transition.
+ * tr_hi contains high 32 bits for 8 transition.
+ * next_input contains up to 4 input bytes for 8 flows.
+ */
+static inline __attribute__((always_inline)) ymm_t
+transition8(ymm_t next_input, const uint64_t *trans, ymm_t *tr_lo, ymm_t *tr_hi)
+{
+	const int32_t *tr;
+	ymm_t addr;
+
+	tr = (const int32_t *)(uintptr_t)trans;
+
+	/* Calculate the address (array index) for all 8 transitions. */
+	ACL_TR_CALC_ADDR(mm256, 256, addr, ymm_index_mask.y, next_input,
+		ymm_shuffle_input.y, ymm_ones_16.y, ymm_range_base.y,
+		*tr_lo, *tr_hi);
+
+	/* load lower 32 bits of 8 transactions at once. */
+	*tr_lo = _mm256_i32gather_epi32(tr, addr, sizeof(trans[0]));
+
+	next_input = _mm256_srli_epi32(next_input, CHAR_BIT);
+
+	/* load high 32 bits of 8 transactions at once. */
+	*tr_hi = _mm256_i32gather_epi32(tr + 1, addr, sizeof(trans[0]));
+
+	return next_input;
+}
+
+/*
+ * Process matches for  8 flows.
+ * tr_lo contains low 32 bits for 8 transition.
+ * tr_hi contains high 32 bits for 8 transition.
+ */
+static inline void
+acl_process_matches_avx2x8(const struct rte_acl_ctx *ctx,
+	struct parms *parms, struct acl_flow_data *flows, uint32_t slot,
+	ymm_t matches, ymm_t *tr_lo, ymm_t *tr_hi)
+{
+	ymm_t t0, t1;
+	ymm_t lo, hi;
+	xmm_t l0, l1;
+	uint32_t i;
+	uint64_t tr[MAX_SEARCHES_SSE8];
+
+	l1 = _mm256_extracti128_si256(*tr_lo, 1);
+	l0 = _mm256_castsi256_si128(*tr_lo);
+
+	for (i = 0; i != RTE_DIM(tr) / 2; i++) {
+
+		/*
+		 * Extract low 32bits of each transition.
+		 * That's enough to process the match.
+		 */
+		tr[i] = (uint32_t)_mm_cvtsi128_si32(l0);
+		tr[i + 4] = (uint32_t)_mm_cvtsi128_si32(l1);
+
+		l0 = _mm_srli_si128(l0, sizeof(uint32_t));
+		l1 = _mm_srli_si128(l1, sizeof(uint32_t));
+
+		tr[i] = acl_match_check(tr[i], slot + i,
+			ctx, parms, flows, resolve_priority_sse);
+		tr[i + 4] = acl_match_check(tr[i + 4], slot + i + 4,
+			ctx, parms, flows, resolve_priority_sse);
+	}
+
+	/* Collect new transitions into 2 YMM registers. */
+	t0 = _mm256_set_epi64x(tr[5], tr[4], tr[1], tr[0]);
+	t1 = _mm256_set_epi64x(tr[7], tr[6], tr[3], tr[2]);
+
+	/* For each transition: put low 32 into tr_lo and high 32 into tr_hi */
+	ACL_TR_HILO(mm256, __m256, t0, t1, lo, hi);
+
+	/* Keep transitions wth NOMATCH intact. */
+	*tr_lo = _mm256_blendv_epi8(*tr_lo, lo, matches);
+	*tr_hi = _mm256_blendv_epi8(*tr_hi, hi, matches);
+}
+
+static inline void
+acl_match_check_avx2x8(const struct rte_acl_ctx *ctx, struct parms *parms,
+	struct acl_flow_data *flows, uint32_t slot,
+	ymm_t *tr_lo, ymm_t *tr_hi, ymm_t match_mask)
+{
+	uint32_t msk;
+	ymm_t matches, temp;
+
+	/* test for match node */
+	temp = _mm256_and_si256(match_mask, *tr_lo);
+	matches = _mm256_cmpeq_epi32(temp, match_mask);
+	msk = _mm256_movemask_epi8(matches);
+
+	while (msk != 0) {
+
+		acl_process_matches_avx2x8(ctx, parms, flows, slot,
+			matches, tr_lo, tr_hi);
+		temp = _mm256_and_si256(match_mask, *tr_lo);
+		matches = _mm256_cmpeq_epi32(temp, match_mask);
+		msk = _mm256_movemask_epi8(matches);
+	}
+}
+
+/*
+ * Execute trie traversal for up to 16 flows in parallel.
+ */
+static inline int
+search_avx2x16(const struct rte_acl_ctx *ctx, const uint8_t **data,
+	uint32_t *results, uint32_t total_packets, uint32_t categories)
+{
+	uint32_t n;
+	struct acl_flow_data flows;
+	uint64_t index_array[MAX_SEARCHES_AVX16];
+	struct completion cmplt[MAX_SEARCHES_AVX16];
+	struct parms parms[MAX_SEARCHES_AVX16];
+	ymm_t input[2], tr_lo[2], tr_hi[2];
+	ymm_t t0, t1;
+
+	acl_set_flow(&flows, cmplt, RTE_DIM(cmplt), data, results,
+		total_packets, categories, ctx->trans_table);
+
+	for (n = 0; n < RTE_DIM(cmplt); n++) {
+		cmplt[n].count = 0;
+		index_array[n] = acl_start_next_trie(&flows, parms, n, ctx);
+	}
+
+	t0 = _mm256_set_epi64x(index_array[5], index_array[4],
+		index_array[1], index_array[0]);
+	t1 = _mm256_set_epi64x(index_array[7], index_array[6],
+		index_array[3], index_array[2]);
+
+	ACL_TR_HILO(mm256, __m256, t0, t1, tr_lo[0], tr_hi[0]);
+
+	t0 = _mm256_set_epi64x(index_array[13], index_array[12],
+		index_array[9], index_array[8]);
+	t1 = _mm256_set_epi64x(index_array[15], index_array[14],
+		index_array[11], index_array[10]);
+
+	ACL_TR_HILO(mm256, __m256, t0, t1, tr_lo[1], tr_hi[1]);
+
+	 /* Check for any matches. */
+	acl_match_check_avx2x8(ctx, parms, &flows, 0, &tr_lo[0], &tr_hi[0],
+		ymm_match_mask.y);
+	acl_match_check_avx2x8(ctx, parms, &flows, 8, &tr_lo[1], &tr_hi[1],
+		ymm_match_mask.y);
+
+	while (flows.started > 0) {
+
+		uint32_t in[MAX_SEARCHES_SSE8];
+
+		/* Gather 4 bytes of input data for first 8 flows. */
+		in[0] = GET_NEXT_4BYTES(parms, 0);
+		in[4] = GET_NEXT_4BYTES(parms, 4);
+		in[1] = GET_NEXT_4BYTES(parms, 1);
+		in[5] = GET_NEXT_4BYTES(parms, 5);
+		in[2] = GET_NEXT_4BYTES(parms, 2);
+		in[6] = GET_NEXT_4BYTES(parms, 6);
+		in[3] = GET_NEXT_4BYTES(parms, 3);
+		in[7] = GET_NEXT_4BYTES(parms, 7);
+		input[0] = _mm256_set_epi32(in[7], in[6], in[5], in[4],
+			in[3], in[2], in[1], in[0]);
+
+		/* Gather 4 bytes of input data for last 8 flows. */
+		in[0] = GET_NEXT_4BYTES(parms, 8);
+		in[4] = GET_NEXT_4BYTES(parms, 12);
+		in[1] = GET_NEXT_4BYTES(parms, 9);
+		in[5] = GET_NEXT_4BYTES(parms, 13);
+		in[2] = GET_NEXT_4BYTES(parms, 10);
+		in[6] = GET_NEXT_4BYTES(parms, 14);
+		in[3] = GET_NEXT_4BYTES(parms, 11);
+		in[7] = GET_NEXT_4BYTES(parms, 15);
+		input[1] = _mm256_set_epi32(in[7], in[6], in[5], in[4],
+			in[3], in[2], in[1], in[0]);
+
+		input[0] = transition8(input[0], flows.trans,
+			&tr_lo[0], &tr_hi[0]);
+		input[1] = transition8(input[1], flows.trans,
+			&tr_lo[1], &tr_hi[1]);
+
+		input[0] = transition8(input[0], flows.trans,
+			&tr_lo[0], &tr_hi[0]);
+		input[1] = transition8(input[1], flows.trans,
+			&tr_lo[1], &tr_hi[1]);
+
+		input[0] = transition8(input[0], flows.trans,
+			&tr_lo[0], &tr_hi[0]);
+		input[1] = transition8(input[1], flows.trans,
+			&tr_lo[1], &tr_hi[1]);
+
+		input[0] = transition8(input[0], flows.trans,
+			&tr_lo[0], &tr_hi[0]);
+		input[1] = transition8(input[1], flows.trans,
+			&tr_lo[1], &tr_hi[1]);
+
+		 /* Check for any matches. */
+		acl_match_check_avx2x8(ctx, parms, &flows, 0,
+			&tr_lo[0], &tr_hi[0], ymm_match_mask.y);
+		acl_match_check_avx2x8(ctx, parms, &flows, 8,
+			&tr_lo[1], &tr_hi[1], ymm_match_mask.y);
+	}
+
+	return 0;
+}
diff --git a/lib/librte_acl/acl_run_neon.c b/lib/librte_acl/acl_run_neon.c
new file mode 100644
index 00000000..b0144514
--- /dev/null
+++ b/lib/librte_acl/acl_run_neon.c
@@ -0,0 +1,46 @@
+/*
+ *   BSD LICENSE
+ *
+ *   Copyright (C) Cavium networks Ltd. 2015.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Cavium networks nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include "acl_run_neon.h"
+
+int
+rte_acl_classify_neon(const struct rte_acl_ctx *ctx, const uint8_t **data,
+		      uint32_t *results, uint32_t num, uint32_t categories)
+{
+	if (likely(num >= 8))
+		return search_neon_8(ctx, data, results, num, categories);
+	else if (num >= 4)
+		return search_neon_4(ctx, data, results, num, categories);
+	else
+		return rte_acl_classify_scalar(ctx, data, results, num,
+			categories);
+}
diff --git a/lib/librte_acl/acl_run_neon.h b/lib/librte_acl/acl_run_neon.h
new file mode 100644
index 00000000..d233ff00
--- /dev/null
+++ b/lib/librte_acl/acl_run_neon.h
@@ -0,0 +1,289 @@
+/*
+ *   BSD LICENSE
+ *
+ *   Copyright (C) Cavium networks Ltd. 2015.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Cavium networks nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include "acl_run.h"
+#include "acl_vect.h"
+
+struct _neon_acl_const {
+	rte_xmm_t xmm_shuffle_input;
+	rte_xmm_t xmm_index_mask;
+	rte_xmm_t range_base;
+} neon_acl_const  __attribute__((aligned(RTE_CACHE_LINE_SIZE))) = {
+	{
+		.u32 = {0x00000000, 0x04040404, 0x08080808, 0x0c0c0c0c}
+	},
+	{
+		.u32 = {RTE_ACL_NODE_INDEX, RTE_ACL_NODE_INDEX,
+		RTE_ACL_NODE_INDEX, RTE_ACL_NODE_INDEX}
+	},
+	{
+		.u32 = {0xffffff00, 0xffffff04, 0xffffff08, 0xffffff0c}
+	},
+};
+
+/*
+ * Resolve priority for multiple results (neon version).
+ * This consists comparing the priority of the current traversal with the
+ * running set of results for the packet.
+ * For each result, keep a running array of the result (rule number) and
+ * its priority for each category.
+ */
+static inline void
+resolve_priority_neon(uint64_t transition, int n, const struct rte_acl_ctx *ctx,
+		      struct parms *parms,
+		      const struct rte_acl_match_results *p,
+		      uint32_t categories)
+{
+	uint32_t x;
+	int32x4_t results, priority, results1, priority1;
+	uint32x4_t selector;
+	int32_t *saved_results, *saved_priority;
+
+	for (x = 0; x < categories; x += RTE_ACL_RESULTS_MULTIPLIER) {
+		saved_results = (int32_t *)(&parms[n].cmplt->results[x]);
+		saved_priority = (int32_t *)(&parms[n].cmplt->priority[x]);
+
+		/* get results and priorities for completed trie */
+		results = vld1q_s32(
+			(const int32_t *)&p[transition].results[x]);
+		priority = vld1q_s32(
+			(const int32_t *)&p[transition].priority[x]);
+
+		/* if this is not the first completed trie */
+		if (parms[n].cmplt->count != ctx->num_tries) {
+			/* get running best results and their priorities */
+			results1 = vld1q_s32(saved_results);
+			priority1 = vld1q_s32(saved_priority);
+
+			/* select results that are highest priority */
+			selector = vcgtq_s32(priority1, priority);
+			results = vbslq_s32(selector, results1, results);
+			priority = vbslq_s32(selector, priority1, priority);
+		}
+
+		/* save running best results and their priorities */
+		vst1q_s32(saved_results, results);
+		vst1q_s32(saved_priority, priority);
+	}
+}
+
+/*
+ * Check for any match in 4 transitions
+ */
+static inline __attribute__((always_inline)) uint32_t
+check_any_match_x4(uint64_t val[])
+{
+	return (val[0] | val[1] | val[2] | val[3]) & RTE_ACL_NODE_MATCH;
+}
+
+static inline __attribute__((always_inline)) void
+acl_match_check_x4(int slot, const struct rte_acl_ctx *ctx, struct parms *parms,
+		   struct acl_flow_data *flows, uint64_t transitions[])
+{
+	while (check_any_match_x4(transitions)) {
+		transitions[0] = acl_match_check(transitions[0], slot, ctx,
+			parms, flows, resolve_priority_neon);
+		transitions[1] = acl_match_check(transitions[1], slot + 1, ctx,
+			parms, flows, resolve_priority_neon);
+		transitions[2] = acl_match_check(transitions[2], slot + 2, ctx,
+			parms, flows, resolve_priority_neon);
+		transitions[3] = acl_match_check(transitions[3], slot + 3, ctx,
+			parms, flows, resolve_priority_neon);
+	}
+}
+
+/*
+ * Process 4 transitions (in 2 NEON Q registers) in parallel
+ */
+static inline __attribute__((always_inline)) int32x4_t
+transition4(int32x4_t next_input, const uint64_t *trans, uint64_t transitions[])
+{
+	int32x4x2_t tr_hi_lo;
+	int32x4_t t, in, r;
+	uint32x4_t index_msk, node_type, addr;
+	uint32x4_t dfa_msk, mask, quad_ofs, dfa_ofs;
+
+	/* Move low 32 into tr_hi_lo.val[0] and high 32 into tr_hi_lo.val[1] */
+	tr_hi_lo = vld2q_s32((const int32_t *)transitions);
+
+	/* Calculate the address (array index) for all 4 transitions. */
+
+	index_msk = vld1q_u32((const uint32_t *)&neon_acl_const.xmm_index_mask);
+
+	/* Calc node type and node addr */
+	node_type = vbicq_s32(tr_hi_lo.val[0], index_msk);
+	addr = vandq_s32(tr_hi_lo.val[0], index_msk);
+
+	/* t = 0 */
+	t = veorq_s32(node_type, node_type);
+
+	/* mask for DFA type(0) nodes */
+	dfa_msk = vceqq_u32(node_type, t);
+
+	mask = vld1q_s32((const int32_t *)&neon_acl_const.xmm_shuffle_input);
+	in = vqtbl1q_u8((uint8x16_t)next_input, (uint8x16_t)mask);
+
+	/* DFA calculations. */
+	r = vshrq_n_u32(in, 30); /* div by 64 */
+	mask = vld1q_s32((const int32_t *)&neon_acl_const.range_base);
+	r = vaddq_u8(r, mask);
+	t = vshrq_n_u32(in, 24);
+	r = vqtbl1q_u8((uint8x16_t)tr_hi_lo.val[1], (uint8x16_t)r);
+	dfa_ofs = vsubq_s32(t, r);
+
+	/* QUAD/SINGLE calculations. */
+	t = vcgtq_s8(in, tr_hi_lo.val[1]);
+	t = vabsq_s8(t);
+	t = vpaddlq_u8(t);
+	quad_ofs = vpaddlq_u16(t);
+
+	/* blend DFA and QUAD/SINGLE. */
+	t = vbslq_u8(dfa_msk, dfa_ofs, quad_ofs);
+
+	/* calculate address for next transitions */
+	addr = vaddq_u32(addr, t);
+
+	/* Fill next transitions */
+	transitions[0] = trans[vgetq_lane_u32(addr, 0)];
+	transitions[1] = trans[vgetq_lane_u32(addr, 1)];
+	transitions[2] = trans[vgetq_lane_u32(addr, 2)];
+	transitions[3] = trans[vgetq_lane_u32(addr, 3)];
+
+	return vshrq_n_u32(next_input, CHAR_BIT);
+}
+
+/*
+ * Execute trie traversal with 8 traversals in parallel
+ */
+static inline int
+search_neon_8(const struct rte_acl_ctx *ctx, const uint8_t **data,
+	      uint32_t *results, uint32_t total_packets, uint32_t categories)
+{
+	int n;
+	struct acl_flow_data flows;
+	uint64_t index_array[8];
+	struct completion cmplt[8];
+	struct parms parms[8];
+	int32x4_t input0, input1;
+
+	acl_set_flow(&flows, cmplt, RTE_DIM(cmplt), data, results,
+		     total_packets, categories, ctx->trans_table);
+
+	for (n = 0; n < 8; n++) {
+		cmplt[n].count = 0;
+		index_array[n] = acl_start_next_trie(&flows, parms, n, ctx);
+	}
+
+	 /* Check for any matches. */
+	acl_match_check_x4(0, ctx, parms, &flows, &index_array[0]);
+	acl_match_check_x4(4, ctx, parms, &flows, &index_array[4]);
+
+	while (flows.started > 0) {
+		/* Gather 4 bytes of input data for each stream. */
+		input0 = vsetq_lane_s32(GET_NEXT_4BYTES(parms, 0), input0, 0);
+		input1 = vsetq_lane_s32(GET_NEXT_4BYTES(parms, 4), input1, 0);
+
+		input0 = vsetq_lane_s32(GET_NEXT_4BYTES(parms, 1), input0, 1);
+		input1 = vsetq_lane_s32(GET_NEXT_4BYTES(parms, 5), input1, 1);
+
+		input0 = vsetq_lane_s32(GET_NEXT_4BYTES(parms, 2), input0, 2);
+		input1 = vsetq_lane_s32(GET_NEXT_4BYTES(parms, 6), input1, 2);
+
+		input0 = vsetq_lane_s32(GET_NEXT_4BYTES(parms, 3), input0, 3);
+		input1 = vsetq_lane_s32(GET_NEXT_4BYTES(parms, 7), input1, 3);
+
+		/* Process the 4 bytes of input on each stream. */
+
+		input0 = transition4(input0, flows.trans, &index_array[0]);
+		input1 = transition4(input1, flows.trans, &index_array[4]);
+
+		input0 = transition4(input0, flows.trans, &index_array[0]);
+		input1 = transition4(input1, flows.trans, &index_array[4]);
+
+		input0 = transition4(input0, flows.trans, &index_array[0]);
+		input1 = transition4(input1, flows.trans, &index_array[4]);
+
+		input0 = transition4(input0, flows.trans, &index_array[0]);
+		input1 = transition4(input1, flows.trans, &index_array[4]);
+
+		 /* Check for any matches. */
+		acl_match_check_x4(0, ctx, parms, &flows, &index_array[0]);
+		acl_match_check_x4(4, ctx, parms, &flows, &index_array[4]);
+	}
+
+	return 0;
+}
+
+/*
+ * Execute trie traversal with 4 traversals in parallel
+ */
+static inline int
+search_neon_4(const struct rte_acl_ctx *ctx, const uint8_t **data,
+	      uint32_t *results, int total_packets, uint32_t categories)
+{
+	int n;
+	struct acl_flow_data flows;
+	uint64_t index_array[4];
+	struct completion cmplt[4];
+	struct parms parms[4];
+	int32x4_t input;
+
+	acl_set_flow(&flows, cmplt, RTE_DIM(cmplt), data, results,
+		     total_packets, categories, ctx->trans_table);
+
+	for (n = 0; n < 4; n++) {
+		cmplt[n].count = 0;
+		index_array[n] = acl_start_next_trie(&flows, parms, n, ctx);
+	}
+
+	/* Check for any matches. */
+	acl_match_check_x4(0, ctx, parms, &flows, index_array);
+
+	while (flows.started > 0) {
+		/* Gather 4 bytes of input data for each stream. */
+		input = vsetq_lane_s32(GET_NEXT_4BYTES(parms, 0), input, 0);
+		input = vsetq_lane_s32(GET_NEXT_4BYTES(parms, 1), input, 1);
+		input = vsetq_lane_s32(GET_NEXT_4BYTES(parms, 2), input, 2);
+		input = vsetq_lane_s32(GET_NEXT_4BYTES(parms, 3), input, 3);
+
+		/* Process the 4 bytes of input on each stream. */
+		input = transition4(input, flows.trans, index_array);
+		input = transition4(input, flows.trans, index_array);
+		input = transition4(input, flows.trans, index_array);
+		input = transition4(input, flows.trans, index_array);
+
+		/* Check for any matches. */
+		acl_match_check_x4(0, ctx, parms, &flows, index_array);
+	}
+
+	return 0;
+}
diff --git a/lib/librte_acl/acl_run_scalar.c b/lib/librte_acl/acl_run_scalar.c
new file mode 100644
index 00000000..5be216c6
--- /dev/null
+++ b/lib/librte_acl/acl_run_scalar.c
@@ -0,0 +1,192 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "acl_run.h"
+
+/*
+ * Resolve priority for multiple results (scalar version).
+ * This consists comparing the priority of the current traversal with the
+ * running set of results for the packet.
+ * For each result, keep a running array of the result (rule number) and
+ * its priority for each category.
+ */
+static inline void
+resolve_priority_scalar(uint64_t transition, int n,
+	const struct rte_acl_ctx *ctx, struct parms *parms,
+	const struct rte_acl_match_results *p, uint32_t categories)
+{
+	uint32_t i;
+	int32_t *saved_priority;
+	uint32_t *saved_results;
+	const int32_t *priority;
+	const uint32_t *results;
+
+	saved_results = parms[n].cmplt->results;
+	saved_priority = parms[n].cmplt->priority;
+
+	/* results and priorities for completed trie */
+	results = p[transition].results;
+	priority = p[transition].priority;
+
+	/* if this is not the first completed trie */
+	if (parms[n].cmplt->count != ctx->num_tries) {
+		for (i = 0; i < categories; i += RTE_ACL_RESULTS_MULTIPLIER) {
+
+			if (saved_priority[i] <= priority[i]) {
+				saved_priority[i] = priority[i];
+				saved_results[i] = results[i];
+			}
+			if (saved_priority[i + 1] <= priority[i + 1]) {
+				saved_priority[i + 1] = priority[i + 1];
+				saved_results[i + 1] = results[i + 1];
+			}
+			if (saved_priority[i + 2] <= priority[i + 2]) {
+				saved_priority[i + 2] = priority[i + 2];
+				saved_results[i + 2] = results[i + 2];
+			}
+			if (saved_priority[i + 3] <= priority[i + 3]) {
+				saved_priority[i + 3] = priority[i + 3];
+				saved_results[i + 3] = results[i + 3];
+			}
+		}
+	} else {
+		for (i = 0; i < categories; i += RTE_ACL_RESULTS_MULTIPLIER) {
+			saved_priority[i] = priority[i];
+			saved_priority[i + 1] = priority[i + 1];
+			saved_priority[i + 2] = priority[i + 2];
+			saved_priority[i + 3] = priority[i + 3];
+
+			saved_results[i] = results[i];
+			saved_results[i + 1] = results[i + 1];
+			saved_results[i + 2] = results[i + 2];
+			saved_results[i + 3] = results[i + 3];
+		}
+	}
+}
+
+static inline uint32_t
+scan_forward(uint32_t input, uint32_t max)
+{
+	return (input == 0) ? max : rte_bsf32(input);
+}
+
+static inline uint64_t
+scalar_transition(const uint64_t *trans_table, uint64_t transition,
+	uint8_t input)
+{
+	uint32_t addr, index, ranges, x, a, b, c;
+
+	/* break transition into component parts */
+	ranges = transition >> (sizeof(index) * CHAR_BIT);
+	index = transition & ~RTE_ACL_NODE_INDEX;
+	addr = transition ^ index;
+
+	if (index != RTE_ACL_NODE_DFA) {
+		/* calc address for a QRANGE/SINGLE node */
+		c = (uint32_t)input * SCALAR_QRANGE_MULT;
+		a = ranges | SCALAR_QRANGE_MIN;
+		a -= (c & SCALAR_QRANGE_MASK);
+		b = c & SCALAR_QRANGE_MIN;
+		a &= SCALAR_QRANGE_MIN;
+		a ^= (ranges ^ b) & (a ^ b);
+		x = scan_forward(a, 32) >> 3;
+	} else {
+		/* calc address for a DFA node */
+		x = ranges >> (input /
+			RTE_ACL_DFA_GR64_SIZE * RTE_ACL_DFA_GR64_BIT);
+		x &= UINT8_MAX;
+		x = input - x;
+	}
+
+	addr += x;
+
+	/* pickup next transition */
+	transition = *(trans_table + addr);
+	return transition;
+}
+
+int
+rte_acl_classify_scalar(const struct rte_acl_ctx *ctx, const uint8_t **data,
+	uint32_t *results, uint32_t num, uint32_t categories)
+{
+	int n;
+	uint64_t transition0, transition1;
+	uint32_t input0, input1;
+	struct acl_flow_data flows;
+	uint64_t index_array[MAX_SEARCHES_SCALAR];
+	struct completion cmplt[MAX_SEARCHES_SCALAR];
+	struct parms parms[MAX_SEARCHES_SCALAR];
+
+	acl_set_flow(&flows, cmplt, RTE_DIM(cmplt), data, results, num,
+		categories, ctx->trans_table);
+
+	for (n = 0; n < MAX_SEARCHES_SCALAR; n++) {
+		cmplt[n].count = 0;
+		index_array[n] = acl_start_next_trie(&flows, parms, n, ctx);
+	}
+
+	transition0 = index_array[0];
+	transition1 = index_array[1];
+
+	while ((transition0 | transition1) & RTE_ACL_NODE_MATCH) {
+		transition0 = acl_match_check(transition0,
+			0, ctx, parms, &flows, resolve_priority_scalar);
+		transition1 = acl_match_check(transition1,
+			1, ctx, parms, &flows, resolve_priority_scalar);
+	}
+
+	while (flows.started > 0) {
+
+		input0 = GET_NEXT_4BYTES(parms, 0);
+		input1 = GET_NEXT_4BYTES(parms, 1);
+
+		for (n = 0; n < 4; n++) {
+
+			transition0 = scalar_transition(flows.trans,
+				transition0, (uint8_t)input0);
+			input0 >>= CHAR_BIT;
+
+			transition1 = scalar_transition(flows.trans,
+				transition1, (uint8_t)input1);
+			input1 >>= CHAR_BIT;
+		}
+
+		while ((transition0 | transition1) & RTE_ACL_NODE_MATCH) {
+			transition0 = acl_match_check(transition0,
+				0, ctx, parms, &flows, resolve_priority_scalar);
+			transition1 = acl_match_check(transition1,
+				1, ctx, parms, &flows, resolve_priority_scalar);
+		}
+	}
+	return 0;
+}
diff --git a/lib/librte_acl/acl_run_sse.c b/lib/librte_acl/acl_run_sse.c
new file mode 100644
index 00000000..a5a7d361
--- /dev/null
+++ b/lib/librte_acl/acl_run_sse.c
@@ -0,0 +1,47 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "acl_run_sse.h"
+
+int
+rte_acl_classify_sse(const struct rte_acl_ctx *ctx, const uint8_t **data,
+	uint32_t *results, uint32_t num, uint32_t categories)
+{
+	if (likely(num >= MAX_SEARCHES_SSE8))
+		return search_sse_8(ctx, data, results, num, categories);
+	else if (num >= MAX_SEARCHES_SSE4)
+		return search_sse_4(ctx, data, results, num, categories);
+	else
+		return rte_acl_classify_scalar(ctx, data, results, num,
+			categories);
+}
diff --git a/lib/librte_acl/acl_run_sse.h b/lib/librte_acl/acl_run_sse.h
new file mode 100644
index 00000000..ad40a674
--- /dev/null
+++ b/lib/librte_acl/acl_run_sse.h
@@ -0,0 +1,357 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "acl_run.h"
+#include "acl_vect.h"
+
+enum {
+	SHUFFLE32_SLOT1 = 0xe5,
+	SHUFFLE32_SLOT2 = 0xe6,
+	SHUFFLE32_SLOT3 = 0xe7,
+	SHUFFLE32_SWAP64 = 0x4e,
+};
+
+static const rte_xmm_t xmm_shuffle_input = {
+	.u32 = {0x00000000, 0x04040404, 0x08080808, 0x0c0c0c0c},
+};
+
+static const rte_xmm_t xmm_ones_16 = {
+	.u16 = {1, 1, 1, 1, 1, 1, 1, 1},
+};
+
+static const rte_xmm_t xmm_match_mask = {
+	.u32 = {
+		RTE_ACL_NODE_MATCH,
+		RTE_ACL_NODE_MATCH,
+		RTE_ACL_NODE_MATCH,
+		RTE_ACL_NODE_MATCH,
+	},
+};
+
+static const rte_xmm_t xmm_index_mask = {
+	.u32 = {
+		RTE_ACL_NODE_INDEX,
+		RTE_ACL_NODE_INDEX,
+		RTE_ACL_NODE_INDEX,
+		RTE_ACL_NODE_INDEX,
+	},
+};
+
+static const rte_xmm_t xmm_range_base = {
+	.u32 = {
+		0xffffff00, 0xffffff04, 0xffffff08, 0xffffff0c,
+	},
+};
+
+/*
+ * Resolve priority for multiple results (sse version).
+ * This consists comparing the priority of the current traversal with the
+ * running set of results for the packet.
+ * For each result, keep a running array of the result (rule number) and
+ * its priority for each category.
+ */
+static inline void
+resolve_priority_sse(uint64_t transition, int n, const struct rte_acl_ctx *ctx,
+	struct parms *parms, const struct rte_acl_match_results *p,
+	uint32_t categories)
+{
+	uint32_t x;
+	xmm_t results, priority, results1, priority1, selector;
+	xmm_t *saved_results, *saved_priority;
+
+	for (x = 0; x < categories; x += RTE_ACL_RESULTS_MULTIPLIER) {
+
+		saved_results = (xmm_t *)(&parms[n].cmplt->results[x]);
+		saved_priority =
+			(xmm_t *)(&parms[n].cmplt->priority[x]);
+
+		/* get results and priorities for completed trie */
+		results = _mm_loadu_si128(
+			(const xmm_t *)&p[transition].results[x]);
+		priority = _mm_loadu_si128(
+			(const xmm_t *)&p[transition].priority[x]);
+
+		/* if this is not the first completed trie */
+		if (parms[n].cmplt->count != ctx->num_tries) {
+
+			/* get running best results and their priorities */
+			results1 = _mm_loadu_si128(saved_results);
+			priority1 = _mm_loadu_si128(saved_priority);
+
+			/* select results that are highest priority */
+			selector = _mm_cmpgt_epi32(priority1, priority);
+			results = _mm_blendv_epi8(results, results1, selector);
+			priority = _mm_blendv_epi8(priority, priority1,
+				selector);
+		}
+
+		/* save running best results and their priorities */
+		_mm_storeu_si128(saved_results, results);
+		_mm_storeu_si128(saved_priority, priority);
+	}
+}
+
+/*
+ * Extract transitions from an XMM register and check for any matches
+ */
+static void
+acl_process_matches(xmm_t *indices, int slot, const struct rte_acl_ctx *ctx,
+	struct parms *parms, struct acl_flow_data *flows)
+{
+	uint64_t transition1, transition2;
+
+	/* extract transition from low 64 bits. */
+	transition1 = _mm_cvtsi128_si64(*indices);
+
+	/* extract transition from high 64 bits. */
+	*indices = _mm_shuffle_epi32(*indices, SHUFFLE32_SWAP64);
+	transition2 = _mm_cvtsi128_si64(*indices);
+
+	transition1 = acl_match_check(transition1, slot, ctx,
+		parms, flows, resolve_priority_sse);
+	transition2 = acl_match_check(transition2, slot + 1, ctx,
+		parms, flows, resolve_priority_sse);
+
+	/* update indices with new transitions. */
+	*indices = _mm_set_epi64x(transition2, transition1);
+}
+
+/*
+ * Check for any match in 4 transitions (contained in 2 SSE registers)
+ */
+static inline __attribute__((always_inline)) void
+acl_match_check_x4(int slot, const struct rte_acl_ctx *ctx, struct parms *parms,
+	struct acl_flow_data *flows, xmm_t *indices1, xmm_t *indices2,
+	xmm_t match_mask)
+{
+	xmm_t temp;
+
+	/* put low 32 bits of each transition into one register */
+	temp = (xmm_t)_mm_shuffle_ps((__m128)*indices1, (__m128)*indices2,
+		0x88);
+	/* test for match node */
+	temp = _mm_and_si128(match_mask, temp);
+
+	while (!_mm_testz_si128(temp, temp)) {
+		acl_process_matches(indices1, slot, ctx, parms, flows);
+		acl_process_matches(indices2, slot + 2, ctx, parms, flows);
+
+		temp = (xmm_t)_mm_shuffle_ps((__m128)*indices1,
+					(__m128)*indices2,
+					0x88);
+		temp = _mm_and_si128(match_mask, temp);
+	}
+}
+
+/*
+ * Process 4 transitions (in 2 XMM registers) in parallel
+ */
+static inline __attribute__((always_inline)) xmm_t
+transition4(xmm_t next_input, const uint64_t *trans,
+	xmm_t *indices1, xmm_t *indices2)
+{
+	xmm_t addr, tr_lo, tr_hi;
+	uint64_t trans0, trans2;
+
+	/* Shuffle low 32 into tr_lo and high 32 into tr_hi */
+	ACL_TR_HILO(mm, __m128, *indices1, *indices2, tr_lo, tr_hi);
+
+	 /* Calculate the address (array index) for all 4 transitions. */
+	ACL_TR_CALC_ADDR(mm, 128, addr, xmm_index_mask.x, next_input,
+		xmm_shuffle_input.x, xmm_ones_16.x, xmm_range_base.x,
+		tr_lo, tr_hi);
+
+	 /* Gather 64 bit transitions and pack back into 2 registers. */
+
+	trans0 = trans[_mm_cvtsi128_si32(addr)];
+
+	/* get slot 2 */
+
+	/* {x0, x1, x2, x3} -> {x2, x1, x2, x3} */
+	addr = _mm_shuffle_epi32(addr, SHUFFLE32_SLOT2);
+	trans2 = trans[_mm_cvtsi128_si32(addr)];
+
+	/* get slot 1 */
+
+	/* {x2, x1, x2, x3} -> {x1, x1, x2, x3} */
+	addr = _mm_shuffle_epi32(addr, SHUFFLE32_SLOT1);
+	*indices1 = _mm_set_epi64x(trans[_mm_cvtsi128_si32(addr)], trans0);
+
+	/* get slot 3 */
+
+	/* {x1, x1, x2, x3} -> {x3, x1, x2, x3} */
+	addr = _mm_shuffle_epi32(addr, SHUFFLE32_SLOT3);
+	*indices2 = _mm_set_epi64x(trans[_mm_cvtsi128_si32(addr)], trans2);
+
+	return _mm_srli_epi32(next_input, CHAR_BIT);
+}
+
+/*
+ * Execute trie traversal with 8 traversals in parallel
+ */
+static inline int
+search_sse_8(const struct rte_acl_ctx *ctx, const uint8_t **data,
+	uint32_t *results, uint32_t total_packets, uint32_t categories)
+{
+	int n;
+	struct acl_flow_data flows;
+	uint64_t index_array[MAX_SEARCHES_SSE8];
+	struct completion cmplt[MAX_SEARCHES_SSE8];
+	struct parms parms[MAX_SEARCHES_SSE8];
+	xmm_t input0, input1;
+	xmm_t indices1, indices2, indices3, indices4;
+
+	acl_set_flow(&flows, cmplt, RTE_DIM(cmplt), data, results,
+		total_packets, categories, ctx->trans_table);
+
+	for (n = 0; n < MAX_SEARCHES_SSE8; n++) {
+		cmplt[n].count = 0;
+		index_array[n] = acl_start_next_trie(&flows, parms, n, ctx);
+	}
+
+	/*
+	 * indices1 contains index_array[0,1]
+	 * indices2 contains index_array[2,3]
+	 * indices3 contains index_array[4,5]
+	 * indices4 contains index_array[6,7]
+	 */
+
+	indices1 = _mm_loadu_si128((xmm_t *) &index_array[0]);
+	indices2 = _mm_loadu_si128((xmm_t *) &index_array[2]);
+
+	indices3 = _mm_loadu_si128((xmm_t *) &index_array[4]);
+	indices4 = _mm_loadu_si128((xmm_t *) &index_array[6]);
+
+	 /* Check for any matches. */
+	acl_match_check_x4(0, ctx, parms, &flows,
+		&indices1, &indices2, xmm_match_mask.x);
+	acl_match_check_x4(4, ctx, parms, &flows,
+		&indices3, &indices4, xmm_match_mask.x);
+
+	while (flows.started > 0) {
+
+		/* Gather 4 bytes of input data for each stream. */
+		input0 = _mm_cvtsi32_si128(GET_NEXT_4BYTES(parms, 0));
+		input1 = _mm_cvtsi32_si128(GET_NEXT_4BYTES(parms, 4));
+
+		input0 = _mm_insert_epi32(input0, GET_NEXT_4BYTES(parms, 1), 1);
+		input1 = _mm_insert_epi32(input1, GET_NEXT_4BYTES(parms, 5), 1);
+
+		input0 = _mm_insert_epi32(input0, GET_NEXT_4BYTES(parms, 2), 2);
+		input1 = _mm_insert_epi32(input1, GET_NEXT_4BYTES(parms, 6), 2);
+
+		input0 = _mm_insert_epi32(input0, GET_NEXT_4BYTES(parms, 3), 3);
+		input1 = _mm_insert_epi32(input1, GET_NEXT_4BYTES(parms, 7), 3);
+
+		 /* Process the 4 bytes of input on each stream. */
+
+		input0 = transition4(input0, flows.trans,
+			&indices1, &indices2);
+		input1 = transition4(input1, flows.trans,
+			&indices3, &indices4);
+
+		input0 = transition4(input0, flows.trans,
+			&indices1, &indices2);
+		input1 = transition4(input1, flows.trans,
+			&indices3, &indices4);
+
+		input0 = transition4(input0, flows.trans,
+			&indices1, &indices2);
+		input1 = transition4(input1, flows.trans,
+			&indices3, &indices4);
+
+		input0 = transition4(input0, flows.trans,
+			&indices1, &indices2);
+		input1 = transition4(input1, flows.trans,
+			&indices3, &indices4);
+
+		 /* Check for any matches. */
+		acl_match_check_x4(0, ctx, parms, &flows,
+			&indices1, &indices2, xmm_match_mask.x);
+		acl_match_check_x4(4, ctx, parms, &flows,
+			&indices3, &indices4, xmm_match_mask.x);
+	}
+
+	return 0;
+}
+
+/*
+ * Execute trie traversal with 4 traversals in parallel
+ */
+static inline int
+search_sse_4(const struct rte_acl_ctx *ctx, const uint8_t **data,
+	 uint32_t *results, int total_packets, uint32_t categories)
+{
+	int n;
+	struct acl_flow_data flows;
+	uint64_t index_array[MAX_SEARCHES_SSE4];
+	struct completion cmplt[MAX_SEARCHES_SSE4];
+	struct parms parms[MAX_SEARCHES_SSE4];
+	xmm_t input, indices1, indices2;
+
+	acl_set_flow(&flows, cmplt, RTE_DIM(cmplt), data, results,
+		total_packets, categories, ctx->trans_table);
+
+	for (n = 0; n < MAX_SEARCHES_SSE4; n++) {
+		cmplt[n].count = 0;
+		index_array[n] = acl_start_next_trie(&flows, parms, n, ctx);
+	}
+
+	indices1 = _mm_loadu_si128((xmm_t *) &index_array[0]);
+	indices2 = _mm_loadu_si128((xmm_t *) &index_array[2]);
+
+	/* Check for any matches. */
+	acl_match_check_x4(0, ctx, parms, &flows,
+		&indices1, &indices2, xmm_match_mask.x);
+
+	while (flows.started > 0) {
+
+		/* Gather 4 bytes of input data for each stream. */
+		input = _mm_cvtsi32_si128(GET_NEXT_4BYTES(parms, 0));
+		input = _mm_insert_epi32(input, GET_NEXT_4BYTES(parms, 1), 1);
+		input = _mm_insert_epi32(input, GET_NEXT_4BYTES(parms, 2), 2);
+		input = _mm_insert_epi32(input, GET_NEXT_4BYTES(parms, 3), 3);
+
+		/* Process the 4 bytes of input on each stream. */
+		input = transition4(input, flows.trans, &indices1, &indices2);
+		input = transition4(input, flows.trans, &indices1, &indices2);
+		input = transition4(input, flows.trans, &indices1, &indices2);
+		input = transition4(input, flows.trans, &indices1, &indices2);
+
+		/* Check for any matches. */
+		acl_match_check_x4(0, ctx, parms, &flows,
+			&indices1, &indices2, xmm_match_mask.x);
+	}
+
+	return 0;
+}
diff --git a/lib/librte_acl/acl_vect.h b/lib/librte_acl/acl_vect.h
new file mode 100644
index 00000000..6cc19997
--- /dev/null
+++ b/lib/librte_acl/acl_vect.h
@@ -0,0 +1,116 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_ACL_VECT_H_
+#define _RTE_ACL_VECT_H_
+
+/**
+ * @file
+ *
+ * RTE ACL SSE/AVX related header.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+/*
+ * Takes 2 SIMD registers containing N transitions eachi (tr0, tr1).
+ * Shuffles it into different representation:
+ * lo - contains low 32 bits of given N transitions.
+ * hi - contains high 32 bits of given N transitions.
+ */
+#define	ACL_TR_HILO(P, TC, tr0, tr1, lo, hi)                        do { \
+	lo = (typeof(lo))_##P##_shuffle_ps((TC)(tr0), (TC)(tr1), 0x88);  \
+	hi = (typeof(hi))_##P##_shuffle_ps((TC)(tr0), (TC)(tr1), 0xdd);  \
+} while (0)
+
+
+/*
+ * Calculate the address of the next transition for
+ * all types of nodes. Note that only DFA nodes and range
+ * nodes actually transition to another node. Match
+ * nodes not supposed to be encountered here.
+ * For quad range nodes:
+ * Calculate number of range boundaries that are less than the
+ * input value. Range boundaries for each node are in signed 8 bit,
+ * ordered from -128 to 127.
+ * This is effectively a popcnt of bytes that are greater than the
+ * input byte.
+ * Single nodes are processed in the same ways as quad range nodes.
+*/
+#define ACL_TR_CALC_ADDR(P, S,					\
+	addr, index_mask, next_input, shuffle_input,		\
+	ones_16, range_base, tr_lo, tr_hi)               do {	\
+								\
+	typeof(addr) in, node_type, r, t;			\
+	typeof(addr) dfa_msk, dfa_ofs, quad_ofs;		\
+								\
+	t = _##P##_xor_si##S(index_mask, index_mask);		\
+	in = _##P##_shuffle_epi8(next_input, shuffle_input);	\
+								\
+	/* Calc node type and node addr */			\
+	node_type = _##P##_andnot_si##S(index_mask, tr_lo);	\
+	addr = _##P##_and_si##S(index_mask, tr_lo);		\
+								\
+	/* mask for DFA type(0) nodes */			\
+	dfa_msk = _##P##_cmpeq_epi32(node_type, t);		\
+								\
+	/* DFA calculations. */					\
+	r = _##P##_srli_epi32(in, 30);				\
+	r = _##P##_add_epi8(r, range_base);			\
+	t = _##P##_srli_epi32(in, 24);				\
+	r = _##P##_shuffle_epi8(tr_hi, r);			\
+								\
+	dfa_ofs = _##P##_sub_epi32(t, r);			\
+								\
+	/* QUAD/SINGLE caluclations. */				\
+	t = _##P##_cmpgt_epi8(in, tr_hi);			\
+	t = _##P##_sign_epi8(t, t);				\
+	t = _##P##_maddubs_epi16(t, t);				\
+	quad_ofs = _##P##_madd_epi16(t, ones_16);		\
+								\
+	/* blend DFA and QUAD/SINGLE. */			\
+	t = _##P##_blendv_epi8(quad_ofs, dfa_ofs, dfa_msk);	\
+								\
+	/* calculate address for next transitions. */		\
+	addr = _##P##_add_epi32(addr, t);			\
+} while (0)
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_ACL_VECT_H_ */
diff --git a/lib/librte_acl/rte_acl.c b/lib/librte_acl/rte_acl.c
new file mode 100644
index 00000000..4ba9786b
--- /dev/null
+++ b/lib/librte_acl/rte_acl.c
@@ -0,0 +1,393 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <rte_acl.h>
+#include "acl.h"
+
+TAILQ_HEAD(rte_acl_list, rte_tailq_entry);
+
+static struct rte_tailq_elem rte_acl_tailq = {
+	.name = "RTE_ACL",
+};
+EAL_REGISTER_TAILQ(rte_acl_tailq)
+
+/*
+ * If the compiler doesn't support AVX2 instructions,
+ * then the dummy one would be used instead for AVX2 classify method.
+ */
+int __attribute__ ((weak))
+rte_acl_classify_avx2(__rte_unused const struct rte_acl_ctx *ctx,
+	__rte_unused const uint8_t **data,
+	__rte_unused uint32_t *results,
+	__rte_unused uint32_t num,
+	__rte_unused uint32_t categories)
+{
+	return -ENOTSUP;
+}
+
+int __attribute__ ((weak))
+rte_acl_classify_sse(__rte_unused const struct rte_acl_ctx *ctx,
+	__rte_unused const uint8_t **data,
+	__rte_unused uint32_t *results,
+	__rte_unused uint32_t num,
+	__rte_unused uint32_t categories)
+{
+	return -ENOTSUP;
+}
+
+int __attribute__ ((weak))
+rte_acl_classify_neon(__rte_unused const struct rte_acl_ctx *ctx,
+	__rte_unused const uint8_t **data,
+	__rte_unused uint32_t *results,
+	__rte_unused uint32_t num,
+	__rte_unused uint32_t categories)
+{
+	return -ENOTSUP;
+}
+
+static const rte_acl_classify_t classify_fns[] = {
+	[RTE_ACL_CLASSIFY_DEFAULT] = rte_acl_classify_scalar,
+	[RTE_ACL_CLASSIFY_SCALAR] = rte_acl_classify_scalar,
+	[RTE_ACL_CLASSIFY_SSE] = rte_acl_classify_sse,
+	[RTE_ACL_CLASSIFY_AVX2] = rte_acl_classify_avx2,
+	[RTE_ACL_CLASSIFY_NEON] = rte_acl_classify_neon,
+};
+
+/* by default, use always available scalar code path. */
+static enum rte_acl_classify_alg rte_acl_default_classify =
+	RTE_ACL_CLASSIFY_SCALAR;
+
+static void
+rte_acl_set_default_classify(enum rte_acl_classify_alg alg)
+{
+	rte_acl_default_classify = alg;
+}
+
+extern int
+rte_acl_set_ctx_classify(struct rte_acl_ctx *ctx, enum rte_acl_classify_alg alg)
+{
+	if (ctx == NULL || (uint32_t)alg >= RTE_DIM(classify_fns))
+		return -EINVAL;
+
+	ctx->alg = alg;
+	return 0;
+}
+
+/*
+ * Select highest available classify method as default one.
+ * Note that CLASSIFY_AVX2 should be set as a default only
+ * if both conditions are met:
+ * at build time compiler supports AVX2 and target cpu supports AVX2.
+ */
+static void __attribute__((constructor))
+rte_acl_init(void)
+{
+	enum rte_acl_classify_alg alg = RTE_ACL_CLASSIFY_DEFAULT;
+
+#if defined(RTE_ARCH_ARM64)
+	alg =  RTE_ACL_CLASSIFY_NEON;
+#elif defined(RTE_ARCH_ARM)
+	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_NEON))
+		alg =  RTE_ACL_CLASSIFY_NEON;
+#else
+#ifdef CC_AVX2_SUPPORT
+	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2))
+		alg = RTE_ACL_CLASSIFY_AVX2;
+	else if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_SSE4_1))
+#else
+	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_SSE4_1))
+#endif
+		alg = RTE_ACL_CLASSIFY_SSE;
+
+#endif
+	rte_acl_set_default_classify(alg);
+}
+
+int
+rte_acl_classify_alg(const struct rte_acl_ctx *ctx, const uint8_t **data,
+	uint32_t *results, uint32_t num, uint32_t categories,
+	enum rte_acl_classify_alg alg)
+{
+	if (categories != 1 &&
+			((RTE_ACL_RESULTS_MULTIPLIER - 1) & categories) != 0)
+		return -EINVAL;
+
+	return classify_fns[alg](ctx, data, results, num, categories);
+}
+
+int
+rte_acl_classify(const struct rte_acl_ctx *ctx, const uint8_t **data,
+	uint32_t *results, uint32_t num, uint32_t categories)
+{
+	return rte_acl_classify_alg(ctx, data, results, num, categories,
+		ctx->alg);
+}
+
+struct rte_acl_ctx *
+rte_acl_find_existing(const char *name)
+{
+	struct rte_acl_ctx *ctx = NULL;
+	struct rte_acl_list *acl_list;
+	struct rte_tailq_entry *te;
+
+	acl_list = RTE_TAILQ_CAST(rte_acl_tailq.head, rte_acl_list);
+
+	rte_rwlock_read_lock(RTE_EAL_TAILQ_RWLOCK);
+	TAILQ_FOREACH(te, acl_list, next) {
+		ctx = (struct rte_acl_ctx *) te->data;
+		if (strncmp(name, ctx->name, sizeof(ctx->name)) == 0)
+			break;
+	}
+	rte_rwlock_read_unlock(RTE_EAL_TAILQ_RWLOCK);
+
+	if (te == NULL) {
+		rte_errno = ENOENT;
+		return NULL;
+	}
+	return ctx;
+}
+
+void
+rte_acl_free(struct rte_acl_ctx *ctx)
+{
+	struct rte_acl_list *acl_list;
+	struct rte_tailq_entry *te;
+
+	if (ctx == NULL)
+		return;
+
+	acl_list = RTE_TAILQ_CAST(rte_acl_tailq.head, rte_acl_list);
+
+	rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK);
+
+	/* find our tailq entry */
+	TAILQ_FOREACH(te, acl_list, next) {
+		if (te->data == (void *) ctx)
+			break;
+	}
+	if (te == NULL) {
+		rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
+		return;
+	}
+
+	TAILQ_REMOVE(acl_list, te, next);
+
+	rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
+
+	rte_free(ctx->mem);
+	rte_free(ctx);
+	rte_free(te);
+}
+
+struct rte_acl_ctx *
+rte_acl_create(const struct rte_acl_param *param)
+{
+	size_t sz;
+	struct rte_acl_ctx *ctx;
+	struct rte_acl_list *acl_list;
+	struct rte_tailq_entry *te;
+	char name[sizeof(ctx->name)];
+
+	acl_list = RTE_TAILQ_CAST(rte_acl_tailq.head, rte_acl_list);
+
+	/* check that input parameters are valid. */
+	if (param == NULL || param->name == NULL) {
+		rte_errno = EINVAL;
+		return NULL;
+	}
+
+	snprintf(name, sizeof(name), "ACL_%s", param->name);
+
+	/* calculate amount of memory required for pattern set. */
+	sz = sizeof(*ctx) + param->max_rule_num * param->rule_size;
+
+	/* get EAL TAILQ lock. */
+	rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK);
+
+	/* if we already have one with that name */
+	TAILQ_FOREACH(te, acl_list, next) {
+		ctx = (struct rte_acl_ctx *) te->data;
+		if (strncmp(param->name, ctx->name, sizeof(ctx->name)) == 0)
+			break;
+	}
+
+	/* if ACL with such name doesn't exist, then create a new one. */
+	if (te == NULL) {
+		ctx = NULL;
+		te = rte_zmalloc("ACL_TAILQ_ENTRY", sizeof(*te), 0);
+
+		if (te == NULL) {
+			RTE_LOG(ERR, ACL, "Cannot allocate tailq entry!\n");
+			goto exit;
+		}
+
+		ctx = rte_zmalloc_socket(name, sz, RTE_CACHE_LINE_SIZE, param->socket_id);
+
+		if (ctx == NULL) {
+			RTE_LOG(ERR, ACL,
+				"allocation of %zu bytes on socket %d for %s failed\n",
+				sz, param->socket_id, name);
+			rte_free(te);
+			goto exit;
+		}
+		/* init new allocated context. */
+		ctx->rules = ctx + 1;
+		ctx->max_rules = param->max_rule_num;
+		ctx->rule_sz = param->rule_size;
+		ctx->socket_id = param->socket_id;
+		ctx->alg = rte_acl_default_classify;
+		snprintf(ctx->name, sizeof(ctx->name), "%s", param->name);
+
+		te->data = (void *) ctx;
+
+		TAILQ_INSERT_TAIL(acl_list, te, next);
+	}
+
+exit:
+	rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
+	return ctx;
+}
+
+static int
+acl_add_rules(struct rte_acl_ctx *ctx, const void *rules, uint32_t num)
+{
+	uint8_t *pos;
+
+	if (num + ctx->num_rules > ctx->max_rules)
+		return -ENOMEM;
+
+	pos = ctx->rules;
+	pos += ctx->rule_sz * ctx->num_rules;
+	memcpy(pos, rules, num * ctx->rule_sz);
+	ctx->num_rules += num;
+
+	return 0;
+}
+
+static int
+acl_check_rule(const struct rte_acl_rule_data *rd)
+{
+	if ((RTE_LEN2MASK(RTE_ACL_MAX_CATEGORIES, typeof(rd->category_mask)) &
+			rd->category_mask) == 0 ||
+			rd->priority > RTE_ACL_MAX_PRIORITY ||
+			rd->priority < RTE_ACL_MIN_PRIORITY ||
+			rd->userdata == RTE_ACL_INVALID_USERDATA)
+		return -EINVAL;
+	return 0;
+}
+
+int
+rte_acl_add_rules(struct rte_acl_ctx *ctx, const struct rte_acl_rule *rules,
+	uint32_t num)
+{
+	const struct rte_acl_rule *rv;
+	uint32_t i;
+	int32_t rc;
+
+	if (ctx == NULL || rules == NULL || 0 == ctx->rule_sz)
+		return -EINVAL;
+
+	for (i = 0; i != num; i++) {
+		rv = (const struct rte_acl_rule *)
+			((uintptr_t)rules + i * ctx->rule_sz);
+		rc = acl_check_rule(&rv->data);
+		if (rc != 0) {
+			RTE_LOG(ERR, ACL, "%s(%s): rule #%u is invalid\n",
+				__func__, ctx->name, i + 1);
+			return rc;
+		}
+	}
+
+	return acl_add_rules(ctx, rules, num);
+}
+
+/*
+ * Reset all rules.
+ * Note that RT structures are not affected.
+ */
+void
+rte_acl_reset_rules(struct rte_acl_ctx *ctx)
+{
+	if (ctx != NULL)
+		ctx->num_rules = 0;
+}
+
+/*
+ * Reset all rules and destroys RT structures.
+ */
+void
+rte_acl_reset(struct rte_acl_ctx *ctx)
+{
+	if (ctx != NULL) {
+		rte_acl_reset_rules(ctx);
+		rte_acl_build(ctx, &ctx->config);
+	}
+}
+
+/*
+ * Dump ACL context to the stdout.
+ */
+void
+rte_acl_dump(const struct rte_acl_ctx *ctx)
+{
+	if (!ctx)
+		return;
+	printf("acl context <%s>@%p\n", ctx->name, ctx);
+	printf("  socket_id=%"PRId32"\n", ctx->socket_id);
+	printf("  alg=%"PRId32"\n", ctx->alg);
+	printf("  max_rules=%"PRIu32"\n", ctx->max_rules);
+	printf("  rule_size=%"PRIu32"\n", ctx->rule_sz);
+	printf("  num_rules=%"PRIu32"\n", ctx->num_rules);
+	printf("  num_categories=%"PRIu32"\n", ctx->num_categories);
+	printf("  num_tries=%"PRIu32"\n", ctx->num_tries);
+}
+
+/*
+ * Dump all ACL contexts to the stdout.
+ */
+void
+rte_acl_list_dump(void)
+{
+	struct rte_acl_ctx *ctx;
+	struct rte_acl_list *acl_list;
+	struct rte_tailq_entry *te;
+
+	acl_list = RTE_TAILQ_CAST(rte_acl_tailq.head, rte_acl_list);
+
+	rte_rwlock_read_lock(RTE_EAL_TAILQ_RWLOCK);
+	TAILQ_FOREACH(te, acl_list, next) {
+		ctx = (struct rte_acl_ctx *) te->data;
+		rte_acl_dump(ctx);
+	}
+	rte_rwlock_read_unlock(RTE_EAL_TAILQ_RWLOCK);
+}
diff --git a/lib/librte_acl/rte_acl.h b/lib/librte_acl/rte_acl.h
new file mode 100644
index 00000000..0979a098
--- /dev/null
+++ b/lib/librte_acl/rte_acl.h
@@ -0,0 +1,388 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_ACL_H_
+#define _RTE_ACL_H_
+
+/**
+ * @file
+ *
+ * RTE Classifier.
+ */
+
+#include <rte_acl_osdep.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define	RTE_ACL_MAX_CATEGORIES	16
+
+#define	RTE_ACL_RESULTS_MULTIPLIER	(XMM_SIZE / sizeof(uint32_t))
+
+#define RTE_ACL_MAX_LEVELS 64
+#define RTE_ACL_MAX_FIELDS 64
+
+union rte_acl_field_types {
+	uint8_t  u8;
+	uint16_t u16;
+	uint32_t u32;
+	uint64_t u64;
+};
+
+enum {
+	RTE_ACL_FIELD_TYPE_MASK = 0,
+	RTE_ACL_FIELD_TYPE_RANGE,
+	RTE_ACL_FIELD_TYPE_BITMASK
+};
+
+/**
+ * ACL Field definition.
+ * Each field in the ACL rule has an associate definition.
+ * It defines the type of field, its size, its offset in the input buffer,
+ * the field index, and the input index.
+ * For performance reasons, the inner loop of the search function is unrolled
+ * to process four input bytes at a time. This requires the input to be grouped
+ * into sets of 4 consecutive bytes. The loop processes the first input byte as
+ * part of the setup and then subsequent bytes must be in groups of 4
+ * consecutive bytes.
+ */
+struct rte_acl_field_def {
+	uint8_t  type;        /**< type - RTE_ACL_FIELD_TYPE_*. */
+	uint8_t	 size;        /**< size of field 1,2,4, or 8. */
+	uint8_t	 field_index; /**< index of field inside the rule. */
+	uint8_t  input_index; /**< 0-N input index. */
+	uint32_t offset;      /**< offset to start of field. */
+};
+
+/**
+ * ACL build configuration.
+ * Defines the fields of an ACL trie and number of categories to build with.
+ */
+struct rte_acl_config {
+	uint32_t num_categories; /**< Number of categories to build with. */
+	uint32_t num_fields;     /**< Number of field definitions. */
+	struct rte_acl_field_def defs[RTE_ACL_MAX_FIELDS];
+	/**< array of field definitions. */
+	size_t max_size;
+	/**< max memory limit for internal run-time structures. */
+};
+
+/**
+ * Defines the value of a field for a rule.
+ */
+struct rte_acl_field {
+	union rte_acl_field_types value;
+	/**< a 1,2,4, or 8 byte value of the field. */
+	union rte_acl_field_types mask_range;
+	/**<
+	 * depending on field type:
+	 * mask -> 1.2.3.4/32 value=0x1020304, mask_range=32,
+	 * range -> 0 : 65535 value=0, mask_range=65535,
+	 * bitmask -> 0x06/0xff value=6, mask_range=0xff.
+	 */
+};
+
+enum {
+	RTE_ACL_TYPE_SHIFT = 29,
+	RTE_ACL_MAX_INDEX = RTE_LEN2MASK(RTE_ACL_TYPE_SHIFT, uint32_t),
+	RTE_ACL_MAX_PRIORITY = RTE_ACL_MAX_INDEX,
+	RTE_ACL_MIN_PRIORITY = 0,
+};
+
+#define	RTE_ACL_INVALID_USERDATA	0
+
+#define	RTE_ACL_MASKLEN_TO_BITMASK(v, s)	\
+((v) == 0 ? (v) : (typeof(v))((uint64_t)-1 << ((s) * CHAR_BIT - (v))))
+
+/**
+ * Miscellaneous data for ACL rule.
+ */
+struct rte_acl_rule_data {
+	uint32_t category_mask; /**< Mask of categories for that rule. */
+	int32_t  priority;      /**< Priority for that rule. */
+	uint32_t userdata;      /**< Associated with the rule user data. */
+};
+
+/**
+ * Defines single ACL rule.
+ * data - miscellaneous data for the rule.
+ * field[] - value and mask or range for each field.
+ */
+#define	RTE_ACL_RULE_DEF(name, fld_num)	struct name {\
+	struct rte_acl_rule_data data;               \
+	struct rte_acl_field field[fld_num];         \
+}
+
+RTE_ACL_RULE_DEF(rte_acl_rule, 0);
+
+#define	RTE_ACL_RULE_SZ(fld_num)	\
+	(sizeof(struct rte_acl_rule) + sizeof(struct rte_acl_field) * (fld_num))
+
+
+/** Max number of characters in name.*/
+#define	RTE_ACL_NAMESIZE		32
+
+/**
+ * Parameters used when creating the ACL context.
+ */
+struct rte_acl_param {
+	const char *name;         /**< Name of the ACL context. */
+	int         socket_id;    /**< Socket ID to allocate memory for. */
+	uint32_t    rule_size;    /**< Size of each rule. */
+	uint32_t    max_rule_num; /**< Maximum number of rules. */
+};
+
+
+/**
+ * Create a new ACL context.
+ *
+ * @param param
+ *   Parameters used to create and initialise the ACL context.
+ * @return
+ *   Pointer to ACL context structure that is used in future ACL
+ *   operations, or NULL on error, with error code set in rte_errno.
+ *   Possible rte_errno errors include:
+ *   - EINVAL - invalid parameter passed to function
+ */
+struct rte_acl_ctx *
+rte_acl_create(const struct rte_acl_param *param);
+
+/**
+ * Find an existing ACL context object and return a pointer to it.
+ *
+ * @param name
+ *   Name of the ACL context as passed to rte_acl_create()
+ * @return
+ *   Pointer to ACL context or NULL if object not found
+ *   with rte_errno set appropriately. Possible rte_errno values include:
+ *    - ENOENT - value not available for return
+ */
+struct rte_acl_ctx *
+rte_acl_find_existing(const char *name);
+
+/**
+ * De-allocate all memory used by ACL context.
+ *
+ * @param ctx
+ *   ACL context to free
+ */
+void
+rte_acl_free(struct rte_acl_ctx *ctx);
+
+/**
+ * Add rules to an existing ACL context.
+ * This function is not multi-thread safe.
+ *
+ * @param ctx
+ *   ACL context to add patterns to.
+ * @param rules
+ *   Array of rules to add to the ACL context.
+ *   Note that all fields in rte_acl_rule structures are expected
+ *   to be in host byte order.
+ *   Each rule expected to be in the same format and not exceed size
+ *   specified at ACL context creation time.
+ * @param num
+ *   Number of elements in the input array of rules.
+ * @return
+ *   - -ENOMEM if there is no space in the ACL context for these rules.
+ *   - -EINVAL if the parameters are invalid.
+ *   - Zero if operation completed successfully.
+ */
+int
+rte_acl_add_rules(struct rte_acl_ctx *ctx, const struct rte_acl_rule *rules,
+	uint32_t num);
+
+/**
+ * Delete all rules from the ACL context.
+ * This function is not multi-thread safe.
+ * Note that internal run-time structures are not affected.
+ *
+ * @param ctx
+ *   ACL context to delete rules from.
+ */
+void
+rte_acl_reset_rules(struct rte_acl_ctx *ctx);
+
+/**
+ * Analyze set of rules and build required internal run-time structures.
+ * This function is not multi-thread safe.
+ *
+ * @param ctx
+ *   ACL context to build.
+ * @param cfg
+ *   Pointer to struct rte_acl_config - defines build parameters.
+ * @return
+ *   - -ENOMEM if couldn't allocate enough memory.
+ *   - -EINVAL if the parameters are invalid.
+ *   - Negative error code if operation failed.
+ *   - Zero if operation completed successfully.
+ */
+int
+rte_acl_build(struct rte_acl_ctx *ctx, const struct rte_acl_config *cfg);
+
+/**
+ * Delete all rules from the ACL context and
+ * destroy all internal run-time structures.
+ * This function is not multi-thread safe.
+ *
+ * @param ctx
+ *   ACL context to reset.
+ */
+void
+rte_acl_reset(struct rte_acl_ctx *ctx);
+
+/**
+ *  Available implementations of ACL classify.
+ */
+enum rte_acl_classify_alg {
+	RTE_ACL_CLASSIFY_DEFAULT = 0,
+	RTE_ACL_CLASSIFY_SCALAR = 1,  /**< generic implementation. */
+	RTE_ACL_CLASSIFY_SSE = 2,     /**< requires SSE4.1 support. */
+	RTE_ACL_CLASSIFY_AVX2 = 3,    /**< requires AVX2 support. */
+	RTE_ACL_CLASSIFY_NEON = 4,    /**< requires NEON support. */
+	RTE_ACL_CLASSIFY_NUM          /* should always be the last one. */
+};
+
+/**
+ * Perform search for a matching ACL rule for each input data buffer.
+ * Each input data buffer can have up to *categories* matches.
+ * That implies that results array should be big enough to hold
+ * (categories * num) elements.
+ * Also categories parameter should be either one or multiple of
+ * RTE_ACL_RESULTS_MULTIPLIER and can't be bigger than RTE_ACL_MAX_CATEGORIES.
+ * If more than one rule is applicable for given input buffer and
+ * given category, then rule with highest priority will be returned as a match.
+ * Note, that it is a caller's responsibility to ensure that input parameters
+ * are valid and point to correct memory locations.
+ *
+ * @param ctx
+ *   ACL context to search with.
+ * @param data
+ *   Array of pointers to input data buffers to perform search.
+ *   Note that all fields in input data buffers supposed to be in network
+ *   byte order (MSB).
+ * @param results
+ *   Array of search results, *categories* results per each input data buffer.
+ * @param num
+ *   Number of elements in the input data buffers array.
+ * @param categories
+ *   Number of maximum possible matches for each input buffer, one possible
+ *   match per category.
+ * @return
+ *   zero on successful completion.
+ *   -EINVAL for incorrect arguments.
+ */
+extern int
+rte_acl_classify(const struct rte_acl_ctx *ctx,
+		 const uint8_t **data,
+		 uint32_t *results, uint32_t num,
+		 uint32_t categories);
+
+/**
+ * Perform search using specified algorithm for a matching ACL rule for
+ * each input data buffer.
+ * Each input data buffer can have up to *categories* matches.
+ * That implies that results array should be big enough to hold
+ * (categories * num) elements.
+ * Also categories parameter should be either one or multiple of
+ * RTE_ACL_RESULTS_MULTIPLIER and can't be bigger than RTE_ACL_MAX_CATEGORIES.
+ * If more than one rule is applicable for given input buffer and
+ * given category, then rule with highest priority will be returned as a match.
+ * Note, that it is a caller's responsibility to ensure that input parameters
+ * are valid and point to correct memory locations.
+ *
+ * @param ctx
+ *   ACL context to search with.
+ * @param data
+ *   Array of pointers to input data buffers to perform search.
+ *   Note that all fields in input data buffers supposed to be in network
+ *   byte order (MSB).
+ * @param results
+ *   Array of search results, *categories* results per each input data buffer.
+ * @param num
+ *   Number of elements in the input data buffers array.
+ * @param categories
+ *   Number of maximum possible matches for each input buffer, one possible
+ *   match per category.
+ * @param alg
+ *   Algorithm to be used for the search.
+ *   It is the caller responsibility to ensure that the value refers to the
+ *   existing algorithm, and that it could be run on the given CPU.
+ * @return
+ *   zero on successful completion.
+ *   -EINVAL for incorrect arguments.
+ */
+extern int
+rte_acl_classify_alg(const struct rte_acl_ctx *ctx,
+		 const uint8_t **data,
+		 uint32_t *results, uint32_t num,
+		 uint32_t categories,
+		 enum rte_acl_classify_alg alg);
+
+/*
+ * Override the default classifier function for a given ACL context.
+ * @param ctx
+ *   ACL context to change classify function for.
+ * @param alg
+ *   New default classify algorithm for given ACL context.
+ *   It is the caller responsibility to ensure that the value refers to the
+ *   existing algorithm, and that it could be run on the given CPU.
+ * @return
+ *   - -EINVAL if the parameters are invalid.
+ *   - Zero if operation completed successfully.
+ */
+extern int
+rte_acl_set_ctx_classify(struct rte_acl_ctx *ctx,
+	enum rte_acl_classify_alg alg);
+
+/**
+ * Dump an ACL context structure to the console.
+ *
+ * @param ctx
+ *   ACL context to dump.
+ */
+void
+rte_acl_dump(const struct rte_acl_ctx *ctx);
+
+/**
+ * Dump all ACL context structures to the console.
+ */
+void
+rte_acl_list_dump(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_ACL_H_ */
diff --git a/lib/librte_acl/rte_acl_osdep.h b/lib/librte_acl/rte_acl_osdep.h
new file mode 100644
index 00000000..41f7e3d4
--- /dev/null
+++ b/lib/librte_acl/rte_acl_osdep.h
@@ -0,0 +1,80 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_ACL_OSDEP_H_
+#define _RTE_ACL_OSDEP_H_
+
+/**
+ * @file
+ *
+ * RTE ACL DPDK/OS dependent file.
+ */
+
+#include <stdint.h>
+#include <stddef.h>
+#include <inttypes.h>
+#include <limits.h>
+#include <ctype.h>
+#include <string.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <sys/queue.h>
+
+/*
+ * Common defines.
+ */
+
+#define DIM(x) RTE_DIM(x)
+
+#include <rte_common.h>
+#include <rte_vect.h>
+#include <rte_memory.h>
+#include <rte_log.h>
+#include <rte_memcpy.h>
+#include <rte_prefetch.h>
+#include <rte_byteorder.h>
+#include <rte_branch_prediction.h>
+#include <rte_memzone.h>
+#include <rte_malloc.h>
+#include <rte_eal.h>
+#include <rte_eal_memconfig.h>
+#include <rte_per_lcore.h>
+#include <rte_errno.h>
+#include <rte_string_fns.h>
+#include <rte_cpuflags.h>
+#include <rte_log.h>
+#include <rte_debug.h>
+
+#endif /* _RTE_ACL_OSDEP_H_ */
diff --git a/lib/librte_acl/rte_acl_version.map b/lib/librte_acl/rte_acl_version.map
new file mode 100644
index 00000000..b09370a1
--- /dev/null
+++ b/lib/librte_acl/rte_acl_version.map
@@ -0,0 +1,19 @@
+DPDK_2.0 {
+	global:
+
+	rte_acl_add_rules;
+	rte_acl_build;
+	rte_acl_classify;
+	rte_acl_classify_alg;
+	rte_acl_classify_scalar;
+	rte_acl_create;
+	rte_acl_dump;
+	rte_acl_find_existing;
+	rte_acl_free;
+	rte_acl_list_dump;
+	rte_acl_reset;
+	rte_acl_reset_rules;
+	rte_acl_set_ctx_classify;
+
+	local: *;
+};
diff --git a/lib/librte_acl/tb_mem.c b/lib/librte_acl/tb_mem.c
new file mode 100644
index 00000000..157e6080
--- /dev/null
+++ b/lib/librte_acl/tb_mem.c
@@ -0,0 +1,108 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "tb_mem.h"
+
+/*
+ *  Memory management routines for temporary memory.
+ *  That memory is used only during build phase and is released after
+ *  build is finished.
+ *  Note, that tb_pool/tb_alloc() are not supposed to return NULL.
+ *  Instead, in the case of failure to allocate memory,
+ *  it would do siglongjmp(pool->fail).
+ *  It is responsibility of the caller to save the proper context/environment,
+ *  in the pool->fail before calling tb_alloc() for the given pool first time.
+ */
+
+static struct tb_mem_block *
+tb_pool(struct tb_mem_pool *pool, size_t sz)
+{
+	struct tb_mem_block *block;
+	uint8_t *ptr;
+	size_t size;
+
+	size = sz + pool->alignment - 1;
+	block = calloc(1, size + sizeof(*pool->block));
+	if (block == NULL) {
+		RTE_LOG(ERR, MALLOC, "%s(%zu)\n failed, currently allocated "
+			"by pool: %zu bytes\n", __func__, sz, pool->alloc);
+		siglongjmp(pool->fail, -ENOMEM);
+		return NULL;
+	}
+
+	block->pool = pool;
+
+	block->next = pool->block;
+	pool->block = block;
+
+	pool->alloc += size;
+
+	ptr = (uint8_t *)(block + 1);
+	block->mem = RTE_PTR_ALIGN_CEIL(ptr, pool->alignment);
+	block->size = size - (block->mem - ptr);
+
+	return block;
+}
+
+void *
+tb_alloc(struct tb_mem_pool *pool, size_t size)
+{
+	struct tb_mem_block *block;
+	void *ptr;
+	size_t new_sz;
+
+	size = RTE_ALIGN_CEIL(size, pool->alignment);
+
+	block = pool->block;
+	if (block == NULL || block->size < size) {
+		new_sz = (size > pool->min_alloc) ? size : pool->min_alloc;
+		block = tb_pool(pool, new_sz);
+	}
+	ptr = block->mem;
+	block->size -= size;
+	block->mem += size;
+	return ptr;
+}
+
+void
+tb_free_pool(struct tb_mem_pool *pool)
+{
+	struct tb_mem_block *next, *block;
+
+	for (block = pool->block; block != NULL; block = next) {
+		next = block->next;
+		free(block);
+	}
+	pool->block = NULL;
+	pool->alloc = 0;
+}
diff --git a/lib/librte_acl/tb_mem.h b/lib/librte_acl/tb_mem.h
new file mode 100644
index 00000000..ca7af966
--- /dev/null
+++ b/lib/librte_acl/tb_mem.h
@@ -0,0 +1,76 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _TB_MEM_H_
+#define _TB_MEM_H_
+
+/**
+ * @file
+ *
+ * RTE ACL temporary (build phase) memory management.
+ * Contains structures and functions to manage temporary (used by build only)
+ * memory. Memory allocated in large blocks to speed 'free' when trie is
+ * destructed (finish of build phase).
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <rte_acl_osdep.h>
+#include <setjmp.h>
+
+struct tb_mem_block {
+	struct tb_mem_block *next;
+	struct tb_mem_pool  *pool;
+	size_t               size;
+	uint8_t             *mem;
+};
+
+struct tb_mem_pool {
+	struct tb_mem_block *block;
+	size_t               alignment;
+	size_t               min_alloc;
+	size_t               alloc;
+	/* jump target in case of memory allocation failure. */
+	sigjmp_buf           fail;
+};
+
+void *tb_alloc(struct tb_mem_pool *pool, size_t size);
+void tb_free_pool(struct tb_mem_pool *pool);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _TB_MEM_H_ */
diff --git a/lib/librte_cfgfile/Makefile b/lib/librte_cfgfile/Makefile
new file mode 100644
index 00000000..616aef09
--- /dev/null
+++ b/lib/librte_cfgfile/Makefile
@@ -0,0 +1,57 @@
+#   BSD LICENSE
+#
+#   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+#   All rights reserved.
+#
+#   Redistribution and use in source and binary forms, with or without
+#   modification, are permitted provided that the following conditions
+#   are met:
+#
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in
+#       the documentation and/or other materials provided with the
+#       distribution.
+#     * Neither the name of Intel Corporation nor the names of its
+#       contributors may be used to endorse or promote products derived
+#       from this software without specific prior written permission.
+#
+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+#
+# library name
+#
+LIB = librte_cfgfile.a
+
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS)
+
+EXPORT_MAP := rte_cfgfile_version.map
+
+LIBABIVER := 2
+
+#
+# all source are stored in SRCS-y
+#
+SRCS-$(CONFIG_RTE_LIBRTE_CFGFILE) += rte_cfgfile.c
+
+# install includes
+SYMLINK-$(CONFIG_RTE_LIBRTE_CFGFILE)-include += rte_cfgfile.h
+
+# this lib needs eal
+DEPDIRS-$(CONFIG_RTE_LIBRTE_CFGFILE) += lib/librte_eal
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_cfgfile/rte_cfgfile.c b/lib/librte_cfgfile/rte_cfgfile.c
new file mode 100644
index 00000000..75625a28
--- /dev/null
+++ b/lib/librte_cfgfile/rte_cfgfile.c
@@ -0,0 +1,374 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <rte_string_fns.h>
+
+#include "rte_cfgfile.h"
+
+struct rte_cfgfile_section {
+	char name[CFG_NAME_LEN];
+	int num_entries;
+	struct rte_cfgfile_entry *entries[0];
+};
+
+struct rte_cfgfile {
+	int flags;
+	int num_sections;
+	struct rte_cfgfile_section *sections[0];
+};
+
+/** when we resize a file structure, how many extra entries
+ * for new sections do we add in */
+#define CFG_ALLOC_SECTION_BATCH 8
+/** when we resize a section structure, how many extra entries
+ * for new entries do we add in */
+#define CFG_ALLOC_ENTRY_BATCH 16
+
+static unsigned
+_strip(char *str, unsigned len)
+{
+	int newlen = len;
+	if (len == 0)
+		return 0;
+
+	if (isspace(str[len-1])) {
+		/* strip trailing whitespace */
+		while (newlen > 0 && isspace(str[newlen - 1]))
+			str[--newlen] = '\0';
+	}
+
+	if (isspace(str[0])) {
+		/* strip leading whitespace */
+		int i, start = 1;
+		while (isspace(str[start]) && start < newlen)
+			start++
+			; /* do nothing */
+		newlen -= start;
+		for (i = 0; i < newlen; i++)
+			str[i] = str[i+start];
+		str[i] = '\0';
+	}
+	return newlen;
+}
+
+struct rte_cfgfile *
+rte_cfgfile_load(const char *filename, int flags)
+{
+	int allocated_sections = CFG_ALLOC_SECTION_BATCH;
+	int allocated_entries = 0;
+	int curr_section = -1;
+	int curr_entry = -1;
+	char buffer[256] = {0};
+	int lineno = 0;
+	struct rte_cfgfile *cfg = NULL;
+
+	FILE *f = fopen(filename, "r");
+	if (f == NULL)
+		return NULL;
+
+	cfg = malloc(sizeof(*cfg) + sizeof(cfg->sections[0]) *
+		allocated_sections);
+	if (cfg == NULL)
+		goto error2;
+
+	memset(cfg->sections, 0, sizeof(cfg->sections[0]) * allocated_sections);
+
+	while (fgets(buffer, sizeof(buffer), f) != NULL) {
+		char *pos = NULL;
+		size_t len = strnlen(buffer, sizeof(buffer));
+		lineno++;
+		if ((len >= sizeof(buffer) - 1) && (buffer[len-1] != '\n')) {
+			printf("Error line %d - no \\n found on string. "
+					"Check if line too long\n", lineno);
+			goto error1;
+		}
+		pos = memchr(buffer, ';', sizeof(buffer));
+		if (pos != NULL) {
+			*pos = '\0';
+			len = pos -  buffer;
+		}
+
+		len = _strip(buffer, len);
+		if (buffer[0] != '[' && memchr(buffer, '=', len) == NULL)
+			continue;
+
+		if (buffer[0] == '[') {
+			/* section heading line */
+			char *end = memchr(buffer, ']', len);
+			if (end == NULL) {
+				printf("Error line %d - no terminating '['"
+					"character found\n", lineno);
+				goto error1;
+			}
+			*end = '\0';
+			_strip(&buffer[1], end - &buffer[1]);
+
+			/* close off old section and add start new one */
+			if (curr_section >= 0)
+				cfg->sections[curr_section]->num_entries =
+					curr_entry + 1;
+			curr_section++;
+
+			/* resize overall struct if we don't have room for more
+			sections */
+			if (curr_section == allocated_sections) {
+				allocated_sections += CFG_ALLOC_SECTION_BATCH;
+				struct rte_cfgfile *n_cfg = realloc(cfg,
+					sizeof(*cfg) + sizeof(cfg->sections[0])
+					* allocated_sections);
+				if (n_cfg == NULL) {
+					printf("Error - no more memory\n");
+					goto error1;
+				}
+				cfg = n_cfg;
+			}
+
+			/* allocate space for new section */
+			allocated_entries = CFG_ALLOC_ENTRY_BATCH;
+			curr_entry = -1;
+			cfg->sections[curr_section] = malloc(
+				sizeof(*cfg->sections[0]) +
+				sizeof(cfg->sections[0]->entries[0]) *
+				allocated_entries);
+			if (cfg->sections[curr_section] == NULL) {
+				printf("Error - no more memory\n");
+				goto error1;
+			}
+
+			snprintf(cfg->sections[curr_section]->name,
+					sizeof(cfg->sections[0]->name),
+					"%s", &buffer[1]);
+		} else {
+			/* value line */
+			if (curr_section < 0) {
+				printf("Error line %d - value outside of"
+					"section\n", lineno);
+				goto error1;
+			}
+
+			struct rte_cfgfile_section *sect =
+				cfg->sections[curr_section];
+			char *split[2];
+			if (rte_strsplit(buffer, sizeof(buffer), split, 2, '=')
+				!= 2) {
+				printf("Error at line %d - cannot split "
+					"string\n", lineno);
+				goto error1;
+			}
+
+			curr_entry++;
+			if (curr_entry == allocated_entries) {
+				allocated_entries += CFG_ALLOC_ENTRY_BATCH;
+				struct rte_cfgfile_section *n_sect = realloc(
+					sect, sizeof(*sect) +
+					sizeof(sect->entries[0]) *
+					allocated_entries);
+				if (n_sect == NULL) {
+					printf("Error - no more memory\n");
+					goto error1;
+				}
+				sect = cfg->sections[curr_section] = n_sect;
+			}
+
+			sect->entries[curr_entry] = malloc(
+				sizeof(*sect->entries[0]));
+			if (sect->entries[curr_entry] == NULL) {
+				printf("Error - no more memory\n");
+				goto error1;
+			}
+
+			struct rte_cfgfile_entry *entry = sect->entries[
+				curr_entry];
+			snprintf(entry->name, sizeof(entry->name), "%s",
+				split[0]);
+			snprintf(entry->value, sizeof(entry->value), "%s",
+				split[1]);
+			_strip(entry->name, strnlen(entry->name,
+				sizeof(entry->name)));
+			_strip(entry->value, strnlen(entry->value,
+				sizeof(entry->value)));
+		}
+	}
+	fclose(f);
+	cfg->flags = flags;
+	cfg->num_sections = curr_section + 1;
+	/* curr_section will still be -1 if we have an empty file */
+	if (curr_section >= 0)
+		cfg->sections[curr_section]->num_entries = curr_entry + 1;
+	return cfg;
+
+error1:
+	rte_cfgfile_close(cfg);
+error2:
+	fclose(f);
+	return NULL;
+}
+
+
+int rte_cfgfile_close(struct rte_cfgfile *cfg)
+{
+	int i, j;
+
+	if (cfg == NULL)
+		return -1;
+
+	for (i = 0; i < cfg->num_sections; i++) {
+		if (cfg->sections[i] != NULL) {
+			if (cfg->sections[i]->num_entries) {
+				for (j = 0; j < cfg->sections[i]->num_entries;
+					j++) {
+					if (cfg->sections[i]->entries[j] !=
+						NULL)
+						free(cfg->sections[i]->
+							entries[j]);
+				}
+			}
+			free(cfg->sections[i]);
+		}
+	}
+	free(cfg);
+
+	return 0;
+}
+
+int
+rte_cfgfile_num_sections(struct rte_cfgfile *cfg, const char *sectionname,
+size_t length)
+{
+	int i;
+	int num_sections = 0;
+	for (i = 0; i < cfg->num_sections; i++) {
+		if (strncmp(cfg->sections[i]->name, sectionname, length) == 0)
+			num_sections++;
+	}
+	return num_sections;
+}
+
+int
+rte_cfgfile_sections(struct rte_cfgfile *cfg, char *sections[],
+	int max_sections)
+{
+	int i;
+
+	for (i = 0; i < cfg->num_sections && i < max_sections; i++)
+		snprintf(sections[i], CFG_NAME_LEN, "%s",
+		cfg->sections[i]->name);
+
+	return i;
+}
+
+static const struct rte_cfgfile_section *
+_get_section(struct rte_cfgfile *cfg, const char *sectionname)
+{
+	int i;
+	for (i = 0; i < cfg->num_sections; i++) {
+		if (strncmp(cfg->sections[i]->name, sectionname,
+				sizeof(cfg->sections[0]->name)) == 0)
+			return cfg->sections[i];
+	}
+	return NULL;
+}
+
+int
+rte_cfgfile_has_section(struct rte_cfgfile *cfg, const char *sectionname)
+{
+	return _get_section(cfg, sectionname) != NULL;
+}
+
+int
+rte_cfgfile_section_num_entries(struct rte_cfgfile *cfg,
+	const char *sectionname)
+{
+	const struct rte_cfgfile_section *s = _get_section(cfg, sectionname);
+	if (s == NULL)
+		return -1;
+	return s->num_entries;
+}
+
+
+int
+rte_cfgfile_section_entries(struct rte_cfgfile *cfg, const char *sectionname,
+		struct rte_cfgfile_entry *entries, int max_entries)
+{
+	int i;
+	const struct rte_cfgfile_section *sect = _get_section(cfg, sectionname);
+	if (sect == NULL)
+		return -1;
+	for (i = 0; i < max_entries && i < sect->num_entries; i++)
+		entries[i] = *sect->entries[i];
+	return i;
+}
+
+int
+rte_cfgfile_section_entries_by_index(struct rte_cfgfile *cfg, int index,
+		char *sectionname,
+		struct rte_cfgfile_entry *entries, int max_entries)
+{
+	int i;
+	const struct rte_cfgfile_section *sect;
+
+	if (index < 0 || index >= cfg->num_sections)
+		return -1;
+
+	sect = cfg->sections[index];
+	snprintf(sectionname, CFG_NAME_LEN, "%s", sect->name);
+	for (i = 0; i < max_entries && i < sect->num_entries; i++)
+		entries[i] = *sect->entries[i];
+	return i;
+}
+
+const char *
+rte_cfgfile_get_entry(struct rte_cfgfile *cfg, const char *sectionname,
+		const char *entryname)
+{
+	int i;
+	const struct rte_cfgfile_section *sect = _get_section(cfg, sectionname);
+	if (sect == NULL)
+		return NULL;
+	for (i = 0; i < sect->num_entries; i++)
+		if (strncmp(sect->entries[i]->name, entryname, CFG_NAME_LEN)
+			== 0)
+			return sect->entries[i]->value;
+	return NULL;
+}
+
+int
+rte_cfgfile_has_entry(struct rte_cfgfile *cfg, const char *sectionname,
+		const char *entryname)
+{
+	return rte_cfgfile_get_entry(cfg, sectionname, entryname) != NULL;
+}
diff --git a/lib/librte_cfgfile/rte_cfgfile.h b/lib/librte_cfgfile/rte_cfgfile.h
new file mode 100644
index 00000000..834f8287
--- /dev/null
+++ b/lib/librte_cfgfile/rte_cfgfile.h
@@ -0,0 +1,239 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __INCLUDE_RTE_CFGFILE_H__
+#define __INCLUDE_RTE_CFGFILE_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+* @file
+* RTE Configuration File
+*
+* This library allows reading application defined parameters from standard
+* format configuration file.
+*
+***/
+
+#ifndef CFG_NAME_LEN
+#define CFG_NAME_LEN 64
+#endif
+
+#ifndef CFG_VALUE_LEN
+#define CFG_VALUE_LEN 256
+#endif
+
+/** Configuration file */
+struct rte_cfgfile;
+
+/** Configuration file entry */
+struct rte_cfgfile_entry {
+	char name[CFG_NAME_LEN]; /**< Name */
+	char value[CFG_VALUE_LEN]; /**< Value */
+};
+
+/**
+* Open config file
+*
+* @param filename
+*   Config file name
+* @param flags
+*   Config file flags, Reserved for future use. Must be set to 0.
+* @return
+*   Handle to configuration file
+*/
+struct rte_cfgfile *rte_cfgfile_load(const char *filename, int flags);
+
+/**
+* Get number of sections in config file
+*
+* @param cfg
+*   Config file
+* @param sec_name
+*   Section name
+* @param length
+*   Maximum section name length
+* @return
+*   0 on success, error code otherwise
+*/
+int rte_cfgfile_num_sections(struct rte_cfgfile *cfg, const char *sec_name,
+	size_t length);
+
+/**
+* Get name of all config file sections.
+*
+* Fills in the array sections with the name of all the sections in the file
+* (up to the number of max_sections sections).
+*
+* @param cfg
+*   Config file
+* @param sections
+*   Array containing section names after successful invocation. Each elemen
+*   of this array should be preallocated by the user with at least
+*   CFG_NAME_LEN characters.
+* @param max_sections
+*   Maximum number of section names to be stored in sections array
+* @return
+*   0 on success, error code otherwise
+*/
+int rte_cfgfile_sections(struct rte_cfgfile *cfg, char *sections[],
+	int max_sections);
+
+/**
+* Check if given section exists in config file
+*
+* @param cfg
+*   Config file
+* @param sectionname
+*   Section name
+* @return
+*   TRUE (value different than 0) if section exists, FALSE (value 0) otherwise
+*/
+int rte_cfgfile_has_section(struct rte_cfgfile *cfg, const char *sectionname);
+
+/**
+* Get number of entries in given config file section
+*
+* If multiple sections have the given name this function operates on the
+* first one.
+*
+* @param cfg
+*   Config file
+* @param sectionname
+*   Section name
+* @return
+*   Number of entries in section
+*/
+int rte_cfgfile_section_num_entries(struct rte_cfgfile *cfg,
+	const char *sectionname);
+
+/** Get section entries as key-value pairs
+*
+* If multiple sections have the given name this function operates on the
+* first one.
+*
+* @param cfg
+*   Config file
+* @param sectionname
+*   Section name
+* @param entries
+*   Pre-allocated array of at least max_entries entries where the section
+*   entries are stored as key-value pair after successful invocation
+* @param max_entries
+*   Maximum number of section entries to be stored in entries array
+* @return
+*   0 on success, error code otherwise
+*/
+int rte_cfgfile_section_entries(struct rte_cfgfile *cfg,
+	const char *sectionname,
+	struct rte_cfgfile_entry *entries,
+	int max_entries);
+
+/** Get section entries as key-value pairs
+*
+* The index of a section is the same as the index of its name in the
+* result of rte_cfgfile_sections. This API can be used when there are
+* multiple sections with the same name.
+*
+* @param cfg
+*   Config file
+* @param index
+*   Section index
+* @param sectionname
+*   Pre-allocated string of at least CFG_NAME_LEN characters where the
+*   section name is stored after successful invocation.
+* @param entries
+*   Pre-allocated array of at least max_entries entries where the section
+*   entries are stored as key-value pair after successful invocation
+* @param max_entries
+*   Maximum number of section entries to be stored in entries array
+* @return
+*   Number of entries populated on success, negative error code otherwise
+*/
+int rte_cfgfile_section_entries_by_index(struct rte_cfgfile *cfg,
+	int index,
+	char *sectionname,
+	struct rte_cfgfile_entry *entries,
+	int max_entries);
+
+/** Get value of the named entry in named config file section
+*
+* If multiple sections have the given name this function operates on the
+* first one.
+*
+* @param cfg
+*   Config file
+* @param sectionname
+*   Section name
+* @param entryname
+*   Entry name
+* @return
+*   Entry value
+*/
+const char *rte_cfgfile_get_entry(struct rte_cfgfile *cfg,
+	const char *sectionname,
+	const char *entryname);
+
+/** Check if given entry exists in named config file section
+*
+* If multiple sections have the given name this function operates on the
+* first one.
+*
+* @param cfg
+*   Config file
+* @param sectionname
+*   Section name
+* @param entryname
+*   Entry name
+* @return
+*   TRUE (value different than 0) if entry exists, FALSE (value 0) otherwise
+*/
+int rte_cfgfile_has_entry(struct rte_cfgfile *cfg, const char *sectionname,
+	const char *entryname);
+
+/** Close config file
+*
+* @param cfg
+*   Config file
+* @return
+*   0 on success, error code otherwise
+*/
+int rte_cfgfile_close(struct rte_cfgfile *cfg);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/lib/librte_cfgfile/rte_cfgfile_version.map b/lib/librte_cfgfile/rte_cfgfile_version.map
new file mode 100644
index 00000000..3c2f0dbf
--- /dev/null
+++ b/lib/librte_cfgfile/rte_cfgfile_version.map
@@ -0,0 +1,22 @@
+DPDK_2.0 {
+	global:
+
+	rte_cfgfile_close;
+	rte_cfgfile_get_entry;
+	rte_cfgfile_has_entry;
+	rte_cfgfile_has_section;
+	rte_cfgfile_load;
+	rte_cfgfile_num_sections;
+	rte_cfgfile_section_entries;
+	rte_cfgfile_section_num_entries;
+	rte_cfgfile_sections;
+
+	local: *;
+};
+
+DPDK_16.04 {
+	global:
+
+	rte_cfgfile_section_entries_by_index;
+
+} DPDK_2.0;
diff --git a/lib/librte_cmdline/Makefile b/lib/librte_cmdline/Makefile
new file mode 100644
index 00000000..7d2d148c
--- /dev/null
+++ b/lib/librte_cmdline/Makefile
@@ -0,0 +1,67 @@
+#   BSD LICENSE
+#
+#   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+#   All rights reserved.
+#
+#   Redistribution and use in source and binary forms, with or without
+#   modification, are permitted provided that the following conditions
+#   are met:
+#
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in
+#       the documentation and/or other materials provided with the
+#       distribution.
+#     * Neither the name of Intel Corporation nor the names of its
+#       contributors may be used to endorse or promote products derived
+#       from this software without specific prior written permission.
+#
+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# library name
+LIB = librte_cmdline.a
+
+CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3
+
+EXPORT_MAP := rte_cmdline_version.map
+
+LIBABIVER := 2
+
+# all source are stored in SRCS-y
+SRCS-$(CONFIG_RTE_LIBRTE_CMDLINE) := cmdline.c
+SRCS-$(CONFIG_RTE_LIBRTE_CMDLINE) += cmdline_cirbuf.c
+SRCS-$(CONFIG_RTE_LIBRTE_CMDLINE) += cmdline_parse.c
+SRCS-$(CONFIG_RTE_LIBRTE_CMDLINE) += cmdline_parse_etheraddr.c
+SRCS-$(CONFIG_RTE_LIBRTE_CMDLINE) += cmdline_parse_ipaddr.c
+SRCS-$(CONFIG_RTE_LIBRTE_CMDLINE) += cmdline_parse_num.c
+SRCS-$(CONFIG_RTE_LIBRTE_CMDLINE) += cmdline_parse_string.c
+SRCS-$(CONFIG_RTE_LIBRTE_CMDLINE) += cmdline_rdline.c
+SRCS-$(CONFIG_RTE_LIBRTE_CMDLINE) += cmdline_vt100.c
+SRCS-$(CONFIG_RTE_LIBRTE_CMDLINE) += cmdline_socket.c
+SRCS-$(CONFIG_RTE_LIBRTE_CMDLINE) += cmdline_parse_portlist.c
+
+CFLAGS += -D_GNU_SOURCE
+
+# install includes
+INCS := cmdline.h cmdline_parse.h cmdline_parse_num.h cmdline_parse_ipaddr.h
+INCS += cmdline_parse_etheraddr.h cmdline_parse_string.h cmdline_rdline.h
+INCS += cmdline_vt100.h cmdline_socket.h cmdline_cirbuf.h cmdline_parse_portlist.h
+SYMLINK-$(CONFIG_RTE_LIBRTE_CMDLINE)-include := $(INCS)
+
+# this lib needs eal
+DEPDIRS-$(CONFIG_RTE_LIBRTE_CMDLINE) += lib/librte_eal
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_cmdline/cmdline.c b/lib/librte_cmdline/cmdline.c
new file mode 100644
index 00000000..c405878e
--- /dev/null
+++ b/lib/librte_cmdline/cmdline.c
@@ -0,0 +1,301 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Copyright (c) 2009, Olivier MATZ <zer0@droids-corp.org>
+ * All rights reserved.
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of the University of California, Berkeley nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <inttypes.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <errno.h>
+#include <termios.h>
+#include <netinet/in.h>
+
+#include <rte_string_fns.h>
+
+#include "cmdline_parse.h"
+#include "cmdline_rdline.h"
+#include "cmdline.h"
+
+static void
+cmdline_valid_buffer(struct rdline *rdl, const char *buf,
+		     __attribute__((unused)) unsigned int size)
+{
+	struct cmdline *cl = rdl->opaque;
+	int ret;
+	ret = cmdline_parse(cl, buf);
+	if (ret == CMDLINE_PARSE_AMBIGUOUS)
+		cmdline_printf(cl, "Ambiguous command\n");
+	else if (ret == CMDLINE_PARSE_NOMATCH)
+		cmdline_printf(cl, "Command not found\n");
+	else if (ret == CMDLINE_PARSE_BAD_ARGS)
+		cmdline_printf(cl, "Bad arguments\n");
+}
+
+static int
+cmdline_complete_buffer(struct rdline *rdl, const char *buf,
+			char *dstbuf, unsigned int dstsize,
+			int *state)
+{
+	struct cmdline *cl = rdl->opaque;
+	return cmdline_complete(cl, buf, state, dstbuf, dstsize);
+}
+
+int
+cmdline_write_char(struct rdline *rdl, char c)
+{
+	int ret = -1;
+	struct cmdline *cl;
+
+	if (!rdl)
+		return -1;
+
+	cl = rdl->opaque;
+
+	if (cl->s_out >= 0)
+		ret = write(cl->s_out, &c, 1);
+
+	return ret;
+}
+
+
+void
+cmdline_set_prompt(struct cmdline *cl, const char *prompt)
+{
+	if (!cl || !prompt)
+		return;
+	snprintf(cl->prompt, sizeof(cl->prompt), "%s", prompt);
+}
+
+struct cmdline *
+cmdline_new(cmdline_parse_ctx_t *ctx, const char *prompt, int s_in, int s_out)
+{
+	struct cmdline *cl;
+
+	if (!ctx || !prompt)
+		return NULL;
+
+	cl = malloc(sizeof(struct cmdline));
+	if (cl == NULL)
+		return NULL;
+	memset(cl, 0, sizeof(struct cmdline));
+	cl->s_in = s_in;
+	cl->s_out = s_out;
+	cl->ctx = ctx;
+
+	rdline_init(&cl->rdl, cmdline_write_char,
+		    cmdline_valid_buffer, cmdline_complete_buffer);
+	cl->rdl.opaque = cl;
+	cmdline_set_prompt(cl, prompt);
+	rdline_newline(&cl->rdl, cl->prompt);
+
+	return cl;
+}
+
+void
+cmdline_free(struct cmdline *cl)
+{
+	dprintf("called\n");
+
+	if (!cl)
+		return;
+
+	if (cl->s_in > 2)
+		close(cl->s_in);
+	if (cl->s_out != cl->s_in && cl->s_out > 2)
+		close(cl->s_out);
+	free(cl);
+}
+
+void
+cmdline_printf(const struct cmdline *cl, const char *fmt, ...)
+{
+	va_list ap;
+
+	if (!cl || !fmt)
+		return;
+
+#ifdef _GNU_SOURCE
+	if (cl->s_out < 0)
+		return;
+	va_start(ap, fmt);
+	vdprintf(cl->s_out, fmt, ap);
+	va_end(ap);
+#else
+	int ret;
+	char *buf;
+
+	if (cl->s_out < 0)
+		return;
+
+	buf = malloc(BUFSIZ);
+	if (buf == NULL)
+		return;
+	va_start(ap, fmt);
+	ret = vsnprintf(buf, BUFSIZ, fmt, ap);
+	va_end(ap);
+	if (ret < 0) {
+		free(buf);
+		return;
+	}
+	if (ret >= BUFSIZ)
+		ret = BUFSIZ - 1;
+	write(cl->s_out, buf, ret);
+	free(buf);
+#endif
+}
+
+int
+cmdline_in(struct cmdline *cl, const char *buf, int size)
+{
+	const char *history, *buffer;
+	size_t histlen, buflen;
+	int ret = 0;
+	int i, same;
+
+	if (!cl || !buf)
+		return -1;
+
+	for (i=0; i<size; i++) {
+		ret = rdline_char_in(&cl->rdl, buf[i]);
+
+		if (ret == RDLINE_RES_VALIDATED) {
+			buffer = rdline_get_buffer(&cl->rdl);
+			history = rdline_get_history_item(&cl->rdl, 0);
+			if (history) {
+				histlen = strnlen(history, RDLINE_BUF_SIZE);
+				same = !memcmp(buffer, history, histlen) &&
+					buffer[histlen] == '\n';
+			}
+			else
+				same = 0;
+			buflen = strnlen(buffer, RDLINE_BUF_SIZE);
+			if (buflen > 1 && !same)
+				rdline_add_history(&cl->rdl, buffer);
+			rdline_newline(&cl->rdl, cl->prompt);
+		}
+		else if (ret == RDLINE_RES_EOF)
+			return -1;
+		else if (ret == RDLINE_RES_EXITED)
+			return -1;
+	}
+	return i;
+}
+
+void
+cmdline_quit(struct cmdline *cl)
+{
+	if (!cl)
+		return;
+	rdline_quit(&cl->rdl);
+}
+
+int
+cmdline_poll(struct cmdline *cl)
+{
+	struct pollfd pfd;
+	int status;
+	ssize_t read_status;
+	char c;
+
+	if (!cl)
+		return -EINVAL;
+	else if (cl->rdl.status == RDLINE_EXITED)
+		return RDLINE_EXITED;
+
+	pfd.fd = cl->s_in;
+	pfd.events = POLLIN;
+	pfd.revents = 0;
+
+	status = poll(&pfd, 1, 0);
+	if (status < 0)
+		return status;
+	else if (status > 0) {
+		c = -1;
+		read_status = read(cl->s_in, &c, 1);
+		if (read_status < 0)
+			return read_status;
+
+		status = cmdline_in(cl, &c, 1);
+		if (status < 0 && cl->rdl.status != RDLINE_EXITED)
+			return status;
+	}
+
+	return cl->rdl.status;
+}
+
+void
+cmdline_interact(struct cmdline *cl)
+{
+	char c;
+
+	if (!cl)
+		return;
+
+	c = -1;
+	while (1) {
+		if (read(cl->s_in, &c, 1) <= 0)
+			break;
+		if (cmdline_in(cl, &c, 1) < 0)
+			break;
+	}
+}
diff --git a/lib/librte_cmdline/cmdline.h b/lib/librte_cmdline/cmdline.h
new file mode 100644
index 00000000..2578ca81
--- /dev/null
+++ b/lib/librte_cmdline/cmdline.h
@@ -0,0 +1,115 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Copyright (c) 2009, Olivier MATZ <zer0@droids-corp.org>
+ * All rights reserved.
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of the University of California, Berkeley nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _CMDLINE_H_
+#define _CMDLINE_H_
+
+#include <termios.h>
+#include <cmdline_rdline.h>
+
+/**
+ * @file
+ *
+ * Command line API
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct cmdline {
+	int s_in;
+	int s_out;
+	cmdline_parse_ctx_t *ctx;
+	struct rdline rdl;
+	char prompt[RDLINE_PROMPT_SIZE];
+	struct termios oldterm;
+};
+
+struct cmdline *cmdline_new(cmdline_parse_ctx_t *ctx, const char *prompt, int s_in, int s_out);
+void cmdline_set_prompt(struct cmdline *cl, const char *prompt);
+void cmdline_free(struct cmdline *cl);
+void cmdline_printf(const struct cmdline *cl, const char *fmt, ...)
+	__attribute__((format(printf,2,3)));
+int cmdline_in(struct cmdline *cl, const char *buf, int size);
+int cmdline_write_char(struct rdline *rdl, char c);
+
+/**
+ * This function is nonblocking equivalent of ``cmdline_interact()``. It polls
+ * *cl* for one character and interpret it. If return value is *RDLINE_EXITED*
+ * it mean that ``cmdline_quit()`` was invoked.
+ *
+ * @param cl
+ *   The command line object.
+ *
+ * @return
+ *   On success return object status - one of *enum rdline_status*.
+ *   On error return negative value.
+ */
+int cmdline_poll(struct cmdline *cl);
+
+void cmdline_interact(struct cmdline *cl);
+void cmdline_quit(struct cmdline *cl);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _CMDLINE_SOCKET_H_ */
diff --git a/lib/librte_cmdline/cmdline_cirbuf.c b/lib/librte_cmdline/cmdline_cirbuf.c
new file mode 100644
index 00000000..f506f880
--- /dev/null
+++ b/lib/librte_cmdline/cmdline_cirbuf.c
@@ -0,0 +1,466 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Copyright (c) 2009, Olivier MATZ <zer0@droids-corp.org>
+ * All rights reserved.
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of the University of California, Berkeley nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <string.h>
+#include <errno.h>
+#include <stdio.h>
+
+#include "cmdline_cirbuf.h"
+
+
+int
+cirbuf_init(struct cirbuf *cbuf, char *buf, unsigned int start, unsigned int maxlen)
+{
+	if (!cbuf || !buf)
+		return -EINVAL;
+	cbuf->maxlen = maxlen;
+	cbuf->len = 0;
+	cbuf->start = start;
+	cbuf->end = start;
+	cbuf->buf = buf;
+	return 0;
+}
+
+/* multiple add */
+
+int
+cirbuf_add_buf_head(struct cirbuf *cbuf, const char *c, unsigned int n)
+{
+	unsigned int e;
+
+	if (!cbuf || !c || !n || n > CIRBUF_GET_FREELEN(cbuf))
+		return -EINVAL;
+
+	e = CIRBUF_IS_EMPTY(cbuf) ? 1 : 0;
+
+	if (n < cbuf->start + e) {
+		dprintf("s[%d] -> d[%d] (%d)\n", 0, cbuf->start - n + e, n);
+		memcpy(cbuf->buf + cbuf->start - n + e, c, n);
+	}
+	else {
+		dprintf("s[%d] -> d[%d] (%d)\n", + n - (cbuf->start + e), 0,
+			cbuf->start + e);
+		dprintf("s[%d] -> d[%d] (%d)\n", cbuf->maxlen - n +
+			(cbuf->start + e), 0, n - (cbuf->start + e));
+		memcpy(cbuf->buf, c  + n - (cbuf->start + e) , cbuf->start + e);
+		memcpy(cbuf->buf + cbuf->maxlen - n + (cbuf->start + e), c,
+		       n - (cbuf->start + e));
+	}
+	cbuf->len += n;
+	cbuf->start += (cbuf->maxlen - n + e);
+	cbuf->start %= cbuf->maxlen;
+	return n;
+}
+
+/* multiple add */
+
+int
+cirbuf_add_buf_tail(struct cirbuf *cbuf, const char *c, unsigned int n)
+{
+	unsigned int e;
+
+	if (!cbuf || !c || !n || n > CIRBUF_GET_FREELEN(cbuf))
+		return -EINVAL;
+
+	e = CIRBUF_IS_EMPTY(cbuf) ? 1 : 0;
+
+	if (n < cbuf->maxlen - cbuf->end - 1 + e) {
+		dprintf("s[%d] -> d[%d] (%d)\n", 0, cbuf->end + !e, n);
+		memcpy(cbuf->buf + cbuf->end + !e, c, n);
+	}
+	else {
+		dprintf("s[%d] -> d[%d] (%d)\n", cbuf->end + !e, 0,
+			cbuf->maxlen - cbuf->end - 1 + e);
+		dprintf("s[%d] -> d[%d] (%d)\n", cbuf->maxlen - cbuf->end - 1 +
+			e, 0, n - cbuf->maxlen + cbuf->end + 1 - e);
+		memcpy(cbuf->buf + cbuf->end + !e, c, cbuf->maxlen -
+		       cbuf->end - 1 + e);
+		memcpy(cbuf->buf, c + cbuf->maxlen - cbuf->end - 1 + e,
+		       n - cbuf->maxlen + cbuf->end + 1 - e);
+	}
+	cbuf->len += n;
+	cbuf->end += n - e;
+	cbuf->end %= cbuf->maxlen;
+	return n;
+}
+
+/* add at head */
+
+static inline void
+__cirbuf_add_head(struct cirbuf * cbuf, char c)
+{
+	if (!CIRBUF_IS_EMPTY(cbuf)) {
+		cbuf->start += (cbuf->maxlen - 1);
+		cbuf->start %= cbuf->maxlen;
+	}
+	cbuf->buf[cbuf->start] = c;
+	cbuf->len ++;
+}
+
+int
+cirbuf_add_head_safe(struct cirbuf * cbuf, char c)
+{
+	if (cbuf && !CIRBUF_IS_FULL(cbuf)) {
+		__cirbuf_add_head(cbuf, c);
+		return 0;
+	}
+	return -EINVAL;
+}
+
+void
+cirbuf_add_head(struct cirbuf * cbuf, char c)
+{
+	__cirbuf_add_head(cbuf, c);
+}
+
+/* add at tail */
+
+static inline void
+__cirbuf_add_tail(struct cirbuf * cbuf, char c)
+{
+	if (!CIRBUF_IS_EMPTY(cbuf)) {
+		cbuf->end ++;
+		cbuf->end %= cbuf->maxlen;
+	}
+	cbuf->buf[cbuf->end] = c;
+	cbuf->len ++;
+}
+
+int
+cirbuf_add_tail_safe(struct cirbuf * cbuf, char c)
+{
+	if (cbuf && !CIRBUF_IS_FULL(cbuf)) {
+		__cirbuf_add_tail(cbuf, c);
+		return 0;
+	}
+	return -EINVAL;
+}
+
+void
+cirbuf_add_tail(struct cirbuf * cbuf, char c)
+{
+	__cirbuf_add_tail(cbuf, c);
+}
+
+
+static inline void
+__cirbuf_shift_left(struct cirbuf *cbuf)
+{
+	unsigned int i;
+	char tmp = cbuf->buf[cbuf->start];
+
+	for (i=0 ; i<cbuf->len ; i++) {
+		cbuf->buf[(cbuf->start+i)%cbuf->maxlen] =
+			cbuf->buf[(cbuf->start+i+1)%cbuf->maxlen];
+	}
+	cbuf->buf[(cbuf->start-1+cbuf->maxlen)%cbuf->maxlen] = tmp;
+	cbuf->start += (cbuf->maxlen - 1);
+	cbuf->start %= cbuf->maxlen;
+	cbuf->end += (cbuf->maxlen - 1);
+	cbuf->end %= cbuf->maxlen;
+}
+
+static inline void
+__cirbuf_shift_right(struct cirbuf *cbuf)
+{
+	unsigned int i;
+	char tmp = cbuf->buf[cbuf->end];
+
+	for (i=0 ; i<cbuf->len ; i++) {
+		cbuf->buf[(cbuf->end+cbuf->maxlen-i)%cbuf->maxlen] =
+			cbuf->buf[(cbuf->end+cbuf->maxlen-i-1)%cbuf->maxlen];
+	}
+	cbuf->buf[(cbuf->end+1)%cbuf->maxlen] = tmp;
+	cbuf->start += 1;
+	cbuf->start %= cbuf->maxlen;
+	cbuf->end += 1;
+	cbuf->end %= cbuf->maxlen;
+}
+
+/* XXX we could do a better algorithm here... */
+int
+cirbuf_align_left(struct cirbuf * cbuf)
+{
+	if (!cbuf)
+		return -EINVAL;
+
+	if (cbuf->start < cbuf->maxlen/2) {
+		while (cbuf->start != 0) {
+			__cirbuf_shift_left(cbuf);
+		}
+	}
+	else {
+		while (cbuf->start != 0) {
+			__cirbuf_shift_right(cbuf);
+		}
+	}
+
+	return 0;
+}
+
+/* XXX we could do a better algorithm here... */
+int
+cirbuf_align_right(struct cirbuf * cbuf)
+{
+	if (!cbuf)
+		return -EINVAL;
+
+	if (cbuf->start >= cbuf->maxlen/2) {
+		while (cbuf->end != cbuf->maxlen-1) {
+			__cirbuf_shift_left(cbuf);
+		}
+	}
+	else {
+		while (cbuf->start != cbuf->maxlen-1) {
+			__cirbuf_shift_right(cbuf);
+		}
+	}
+
+	return 0;
+}
+
+/* buffer del */
+
+int
+cirbuf_del_buf_head(struct cirbuf *cbuf, unsigned int size)
+{
+	if (!cbuf || !size || size > CIRBUF_GET_LEN(cbuf))
+		return -EINVAL;
+
+	cbuf->len -= size;
+	if (CIRBUF_IS_EMPTY(cbuf)) {
+		cbuf->start += size - 1;
+		cbuf->start %= cbuf->maxlen;
+	}
+	else {
+		cbuf->start += size;
+		cbuf->start %= cbuf->maxlen;
+	}
+	return 0;
+}
+
+/* buffer del */
+
+int
+cirbuf_del_buf_tail(struct cirbuf *cbuf, unsigned int size)
+{
+	if (!cbuf || !size || size > CIRBUF_GET_LEN(cbuf))
+		return -EINVAL;
+
+	cbuf->len -= size;
+	if (CIRBUF_IS_EMPTY(cbuf)) {
+		cbuf->end  += (cbuf->maxlen - size + 1);
+		cbuf->end %= cbuf->maxlen;
+	}
+	else {
+		cbuf->end  += (cbuf->maxlen - size);
+		cbuf->end %= cbuf->maxlen;
+	}
+	return 0;
+}
+
+/* del at head */
+
+static inline void
+__cirbuf_del_head(struct cirbuf * cbuf)
+{
+	cbuf->len --;
+	if (!CIRBUF_IS_EMPTY(cbuf)) {
+		cbuf->start ++;
+		cbuf->start %= cbuf->maxlen;
+	}
+}
+
+int
+cirbuf_del_head_safe(struct cirbuf * cbuf)
+{
+	if (cbuf && !CIRBUF_IS_EMPTY(cbuf)) {
+		__cirbuf_del_head(cbuf);
+		return 0;
+	}
+	return -EINVAL;
+}
+
+void
+cirbuf_del_head(struct cirbuf * cbuf)
+{
+	__cirbuf_del_head(cbuf);
+}
+
+/* del at tail */
+
+static inline void
+__cirbuf_del_tail(struct cirbuf * cbuf)
+{
+	cbuf->len --;
+	if (!CIRBUF_IS_EMPTY(cbuf)) {
+		cbuf->end  += (cbuf->maxlen - 1);
+		cbuf->end %= cbuf->maxlen;
+	}
+}
+
+int
+cirbuf_del_tail_safe(struct cirbuf * cbuf)
+{
+	if (cbuf && !CIRBUF_IS_EMPTY(cbuf)) {
+		__cirbuf_del_tail(cbuf);
+		return 0;
+	}
+	return -EINVAL;
+}
+
+void
+cirbuf_del_tail(struct cirbuf * cbuf)
+{
+	__cirbuf_del_tail(cbuf);
+}
+
+/* convert to buffer */
+
+int
+cirbuf_get_buf_head(struct cirbuf *cbuf, char *c, unsigned int size)
+{
+	unsigned int n;
+
+	if (!cbuf || !c)
+		return -EINVAL;
+
+	n = (size < CIRBUF_GET_LEN(cbuf)) ? size : CIRBUF_GET_LEN(cbuf);
+
+	if (!n)
+		return 0;
+
+	if (cbuf->start <= cbuf->end) {
+		dprintf("s[%d] -> d[%d] (%d)\n", cbuf->start, 0, n);
+		memcpy(c, cbuf->buf + cbuf->start , n);
+	}
+	else {
+		/* check if we need to go from end to the beginning */
+		if (n <= cbuf->maxlen - cbuf->start) {
+			dprintf("s[%d] -> d[%d] (%d)\n", 0, cbuf->start, n);
+			memcpy(c, cbuf->buf + cbuf->start , n);
+		}
+		else {
+			dprintf("s[%d] -> d[%d] (%d)\n", cbuf->start, 0,
+				cbuf->maxlen - cbuf->start);
+			dprintf("s[%d] -> d[%d] (%d)\n", 0, cbuf->maxlen - cbuf->start,
+				n - cbuf->maxlen + cbuf->start);
+			memcpy(c, cbuf->buf + cbuf->start , cbuf->maxlen - cbuf->start);
+			memcpy(c + cbuf->maxlen - cbuf->start, cbuf->buf,
+				   n - cbuf->maxlen + cbuf->start);
+		}
+	}
+	return n;
+}
+
+/* convert to buffer */
+
+int
+cirbuf_get_buf_tail(struct cirbuf *cbuf, char *c, unsigned int size)
+{
+	unsigned int n;
+
+	if (!cbuf || !c)
+		return -EINVAL;
+
+	n = (size < CIRBUF_GET_LEN(cbuf)) ? size : CIRBUF_GET_LEN(cbuf);
+
+	if (!n)
+		return 0;
+
+	if (cbuf->start <= cbuf->end) {
+		dprintf("s[%d] -> d[%d] (%d)\n", cbuf->end - n + 1, 0, n);
+		memcpy(c, cbuf->buf + cbuf->end - n + 1, n);
+	}
+	else {
+		/* check if we need to go from end to the beginning */
+		if (n <= cbuf->end + 1) {
+			dprintf("s[%d] -> d[%d] (%d)\n", 0, cbuf->end - n + 1, n);
+			memcpy(c, cbuf->buf + cbuf->end - n + 1, n);
+		}
+		else {
+			dprintf("s[%d] -> d[%d] (%d)\n", 0,
+				cbuf->maxlen - cbuf->start, cbuf->end + 1);
+			dprintf("s[%d] -> d[%d] (%d)\n",
+				cbuf->maxlen - n + cbuf->end + 1, 0, n - cbuf->end - 1);
+			memcpy(c + cbuf->maxlen - cbuf->start,
+					       cbuf->buf, cbuf->end + 1);
+			memcpy(c, cbuf->buf + cbuf->maxlen - n + cbuf->end +1,
+				   n - cbuf->end - 1);
+		}
+	}
+	return n;
+}
+
+/* get head or get tail */
+
+char
+cirbuf_get_head(struct cirbuf * cbuf)
+{
+	return cbuf->buf[cbuf->start];
+}
+
+/* get head or get tail */
+
+char
+cirbuf_get_tail(struct cirbuf * cbuf)
+{
+	return cbuf->buf[cbuf->end];
+}
diff --git a/lib/librte_cmdline/cmdline_cirbuf.h b/lib/librte_cmdline/cmdline_cirbuf.h
new file mode 100644
index 00000000..6321dec5
--- /dev/null
+++ b/lib/librte_cmdline/cmdline_cirbuf.h
@@ -0,0 +1,245 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Copyright (c) 2009, Olivier MATZ <zer0@droids-corp.org>
+ * All rights reserved.
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of the University of California, Berkeley nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _CIRBUF_H_
+#define _CIRBUF_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * This structure is the header of a cirbuf type.
+ */
+struct cirbuf {
+	unsigned int maxlen;    /**< total len of the fifo (number of elements) */
+	unsigned int start;     /**< indice of the first elt */
+	unsigned int end;       /**< indice of the last elt */
+	unsigned int len;       /**< current len of fifo */
+	char *buf;
+};
+
+#ifdef RTE_LIBRTE_CMDLINE_DEBUG
+#define dprintf_(fmt, ...) printf("line %3.3d - " fmt "%.0s", __LINE__, __VA_ARGS__)
+#define dprintf(...) dprintf_(__VA_ARGS__, "dummy")
+#else
+#define dprintf(...) (void)0
+#endif
+
+
+/**
+ * Init the circular buffer
+ */
+int cirbuf_init(struct cirbuf *cbuf, char *buf, unsigned int start, unsigned int maxlen);
+
+
+/**
+ * Return 1 if the circular buffer is full
+ */
+#define CIRBUF_IS_FULL(cirbuf) ((cirbuf)->maxlen == (cirbuf)->len)
+
+/**
+ * Return 1 if the circular buffer is empty
+ */
+#define CIRBUF_IS_EMPTY(cirbuf) ((cirbuf)->len == 0)
+
+/**
+ * return current size of the circular buffer (number of used elements)
+ */
+#define CIRBUF_GET_LEN(cirbuf) ((cirbuf)->len)
+
+/**
+ * return size of the circular buffer (used + free elements)
+ */
+#define CIRBUF_GET_MAXLEN(cirbuf) ((cirbuf)->maxlen)
+
+/**
+ * return the number of free elts
+ */
+#define CIRBUF_GET_FREELEN(cirbuf) ((cirbuf)->maxlen - (cirbuf)->len)
+
+/**
+ * Iterator for a circular buffer
+ *   c: struct cirbuf pointer
+ *   i: an integer type internally used in the macro
+ *   e: char that takes the value for each iteration
+ */
+#define CIRBUF_FOREACH(c, i, e)                                 \
+	for ( i=0, e=(c)->buf[(c)->start] ;                     \
+		i<((c)->len) ;                                  \
+		i ++,  e=(c)->buf[((c)->start+i)%((c)->maxlen)])
+
+
+/**
+ * Add a character at head of the circular buffer. Return 0 on success, or
+ * a negative value on error.
+ */
+int cirbuf_add_head_safe(struct cirbuf *cbuf, char c);
+
+/**
+ * Add a character at head of the circular buffer. You _must_ check that you
+ * have enough free space in the buffer before calling this func.
+ */
+void cirbuf_add_head(struct cirbuf *cbuf, char c);
+
+/**
+ * Add a character at tail of the circular buffer. Return 0 on success, or
+ * a negative value on error.
+ */
+int cirbuf_add_tail_safe(struct cirbuf *cbuf, char c);
+
+/**
+ * Add a character at tail of the circular buffer. You _must_ check that you
+ * have enough free space in the buffer before calling this func.
+ */
+void cirbuf_add_tail(struct cirbuf *cbuf, char c);
+
+/**
+ * Remove a char at the head of the circular buffer. Return 0 on
+ * success, or a negative value on error.
+ */
+int cirbuf_del_head_safe(struct cirbuf *cbuf);
+
+/**
+ * Remove a char at the head of the circular buffer. You _must_ check
+ * that buffer is not empty before calling the function.
+ */
+void cirbuf_del_head(struct cirbuf *cbuf);
+
+/**
+ * Remove a char at the tail of the circular buffer. Return 0 on
+ * success, or a negative value on error.
+ */
+int cirbuf_del_tail_safe(struct cirbuf *cbuf);
+
+/**
+ * Remove a char at the tail of the circular buffer. You _must_ check
+ * that buffer is not empty before calling the function.
+ */
+void cirbuf_del_tail(struct cirbuf *cbuf);
+
+/**
+ * Return the head of the circular buffer. You _must_ check that
+ * buffer is not empty before calling the function.
+ */
+char cirbuf_get_head(struct cirbuf *cbuf);
+
+/**
+ * Return the tail of the circular buffer. You _must_ check that
+ * buffer is not empty before calling the function.
+ */
+char cirbuf_get_tail(struct cirbuf *cbuf);
+
+/**
+ * Add a buffer at head of the circular buffer. 'c' is a pointer to a
+ * buffer, and n is the number of char to add. Return the number of
+ * copied bytes on success, or a negative value on error.
+ */
+int cirbuf_add_buf_head(struct cirbuf *cbuf, const char *c, unsigned int n);
+
+/**
+ * Add a buffer at tail of the circular buffer. 'c' is a pointer to a
+ * buffer, and n is the number of char to add. Return the number of
+ * copied bytes on success, or a negative value on error.
+ */
+int cirbuf_add_buf_tail(struct cirbuf *cbuf, const char *c, unsigned int n);
+
+/**
+ * Remove chars at the head of the circular buffer. Return 0 on
+ * success, or a negative value on error.
+ */
+int cirbuf_del_buf_head(struct cirbuf *cbuf, unsigned int size);
+
+/**
+ * Remove chars at the tail of the circular buffer. Return 0 on
+ * success, or a negative value on error.
+ */
+int cirbuf_del_buf_tail(struct cirbuf *cbuf, unsigned int size);
+
+/**
+ * Copy a maximum of 'size' characters from the head of the circular
+ * buffer to a flat one pointed by 'c'. Return the number of copied
+ * chars.
+ */
+int cirbuf_get_buf_head(struct cirbuf *cbuf, char *c, unsigned int size);
+
+/**
+ * Copy a maximum of 'size' characters from the tail of the circular
+ * buffer to a flat one pointed by 'c'. Return the number of copied
+ * chars.
+ */
+int cirbuf_get_buf_tail(struct cirbuf *cbuf, char *c, unsigned int size);
+
+
+/**
+ * Set the start of the data to the index 0 of the internal buffer.
+ */
+int cirbuf_align_left(struct cirbuf *cbuf);
+
+/**
+ * Set the end of the data to the last index of the internal buffer.
+ */
+int cirbuf_align_right(struct cirbuf *cbuf);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _CIRBUF_H_ */
diff --git a/lib/librte_cmdline/cmdline_parse.c b/lib/librte_cmdline/cmdline_parse.c
new file mode 100644
index 00000000..24a6ed67
--- /dev/null
+++ b/lib/librte_cmdline/cmdline_parse.c
@@ -0,0 +1,563 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Copyright (c) 2009, Olivier MATZ <zer0@droids-corp.org>
+ * All rights reserved.
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of the University of California, Berkeley nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <stdarg.h>
+#include <errno.h>
+#include <string.h>
+#include <inttypes.h>
+#include <ctype.h>
+#include <termios.h>
+
+#include <netinet/in.h>
+
+#include <rte_string_fns.h>
+
+#include "cmdline_rdline.h"
+#include "cmdline_parse.h"
+#include "cmdline.h"
+
+#ifdef RTE_LIBRTE_CMDLINE_DEBUG
+#define debug_printf printf
+#else
+#define debug_printf(args...) do {} while(0)
+#endif
+
+#define CMDLINE_BUFFER_SIZE 64
+
+/* isblank() needs _XOPEN_SOURCE >= 600 || _ISOC99_SOURCE, so use our
+ * own. */
+static int
+isblank2(char c)
+{
+	if (c == ' ' ||
+	    c == '\t' )
+		return 1;
+	return 0;
+}
+
+static int
+isendofline(char c)
+{
+	if (c == '\n' ||
+	    c == '\r' )
+		return 1;
+	return 0;
+}
+
+static int
+iscomment(char c)
+{
+	if (c == '#')
+		return 1;
+	return 0;
+}
+
+int
+cmdline_isendoftoken(char c)
+{
+	if (!c || iscomment(c) || isblank2(c) || isendofline(c))
+		return 1;
+	return 0;
+}
+
+static unsigned int
+nb_common_chars(const char * s1, const char * s2)
+{
+	unsigned int i=0;
+
+	while (*s1==*s2 && *s1) {
+		s1++;
+		s2++;
+		i++;
+	}
+	return i;
+}
+
+/**
+ * try to match the buffer with an instruction (only the first
+ * nb_match_token tokens if != 0). Return 0 if we match all the
+ * tokens, else the number of matched tokens, else -1.
+ */
+static int
+match_inst(cmdline_parse_inst_t *inst, const char *buf,
+	   unsigned int nb_match_token, void *resbuf, unsigned resbuf_size)
+{
+	unsigned int token_num=0;
+	cmdline_parse_token_hdr_t * token_p;
+	unsigned int i=0;
+	int n = 0;
+	struct cmdline_token_hdr token_hdr;
+
+	token_p = inst->tokens[token_num];
+	if (token_p)
+		memcpy(&token_hdr, token_p, sizeof(token_hdr));
+
+	/* check if we match all tokens of inst */
+	while (token_p && (!nb_match_token || i<nb_match_token)) {
+		debug_printf("TK\n");
+		/* skip spaces */
+		while (isblank2(*buf)) {
+			buf++;
+		}
+
+		/* end of buf */
+		if ( isendofline(*buf) || iscomment(*buf) )
+			break;
+
+		if (resbuf == NULL) {
+			n = token_hdr.ops->parse(token_p, buf, NULL, 0);
+		} else {
+			unsigned rb_sz;
+
+			if (token_hdr.offset > resbuf_size) {
+				printf("Parse error(%s:%d): Token offset(%u) "
+					"exceeds maximum size(%u)\n",
+					__FILE__, __LINE__,
+					token_hdr.offset, resbuf_size);
+				return -ENOBUFS;
+			}
+			rb_sz = resbuf_size - token_hdr.offset;
+
+			n = token_hdr.ops->parse(token_p, buf, (char *)resbuf +
+				token_hdr.offset, rb_sz);
+		}
+
+		if (n < 0)
+			break;
+
+		debug_printf("TK parsed (len=%d)\n", n);
+		i++;
+		buf += n;
+
+		token_num ++;
+		token_p = inst->tokens[token_num];
+		if (token_p)
+			memcpy(&token_hdr, token_p, sizeof(token_hdr));
+	}
+
+	/* does not match */
+	if (i==0)
+		return -1;
+
+	/* in case we want to match a specific num of token */
+	if (nb_match_token) {
+		if (i == nb_match_token) {
+			return 0;
+		}
+		return i;
+	}
+
+	/* we don't match all the tokens */
+	if (token_p) {
+		return i;
+	}
+
+	/* are there are some tokens more */
+	while (isblank2(*buf)) {
+		buf++;
+	}
+
+	/* end of buf */
+	if ( isendofline(*buf) || iscomment(*buf) )
+		return 0;
+
+	/* garbage after inst */
+	return i;
+}
+
+
+int
+cmdline_parse(struct cmdline *cl, const char * buf)
+{
+	unsigned int inst_num=0;
+	cmdline_parse_inst_t *inst;
+	const char *curbuf;
+	char result_buf[CMDLINE_PARSE_RESULT_BUFSIZE];
+	void (*f)(void *, struct cmdline *, void *) = NULL;
+	void *data = NULL;
+	int comment = 0;
+	int linelen = 0;
+	int parse_it = 0;
+	int err = CMDLINE_PARSE_NOMATCH;
+	int tok;
+	cmdline_parse_ctx_t *ctx;
+#ifdef RTE_LIBRTE_CMDLINE_DEBUG
+	char debug_buf[BUFSIZ];
+#endif
+
+	if (!cl || !buf)
+		return CMDLINE_PARSE_BAD_ARGS;
+
+	ctx = cl->ctx;
+
+	/*
+	 * - look if the buffer contains at least one line
+	 * - look if line contains only spaces or comments
+	 * - count line length
+	 */
+	curbuf = buf;
+	while (! isendofline(*curbuf)) {
+		if ( *curbuf == '\0' ) {
+			debug_printf("Incomplete buf (len=%d)\n", linelen);
+			return 0;
+		}
+		if ( iscomment(*curbuf) ) {
+			comment = 1;
+		}
+		if ( ! isblank2(*curbuf) && ! comment) {
+			parse_it = 1;
+		}
+		curbuf++;
+		linelen++;
+	}
+
+	/* skip all endofline chars */
+	while (isendofline(buf[linelen])) {
+		linelen++;
+	}
+
+	/* empty line */
+	if ( parse_it == 0 ) {
+		debug_printf("Empty line (len=%d)\n", linelen);
+		return linelen;
+	}
+
+#ifdef RTE_LIBRTE_CMDLINE_DEBUG
+	snprintf(debug_buf, (linelen>64 ? 64 : linelen), "%s", buf);
+	debug_printf("Parse line : len=%d, <%s>\n", linelen, debug_buf);
+#endif
+
+	/* parse it !! */
+	inst = ctx[inst_num];
+	while (inst) {
+		debug_printf("INST %d\n", inst_num);
+
+		/* fully parsed */
+		tok = match_inst(inst, buf, 0, result_buf, sizeof(result_buf));
+
+		if (tok > 0) /* we matched at least one token */
+			err = CMDLINE_PARSE_BAD_ARGS;
+
+		else if (!tok) {
+			debug_printf("INST fully parsed\n");
+			/* skip spaces */
+			while (isblank2(*curbuf)) {
+				curbuf++;
+			}
+
+			/* if end of buf -> there is no garbage after inst */
+			if (isendofline(*curbuf) || iscomment(*curbuf)) {
+				if (!f) {
+					memcpy(&f, &inst->f, sizeof(f));
+					memcpy(&data, &inst->data, sizeof(data));
+				}
+				else {
+					/* more than 1 inst matches */
+					err = CMDLINE_PARSE_AMBIGUOUS;
+					f=NULL;
+					debug_printf("Ambiguous cmd\n");
+					break;
+				}
+			}
+		}
+
+		inst_num ++;
+		inst = ctx[inst_num];
+	}
+
+	/* call func */
+	if (f) {
+		f(result_buf, cl, data);
+	}
+
+	/* no match */
+	else {
+		debug_printf("No match err=%d\n", err);
+		return err;
+	}
+
+	return linelen;
+}
+
+int
+cmdline_complete(struct cmdline *cl, const char *buf, int *state,
+		 char *dst, unsigned int size)
+{
+	const char *partial_tok = buf;
+	unsigned int inst_num = 0;
+	cmdline_parse_inst_t *inst;
+	cmdline_parse_token_hdr_t *token_p;
+	struct cmdline_token_hdr token_hdr;
+	char tmpbuf[CMDLINE_BUFFER_SIZE], comp_buf[CMDLINE_BUFFER_SIZE];
+	unsigned int partial_tok_len;
+	int comp_len = -1;
+	int tmp_len = -1;
+	int nb_token = 0;
+	unsigned int i, n;
+	int l;
+	unsigned int nb_completable;
+	unsigned int nb_non_completable;
+	int local_state = 0;
+	const char *help_str;
+	cmdline_parse_ctx_t *ctx;
+
+	if (!cl || !buf || !state || !dst)
+		return -1;
+
+	ctx = cl->ctx;
+
+	debug_printf("%s called\n", __func__);
+	memset(&token_hdr, 0, sizeof(token_hdr));
+
+	/* count the number of complete token to parse */
+	for (i=0 ; buf[i] ; i++) {
+		if (!isblank2(buf[i]) && isblank2(buf[i+1]))
+			nb_token++;
+		if (isblank2(buf[i]) && !isblank2(buf[i+1]))
+			partial_tok = buf+i+1;
+	}
+	partial_tok_len = strnlen(partial_tok, RDLINE_BUF_SIZE);
+
+	/* first call -> do a first pass */
+	if (*state <= 0) {
+		debug_printf("try complete <%s>\n", buf);
+		debug_printf("there is %d complete tokens, <%s> is incomplete\n",
+			     nb_token, partial_tok);
+
+		nb_completable = 0;
+		nb_non_completable = 0;
+
+		inst = ctx[inst_num];
+		while (inst) {
+			/* parse the first tokens of the inst */
+			if (nb_token && match_inst(inst, buf, nb_token, NULL, 0))
+				goto next;
+
+			debug_printf("instruction match\n");
+			token_p = inst->tokens[nb_token];
+			if (token_p)
+				memcpy(&token_hdr, token_p, sizeof(token_hdr));
+
+			/* non completable */
+			if (!token_p ||
+			    !token_hdr.ops->complete_get_nb ||
+			    !token_hdr.ops->complete_get_elt ||
+			    (n = token_hdr.ops->complete_get_nb(token_p)) == 0) {
+				nb_non_completable++;
+				goto next;
+			}
+
+			debug_printf("%d choices for this token\n", n);
+			for (i=0 ; i<n ; i++) {
+				if (token_hdr.ops->complete_get_elt(token_p, i,
+								    tmpbuf,
+								    sizeof(tmpbuf)) < 0)
+					continue;
+
+				/* we have at least room for one char */
+				tmp_len = strnlen(tmpbuf, sizeof(tmpbuf));
+				if (tmp_len < CMDLINE_BUFFER_SIZE - 1) {
+					tmpbuf[tmp_len] = ' ';
+					tmpbuf[tmp_len+1] = 0;
+				}
+
+				debug_printf("   choice <%s>\n", tmpbuf);
+
+				/* does the completion match the
+				 * beginning of the word ? */
+				if (!strncmp(partial_tok, tmpbuf,
+					     partial_tok_len)) {
+					if (comp_len == -1) {
+						snprintf(comp_buf, sizeof(comp_buf),
+							 "%s", tmpbuf + partial_tok_len);
+						comp_len =
+							strnlen(tmpbuf + partial_tok_len,
+									sizeof(tmpbuf) - partial_tok_len);
+
+					}
+					else {
+						comp_len =
+							nb_common_chars(comp_buf,
+									tmpbuf+partial_tok_len);
+						comp_buf[comp_len] = 0;
+					}
+					nb_completable++;
+				}
+			}
+		next:
+			debug_printf("next\n");
+			inst_num ++;
+			inst = ctx[inst_num];
+		}
+
+		debug_printf("total choices %d for this completion\n",
+			     nb_completable);
+
+		/* no possible completion */
+		if (nb_completable == 0 && nb_non_completable == 0)
+			return 0;
+
+		/* if multichoice is not required */
+		if (*state == 0 && partial_tok_len > 0) {
+			/* one or several choices starting with the
+			   same chars */
+			if (comp_len > 0) {
+				if ((unsigned)(comp_len + 1) > size)
+					return 0;
+
+				snprintf(dst, size, "%s", comp_buf);
+				dst[comp_len] = 0;
+				return 2;
+			}
+		}
+	}
+
+	/* init state correctly */
+	if (*state == -1)
+		*state = 0;
+
+	debug_printf("Multiple choice STATE=%d\n", *state);
+
+	inst_num = 0;
+	inst = ctx[inst_num];
+	while (inst) {
+		/* we need to redo it */
+		inst = ctx[inst_num];
+
+		if (nb_token && match_inst(inst, buf, nb_token, NULL, 0))
+			goto next2;
+
+		token_p = inst->tokens[nb_token];
+		if (token_p)
+			memcpy(&token_hdr, token_p, sizeof(token_hdr));
+
+		/* one choice for this token */
+		if (!token_p ||
+		    !token_hdr.ops->complete_get_nb ||
+		    !token_hdr.ops->complete_get_elt ||
+		    (n = token_hdr.ops->complete_get_nb(token_p)) == 0) {
+			if (local_state < *state) {
+				local_state++;
+				goto next2;
+			}
+			(*state)++;
+			if (token_p && token_hdr.ops->get_help) {
+				token_hdr.ops->get_help(token_p, tmpbuf,
+							sizeof(tmpbuf));
+				help_str = inst->help_str;
+				if (help_str)
+					snprintf(dst, size, "[%s]: %s", tmpbuf,
+						 help_str);
+				else
+					snprintf(dst, size, "[%s]: No help",
+						 tmpbuf);
+			}
+			else {
+				snprintf(dst, size, "[RETURN]");
+			}
+			return 1;
+		}
+
+		/* several choices */
+		for (i=0 ; i<n ; i++) {
+			if (token_hdr.ops->complete_get_elt(token_p, i, tmpbuf,
+							    sizeof(tmpbuf)) < 0)
+				continue;
+			/* we have at least room for one char */
+			tmp_len = strnlen(tmpbuf, sizeof(tmpbuf));
+			if (tmp_len < CMDLINE_BUFFER_SIZE - 1) {
+				tmpbuf[tmp_len] = ' ';
+				tmpbuf[tmp_len + 1] = 0;
+			}
+
+			debug_printf("   choice <%s>\n", tmpbuf);
+
+			/* does the completion match the beginning of
+			 * the word ? */
+			if (!strncmp(partial_tok, tmpbuf,
+				     partial_tok_len)) {
+				if (local_state < *state) {
+					local_state++;
+					continue;
+				}
+				(*state)++;
+				l=snprintf(dst, size, "%s", tmpbuf);
+				if (l>=0 && token_hdr.ops->get_help) {
+					token_hdr.ops->get_help(token_p, tmpbuf,
+								sizeof(tmpbuf));
+					help_str = inst->help_str;
+					if (help_str)
+						snprintf(dst+l, size-l, "[%s]: %s",
+							 tmpbuf, help_str);
+					else
+						snprintf(dst+l, size-l,
+							 "[%s]: No help", tmpbuf);
+				}
+
+				return 1;
+			}
+		}
+	next2:
+		inst_num ++;
+		inst = ctx[inst_num];
+	}
+	return 0;
+}
diff --git a/lib/librte_cmdline/cmdline_parse.h b/lib/librte_cmdline/cmdline_parse.h
new file mode 100644
index 00000000..4b25c456
--- /dev/null
+++ b/lib/librte_cmdline/cmdline_parse.h
@@ -0,0 +1,191 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Copyright (c) 2009, Olivier MATZ <zer0@droids-corp.org>
+ * All rights reserved.
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of the University of California, Berkeley nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _CMDLINE_PARSE_H_
+#define _CMDLINE_PARSE_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifndef offsetof
+#define offsetof(type, field)  ((size_t) &( ((type *)0)->field) )
+#endif
+
+/* return status for parsing */
+#define CMDLINE_PARSE_SUCCESS        0
+#define CMDLINE_PARSE_AMBIGUOUS     -1
+#define CMDLINE_PARSE_NOMATCH       -2
+#define CMDLINE_PARSE_BAD_ARGS      -3
+
+/* return status for completion */
+#define CMDLINE_PARSE_COMPLETE_FINISHED 0
+#define CMDLINE_PARSE_COMPLETE_AGAIN    1
+#define CMDLINE_PARSE_COMPLETED_BUFFER  2
+
+/* maximum buffer size for parsed result */
+#define CMDLINE_PARSE_RESULT_BUFSIZE 8192
+
+/**
+ * Stores a pointer to the ops struct, and the offset: the place to
+ * write the parsed result in the destination structure.
+ */
+struct cmdline_token_hdr {
+	struct cmdline_token_ops *ops;
+	unsigned int offset;
+};
+typedef struct cmdline_token_hdr cmdline_parse_token_hdr_t;
+
+/**
+ * A token is defined by this structure.
+ *
+ * parse() takes the token as first argument, then the source buffer
+ * starting at the token we want to parse. The 3rd arg is a pointer
+ * where we store the parsed data (as binary). It returns the number of
+ * parsed chars on success and a negative value on error.
+ *
+ * complete_get_nb() returns the number of possible values for this
+ * token if completion is possible. If it is NULL or if it returns 0,
+ * no completion is possible.
+ *
+ * complete_get_elt() copy in dstbuf (the size is specified in the
+ * parameter) the i-th possible completion for this token.  returns 0
+ * on success or and a negative value on error.
+ *
+ * get_help() fills the dstbuf with the help for the token. It returns
+ * -1 on error and 0 on success.
+ */
+struct cmdline_token_ops {
+	/** parse(token ptr, buf, res pts, buf len) */
+	int (*parse)(cmdline_parse_token_hdr_t *, const char *, void *,
+		unsigned int);
+	/** return the num of possible choices for this token */
+	int (*complete_get_nb)(cmdline_parse_token_hdr_t *);
+	/** return the elt x for this token (token, idx, dstbuf, size) */
+	int (*complete_get_elt)(cmdline_parse_token_hdr_t *, int, char *,
+		unsigned int);
+	/** get help for this token (token, dstbuf, size) */
+	int (*get_help)(cmdline_parse_token_hdr_t *, char *, unsigned int);
+};
+
+struct cmdline;
+/**
+ * Store a instruction, which is a pointer to a callback function and
+ * its parameter that is called when the instruction is parsed, a help
+ * string, and a list of token composing this instruction.
+ */
+struct cmdline_inst {
+	/* f(parsed_struct, data) */
+	void (*f)(void *, struct cmdline *, void *);
+	void *data;
+	const char *help_str;
+	cmdline_parse_token_hdr_t *tokens[];
+};
+typedef struct cmdline_inst cmdline_parse_inst_t;
+
+/**
+ * A context is identified by its name, and contains a list of
+ * instruction
+ *
+ */
+typedef cmdline_parse_inst_t *cmdline_parse_ctx_t;
+
+/**
+ * Try to parse a buffer according to the specified context. The
+ * argument buf must ends with "\n\0". The function returns
+ * CMDLINE_PARSE_AMBIGUOUS, CMDLINE_PARSE_NOMATCH or
+ * CMDLINE_PARSE_BAD_ARGS on error. Else it calls the associated
+ * function (defined in the context) and returns 0
+ * (CMDLINE_PARSE_SUCCESS).
+ */
+int cmdline_parse(struct cmdline *cl, const char *buf);
+
+/**
+ * complete() must be called with *state==0 (try to complete) or
+ * with *state==-1 (just display choices), then called without
+ * modifying *state until it returns CMDLINE_PARSE_COMPLETED_BUFFER or
+ * CMDLINE_PARSE_COMPLETED_BUFFER.
+ *
+ * It returns < 0 on error.
+ *
+ * Else it returns:
+ *   - CMDLINE_PARSE_COMPLETED_BUFFER on completion (one possible
+ *     choice). In this case, the chars are appended in dst buffer.
+ *   - CMDLINE_PARSE_COMPLETE_AGAIN if there is several possible
+ *     choices. In this case, you must call the function again,
+ *     keeping the value of state intact.
+ *   - CMDLINE_PARSE_COMPLETED_BUFFER when the iteration is
+ *     finished. The dst is not valid for this last call.
+ *
+ * The returned dst buf ends with \0.
+ */
+int cmdline_complete(struct cmdline *cl, const char *buf, int *state,
+		     char *dst, unsigned int size);
+
+
+/* return true if(!c || iscomment(c) || isblank(c) ||
+ * isendofline(c)) */
+int cmdline_isendoftoken(char c);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _CMDLINE_PARSE_H_ */
diff --git a/lib/librte_cmdline/cmdline_parse_etheraddr.c b/lib/librte_cmdline/cmdline_parse_etheraddr.c
new file mode 100644
index 00000000..dbfe4a61
--- /dev/null
+++ b/lib/librte_cmdline/cmdline_parse_etheraddr.c
@@ -0,0 +1,180 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Copyright (c) 2009, Olivier MATZ <zer0@droids-corp.org>
+ * All rights reserved.
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of the University of California, Berkeley nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <errno.h>
+#include <inttypes.h>
+#include <ctype.h>
+#include <string.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <net/ethernet.h>
+
+#include <rte_string_fns.h>
+
+#include "cmdline_parse.h"
+#include "cmdline_parse_etheraddr.h"
+
+struct cmdline_token_ops cmdline_token_etheraddr_ops = {
+	.parse = cmdline_parse_etheraddr,
+	.complete_get_nb = NULL,
+	.complete_get_elt = NULL,
+	.get_help = cmdline_get_help_etheraddr,
+};
+
+/* the format can be either XX:XX:XX:XX:XX:XX or XXXX:XXXX:XXXX */
+#define ETHER_ADDRSTRLENLONG 18
+#define ETHER_ADDRSTRLENSHORT 15
+
+#ifdef __linux__
+#define ea_oct ether_addr_octet
+#else
+#define ea_oct octet
+#endif
+
+
+static struct ether_addr *
+my_ether_aton(const char *a)
+{
+	int i;
+	char *end;
+	unsigned long o[ETHER_ADDR_LEN];
+	static struct ether_addr ether_addr;
+
+	i = 0;
+	do {
+		errno = 0;
+		o[i] = strtoul(a, &end, 16);
+		if (errno != 0 || end == a || (end[0] != ':' && end[0] != 0))
+			return NULL;
+		a = end + 1;
+	} while (++i != sizeof (o) / sizeof (o[0]) && end[0] != 0);
+
+	/* Junk at the end of line */
+	if (end[0] != 0)
+		return NULL;
+
+	/* Support the format XX:XX:XX:XX:XX:XX */
+	if (i == ETHER_ADDR_LEN) {
+		while (i-- != 0) {
+			if (o[i] > UINT8_MAX)
+				return NULL;
+			ether_addr.ea_oct[i] = (uint8_t)o[i];
+		}
+	/* Support the format XXXX:XXXX:XXXX */
+	} else if (i == ETHER_ADDR_LEN / 2) {
+		while (i-- != 0) {
+			if (o[i] > UINT16_MAX)
+				return NULL;
+			ether_addr.ea_oct[i * 2] = (uint8_t)(o[i] >> 8);
+			ether_addr.ea_oct[i * 2 + 1] = (uint8_t)(o[i] & 0xff);
+		}
+	/* unknown format */
+	} else
+		return NULL;
+
+	return (struct ether_addr *)&ether_addr;
+}
+
+int
+cmdline_parse_etheraddr(__attribute__((unused)) cmdline_parse_token_hdr_t *tk,
+	const char *buf, void *res, unsigned ressize)
+{
+	unsigned int token_len = 0;
+	char ether_str[ETHER_ADDRSTRLENLONG+1];
+	struct ether_addr *tmp;
+
+	if (res && ressize < sizeof(struct ether_addr))
+		return -1;
+
+	if (!buf || ! *buf)
+		return -1;
+
+	while (!cmdline_isendoftoken(buf[token_len]))
+		token_len++;
+
+	/* if token doesn't match possible string lengths... */
+	if ((token_len != ETHER_ADDRSTRLENLONG - 1) &&
+			(token_len != ETHER_ADDRSTRLENSHORT - 1))
+		return -1;
+
+	snprintf(ether_str, token_len+1, "%s", buf);
+
+	tmp = my_ether_aton(ether_str);
+	if (tmp == NULL)
+		return -1;
+	if (res)
+		memcpy(res, tmp, sizeof(struct ether_addr));
+	return token_len;
+}
+
+int
+cmdline_get_help_etheraddr(__attribute__((unused)) cmdline_parse_token_hdr_t *tk,
+			       char *dstbuf, unsigned int size)
+{
+	int ret;
+
+	ret = snprintf(dstbuf, size, "Ethernet address");
+	if (ret < 0)
+		return -1;
+	return 0;
+}
diff --git a/lib/librte_cmdline/cmdline_parse_etheraddr.h b/lib/librte_cmdline/cmdline_parse_etheraddr.h
new file mode 100644
index 00000000..e539fb66
--- /dev/null
+++ b/lib/librte_cmdline/cmdline_parse_etheraddr.h
@@ -0,0 +1,96 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Copyright (c) 2009, Olivier MATZ <zer0@droids-corp.org>
+ * All rights reserved.
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of the University of California, Berkeley nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _PARSE_ETHERADDR_H_
+#define _PARSE_ETHERADDR_H_
+
+#include <cmdline_parse.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct cmdline_token_etheraddr {
+	struct cmdline_token_hdr hdr;
+};
+typedef struct cmdline_token_etheraddr cmdline_parse_token_etheraddr_t;
+
+extern struct cmdline_token_ops cmdline_token_etheraddr_ops;
+
+int cmdline_parse_etheraddr(cmdline_parse_token_hdr_t *tk, const char *srcbuf,
+	void *res, unsigned ressize);
+int cmdline_get_help_etheraddr(cmdline_parse_token_hdr_t *tk, char *dstbuf,
+	unsigned int size);
+
+#define TOKEN_ETHERADDR_INITIALIZER(structure, field)       \
+{                                                           \
+	/* hdr */                                               \
+	{                                                       \
+		&cmdline_token_etheraddr_ops,   /* ops */           \
+		offsetof(structure, field),     /* offset */        \
+	},                                                      \
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+
+#endif /* _PARSE_ETHERADDR_H_ */
diff --git a/lib/librte_cmdline/cmdline_parse_ipaddr.c b/lib/librte_cmdline/cmdline_parse_ipaddr.c
new file mode 100644
index 00000000..d3d3e044
--- /dev/null
+++ b/lib/librte_cmdline/cmdline_parse_ipaddr.c
@@ -0,0 +1,408 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Copyright (c) 2009, Olivier MATZ <zer0@droids-corp.org>
+ * All rights reserved.
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of the University of California, Berkeley nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * For inet_ntop() functions:
+ *
+ * Copyright (c) 1996 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND INTERNET SOFTWARE CONSORTIUM DISCLAIMS
+ * ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL INTERNET SOFTWARE
+ * CONSORTIUM BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
+ * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
+ * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+ * SOFTWARE.
+ */
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <inttypes.h>
+#include <ctype.h>
+#include <string.h>
+#include <errno.h>
+#include <netinet/in.h>
+#ifndef __linux__
+#ifndef __FreeBSD__
+#include <net/socket.h>
+#else
+#include <sys/socket.h>
+#endif
+#endif
+
+#include <rte_string_fns.h>
+
+#include "cmdline_parse.h"
+#include "cmdline_parse_ipaddr.h"
+
+struct cmdline_token_ops cmdline_token_ipaddr_ops = {
+	.parse = cmdline_parse_ipaddr,
+	.complete_get_nb = NULL,
+	.complete_get_elt = NULL,
+	.get_help = cmdline_get_help_ipaddr,
+};
+
+#define INADDRSZ 4
+#define IN6ADDRSZ 16
+#define PREFIXMAX 128
+#define V4PREFIXMAX 32
+
+/*
+ * WARNING: Don't even consider trying to compile this on a system where
+ * sizeof(int) < 4.  sizeof(int) > 4 is fine; all the world's not a VAX.
+ */
+
+static int inet_pton4(const char *src, unsigned char *dst);
+static int inet_pton6(const char *src, unsigned char *dst);
+
+/* int
+ * inet_pton(af, src, dst)
+ *      convert from presentation format (which usually means ASCII printable)
+ *      to network format (which is usually some kind of binary format).
+ * return:
+ *      1 if the address was valid for the specified address family
+ *      0 if the address wasn't valid (`dst' is untouched in this case)
+ *      -1 if some other error occurred (`dst' is untouched in this case, too)
+ * author:
+ *      Paul Vixie, 1996.
+ */
+static int
+my_inet_pton(int af, const char *src, void *dst)
+{
+	switch (af) {
+		case AF_INET:
+			return inet_pton4(src, dst);
+		case AF_INET6:
+			return inet_pton6(src, dst);
+		default:
+			errno = EAFNOSUPPORT;
+			return -1;
+	}
+	/* NOTREACHED */
+}
+
+/* int
+ * inet_pton4(src, dst)
+ *      like inet_aton() but without all the hexadecimal and shorthand.
+ * return:
+ *      1 if `src' is a valid dotted quad, else 0.
+ * notice:
+ *      does not touch `dst' unless it's returning 1.
+ * author:
+ *      Paul Vixie, 1996.
+ */
+static int
+inet_pton4(const char *src, unsigned char *dst)
+{
+	static const char digits[] = "0123456789";
+	int saw_digit, octets, ch;
+	unsigned char tmp[INADDRSZ], *tp;
+
+	saw_digit = 0;
+	octets = 0;
+	*(tp = tmp) = 0;
+	while ((ch = *src++) != '\0') {
+		const char *pch;
+
+		if ((pch = strchr(digits, ch)) != NULL) {
+			unsigned int new = *tp * 10 + (pch - digits);
+
+			if (new > 255)
+				return 0;
+			if (! saw_digit) {
+				if (++octets > 4)
+					return 0;
+				saw_digit = 1;
+			}
+			*tp = (unsigned char)new;
+		} else if (ch == '.' && saw_digit) {
+			if (octets == 4)
+				return 0;
+			*++tp = 0;
+			saw_digit = 0;
+		} else
+			return 0;
+	}
+	if (octets < 4)
+		return 0;
+
+	memcpy(dst, tmp, INADDRSZ);
+	return 1;
+}
+
+/* int
+ * inet_pton6(src, dst)
+ *      convert presentation level address to network order binary form.
+ * return:
+ *      1 if `src' is a valid [RFC1884 2.2] address, else 0.
+ * notice:
+ *      (1) does not touch `dst' unless it's returning 1.
+ *      (2) :: in a full address is silently ignored.
+ * credit:
+ *      inspired by Mark Andrews.
+ * author:
+ *      Paul Vixie, 1996.
+ */
+static int
+inet_pton6(const char *src, unsigned char *dst)
+{
+	static const char xdigits_l[] = "0123456789abcdef",
+		xdigits_u[] = "0123456789ABCDEF";
+	unsigned char tmp[IN6ADDRSZ], *tp = 0, *endp = 0, *colonp = 0;
+	const char *xdigits = 0, *curtok = 0;
+	int ch = 0, saw_xdigit = 0, count_xdigit = 0;
+	unsigned int val = 0;
+	unsigned dbloct_count = 0;
+
+	memset((tp = tmp), '\0', IN6ADDRSZ);
+	endp = tp + IN6ADDRSZ;
+	colonp = NULL;
+	/* Leading :: requires some special handling. */
+	if (*src == ':')
+		if (*++src != ':')
+			return 0;
+	curtok = src;
+	saw_xdigit = count_xdigit = 0;
+	val = 0;
+
+	while ((ch = *src++) != '\0') {
+		const char *pch;
+
+		if ((pch = strchr((xdigits = xdigits_l), ch)) == NULL)
+			pch = strchr((xdigits = xdigits_u), ch);
+		if (pch != NULL) {
+			if (count_xdigit >= 4)
+				return 0;
+			val <<= 4;
+			val |= (pch - xdigits);
+			if (val > 0xffff)
+				return 0;
+			saw_xdigit = 1;
+			count_xdigit++;
+			continue;
+		}
+		if (ch == ':') {
+			curtok = src;
+			if (!saw_xdigit) {
+				if (colonp)
+					return 0;
+				colonp = tp;
+				continue;
+			} else if (*src == '\0') {
+				return 0;
+			}
+			if (tp + sizeof(int16_t) > endp)
+				return 0;
+			*tp++ = (unsigned char) ((val >> 8) & 0xff);
+			*tp++ = (unsigned char) (val & 0xff);
+			saw_xdigit = 0;
+			count_xdigit = 0;
+			val = 0;
+			dbloct_count++;
+			continue;
+		}
+		if (ch == '.' && ((tp + INADDRSZ) <= endp) &&
+		    inet_pton4(curtok, tp) > 0) {
+			tp += INADDRSZ;
+			saw_xdigit = 0;
+			dbloct_count += 2;
+			break;  /* '\0' was seen by inet_pton4(). */
+		}
+		return 0;
+	}
+	if (saw_xdigit) {
+		if (tp + sizeof(int16_t) > endp)
+			return 0;
+		*tp++ = (unsigned char) ((val >> 8) & 0xff);
+		*tp++ = (unsigned char) (val & 0xff);
+		dbloct_count++;
+	}
+	if (colonp != NULL) {
+		/* if we already have 8 double octets, having a colon means error */
+		if (dbloct_count == 8)
+			return 0;
+
+		/*
+		 * Since some memmove()'s erroneously fail to handle
+		 * overlapping regions, we'll do the shift by hand.
+		 */
+		const int n = tp - colonp;
+		int i;
+
+		for (i = 1; i <= n; i++) {
+			endp[- i] = colonp[n - i];
+			colonp[n - i] = 0;
+		}
+		tp = endp;
+	}
+	if (tp != endp)
+		return 0;
+	memcpy(dst, tmp, IN6ADDRSZ);
+	return 1;
+}
+
+int
+cmdline_parse_ipaddr(cmdline_parse_token_hdr_t *tk, const char *buf, void *res,
+	unsigned ressize)
+{
+	struct cmdline_token_ipaddr *tk2;
+	unsigned int token_len = 0;
+	char ip_str[INET6_ADDRSTRLEN+4+1]; /* '+4' is for prefixlen (if any) */
+	cmdline_ipaddr_t ipaddr;
+	char *prefix, *prefix_end;
+	long prefixlen = 0;
+
+	if (res && ressize < sizeof(cmdline_ipaddr_t))
+		return -1;
+
+	if (!buf || !tk || ! *buf)
+		return -1;
+
+	tk2 = (struct cmdline_token_ipaddr *)tk;
+
+	while (!cmdline_isendoftoken(buf[token_len]))
+		token_len++;
+
+	/* if token is too big... */
+	if (token_len >= INET6_ADDRSTRLEN+4)
+		return -1;
+
+	snprintf(ip_str, token_len+1, "%s", buf);
+
+	/* convert the network prefix */
+	if (tk2->ipaddr_data.flags & CMDLINE_IPADDR_NETWORK) {
+		prefix = strrchr(ip_str, '/');
+		if (prefix == NULL)
+			return -1;
+		*prefix = '\0';
+		prefix ++;
+		errno = 0;
+		prefixlen = strtol(prefix, &prefix_end, 10);
+		if (errno || (*prefix_end != '\0')
+			|| prefixlen < 0 || prefixlen > PREFIXMAX)
+			return -1;
+		ipaddr.prefixlen = prefixlen;
+	}
+	else {
+		ipaddr.prefixlen = 0;
+	}
+
+	/* convert the IP addr */
+	if ((tk2->ipaddr_data.flags & CMDLINE_IPADDR_V4) &&
+	    my_inet_pton(AF_INET, ip_str, &ipaddr.addr.ipv4) == 1 &&
+		prefixlen <= V4PREFIXMAX) {
+		ipaddr.family = AF_INET;
+		if (res)
+			memcpy(res, &ipaddr, sizeof(ipaddr));
+		return token_len;
+	}
+	if ((tk2->ipaddr_data.flags & CMDLINE_IPADDR_V6) &&
+	    my_inet_pton(AF_INET6, ip_str, &ipaddr.addr.ipv6) == 1) {
+		ipaddr.family = AF_INET6;
+		if (res)
+			memcpy(res, &ipaddr, sizeof(ipaddr));
+		return token_len;
+	}
+	return -1;
+
+}
+
+int cmdline_get_help_ipaddr(cmdline_parse_token_hdr_t *tk, char *dstbuf,
+			    unsigned int size)
+{
+	struct cmdline_token_ipaddr *tk2;
+
+	if (!tk || !dstbuf)
+		return -1;
+
+	tk2 = (struct cmdline_token_ipaddr *)tk;
+
+	switch (tk2->ipaddr_data.flags) {
+	case CMDLINE_IPADDR_V4:
+		snprintf(dstbuf, size, "IPv4");
+		break;
+	case CMDLINE_IPADDR_V6:
+		snprintf(dstbuf, size, "IPv6");
+		break;
+	case CMDLINE_IPADDR_V4|CMDLINE_IPADDR_V6:
+		snprintf(dstbuf, size, "IPv4/IPv6");
+		break;
+	case CMDLINE_IPADDR_NETWORK|CMDLINE_IPADDR_V4:
+		snprintf(dstbuf, size, "IPv4 network");
+		break;
+	case CMDLINE_IPADDR_NETWORK|CMDLINE_IPADDR_V6:
+		snprintf(dstbuf, size, "IPv6 network");
+		break;
+	case CMDLINE_IPADDR_NETWORK|CMDLINE_IPADDR_V4|CMDLINE_IPADDR_V6:
+		snprintf(dstbuf, size, "IPv4/IPv6 network");
+		break;
+	default:
+		snprintf(dstbuf, size, "IPaddr (bad flags)");
+		break;
+	}
+	return 0;
+}
diff --git a/lib/librte_cmdline/cmdline_parse_ipaddr.h b/lib/librte_cmdline/cmdline_parse_ipaddr.h
new file mode 100644
index 00000000..2b4266fd
--- /dev/null
+++ b/lib/librte_cmdline/cmdline_parse_ipaddr.h
@@ -0,0 +1,189 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Copyright (c) 2009, Olivier MATZ <zer0@droids-corp.org>
+ * All rights reserved.
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of the University of California, Berkeley nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _PARSE_IPADDR_H_
+#define _PARSE_IPADDR_H_
+
+#include <cmdline_parse.h>
+#include <netinet/in.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define CMDLINE_IPADDR_V4      0x01
+#define CMDLINE_IPADDR_V6      0x02
+#define CMDLINE_IPADDR_NETWORK 0x04
+
+struct cmdline_ipaddr {
+	uint8_t family;
+	union {
+		struct in_addr ipv4;
+		struct in6_addr ipv6;
+	} addr;
+	unsigned int prefixlen; /* in case of network only */
+};
+typedef struct cmdline_ipaddr cmdline_ipaddr_t;
+
+struct cmdline_token_ipaddr_data {
+	uint8_t flags;
+};
+
+struct cmdline_token_ipaddr {
+	struct cmdline_token_hdr hdr;
+	struct cmdline_token_ipaddr_data ipaddr_data;
+};
+typedef struct cmdline_token_ipaddr cmdline_parse_token_ipaddr_t;
+
+extern struct cmdline_token_ops cmdline_token_ipaddr_ops;
+
+int cmdline_parse_ipaddr(cmdline_parse_token_hdr_t *tk, const char *srcbuf,
+	void *res, unsigned ressize);
+int cmdline_get_help_ipaddr(cmdline_parse_token_hdr_t *tk, char *dstbuf,
+	unsigned int size);
+
+#define TOKEN_IPADDR_INITIALIZER(structure, field)      \
+{                                                       \
+	/* hdr */                                           \
+	{                                                   \
+		&cmdline_token_ipaddr_ops,      /* ops */       \
+		offsetof(structure, field),     /* offset */    \
+	},                                                  \
+	/* ipaddr_data */                                   \
+	{                                                   \
+		CMDLINE_IPADDR_V4 |             /* flags */     \
+		CMDLINE_IPADDR_V6,                              \
+	},                                                  \
+}
+
+#define TOKEN_IPV4_INITIALIZER(structure, field)        \
+{                                                       \
+	/* hdr */                                           \
+	{                                                   \
+		&cmdline_token_ipaddr_ops,      /* ops */       \
+		offsetof(structure, field),     /* offset */    \
+	},                                                  \
+	/* ipaddr_data */                                   \
+	{                                                   \
+		CMDLINE_IPADDR_V4,              /* flags */     \
+	},                                                  \
+}
+
+#define TOKEN_IPV6_INITIALIZER(structure, field)        \
+{                                                       \
+	/* hdr */                                           \
+	{                                                   \
+		&cmdline_token_ipaddr_ops,      /* ops */       \
+		offsetof(structure, field),     /* offset */    \
+	},                                                  \
+	/* ipaddr_data */                                   \
+	{                                                   \
+		CMDLINE_IPADDR_V6,              /* flags */     \
+	},                                                  \
+}
+
+#define TOKEN_IPNET_INITIALIZER(structure, field)       \
+{                                                       \
+	/* hdr */                                           \
+	{                                                   \
+		&cmdline_token_ipaddr_ops,      /* ops */       \
+		offsetof(structure, field),     /* offset */    \
+	},                                                  \
+	/* ipaddr_data */                                   \
+	{                                                   \
+		CMDLINE_IPADDR_V4 |             /* flags */     \
+		CMDLINE_IPADDR_V6 |                             \
+		CMDLINE_IPADDR_NETWORK,                         \
+	},                                                  \
+}
+
+#define TOKEN_IPV4NET_INITIALIZER(structure, field)     \
+{                                                       \
+	/* hdr */                                           \
+	{                                                   \
+		&cmdline_token_ipaddr_ops,      /* ops */       \
+		offsetof(structure, field),     /* offset */    \
+	},                                                  \
+	/* ipaddr_data */                                   \
+	{                                                   \
+		CMDLINE_IPADDR_V4 |             /* flags */     \
+		CMDLINE_IPADDR_NETWORK,                         \
+	},                                                  \
+}
+
+#define TOKEN_IPV6NET_INITIALIZER(structure, field)     \
+{                                                       \
+	/* hdr */                                           \
+	{                                                   \
+		&cmdline_token_ipaddr_ops,      /* ops */       \
+		offsetof(structure, field),     /* offset */    \
+	},                                                  \
+	/* ipaddr_data */                                   \
+	{                                                   \
+		CMDLINE_IPADDR_V4 |             /* flags */     \
+		CMDLINE_IPADDR_NETWORK,                         \
+	},                                                  \
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _PARSE_IPADDR_H_ */
diff --git a/lib/librte_cmdline/cmdline_parse_num.c b/lib/librte_cmdline/cmdline_parse_num.c
new file mode 100644
index 00000000..b0f9a35d
--- /dev/null
+++ b/lib/librte_cmdline/cmdline_parse_num.c
@@ -0,0 +1,402 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Copyright (c) 2009, Olivier MATZ <zer0@droids-corp.org>
+ * All rights reserved.
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of the University of California, Berkeley nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <ctype.h>
+#include <string.h>
+#include <stdarg.h>
+#include <errno.h>
+#include <rte_string_fns.h>
+
+#include "cmdline_parse.h"
+#include "cmdline_parse_num.h"
+
+#ifdef RTE_LIBRTE_CMDLINE_DEBUG
+#define debug_printf(args...) printf(args)
+#else
+#define debug_printf(args...) do {} while(0)
+#endif
+
+struct cmdline_token_ops cmdline_token_num_ops = {
+	.parse = cmdline_parse_num,
+	.complete_get_nb = NULL,
+	.complete_get_elt = NULL,
+	.get_help = cmdline_get_help_num,
+};
+
+
+enum num_parse_state_t {
+	START,
+	DEC_NEG,
+	BIN,
+	HEX,
+
+	ERROR,
+
+	FIRST_OK, /* not used */
+	ZERO_OK,
+	HEX_OK,
+	OCTAL_OK,
+	BIN_OK,
+	DEC_NEG_OK,
+	DEC_POS_OK,
+};
+
+/* Keep it sync with enum in .h */
+static const char * num_help[] = {
+	"UINT8", "UINT16", "UINT32", "UINT64",
+	"INT8", "INT16", "INT32", "INT64",
+};
+
+static inline int
+add_to_res(unsigned int c, uint64_t *res, unsigned int base)
+{
+	/* overflow */
+	if ( (UINT64_MAX - c) / base < *res ) {
+		return -1;
+	}
+
+	*res = (uint64_t) (*res * base + c);
+	return 0;
+}
+
+static int
+check_res_size(struct cmdline_token_num_data *nd, unsigned ressize)
+{
+	switch (nd->type) {
+	case INT8:
+	case UINT8:
+		if (ressize < sizeof(int8_t))
+			return -1;
+		break;
+	case INT16:
+	case UINT16:
+		if (ressize < sizeof(int16_t))
+			return -1;
+		break;
+	case INT32:
+	case UINT32:
+		if (ressize < sizeof(int32_t))
+			return -1;
+		break;
+	case INT64:
+	case UINT64:
+		if (ressize < sizeof(int64_t))
+			return -1;
+		break;
+	default:
+		return -1;
+	}
+	return 0;
+}
+
+/* parse an int */
+int
+cmdline_parse_num(cmdline_parse_token_hdr_t *tk, const char *srcbuf, void *res,
+	unsigned ressize)
+{
+	struct cmdline_token_num_data nd;
+	enum num_parse_state_t st = START;
+	const char * buf;
+	char c;
+	uint64_t res1 = 0;
+
+	if (!tk)
+		return -1;
+
+	if (!srcbuf || !*srcbuf)
+		return -1;
+
+	buf = srcbuf;
+	c = *buf;
+
+	memcpy(&nd, &((struct cmdline_token_num *)tk)->num_data, sizeof(nd));
+
+	/* check that we have enough room in res */
+	if (res) {
+		if (check_res_size(&nd, ressize) < 0)
+			return -1;
+	}
+
+	while ( st != ERROR && c && ! cmdline_isendoftoken(c) ) {
+		debug_printf("%c %x -> ", c, c);
+		switch (st) {
+		case START:
+			if (c == '-') {
+				st = DEC_NEG;
+			}
+			else if (c == '0') {
+				st = ZERO_OK;
+			}
+			else if (c >= '1' && c <= '9') {
+				if (add_to_res(c - '0', &res1, 10) < 0)
+					st = ERROR;
+				else
+					st = DEC_POS_OK;
+			}
+			else  {
+				st = ERROR;
+			}
+			break;
+
+		case ZERO_OK:
+			if (c == 'x') {
+				st = HEX;
+			}
+			else if (c == 'b') {
+				st = BIN;
+			}
+			else if (c >= '0' && c <= '7') {
+				if (add_to_res(c - '0', &res1, 10) < 0)
+					st = ERROR;
+				else
+					st = OCTAL_OK;
+			}
+			else  {
+				st = ERROR;
+			}
+			break;
+
+		case DEC_NEG:
+			if (c >= '0' && c <= '9') {
+				if (add_to_res(c - '0', &res1, 10) < 0)
+					st = ERROR;
+				else
+					st = DEC_NEG_OK;
+			}
+			else {
+				st = ERROR;
+			}
+			break;
+
+		case DEC_NEG_OK:
+			if (c >= '0' && c <= '9') {
+				if (add_to_res(c - '0', &res1, 10) < 0)
+					st = ERROR;
+			}
+			else {
+				st = ERROR;
+			}
+			break;
+
+		case DEC_POS_OK:
+			if (c >= '0' && c <= '9') {
+				if (add_to_res(c - '0', &res1, 10) < 0)
+					st = ERROR;
+			}
+			else {
+				st = ERROR;
+			}
+			break;
+
+		case HEX:
+			st = HEX_OK;
+			/* no break */
+		case HEX_OK:
+			if (c >= '0' && c <= '9') {
+				if (add_to_res(c - '0', &res1, 16) < 0)
+					st = ERROR;
+			}
+			else if (c >= 'a' && c <= 'f') {
+				if (add_to_res(c - 'a' + 10, &res1, 16) < 0)
+					st = ERROR;
+			}
+			else if (c >= 'A' && c <= 'F') {
+				if (add_to_res(c - 'A' + 10, &res1, 16) < 0)
+					st = ERROR;
+			}
+			else {
+				st = ERROR;
+			}
+			break;
+
+
+		case OCTAL_OK:
+			if (c >= '0' && c <= '7') {
+				if (add_to_res(c - '0', &res1, 8) < 0)
+					st = ERROR;
+			}
+			else {
+				st = ERROR;
+			}
+			break;
+
+		case BIN:
+			st = BIN_OK;
+			/* no break */
+		case BIN_OK:
+			if (c >= '0' && c <= '1') {
+				if (add_to_res(c - '0', &res1, 2) < 0)
+					st = ERROR;
+			}
+			else {
+				st = ERROR;
+			}
+			break;
+		default:
+			debug_printf("not impl ");
+
+		}
+
+		debug_printf("(%"PRIu64")\n", res1);
+
+		buf ++;
+		c = *buf;
+
+		/* token too long */
+		if (buf-srcbuf > 127)
+			return -1;
+	}
+
+	switch (st) {
+	case ZERO_OK:
+	case DEC_POS_OK:
+	case HEX_OK:
+	case OCTAL_OK:
+	case BIN_OK:
+		if ( nd.type == INT8 && res1 <= INT8_MAX ) {
+			if (res) *(int8_t *)res = (int8_t) res1;
+			return buf-srcbuf;
+		}
+		else if ( nd.type == INT16 && res1 <= INT16_MAX ) {
+			if (res) *(int16_t *)res = (int16_t) res1;
+			return buf-srcbuf;
+		}
+		else if ( nd.type == INT32 && res1 <= INT32_MAX ) {
+			if (res) *(int32_t *)res = (int32_t) res1;
+			return buf-srcbuf;
+		}
+		else if ( nd.type == INT64 && res1 <= INT64_MAX ) {
+			if (res) *(int64_t *)res = (int64_t) res1;
+			return buf-srcbuf;
+		}
+		else if ( nd.type == UINT8 && res1 <= UINT8_MAX ) {
+			if (res) *(uint8_t *)res = (uint8_t) res1;
+			return buf-srcbuf;
+		}
+		else if (nd.type == UINT16  && res1 <= UINT16_MAX ) {
+			if (res) *(uint16_t *)res = (uint16_t) res1;
+			return buf-srcbuf;
+		}
+		else if ( nd.type == UINT32 && res1 <= UINT32_MAX ) {
+			if (res) *(uint32_t *)res = (uint32_t) res1;
+			return buf-srcbuf;
+		}
+		else if ( nd.type == UINT64 ) {
+			if (res) *(uint64_t *)res = res1;
+			return buf-srcbuf;
+		}
+		else {
+			return -1;
+		}
+		break;
+
+	case DEC_NEG_OK:
+		if ( nd.type == INT8 && res1 <= INT8_MAX + 1 ) {
+			if (res) *(int8_t *)res = (int8_t) (-res1);
+			return buf-srcbuf;
+		}
+		else if ( nd.type == INT16 && res1 <= (uint16_t)INT16_MAX + 1 ) {
+			if (res) *(int16_t *)res = (int16_t) (-res1);
+			return buf-srcbuf;
+		}
+		else if ( nd.type == INT32 && res1 <= (uint32_t)INT32_MAX + 1 ) {
+			if (res) *(int32_t *)res = (int32_t) (-res1);
+			return buf-srcbuf;
+		}
+		else if ( nd.type == INT64 && res1 <= (uint64_t)INT64_MAX + 1 ) {
+			if (res) *(int64_t *)res = (int64_t) (-res1);
+			return buf-srcbuf;
+		}
+		else {
+			return -1;
+		}
+		break;
+	default:
+		debug_printf("error\n");
+		return -1;
+	}
+}
+
+
+/* parse an int */
+int
+cmdline_get_help_num(cmdline_parse_token_hdr_t *tk, char *dstbuf, unsigned int size)
+{
+	struct cmdline_token_num_data nd;
+	int ret;
+
+	if (!tk)
+		return -1;
+
+	memcpy(&nd, &((struct cmdline_token_num *)tk)->num_data, sizeof(nd));
+
+	/* should not happen.... don't so this test */
+	/* if (nd.type >= (sizeof(num_help)/sizeof(const char *))) */
+	/* return -1; */
+
+	ret = snprintf(dstbuf, size, "%s", num_help[nd.type]);
+	if (ret < 0)
+		return -1;
+	dstbuf[size-1] = '\0';
+	return 0;
+}
diff --git a/lib/librte_cmdline/cmdline_parse_num.h b/lib/librte_cmdline/cmdline_parse_num.h
new file mode 100644
index 00000000..2558cbf0
--- /dev/null
+++ b/lib/librte_cmdline/cmdline_parse_num.h
@@ -0,0 +1,115 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Copyright (c) 2009, Olivier MATZ <zer0@droids-corp.org>
+ * All rights reserved.
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of the University of California, Berkeley nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _PARSE_NUM_H_
+#define _PARSE_NUM_H_
+
+#include <cmdline_parse.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+enum cmdline_numtype {
+	UINT8 = 0,
+	UINT16,
+	UINT32,
+	UINT64,
+	INT8,
+	INT16,
+	INT32,
+	INT64
+};
+
+struct cmdline_token_num_data {
+	enum cmdline_numtype type;
+};
+
+struct cmdline_token_num {
+	struct cmdline_token_hdr hdr;
+	struct cmdline_token_num_data num_data;
+};
+typedef struct cmdline_token_num cmdline_parse_token_num_t;
+
+extern struct cmdline_token_ops cmdline_token_num_ops;
+
+int cmdline_parse_num(cmdline_parse_token_hdr_t *tk,
+	const char *srcbuf, void *res, unsigned ressize);
+int cmdline_get_help_num(cmdline_parse_token_hdr_t *tk,
+	char *dstbuf, unsigned int size);
+
+#define TOKEN_NUM_INITIALIZER(structure, field, numtype)    \
+{                                                           \
+	/* hdr */                                               \
+	{                                                       \
+		&cmdline_token_num_ops,         /* ops */           \
+		offsetof(structure, field),     /* offset */        \
+	},                                                      \
+	/* num_data */                                          \
+	{                                                       \
+		numtype,                        /* type */          \
+	},                                                      \
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _PARSE_NUM_H_ */
diff --git a/lib/librte_cmdline/cmdline_parse_portlist.c b/lib/librte_cmdline/cmdline_parse_portlist.c
new file mode 100644
index 00000000..f11bdf03
--- /dev/null
+++ b/lib/librte_cmdline/cmdline_parse_portlist.c
@@ -0,0 +1,173 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Copyright (c) 2010, Keith Wiles <keith.wiles@windriver.com>
+ * All rights reserved.
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of the University of California, Berkeley nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <inttypes.h>
+#include <ctype.h>
+#include <string.h>
+#include <errno.h>
+#include <stdarg.h>
+
+#include <rte_string_fns.h>
+#include "cmdline_parse.h"
+#include "cmdline_parse_portlist.h"
+
+struct cmdline_token_ops cmdline_token_portlist_ops = {
+	.parse = cmdline_parse_portlist,
+	.complete_get_nb = NULL,
+	.complete_get_elt = NULL,
+	.get_help = cmdline_get_help_portlist,
+};
+
+static void
+parse_set_list(cmdline_portlist_t *pl, size_t low, size_t high)
+{
+	do {
+		pl->map |= (1 << low++);
+	} while (low <= high);
+}
+
+static int
+parse_ports(cmdline_portlist_t *pl, const char *str)
+{
+	size_t ps, pe;
+	const char *first, *last;
+	char *end;
+
+	for (first = str, last = first;
+	    first != NULL && last != NULL;
+	    first = last + 1) {
+
+		last = strchr(first, ',');
+
+		errno = 0;
+		ps = strtoul(first, &end, 10);
+		if (errno != 0 || end == first ||
+		    (end[0] != '-' && end[0] != 0 && end != last))
+			return -1;
+
+		/* Support for N-M portlist format */
+		if (end[0] == '-') {
+			errno = 0;
+			first = end + 1;
+			pe = strtoul(first, &end, 10);
+			if (errno != 0 || end == first ||
+			    (end[0] != 0 && end != last))
+				return -1;
+		} else {
+			pe = ps;
+		}
+
+		if (ps > pe || pe >= sizeof (pl->map) * 8)
+			return -1;
+
+		parse_set_list(pl, ps, pe);
+	}
+
+	return 0;
+}
+
+int
+cmdline_parse_portlist(__attribute__((unused)) cmdline_parse_token_hdr_t *tk,
+	const char *buf, void *res, unsigned ressize)
+{
+	unsigned int token_len = 0;
+	char portlist_str[PORTLIST_TOKEN_SIZE+1];
+	cmdline_portlist_t *pl;
+
+	if (!buf || ! *buf)
+		return -1;
+
+	if (res && ressize < sizeof(cmdline_portlist_t))
+		return -1;
+
+	pl = res;
+
+	while (!cmdline_isendoftoken(buf[token_len]) &&
+	    (token_len < PORTLIST_TOKEN_SIZE))
+		token_len++;
+
+	if (token_len >= PORTLIST_TOKEN_SIZE)
+		return -1;
+
+	snprintf(portlist_str, token_len+1, "%s", buf);
+
+	if (pl) {
+		pl->map = 0;
+		if (strcmp("all", portlist_str) == 0)
+			pl->map	= UINT32_MAX;
+		else if (parse_ports(pl, portlist_str) != 0)
+			return -1;
+	}
+
+	return token_len;
+}
+
+int
+cmdline_get_help_portlist(__attribute__((unused)) cmdline_parse_token_hdr_t *tk,
+		char *dstbuf, unsigned int size)
+{
+	int ret;
+	ret = snprintf(dstbuf, size, "range of ports as 3,4-6,8-19,20");
+	if (ret < 0)
+		return -1;
+	return 0;
+}
diff --git a/lib/librte_cmdline/cmdline_parse_portlist.h b/lib/librte_cmdline/cmdline_parse_portlist.h
new file mode 100644
index 00000000..73d70e05
--- /dev/null
+++ b/lib/librte_cmdline/cmdline_parse_portlist.h
@@ -0,0 +1,103 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Copyright (c) 2010, Keith Wiles <keith.wiles@windriver.com>
+ * All rights reserved.
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of the University of California, Berkeley nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _PARSE_PORTLIST_H_
+#define _PARSE_PORTLIST_H_
+
+#include <cmdline_parse.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* size of a parsed string */
+#define PORTLIST_TOKEN_SIZE	128
+#define PORTLIST_MAX_TOKENS	32
+
+typedef struct cmdline_portlist {
+	uint32_t		map;
+} cmdline_portlist_t;
+
+struct cmdline_token_portlist {
+	struct cmdline_token_hdr hdr;
+};
+typedef struct cmdline_token_portlist cmdline_parse_token_portlist_t;
+
+extern struct cmdline_token_ops cmdline_token_portlist_ops;
+
+int cmdline_parse_portlist(cmdline_parse_token_hdr_t *tk,
+	const char *srcbuf, void *res, unsigned ressize);
+int cmdline_get_help_portlist(cmdline_parse_token_hdr_t *tk,
+	char *dstbuf, unsigned int size);
+
+#define TOKEN_PORTLIST_INITIALIZER(structure, field)        \
+{                                                           \
+	/* hdr */                                               \
+	{                                                       \
+		&cmdline_token_portlist_ops,    /* ops */           \
+		offsetof(structure, field),     /* offset */        \
+	},                                                      \
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _PARSE_PORTLIST_H_ */
diff --git a/lib/librte_cmdline/cmdline_parse_string.c b/lib/librte_cmdline/cmdline_parse_string.c
new file mode 100644
index 00000000..45883b3e
--- /dev/null
+++ b/lib/librte_cmdline/cmdline_parse_string.c
@@ -0,0 +1,253 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Copyright (c) 2009, Olivier MATZ <zer0@droids-corp.org>
+ * All rights reserved.
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of the University of California, Berkeley nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <inttypes.h>
+#include <ctype.h>
+#include <string.h>
+#include <stdarg.h>
+#include <errno.h>
+#include <rte_string_fns.h>
+
+#include "cmdline_parse.h"
+#include "cmdline_parse_string.h"
+
+struct cmdline_token_ops cmdline_token_string_ops = {
+	.parse = cmdline_parse_string,
+	.complete_get_nb = cmdline_complete_get_nb_string,
+	.complete_get_elt = cmdline_complete_get_elt_string,
+	.get_help = cmdline_get_help_string,
+};
+
+#define MULTISTRING_HELP "Mul-choice STRING"
+#define ANYSTRING_HELP   "Any STRING"
+#define FIXEDSTRING_HELP "Fixed STRING"
+
+static unsigned int
+get_token_len(const char *s)
+{
+	char c;
+	unsigned int i=0;
+
+	c = s[i];
+	while (c!='#' && c!='\0') {
+		i++;
+		c = s[i];
+	}
+	return i;
+}
+
+static const char *
+get_next_token(const char *s)
+{
+	unsigned int i;
+	i = get_token_len(s);
+	if (s[i] == '#')
+		return s+i+1;
+	return NULL;
+}
+
+int
+cmdline_parse_string(cmdline_parse_token_hdr_t *tk, const char *buf, void *res,
+	unsigned ressize)
+{
+	struct cmdline_token_string *tk2;
+	struct cmdline_token_string_data *sd;
+	unsigned int token_len;
+	const char *str;
+
+	if (res && ressize < STR_TOKEN_SIZE)
+		return -1;
+
+	if (!tk || !buf || ! *buf)
+		return -1;
+
+	tk2 = (struct cmdline_token_string *)tk;
+
+	sd = &tk2->string_data;
+
+	/* fixed string */
+	if (sd->str) {
+		str = sd->str;
+		do {
+			token_len = get_token_len(str);
+
+			/* if token is too big... */
+			if (token_len >= STR_TOKEN_SIZE - 1) {
+				continue;
+			}
+
+			if ( strncmp(buf, str, token_len) ) {
+				continue;
+			}
+
+			if ( !cmdline_isendoftoken(*(buf+token_len)) ) {
+				continue;
+			}
+
+			break;
+		} while ( (str = get_next_token(str)) != NULL );
+
+		if (!str)
+			return -1;
+	}
+	/* unspecified string */
+	else {
+		token_len = 0;
+		while(!cmdline_isendoftoken(buf[token_len]) &&
+		      token_len < (STR_TOKEN_SIZE-1))
+			token_len++;
+
+		/* return if token too long */
+		if (token_len >= STR_TOKEN_SIZE - 1) {
+			return -1;
+		}
+	}
+
+	if (res) {
+		/* we are sure that token_len is < STR_TOKEN_SIZE-1 */
+		snprintf(res, STR_TOKEN_SIZE, "%s", buf);
+		*((char *)res + token_len) = 0;
+	}
+
+
+	return token_len;
+}
+
+int cmdline_complete_get_nb_string(cmdline_parse_token_hdr_t *tk)
+{
+	struct cmdline_token_string *tk2;
+	struct cmdline_token_string_data *sd;
+	const char *str;
+	int ret = 1;
+
+	if (!tk)
+		return -1;
+
+	tk2 = (struct cmdline_token_string *)tk;
+	sd = &tk2->string_data;
+
+	if (!sd->str)
+		return 0;
+
+	str = sd->str;
+	while( (str = get_next_token(str)) != NULL ) {
+		ret++;
+	}
+	return ret;
+}
+
+int cmdline_complete_get_elt_string(cmdline_parse_token_hdr_t *tk, int idx,
+				    char *dstbuf, unsigned int size)
+{
+	struct cmdline_token_string *tk2;
+	struct cmdline_token_string_data *sd;
+	const char *s;
+	unsigned int len;
+
+	if (!tk || !dstbuf || idx < 0)
+		return -1;
+
+	tk2 = (struct cmdline_token_string *)tk;
+	sd = &tk2->string_data;
+
+	s = sd->str;
+
+	while (idx-- && s)
+		s = get_next_token(s);
+
+	if (!s)
+		return -1;
+
+	len = get_token_len(s);
+	if (len > size - 1)
+		return -1;
+
+	memcpy(dstbuf, s, len);
+	dstbuf[len] = '\0';
+	return 0;
+}
+
+
+int cmdline_get_help_string(cmdline_parse_token_hdr_t *tk, char *dstbuf,
+			    unsigned int size)
+{
+	struct cmdline_token_string *tk2;
+	struct cmdline_token_string_data *sd;
+	const char *s;
+
+	if (!tk || !dstbuf)
+		return -1;
+
+	tk2 = (struct cmdline_token_string *)tk;
+	sd = &tk2->string_data;
+
+	s = sd->str;
+
+	if (s) {
+		if (get_next_token(s))
+			snprintf(dstbuf, size, MULTISTRING_HELP);
+		else
+			snprintf(dstbuf, size, FIXEDSTRING_HELP);
+	} else
+		snprintf(dstbuf, size, ANYSTRING_HELP);
+
+	return 0;
+}
diff --git a/lib/librte_cmdline/cmdline_parse_string.h b/lib/librte_cmdline/cmdline_parse_string.h
new file mode 100644
index 00000000..94aa1f1b
--- /dev/null
+++ b/lib/librte_cmdline/cmdline_parse_string.h
@@ -0,0 +1,112 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Copyright (c) 2009, Olivier MATZ <zer0@droids-corp.org>
+ * All rights reserved.
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of the University of California, Berkeley nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _PARSE_STRING_H_
+#define _PARSE_STRING_H_
+
+#include <cmdline_parse.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* size of a parsed string */
+#define STR_TOKEN_SIZE 128
+
+typedef char cmdline_fixed_string_t[STR_TOKEN_SIZE];
+
+struct cmdline_token_string_data {
+	const char *str;
+};
+
+struct cmdline_token_string {
+	struct cmdline_token_hdr hdr;
+	struct cmdline_token_string_data string_data;
+};
+typedef struct cmdline_token_string cmdline_parse_token_string_t;
+
+extern struct cmdline_token_ops cmdline_token_string_ops;
+
+int cmdline_parse_string(cmdline_parse_token_hdr_t *tk, const char *srcbuf,
+	void *res, unsigned ressize);
+int cmdline_complete_get_nb_string(cmdline_parse_token_hdr_t *tk);
+int cmdline_complete_get_elt_string(cmdline_parse_token_hdr_t *tk, int idx,
+				    char *dstbuf, unsigned int size);
+int cmdline_get_help_string(cmdline_parse_token_hdr_t *tk, char *dstbuf,
+			    unsigned int size);
+
+#define TOKEN_STRING_INITIALIZER(structure, field, string)  \
+{                                                           \
+	/* hdr */                                               \
+	{                                                       \
+		&cmdline_token_string_ops,      /* ops */           \
+		offsetof(structure, field),     /* offset */        \
+	},                                                      \
+	/* string_data */                                       \
+	{                                                       \
+		string,                         /* str */           \
+	},                                                      \
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _PARSE_STRING_H_ */
diff --git a/lib/librte_cmdline/cmdline_rdline.c b/lib/librte_cmdline/cmdline_rdline.c
new file mode 100644
index 00000000..1ef2258d
--- /dev/null
+++ b/lib/librte_cmdline/cmdline_rdline.c
@@ -0,0 +1,697 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Copyright (c) 2009, Olivier MATZ <zer0@droids-corp.org>
+ * All rights reserved.
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of the University of California, Berkeley nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <string.h>
+#include <stdarg.h>
+#include <errno.h>
+#include <ctype.h>
+
+#include "cmdline_cirbuf.h"
+#include "cmdline_rdline.h"
+
+static void rdline_puts(struct rdline *rdl, const char *buf);
+static void rdline_miniprintf(struct rdline *rdl,
+			      const char *buf, unsigned int val);
+
+static void rdline_remove_old_history_item(struct rdline *rdl);
+static void rdline_remove_first_history_item(struct rdline *rdl);
+static unsigned int rdline_get_history_size(struct rdline *rdl);
+
+
+/* isblank() needs _XOPEN_SOURCE >= 600 || _ISOC99_SOURCE, so use our
+ * own. */
+static int
+isblank2(char c)
+{
+	if (c == ' ' ||
+	    c == '\t' )
+		return 1;
+	return 0;
+}
+
+int
+rdline_init(struct rdline *rdl,
+		 rdline_write_char_t *write_char,
+		 rdline_validate_t *validate,
+		 rdline_complete_t *complete)
+{
+	if (!rdl || !write_char || !validate || !complete)
+		return -EINVAL;
+	memset(rdl, 0, sizeof(*rdl));
+	rdl->validate = validate;
+	rdl->complete = complete;
+	rdl->write_char = write_char;
+	rdl->status = RDLINE_INIT;
+	return cirbuf_init(&rdl->history, rdl->history_buf, 0, RDLINE_HISTORY_BUF_SIZE);
+}
+
+void
+rdline_newline(struct rdline *rdl, const char *prompt)
+{
+	unsigned int i;
+
+	if (!rdl || !prompt)
+		return;
+
+	vt100_init(&rdl->vt100);
+	cirbuf_init(&rdl->left, rdl->left_buf, 0, RDLINE_BUF_SIZE);
+	cirbuf_init(&rdl->right, rdl->right_buf, 0, RDLINE_BUF_SIZE);
+
+	rdl->prompt_size = strnlen(prompt, RDLINE_PROMPT_SIZE-1);
+	if (prompt != rdl->prompt)
+		memcpy(rdl->prompt, prompt, rdl->prompt_size);
+	rdl->prompt[RDLINE_PROMPT_SIZE-1] = '\0';
+
+	for (i=0 ; i<rdl->prompt_size ; i++)
+		rdl->write_char(rdl, rdl->prompt[i]);
+	rdl->status = RDLINE_RUNNING;
+
+	rdl->history_cur_line = -1;
+}
+
+void
+rdline_stop(struct rdline *rdl)
+{
+	if (!rdl)
+		return;
+	rdl->status = RDLINE_INIT;
+}
+
+void
+rdline_quit(struct rdline *rdl)
+{
+	if (!rdl)
+		return;
+	rdl->status = RDLINE_EXITED;
+}
+
+void
+rdline_restart(struct rdline *rdl)
+{
+	if (!rdl)
+		return;
+	rdl->status = RDLINE_RUNNING;
+}
+
+void
+rdline_reset(struct rdline *rdl)
+{
+	if (!rdl)
+		return;
+	vt100_init(&rdl->vt100);
+	cirbuf_init(&rdl->left, rdl->left_buf, 0, RDLINE_BUF_SIZE);
+	cirbuf_init(&rdl->right, rdl->right_buf, 0, RDLINE_BUF_SIZE);
+
+	rdl->status = RDLINE_RUNNING;
+
+	rdl->history_cur_line = -1;
+}
+
+const char *
+rdline_get_buffer(struct rdline *rdl)
+{
+	if (!rdl)
+		return NULL;
+	unsigned int len_l, len_r;
+	cirbuf_align_left(&rdl->left);
+	cirbuf_align_left(&rdl->right);
+
+	len_l = CIRBUF_GET_LEN(&rdl->left);
+	len_r = CIRBUF_GET_LEN(&rdl->right);
+	memcpy(rdl->left_buf+len_l, rdl->right_buf, len_r);
+
+	rdl->left_buf[len_l + len_r] = '\n';
+	rdl->left_buf[len_l + len_r + 1] = '\0';
+	return rdl->left_buf;
+}
+
+static void
+display_right_buffer(struct rdline *rdl, int force)
+{
+	unsigned int i;
+	char tmp;
+
+	if (!force && CIRBUF_IS_EMPTY(&rdl->right))
+		return;
+
+	rdline_puts(rdl, vt100_clear_right);
+	CIRBUF_FOREACH(&rdl->right, i, tmp) {
+		rdl->write_char(rdl, tmp);
+	}
+	if (!CIRBUF_IS_EMPTY(&rdl->right))
+		rdline_miniprintf(rdl, vt100_multi_left,
+				  CIRBUF_GET_LEN(&rdl->right));
+}
+
+void
+rdline_redisplay(struct rdline *rdl)
+{
+	unsigned int i;
+	char tmp;
+
+	if (!rdl)
+		return;
+
+	rdline_puts(rdl, vt100_home);
+	for (i=0 ; i<rdl->prompt_size ; i++)
+		rdl->write_char(rdl, rdl->prompt[i]);
+	CIRBUF_FOREACH(&rdl->left, i, tmp) {
+		rdl->write_char(rdl, tmp);
+	}
+	display_right_buffer(rdl, 1);
+}
+
+int
+rdline_char_in(struct rdline *rdl, char c)
+{
+	unsigned int i;
+	int cmd;
+	char tmp;
+	char *buf;
+
+	if (!rdl)
+		return -EINVAL;
+
+	if (rdl->status == RDLINE_EXITED)
+		return RDLINE_RES_EXITED;
+	if (rdl->status != RDLINE_RUNNING)
+		return RDLINE_RES_NOT_RUNNING;
+
+	cmd = vt100_parser(&rdl->vt100, c);
+	if (cmd == -2)
+		return RDLINE_RES_SUCCESS;
+
+	if (cmd >= 0) {
+		switch (cmd) {
+		/* move caret 1 char to the left */
+		case CMDLINE_KEY_CTRL_B:
+		case CMDLINE_KEY_LEFT_ARR:
+			if (CIRBUF_IS_EMPTY(&rdl->left))
+				break;
+			tmp = cirbuf_get_tail(&rdl->left);
+			cirbuf_del_tail(&rdl->left);
+			cirbuf_add_head(&rdl->right, tmp);
+			rdline_puts(rdl, vt100_left_arr);
+			break;
+
+		/* move caret 1 char to the right */
+		case CMDLINE_KEY_CTRL_F:
+		case CMDLINE_KEY_RIGHT_ARR:
+			if (CIRBUF_IS_EMPTY(&rdl->right))
+				break;
+			tmp = cirbuf_get_head(&rdl->right);
+			cirbuf_del_head(&rdl->right);
+			cirbuf_add_tail(&rdl->left, tmp);
+			rdline_puts(rdl, vt100_right_arr);
+			break;
+
+		/* move caret 1 word to the left */
+		/* keyboard equivalent: Alt+B */
+		case CMDLINE_KEY_WLEFT:
+			while (! CIRBUF_IS_EMPTY(&rdl->left) &&
+			       (tmp = cirbuf_get_tail(&rdl->left)) &&
+			       isblank2(tmp)) {
+				rdline_puts(rdl, vt100_left_arr);
+				cirbuf_del_tail(&rdl->left);
+				cirbuf_add_head(&rdl->right, tmp);
+			}
+			while (! CIRBUF_IS_EMPTY(&rdl->left) &&
+			       (tmp = cirbuf_get_tail(&rdl->left)) &&
+			       !isblank2(tmp)) {
+				rdline_puts(rdl, vt100_left_arr);
+				cirbuf_del_tail(&rdl->left);
+				cirbuf_add_head(&rdl->right, tmp);
+			}
+			break;
+
+		/* move caret 1 word to the right */
+		/* keyboard equivalent: Alt+F */
+		case CMDLINE_KEY_WRIGHT:
+			while (! CIRBUF_IS_EMPTY(&rdl->right) &&
+			       (tmp = cirbuf_get_head(&rdl->right)) &&
+			       isblank2(tmp)) {
+				rdline_puts(rdl, vt100_right_arr);
+				cirbuf_del_head(&rdl->right);
+				cirbuf_add_tail(&rdl->left, tmp);
+			}
+			while (! CIRBUF_IS_EMPTY(&rdl->right) &&
+			       (tmp = cirbuf_get_head(&rdl->right)) &&
+			       !isblank2(tmp)) {
+				rdline_puts(rdl, vt100_right_arr);
+				cirbuf_del_head(&rdl->right);
+				cirbuf_add_tail(&rdl->left, tmp);
+			}
+			break;
+
+		/* move caret to the left */
+		case CMDLINE_KEY_CTRL_A:
+			if (CIRBUF_IS_EMPTY(&rdl->left))
+				break;
+			rdline_miniprintf(rdl, vt100_multi_left,
+						CIRBUF_GET_LEN(&rdl->left));
+			while (! CIRBUF_IS_EMPTY(&rdl->left)) {
+				tmp = cirbuf_get_tail(&rdl->left);
+				cirbuf_del_tail(&rdl->left);
+				cirbuf_add_head(&rdl->right, tmp);
+			}
+			break;
+
+		/* move caret to the right */
+		case CMDLINE_KEY_CTRL_E:
+			if (CIRBUF_IS_EMPTY(&rdl->right))
+				break;
+			rdline_miniprintf(rdl, vt100_multi_right,
+						CIRBUF_GET_LEN(&rdl->right));
+			while (! CIRBUF_IS_EMPTY(&rdl->right)) {
+				tmp = cirbuf_get_head(&rdl->right);
+				cirbuf_del_head(&rdl->right);
+				cirbuf_add_tail(&rdl->left, tmp);
+			}
+			break;
+
+		/* delete 1 char from the left */
+		case CMDLINE_KEY_BKSPACE:
+			if(!cirbuf_del_tail_safe(&rdl->left)) {
+				rdline_puts(rdl, vt100_bs);
+				display_right_buffer(rdl, 1);
+			}
+			break;
+
+		/* delete 1 char from the right */
+		case CMDLINE_KEY_SUPPR:
+		case CMDLINE_KEY_CTRL_D:
+			if (cmd == CMDLINE_KEY_CTRL_D &&
+			    CIRBUF_IS_EMPTY(&rdl->left) &&
+			    CIRBUF_IS_EMPTY(&rdl->right)) {
+				return RDLINE_RES_EOF;
+			}
+			if (!cirbuf_del_head_safe(&rdl->right)) {
+				display_right_buffer(rdl, 1);
+			}
+			break;
+
+		/* delete 1 word from the left */
+		case CMDLINE_KEY_META_BKSPACE:
+		case CMDLINE_KEY_CTRL_W:
+			while (! CIRBUF_IS_EMPTY(&rdl->left) && isblank2(cirbuf_get_tail(&rdl->left))) {
+				rdline_puts(rdl, vt100_bs);
+				cirbuf_del_tail(&rdl->left);
+			}
+			while (! CIRBUF_IS_EMPTY(&rdl->left) && !isblank2(cirbuf_get_tail(&rdl->left))) {
+				rdline_puts(rdl, vt100_bs);
+				cirbuf_del_tail(&rdl->left);
+			}
+			display_right_buffer(rdl, 1);
+			break;
+
+		/* delete 1 word from the right */
+		case CMDLINE_KEY_META_D:
+			while (! CIRBUF_IS_EMPTY(&rdl->right) && isblank2(cirbuf_get_head(&rdl->right)))
+				cirbuf_del_head(&rdl->right);
+			while (! CIRBUF_IS_EMPTY(&rdl->right) && !isblank2(cirbuf_get_head(&rdl->right)))
+				cirbuf_del_head(&rdl->right);
+			display_right_buffer(rdl, 1);
+			break;
+
+		/* set kill buffer to contents on the right side of caret */
+		case CMDLINE_KEY_CTRL_K:
+			cirbuf_get_buf_head(&rdl->right, rdl->kill_buf, RDLINE_BUF_SIZE);
+			rdl->kill_size = CIRBUF_GET_LEN(&rdl->right);
+			cirbuf_del_buf_head(&rdl->right, rdl->kill_size);
+			rdline_puts(rdl, vt100_clear_right);
+			break;
+
+		/* paste contents of kill buffer to the left side of caret */
+		case CMDLINE_KEY_CTRL_Y:
+			i=0;
+			while(CIRBUF_GET_LEN(&rdl->right) + CIRBUF_GET_LEN(&rdl->left) <
+			      RDLINE_BUF_SIZE &&
+			      i < rdl->kill_size) {
+				cirbuf_add_tail(&rdl->left, rdl->kill_buf[i]);
+				rdl->write_char(rdl, rdl->kill_buf[i]);
+				i++;
+			}
+			display_right_buffer(rdl, 0);
+			break;
+
+		/* clear and newline */
+		case CMDLINE_KEY_CTRL_C:
+			rdline_puts(rdl, "\r\n");
+			rdline_newline(rdl, rdl->prompt);
+			break;
+
+		/* redisplay (helps when prompt is lost in other output) */
+		case CMDLINE_KEY_CTRL_L:
+			rdline_redisplay(rdl);
+			break;
+
+		/* autocomplete */
+		case CMDLINE_KEY_TAB:
+		case CMDLINE_KEY_HELP:
+			cirbuf_align_left(&rdl->left);
+			rdl->left_buf[CIRBUF_GET_LEN(&rdl->left)] = '\0';
+			if (rdl->complete) {
+				char tmp_buf[BUFSIZ];
+				int complete_state;
+				int ret;
+				unsigned int tmp_size;
+
+				if (cmd == CMDLINE_KEY_TAB)
+					complete_state = 0;
+				else
+					complete_state = -1;
+
+				/* see in parse.h for help on complete() */
+				ret = rdl->complete(rdl, rdl->left_buf,
+						    tmp_buf, sizeof(tmp_buf),
+						    &complete_state);
+				/* no completion or error */
+				if (ret <= 0) {
+					return RDLINE_RES_COMPLETE;
+				}
+
+				tmp_size = strnlen(tmp_buf, sizeof(tmp_buf));
+				/* add chars */
+				if (ret == RDLINE_RES_COMPLETE) {
+					i=0;
+					while(CIRBUF_GET_LEN(&rdl->right) + CIRBUF_GET_LEN(&rdl->left) <
+					      RDLINE_BUF_SIZE &&
+					      i < tmp_size) {
+						cirbuf_add_tail(&rdl->left, tmp_buf[i]);
+						rdl->write_char(rdl, tmp_buf[i]);
+						i++;
+					}
+					display_right_buffer(rdl, 1);
+					return RDLINE_RES_COMPLETE; /* ?? */
+				}
+
+				/* choice */
+				rdline_puts(rdl, "\r\n");
+				while (ret) {
+					rdl->write_char(rdl, ' ');
+					for (i=0 ; tmp_buf[i] ; i++)
+						rdl->write_char(rdl, tmp_buf[i]);
+					rdline_puts(rdl, "\r\n");
+					ret = rdl->complete(rdl, rdl->left_buf,
+							    tmp_buf, sizeof(tmp_buf),
+							    &complete_state);
+				}
+
+				rdline_redisplay(rdl);
+			}
+			return RDLINE_RES_COMPLETE;
+
+		/* complete buffer */
+		case CMDLINE_KEY_RETURN:
+		case CMDLINE_KEY_RETURN2:
+			rdline_get_buffer(rdl);
+			rdl->status = RDLINE_INIT;
+			rdline_puts(rdl, "\r\n");
+			if (rdl->history_cur_line != -1)
+				rdline_remove_first_history_item(rdl);
+
+			if (rdl->validate)
+				rdl->validate(rdl, rdl->left_buf, CIRBUF_GET_LEN(&rdl->left)+2);
+			/* user may have stopped rdline */
+			if (rdl->status == RDLINE_EXITED)
+				return RDLINE_RES_EXITED;
+			return RDLINE_RES_VALIDATED;
+
+		/* previous element in history */
+		case CMDLINE_KEY_UP_ARR:
+		case CMDLINE_KEY_CTRL_P:
+			if (rdl->history_cur_line == 0) {
+				rdline_remove_first_history_item(rdl);
+			}
+			if (rdl->history_cur_line <= 0) {
+				rdline_add_history(rdl, rdline_get_buffer(rdl));
+				rdl->history_cur_line = 0;
+			}
+
+			buf = rdline_get_history_item(rdl, rdl->history_cur_line + 1);
+			if (!buf)
+				break;
+
+			rdl->history_cur_line ++;
+			vt100_init(&rdl->vt100);
+			cirbuf_init(&rdl->left, rdl->left_buf, 0, RDLINE_BUF_SIZE);
+			cirbuf_init(&rdl->right, rdl->right_buf, 0, RDLINE_BUF_SIZE);
+			cirbuf_add_buf_tail(&rdl->left, buf, strnlen(buf, RDLINE_BUF_SIZE));
+			rdline_redisplay(rdl);
+			break;
+
+		/* next element in history */
+		case CMDLINE_KEY_DOWN_ARR:
+		case CMDLINE_KEY_CTRL_N:
+			if (rdl->history_cur_line - 1 < 0)
+				break;
+
+			rdl->history_cur_line --;
+			buf = rdline_get_history_item(rdl, rdl->history_cur_line);
+			if (!buf)
+				break;
+			vt100_init(&rdl->vt100);
+			cirbuf_init(&rdl->left, rdl->left_buf, 0, RDLINE_BUF_SIZE);
+			cirbuf_init(&rdl->right, rdl->right_buf, 0, RDLINE_BUF_SIZE);
+			cirbuf_add_buf_tail(&rdl->left, buf, strnlen(buf, RDLINE_BUF_SIZE));
+			rdline_redisplay(rdl);
+
+			break;
+
+
+		default:
+			break;
+		}
+
+		return RDLINE_RES_SUCCESS;
+	}
+
+	if (!isprint((int)c))
+		return RDLINE_RES_SUCCESS;
+
+	/* standard chars */
+	if (CIRBUF_GET_LEN(&rdl->left) + CIRBUF_GET_LEN(&rdl->right) >= RDLINE_BUF_SIZE)
+		return RDLINE_RES_SUCCESS;
+
+	if (cirbuf_add_tail_safe(&rdl->left, c))
+		return RDLINE_RES_SUCCESS;
+
+	rdl->write_char(rdl, c);
+	display_right_buffer(rdl, 0);
+
+	return RDLINE_RES_SUCCESS;
+}
+
+
+/* HISTORY */
+
+static void
+rdline_remove_old_history_item(struct rdline * rdl)
+{
+	char tmp;
+
+	while (! CIRBUF_IS_EMPTY(&rdl->history) ) {
+		tmp = cirbuf_get_head(&rdl->history);
+		cirbuf_del_head(&rdl->history);
+		if (!tmp)
+			break;
+	}
+}
+
+static void
+rdline_remove_first_history_item(struct rdline * rdl)
+{
+	char tmp;
+
+	if ( CIRBUF_IS_EMPTY(&rdl->history) ) {
+		return;
+	}
+	else {
+		cirbuf_del_tail(&rdl->history);
+	}
+
+	while (! CIRBUF_IS_EMPTY(&rdl->history) ) {
+		tmp = cirbuf_get_tail(&rdl->history);
+		if (!tmp)
+			break;
+		cirbuf_del_tail(&rdl->history);
+	}
+}
+
+static unsigned int
+rdline_get_history_size(struct rdline * rdl)
+{
+	unsigned int i, tmp, ret=0;
+
+	CIRBUF_FOREACH(&rdl->history, i, tmp) {
+		if (tmp == 0)
+			ret ++;
+	}
+
+	return ret;
+}
+
+char *
+rdline_get_history_item(struct rdline * rdl, unsigned int idx)
+{
+	unsigned int len, i, tmp;
+
+	if (!rdl)
+		return NULL;
+
+	len = rdline_get_history_size(rdl);
+	if ( idx >= len ) {
+		return NULL;
+	}
+
+	cirbuf_align_left(&rdl->history);
+
+	CIRBUF_FOREACH(&rdl->history, i, tmp) {
+		if ( idx == len - 1) {
+			return rdl->history_buf + i;
+		}
+		if (tmp == 0)
+			len --;
+	}
+
+	return NULL;
+}
+
+int
+rdline_add_history(struct rdline * rdl, const char * buf)
+{
+	unsigned int len, i;
+
+	if (!rdl || !buf)
+		return -EINVAL;
+
+	len = strnlen(buf, RDLINE_BUF_SIZE);
+	for (i=0; i<len ; i++) {
+		if (buf[i] == '\n') {
+			len = i;
+			break;
+		}
+	}
+
+	if ( len >= RDLINE_HISTORY_BUF_SIZE )
+		return -1;
+
+	while ( len >= CIRBUF_GET_FREELEN(&rdl->history) ) {
+		rdline_remove_old_history_item(rdl);
+	}
+
+	cirbuf_add_buf_tail(&rdl->history, buf, len);
+	cirbuf_add_tail(&rdl->history, 0);
+
+	return 0;
+}
+
+void
+rdline_clear_history(struct rdline * rdl)
+{
+	if (!rdl)
+		return;
+	cirbuf_init(&rdl->history, rdl->history_buf, 0, RDLINE_HISTORY_BUF_SIZE);
+}
+
+
+/* STATIC USEFUL FUNCS */
+
+static void
+rdline_puts(struct rdline * rdl, const char * buf)
+{
+	char c;
+	while ( (c = *(buf++)) != '\0' ) {
+		rdl->write_char(rdl, c);
+	}
+}
+
+/* a very very basic printf with one arg and one format 'u' */
+static void
+rdline_miniprintf(struct rdline *rdl, const char * buf, unsigned int val)
+{
+	char c, started=0, div=100;
+
+	while ( (c=*(buf++)) ) {
+		if (c != '%') {
+			rdl->write_char(rdl, c);
+			continue;
+		}
+		c = *(buf++);
+		if (c != 'u') {
+			rdl->write_char(rdl, '%');
+			rdl->write_char(rdl, c);
+			continue;
+		}
+		/* val is never more than 255 */
+		while (div) {
+			c = (char)(val / div);
+			if (c || started) {
+				rdl->write_char(rdl, (char)(c+'0'));
+				started = 1;
+			}
+			val %= div;
+			div /= 10;
+		}
+	}
+}
diff --git a/lib/librte_cmdline/cmdline_rdline.h b/lib/librte_cmdline/cmdline_rdline.h
new file mode 100644
index 00000000..72e2dad8
--- /dev/null
+++ b/lib/librte_cmdline/cmdline_rdline.h
@@ -0,0 +1,255 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Copyright (c) 2009, Olivier MATZ <zer0@droids-corp.org>
+ * All rights reserved.
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of the University of California, Berkeley nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RDLINE_H_
+#define _RDLINE_H_
+
+/**
+ * This file is a small equivalent to the GNU readline library, but it
+ * was originally designed for small systems, like Atmel AVR
+ * microcontrollers (8 bits). Indeed, we don't use any malloc that is
+ * sometimes not implemented (or just not recommended) on such
+ * systems.
+ *
+ * Obviously, it does not support as many things as the GNU readline,
+ * but at least it supports some interesting features like a kill
+ * buffer and a command history.
+ *
+ * It also have a feature that does not have the GNU readline (as far
+ * as I know): we can have several instances of it running at the same
+ * time, even on a monothread program, since it works with callbacks.
+ *
+ * The lib is designed for a client-side or a server-side use:
+ * - server-side: the server receives all data from a socket, including
+ *   control chars, like arrows, tabulations, ... The client is
+ *   very simple, it can be a telnet or a minicom through a serial line.
+ * - client-side: the client receives its data through its stdin for
+ *   instance.
+ */
+
+#include <stdio.h>
+#include <cmdline_cirbuf.h>
+#include <cmdline_vt100.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* configuration */
+#define RDLINE_BUF_SIZE 512
+#define RDLINE_PROMPT_SIZE  32
+#define RDLINE_VT100_BUF_SIZE  8
+#define RDLINE_HISTORY_BUF_SIZE BUFSIZ
+#define RDLINE_HISTORY_MAX_LINE 64
+
+enum rdline_status {
+	RDLINE_INIT,
+	RDLINE_RUNNING,
+	RDLINE_EXITED
+};
+
+struct rdline;
+
+typedef int (rdline_write_char_t)(struct rdline *rdl, char);
+typedef void (rdline_validate_t)(struct rdline *rdl,
+				 const char *buf, unsigned int size);
+typedef int (rdline_complete_t)(struct rdline *rdl, const char *buf,
+				char *dstbuf, unsigned int dstsize,
+				int *state);
+
+struct rdline {
+	enum rdline_status status;
+	/* rdline bufs */
+	struct cirbuf left;
+	struct cirbuf right;
+	char left_buf[RDLINE_BUF_SIZE+2]; /* reserve 2 chars for the \n\0 */
+	char right_buf[RDLINE_BUF_SIZE];
+
+	char prompt[RDLINE_PROMPT_SIZE];
+	unsigned int prompt_size;
+
+	char kill_buf[RDLINE_BUF_SIZE];
+	unsigned int kill_size;
+
+	/* history */
+	struct cirbuf history;
+	char history_buf[RDLINE_HISTORY_BUF_SIZE];
+	int history_cur_line;
+
+	/* callbacks and func pointers */
+	rdline_write_char_t *write_char;
+	rdline_validate_t *validate;
+	rdline_complete_t *complete;
+
+	/* vt100 parser */
+	struct cmdline_vt100 vt100;
+
+	/* opaque pointer */
+	void *opaque;
+};
+
+/**
+ * Init fields for a struct rdline. Call this only once at the beginning
+ * of your program.
+ * \param rdl A pointer to an uninitialized struct rdline
+ * \param write_char The function used by the function to write a character
+ * \param validate A pointer to the function to execute when the
+ *                 user validates the buffer.
+ * \param complete A pointer to the function to execute when the
+ *                 user completes the buffer.
+ */
+int rdline_init(struct rdline *rdl,
+		 rdline_write_char_t *write_char,
+		 rdline_validate_t *validate,
+		 rdline_complete_t *complete);
+
+
+/**
+ * Init the current buffer, and display a prompt.
+ * \param rdl A pointer to a struct rdline
+ * \param prompt A string containing the prompt
+ */
+void rdline_newline(struct rdline *rdl, const char *prompt);
+
+/**
+ * Call it and all received chars will be ignored.
+ * \param rdl A pointer to a struct rdline
+ */
+void rdline_stop(struct rdline *rdl);
+
+/**
+ * Same than rdline_stop() except that next calls to rdline_char_in()
+ * will return RDLINE_RES_EXITED.
+ * \param rdl A pointer to a struct rdline
+ */
+void rdline_quit(struct rdline *rdl);
+
+/**
+ * Restart after a call to rdline_stop() or rdline_quit()
+ * \param rdl A pointer to a struct rdline
+ */
+void rdline_restart(struct rdline *rdl);
+
+/**
+ * Redisplay the current buffer
+ * \param rdl A pointer to a struct rdline
+ */
+void rdline_redisplay(struct rdline *rdl);
+
+/**
+ * Reset the current buffer and setup for a new line.
+ *  \param rdl A pointer to a struct rdline
+ */
+void rdline_reset(struct rdline *rdl);
+
+
+/* return status for rdline_char_in() */
+#define RDLINE_RES_SUCCESS       0
+#define RDLINE_RES_VALIDATED     1
+#define RDLINE_RES_COMPLETE      2
+#define RDLINE_RES_NOT_RUNNING  -1
+#define RDLINE_RES_EOF          -2
+#define RDLINE_RES_EXITED       -3
+
+/**
+ * append a char to the readline buffer.
+ * Return RDLINE_RES_VALIDATE when the line has been validated.
+ * Return RDLINE_RES_COMPLETE when the user asked to complete the buffer.
+ * Return RDLINE_RES_NOT_RUNNING if it is not running.
+ * Return RDLINE_RES_EOF if EOF (ctrl-d on an empty line).
+ * Else return RDLINE_RES_SUCCESS.
+ * XXX error case when the buffer is full ?
+ *
+ * \param rdl A pointer to a struct rdline
+ * \param c The character to append
+ */
+int rdline_char_in(struct rdline *rdl, char c);
+
+/**
+ * Return the current buffer, terminated by '\0'.
+ * \param rdl A pointer to a struct rdline
+ */
+const char *rdline_get_buffer(struct rdline *rdl);
+
+
+/**
+ * Add the buffer to history.
+ * return < 0 on error.
+ * \param rdl A pointer to a struct rdline
+ * \param buf A buffer that is terminated by '\0'
+ */
+int rdline_add_history(struct rdline *rdl, const char *buf);
+
+/**
+ * Clear current history
+ * \param rdl A pointer to a struct rdline
+ */
+void rdline_clear_history(struct rdline *rdl);
+
+/**
+ * Get the i-th history item
+ */
+char *rdline_get_history_item(struct rdline *rdl, unsigned int i);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RDLINE_H_ */
diff --git a/lib/librte_cmdline/cmdline_socket.c b/lib/librte_cmdline/cmdline_socket.c
new file mode 100644
index 00000000..3fc243b7
--- /dev/null
+++ b/lib/librte_cmdline/cmdline_socket.c
@@ -0,0 +1,119 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Copyright (c) 2009, Olivier MATZ <zer0@droids-corp.org>
+ * All rights reserved.
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of the University of California, Berkeley nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <inttypes.h>
+#include <fcntl.h>
+#include <termios.h>
+
+#include "cmdline_parse.h"
+#include "cmdline_rdline.h"
+#include "cmdline_socket.h"
+#include "cmdline.h"
+
+struct cmdline *
+cmdline_file_new(cmdline_parse_ctx_t *ctx, const char *prompt, const char *path)
+{
+	int fd;
+
+	/* everything else is checked in cmdline_new() */
+	if (!path)
+		return NULL;
+
+	fd = open(path, O_RDONLY, 0);
+	if (fd < 0) {
+		dprintf("open() failed\n");
+		return NULL;
+	}
+	return cmdline_new(ctx, prompt, fd, -1);
+}
+
+struct cmdline *
+cmdline_stdin_new(cmdline_parse_ctx_t *ctx, const char *prompt)
+{
+	struct cmdline *cl;
+	struct termios oldterm, term;
+
+	tcgetattr(0, &oldterm);
+	memcpy(&term, &oldterm, sizeof(term));
+	term.c_lflag &= ~(ICANON | ECHO | ISIG);
+	tcsetattr(0, TCSANOW, &term);
+	setbuf(stdin, NULL);
+
+	cl = cmdline_new(ctx, prompt, 0, 1);
+
+	if (cl)
+		memcpy(&cl->oldterm, &oldterm, sizeof(term));
+
+	return cl;
+}
+
+void
+cmdline_stdin_exit(struct cmdline *cl)
+{
+	if (!cl)
+		return;
+
+	tcsetattr(fileno(stdin), TCSANOW, &cl->oldterm);
+}
diff --git a/lib/librte_cmdline/cmdline_socket.h b/lib/librte_cmdline/cmdline_socket.h
new file mode 100644
index 00000000..8cc2dfbc
--- /dev/null
+++ b/lib/librte_cmdline/cmdline_socket.h
@@ -0,0 +1,76 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Copyright (c) 2009, Olivier MATZ <zer0@droids-corp.org>
+ * All rights reserved.
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of the University of California, Berkeley nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _CMDLINE_SOCKET_H_
+#define _CMDLINE_SOCKET_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct cmdline *cmdline_file_new(cmdline_parse_ctx_t *ctx, const char *prompt, const char *path);
+struct cmdline *cmdline_stdin_new(cmdline_parse_ctx_t *ctx, const char *prompt);
+void cmdline_stdin_exit(struct cmdline *cl);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _CMDLINE_SOCKET_H_ */
diff --git a/lib/librte_cmdline/cmdline_vt100.c b/lib/librte_cmdline/cmdline_vt100.c
new file mode 100644
index 00000000..a253e8b6
--- /dev/null
+++ b/lib/librte_cmdline/cmdline_vt100.c
@@ -0,0 +1,185 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Copyright (c) 2009, Olivier MATZ <zer0@droids-corp.org>
+ * All rights reserved.
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of the University of California, Berkeley nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdarg.h>
+#include <ctype.h>
+#include <termios.h>
+
+#include "cmdline_vt100.h"
+
+const char *cmdline_vt100_commands[] = {
+	vt100_up_arr,
+	vt100_down_arr,
+	vt100_right_arr,
+	vt100_left_arr,
+	"\177",
+	"\n",
+	"\001",
+	"\005",
+	"\013",
+	"\031",
+	"\003",
+	"\006",
+	"\002",
+	vt100_suppr,
+	vt100_tab,
+	"\004",
+	"\014",
+	"\r",
+	"\033\177",
+	vt100_word_left,
+	vt100_word_right,
+	"?",
+	"\027",
+	"\020",
+	"\016",
+	"\033\144",
+};
+
+void
+vt100_init(struct cmdline_vt100 *vt)
+{
+	if (!vt)
+		return;
+	vt->state = CMDLINE_VT100_INIT;
+}
+
+
+static int
+match_command(char *buf, unsigned int size)
+{
+	const char *cmd;
+	size_t cmdlen;
+	unsigned int i = 0;
+
+	for (i=0 ; i<sizeof(cmdline_vt100_commands)/sizeof(const char *) ; i++) {
+		cmd = *(cmdline_vt100_commands + i);
+
+		cmdlen = strnlen(cmd, CMDLINE_VT100_BUF_SIZE);
+		if (size == cmdlen &&
+		    !strncmp(buf, cmd, cmdlen)) {
+			return i;
+		}
+	}
+
+	return -1;
+}
+
+int
+vt100_parser(struct cmdline_vt100 *vt, char ch)
+{
+	unsigned int size;
+	uint8_t c = (uint8_t) ch;
+
+	if (!vt)
+		return -1;
+
+	if (vt->bufpos >= CMDLINE_VT100_BUF_SIZE) {
+		vt->state = CMDLINE_VT100_INIT;
+		vt->bufpos = 0;
+	}
+
+	vt->buf[vt->bufpos++] = c;
+	size = vt->bufpos;
+
+	switch (vt->state) {
+	case CMDLINE_VT100_INIT:
+		if (c == 033) {
+			vt->state = CMDLINE_VT100_ESCAPE;
+		}
+		else {
+			vt->bufpos = 0;
+			goto match_command;
+		}
+		break;
+
+	case CMDLINE_VT100_ESCAPE:
+		if (c == 0133) {
+			vt->state = CMDLINE_VT100_ESCAPE_CSI;
+		}
+		else if (c >= 060 && c <= 0177) { /* XXX 0177 ? */
+			vt->bufpos = 0;
+			vt->state = CMDLINE_VT100_INIT;
+			goto match_command;
+		}
+		break;
+
+	case CMDLINE_VT100_ESCAPE_CSI:
+		if (c >= 0100 && c <= 0176) {
+			vt->bufpos = 0;
+			vt->state = CMDLINE_VT100_INIT;
+			goto match_command;
+		}
+		break;
+
+	default:
+		vt->bufpos = 0;
+		break;
+	}
+
+	return -2;
+
+ match_command:
+	return match_command(vt->buf, size);
+}
diff --git a/lib/librte_cmdline/cmdline_vt100.h b/lib/librte_cmdline/cmdline_vt100.h
new file mode 100644
index 00000000..963add8d
--- /dev/null
+++ b/lib/librte_cmdline/cmdline_vt100.h
@@ -0,0 +1,153 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Copyright (c) 2009, Olivier MATZ <zer0@droids-corp.org>
+ * All rights reserved.
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of the University of California, Berkeley nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _CMDLINE_VT100_H_
+#define _CMDLINE_VT100_H_
+
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define vt100_bell         "\007"
+#define vt100_bs           "\010"
+#define vt100_bs_clear     "\010 \010"
+#define vt100_tab          "\011"
+#define vt100_crnl         "\012\015"
+#define vt100_clear_right  "\033[0K"
+#define vt100_clear_left   "\033[1K"
+#define vt100_clear_down   "\033[0J"
+#define vt100_clear_up     "\033[1J"
+#define vt100_clear_line   "\033[2K"
+#define vt100_clear_screen "\033[2J"
+#define vt100_up_arr       "\033\133\101"
+#define vt100_down_arr     "\033\133\102"
+#define vt100_right_arr    "\033\133\103"
+#define vt100_left_arr     "\033\133\104"
+#define vt100_multi_right  "\033\133%uC"
+#define vt100_multi_left   "\033\133%uD"
+#define vt100_suppr        "\033\133\063\176"
+#define vt100_home         "\033M\033E"
+#define vt100_word_left    "\033\142"
+#define vt100_word_right   "\033\146"
+
+/* Result of parsing : it must be synchronized with
+ * cmdline_vt100_commands[] in vt100.c */
+#define CMDLINE_KEY_UP_ARR 0
+#define CMDLINE_KEY_DOWN_ARR 1
+#define CMDLINE_KEY_RIGHT_ARR 2
+#define CMDLINE_KEY_LEFT_ARR 3
+#define CMDLINE_KEY_BKSPACE 4
+#define CMDLINE_KEY_RETURN 5
+#define CMDLINE_KEY_CTRL_A 6
+#define CMDLINE_KEY_CTRL_E 7
+#define CMDLINE_KEY_CTRL_K 8
+#define CMDLINE_KEY_CTRL_Y 9
+#define CMDLINE_KEY_CTRL_C 10
+#define CMDLINE_KEY_CTRL_F 11
+#define CMDLINE_KEY_CTRL_B 12
+#define CMDLINE_KEY_SUPPR 13
+#define CMDLINE_KEY_TAB 14
+#define CMDLINE_KEY_CTRL_D 15
+#define CMDLINE_KEY_CTRL_L 16
+#define CMDLINE_KEY_RETURN2 17
+#define CMDLINE_KEY_META_BKSPACE 18
+#define CMDLINE_KEY_WLEFT 19
+#define CMDLINE_KEY_WRIGHT 20
+#define CMDLINE_KEY_HELP 21
+#define CMDLINE_KEY_CTRL_W 22
+#define CMDLINE_KEY_CTRL_P 23
+#define CMDLINE_KEY_CTRL_N 24
+#define CMDLINE_KEY_META_D 25
+
+extern const char *cmdline_vt100_commands[];
+
+enum cmdline_vt100_parser_state {
+	CMDLINE_VT100_INIT,
+	CMDLINE_VT100_ESCAPE,
+	CMDLINE_VT100_ESCAPE_CSI
+};
+
+#define CMDLINE_VT100_BUF_SIZE 8
+struct cmdline_vt100 {
+	uint8_t bufpos;
+	char buf[CMDLINE_VT100_BUF_SIZE];
+	enum cmdline_vt100_parser_state state;
+};
+
+/**
+ * Init
+ */
+void vt100_init(struct cmdline_vt100 *vt);
+
+/**
+ * Input a new character.
+ * Return -1 if the character is not part of a control sequence
+ * Return -2 if c is not the last char of a control sequence
+ * Else return the index in vt100_commands[]
+ */
+int vt100_parser(struct cmdline_vt100 *vt, char c);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/lib/librte_cmdline/rte_cmdline_version.map b/lib/librte_cmdline/rte_cmdline_version.map
new file mode 100644
index 00000000..c9fc18ab
--- /dev/null
+++ b/lib/librte_cmdline/rte_cmdline_version.map
@@ -0,0 +1,78 @@
+DPDK_2.0 {
+	global:
+
+	cirbuf_add_buf_head;
+	cirbuf_add_buf_tail;
+	cirbuf_add_head;
+	cirbuf_add_head_safe;
+	cirbuf_add_tail;
+	cirbuf_add_tail_safe;
+	cirbuf_align_left;
+	cirbuf_align_right;
+	cirbuf_del_buf_head;
+	cirbuf_del_buf_tail;
+	cirbuf_del_head;
+	cirbuf_del_head_safe;
+	cirbuf_del_tail;
+	cirbuf_del_tail_safe;
+	cirbuf_get_buf_head;
+	cirbuf_get_buf_tail;
+	cirbuf_get_head;
+	cirbuf_get_tail;
+	cirbuf_init;
+	cmdline_complete;
+	cmdline_complete_get_elt_string;
+	cmdline_complete_get_nb_string;
+	cmdline_file_new;
+	cmdline_free;
+	cmdline_get_help_etheraddr;
+	cmdline_get_help_ipaddr;
+	cmdline_get_help_num;
+	cmdline_get_help_portlist;
+	cmdline_get_help_string;
+	cmdline_in;
+	cmdline_interact;
+	cmdline_isendoftoken;
+	cmdline_new;
+	cmdline_parse;
+	cmdline_parse_etheraddr;
+	cmdline_parse_ipaddr;
+	cmdline_parse_num;
+	cmdline_parse_portlist;
+	cmdline_parse_string;
+	cmdline_printf;
+	cmdline_quit;
+	cmdline_set_prompt;
+	cmdline_stdin_exit;
+	cmdline_stdin_new;
+	cmdline_token_etheraddr_ops;
+	cmdline_token_ipaddr_ops;
+	cmdline_token_num_ops;
+	cmdline_token_portlist_ops;
+	cmdline_token_string_ops;
+	cmdline_token_string_ops;
+	cmdline_write_char;
+	rdline_add_history;
+	rdline_char_in;
+	rdline_clear_history;
+	rdline_get_buffer;
+	rdline_get_history_item;
+	rdline_init;
+	rdline_newline;
+	rdline_quit;
+	rdline_redisplay;
+	rdline_reset;
+	rdline_restart;
+	rdline_stop;
+	vt100_init;
+	vt100_parser;
+
+	local: *;
+};
+
+DPDK_2.1 {
+	global:
+
+	cmdline_poll;
+
+} DPDK_2.0;
diff --git a/lib/librte_compat/Makefile b/lib/librte_compat/Makefile
new file mode 100644
index 00000000..0c57533c
--- /dev/null
+++ b/lib/librte_compat/Makefile
@@ -0,0 +1,40 @@
+#   BSD LICENSE
+#
+#   Copyright(c) 2013 Neil Horman <nhorman@tuxdriver.com>
+#   All rights reserved.
+#
+#   Redistribution and use in source and binary forms, with or without
+#   modification, are permitted provided that the following conditions
+#   are met:
+#
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in
+#       the documentation and/or other materials provided with the
+#       distribution.
+#     * Neither the name of Intel Corporation nor the names of its
+#       contributors may be used to endorse or promote products derived
+#       from this software without specific prior written permission.
+#
+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+
+LIBABIVER := 1
+
+# install includes
+SYMLINK-y-include := rte_compat.h
+
+include $(RTE_SDK)/mk/rte.install.mk
diff --git a/lib/librte_compat/rte_compat.h b/lib/librte_compat/rte_compat.h
new file mode 100644
index 00000000..1c3c8d52
--- /dev/null
+++ b/lib/librte_compat/rte_compat.h
@@ -0,0 +1,105 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2015 Neil Horman <nhorman@tuxdriver.com>.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_COMPAT_H_
+#define _RTE_COMPAT_H_
+#include <rte_common.h>
+
+#ifdef RTE_BUILD_SHARED_LIB
+
+/*
+ * Provides backwards compatibility when updating exported functions.
+ * When a symol is exported from a library to provide an API, it also provides a
+ * calling convention (ABI) that is embodied in its name, return type,
+ * arguments, etc.  On occasion that function may need to change to accommodate
+ * new functionality, behavior, etc.  When that occurs, it is desireable to
+ * allow for backwards compatibility for a time with older binaries that are
+ * dynamically linked to the dpdk.  To support that, the __vsym and
+ * VERSION_SYMBOL macros are created.  They, in conjunction with the
+ * <library>_version.map file for a given library allow for multiple versions of
+ * a symbol to exist in a shared library so that older binaries need not be
+ * immediately recompiled.
+ *
+ * Refer to the guidelines document in the docs subdirectory for details on the
+ * use of these macros
+ */
+
+/*
+ * Macro Parameters:
+ * b - function base name
+ * e - function version extension, to be concatenated with base name
+ * n - function symbol version string to be applied
+ * f - function prototype
+ * p - full function symbol name
+ */
+
+/*
+ * VERSION_SYMBOL
+ * Creates a symbol version table entry binding symbol <b>@DPDK_<n> to the internal
+ * function name <b>_<e>
+ */
+#define VERSION_SYMBOL(b, e, n) __asm__(".symver " RTE_STR(b) RTE_STR(e) ", " RTE_STR(b) "@DPDK_" RTE_STR(n))
+
+/*
+ * BIND_DEFAULT_SYMBOL
+ * Creates a symbol version entry instructing the linker to bind references to
+ * symbol <b> to the internal symbol <b>_<e>
+ */
+#define BIND_DEFAULT_SYMBOL(b, e, n) __asm__(".symver " RTE_STR(b) RTE_STR(e) ", " RTE_STR(b) "@@DPDK_" RTE_STR(n))
+#define __vsym __attribute__((used))
+
+/*
+ * MAP_STATIC_SYMBOL
+ * If a function has been bifurcated into multiple versions, none of which
+ * are defined as the exported symbol name in the map file, this macro can be
+ * used to alias a specific version of the symbol to its exported name.  For
+ * example, if you have 2 versions of a function foo_v1 and foo_v2, where the
+ * former is mapped to foo@DPDK_1 and the latter is mapped to foo@DPDK_2 when
+ * building a shared library, this macro can be used to map either foo_v1 or
+ * foo_v2 to the symbol foo when building a static library, e.g.:
+ * MAP_STATIC_SYMBOL(void foo(), foo_v2);
+ */
+#define MAP_STATIC_SYMBOL(f, p)
+
+#else
+/*
+ * No symbol versioning in use
+ */
+#define VERSION_SYMBOL(b, e, n)
+#define __vsym
+#define BIND_DEFAULT_SYMBOL(b, e, n)
+#define MAP_STATIC_SYMBOL(f, p) f __attribute__((alias(RTE_STR(p))))
+/*
+ * RTE_BUILD_SHARED_LIB=n
+ */
+#endif
+
+
+#endif /* _RTE_COMPAT_H_ */
diff --git a/lib/librte_cryptodev/Makefile b/lib/librte_cryptodev/Makefile
new file mode 100644
index 00000000..314a0466
--- /dev/null
+++ b/lib/librte_cryptodev/Makefile
@@ -0,0 +1,62 @@
+#   BSD LICENSE
+#
+#   Copyright(c) 2015 Intel Corporation. All rights reserved.
+#
+#   Redistribution and use in source and binary forms, with or without
+#   modification, are permitted provided that the following conditions
+#   are met:
+#
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in
+#       the documentation and/or other materials provided with the
+#       distribution.
+#     * Neither the name of Intel Corporation nor the names of its
+#       contributors may be used to endorse or promote products derived
+#       from this software without specific prior written permission.
+#
+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# library name
+LIB = librte_cryptodev.a
+
+# library version
+LIBABIVER := 1
+
+# build flags
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS)
+
+# library source files
+SRCS-y += rte_cryptodev.c
+
+# export include files
+SYMLINK-y-include += rte_crypto.h
+SYMLINK-y-include += rte_crypto_sym.h
+SYMLINK-y-include += rte_cryptodev.h
+SYMLINK-y-include += rte_cryptodev_pmd.h
+
+# versioning export map
+EXPORT_MAP := rte_cryptodev_version.map
+
+# library dependencies
+DEPDIRS-y += lib/librte_eal
+DEPDIRS-y += lib/librte_mempool
+DEPDIRS-y += lib/librte_ring
+DEPDIRS-y += lib/librte_mbuf
+DEPDIRS-y += lib/librte_kvargs
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_cryptodev/rte_crypto.h b/lib/librte_cryptodev/rte_crypto.h
new file mode 100644
index 00000000..5bc3eaa7
--- /dev/null
+++ b/lib/librte_cryptodev/rte_crypto.h
@@ -0,0 +1,415 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2016 Intel Corporation. All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_CRYPTO_H_
+#define _RTE_CRYPTO_H_
+
+/**
+ * @file rte_crypto.h
+ *
+ * RTE Cryptography Common Definitions
+ *
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+#include <rte_mbuf.h>
+#include <rte_memory.h>
+#include <rte_mempool.h>
+
+#include "rte_crypto_sym.h"
+
+/** Crypto operation types */
+enum rte_crypto_op_type {
+	RTE_CRYPTO_OP_TYPE_UNDEFINED,
+	/**< Undefined operation type */
+	RTE_CRYPTO_OP_TYPE_SYMMETRIC,
+	/**< Symmetric operation */
+};
+
+/** Status of crypto operation */
+enum rte_crypto_op_status {
+	RTE_CRYPTO_OP_STATUS_SUCCESS,
+	/**< Operation completed successfully */
+	RTE_CRYPTO_OP_STATUS_NOT_PROCESSED,
+	/**< Operation has not yet been processed by a crypto device */
+	RTE_CRYPTO_OP_STATUS_ENQUEUED,
+	/**< Operation is enqueued on device */
+	RTE_CRYPTO_OP_STATUS_AUTH_FAILED,
+	/**< Authentication verification failed */
+	RTE_CRYPTO_OP_STATUS_INVALID_SESSION,
+	/**<
+	 * Symmetric operation failed due to invalid session arguments, or if
+	 * in session-less mode, failed to allocate private operation material.
+	 */
+	RTE_CRYPTO_OP_STATUS_INVALID_ARGS,
+	/**< Operation failed due to invalid arguments in request */
+	RTE_CRYPTO_OP_STATUS_ERROR,
+	/**< Error handling operation */
+};
+
+/**
+ * Cryptographic Operation.
+ *
+ * This structure contains data relating to performing cryptographic
+ * operations. This operation structure is used to contain any operation which
+ * is supported by the cryptodev API, PMDs should check the type parameter to
+ * verify that the operation is a support function of the device. Crypto
+ * operations are enqueued and dequeued in crypto PMDs using the
+ * rte_cryptodev_enqueue_burst() / rte_cryptodev_dequeue_burst() .
+ */
+struct rte_crypto_op {
+	enum rte_crypto_op_type type;
+	/**< operation type */
+
+	enum rte_crypto_op_status status;
+	/**<
+	 * operation status - this is reset to
+	 * RTE_CRYPTO_OP_STATUS_NOT_PROCESSED on allocation from mempool and
+	 * will be set to RTE_CRYPTO_OP_STATUS_SUCCESS after crypto operation
+	 * is successfully processed by a crypto PMD
+	 */
+
+	struct rte_mempool *mempool;
+	/**< crypto operation mempool which operation is allocated from */
+
+	phys_addr_t phys_addr;
+	/**< physical address of crypto operation */
+
+	void *opaque_data;
+	/**< Opaque pointer for user data */
+
+	union {
+		struct rte_crypto_sym_op *sym;
+		/**< Symmetric operation parameters */
+	}; /**< operation specific parameters */
+} __rte_cache_aligned;
+
+/**
+ * Reset the fields of a crypto operation to their default values.
+ *
+ * @param	op	The crypto operation to be reset.
+ * @param	type	The crypto operation type.
+ */
+static inline void
+__rte_crypto_op_reset(struct rte_crypto_op *op, enum rte_crypto_op_type type)
+{
+	op->type = type;
+	op->status = RTE_CRYPTO_OP_STATUS_NOT_PROCESSED;
+
+	switch (type) {
+	case RTE_CRYPTO_OP_TYPE_SYMMETRIC:
+		/** Symmetric operation structure starts after the end of the
+		 * rte_crypto_op structure.
+		 */
+		op->sym = (struct rte_crypto_sym_op *)(op + 1);
+		op->type = type;
+
+		__rte_crypto_sym_op_reset(op->sym);
+		break;
+	default:
+		break;
+	}
+
+	op->opaque_data = NULL;
+}
+
+/**
+ * Private data structure belonging to a crypto symmetric operation pool.
+ */
+struct rte_crypto_op_pool_private {
+	enum rte_crypto_op_type type;
+	/**< Crypto op pool type operation. */
+	uint16_t priv_size;
+	/**< Size of private area in each crypto operation. */
+};
+
+
+/**
+ * Returns the size of private data allocated with each rte_crypto_op object by
+ * the mempool
+ *
+ * @param	mempool	rte_crypto_op mempool
+ *
+ * @return	private data size
+ */
+static inline uint16_t
+__rte_crypto_op_get_priv_data_size(struct rte_mempool *mempool)
+{
+	struct rte_crypto_op_pool_private *priv =
+		(struct rte_crypto_op_pool_private *) rte_mempool_get_priv(mempool);
+
+	return priv->priv_size;
+}
+
+
+/**
+ * Creates a crypto operation pool
+ *
+ * @param	name		pool name
+ * @param	type		crypto operation type, use
+ *				RTE_CRYPTO_OP_TYPE_UNDEFINED for a pool which
+ *				supports all operation types
+ * @param	nb_elts		number of elements in pool
+ * @param	cache_size	Number of elements to cache on lcore, see
+ *				*rte_mempool_create* for further details about
+ *				cache size
+ * @param	priv_size	Size of private data to allocate with each
+ *				operation
+ * @param	socket_id	Socket to allocate memory on
+ *
+ * @return
+ *  - On success pointer to mempool
+ *  - On failure NULL
+ */
+extern struct rte_mempool *
+rte_crypto_op_pool_create(const char *name, enum rte_crypto_op_type type,
+		unsigned nb_elts, unsigned cache_size, uint16_t priv_size,
+		int socket_id);
+
+/**
+ * Bulk allocate raw element from mempool and return as crypto operations
+ *
+ * @param	mempool		crypto operation mempool.
+ * @param	type		crypto operation type.
+ * @param	ops		Array to place allocated crypto operations
+ * @param	nb_ops		Number of crypto operations to allocate
+ *
+ * @returns
+ * - On success returns  number of ops allocated
+ */
+static inline int
+__rte_crypto_op_raw_bulk_alloc(struct rte_mempool *mempool,
+		enum rte_crypto_op_type type,
+		struct rte_crypto_op **ops, uint16_t nb_ops)
+{
+	struct rte_crypto_op_pool_private *priv;
+
+	priv = (struct rte_crypto_op_pool_private *) rte_mempool_get_priv(mempool);
+	if (unlikely(priv->type != type &&
+			priv->type != RTE_CRYPTO_OP_TYPE_UNDEFINED))
+		return -EINVAL;
+
+	if (rte_mempool_get_bulk(mempool, (void **)ops, nb_ops) == 0)
+		return nb_ops;
+
+	return 0;
+}
+
+/**
+ * Allocate a crypto operation from a mempool with default parameters set
+ *
+ * @param	mempool	crypto operation mempool
+ * @param	type	operation type to allocate
+ *
+ * @returns
+ * - On success returns a valid rte_crypto_op structure
+ * - On failure returns NULL
+ */
+static inline struct rte_crypto_op *
+rte_crypto_op_alloc(struct rte_mempool *mempool, enum rte_crypto_op_type type)
+{
+	struct rte_crypto_op *op = NULL;
+	int retval;
+
+	retval = __rte_crypto_op_raw_bulk_alloc(mempool, type, &op, 1);
+	if (unlikely(retval != 1))
+		return NULL;
+
+	__rte_crypto_op_reset(op, type);
+
+	return op;
+}
+
+
+/**
+ * Bulk allocate crypto operations from a mempool with default parameters set
+ *
+ * @param	mempool	crypto operation mempool
+ * @param	type	operation type to allocate
+ * @param	ops	Array to place allocated crypto operations
+ * @param	nb_ops	Number of crypto operations to allocate
+ *
+ * @returns
+ * - On success returns a valid rte_crypto_op structure
+ * - On failure returns NULL
+ */
+
+static inline unsigned
+rte_crypto_op_bulk_alloc(struct rte_mempool *mempool,
+		enum rte_crypto_op_type type,
+		struct rte_crypto_op **ops, uint16_t nb_ops)
+{
+	int i;
+
+	if (unlikely(__rte_crypto_op_raw_bulk_alloc(mempool, type, ops, nb_ops)
+			!= nb_ops))
+		return 0;
+
+	for (i = 0; i < nb_ops; i++)
+		__rte_crypto_op_reset(ops[i], type);
+
+	return nb_ops;
+}
+
+
+
+/**
+ * Returns a pointer to the private data of a crypto operation if
+ * that operation has enough capacity for requested size.
+ *
+ * @param	op	crypto operation.
+ * @param	size	size of space requested in private data.
+ *
+ * @returns
+ * - if sufficient space available returns pointer to start of private data
+ * - if insufficient space returns NULL
+ */
+static inline void *
+__rte_crypto_op_get_priv_data(struct rte_crypto_op *op, uint32_t size)
+{
+	uint32_t priv_size;
+
+	if (likely(op->mempool != NULL)) {
+		priv_size = __rte_crypto_op_get_priv_data_size(op->mempool);
+
+		if (likely(priv_size >= size))
+			return (void *)((uint8_t *)(op + 1) +
+					sizeof(struct rte_crypto_sym_op));
+	}
+
+	return NULL;
+}
+
+/**
+ * free crypto operation structure
+ * If operation has been allocate from a rte_mempool, then the operation will
+ * be returned to the mempool.
+ *
+ * @param	op	symmetric crypto operation
+ */
+static inline void
+rte_crypto_op_free(struct rte_crypto_op *op)
+{
+	if (op != NULL && op->mempool != NULL)
+		rte_mempool_put(op->mempool, op);
+}
+
+/**
+ * Allocate a symmetric crypto operation in the private data of an mbuf.
+ *
+ * @param	m	mbuf which is associated with the crypto operation, the
+ *			operation will be allocated in the private data of that
+ *			mbuf.
+ *
+ * @returns
+ * - On success returns a pointer to the crypto operation.
+ * - On failure returns NULL.
+ */
+static inline struct rte_crypto_op *
+rte_crypto_sym_op_alloc_from_mbuf_priv_data(struct rte_mbuf *m)
+{
+	if (unlikely(m == NULL))
+		return NULL;
+
+	/*
+	 * check that the mbuf's private data size is sufficient to contain a
+	 * crypto operation
+	 */
+	if (unlikely(m->priv_size < (sizeof(struct rte_crypto_op) +
+			sizeof(struct rte_crypto_sym_op))))
+		return NULL;
+
+	/* private data starts immediately after the mbuf header in the mbuf. */
+	struct rte_crypto_op *op = (struct rte_crypto_op *)(m + 1);
+
+	__rte_crypto_op_reset(op, RTE_CRYPTO_OP_TYPE_SYMMETRIC);
+
+	op->mempool = NULL;
+	op->sym->m_src = m;
+
+	return op;
+}
+
+/**
+ * Allocate space for symmetric crypto xforms in the private data space of the
+ * crypto operation. This also defaults the crypto xform type and configures
+ * the chaining of the xforms in the crypto operation
+ *
+ * @return
+ * - On success returns pointer to first crypto xform in crypto operations chain
+ * - On failure returns NULL
+ */
+static inline struct rte_crypto_sym_xform *
+rte_crypto_op_sym_xforms_alloc(struct rte_crypto_op *op, uint8_t nb_xforms)
+{
+	void *priv_data;
+	uint32_t size;
+
+	if (unlikely(op->type != RTE_CRYPTO_OP_TYPE_SYMMETRIC))
+		return NULL;
+
+	size = sizeof(struct rte_crypto_sym_xform) * nb_xforms;
+
+	priv_data = __rte_crypto_op_get_priv_data(op, size);
+	if (priv_data == NULL)
+		return NULL;
+
+	return __rte_crypto_sym_op_sym_xforms_alloc(op->sym, priv_data,
+			nb_xforms);
+}
+
+
+/**
+ * Attach a session to a crypto operation
+ *
+ * @param	op	crypto operation, must be of type symmetric
+ * @param	sess	cryptodev session
+ */
+static inline int
+rte_crypto_op_attach_sym_session(struct rte_crypto_op *op,
+		struct rte_cryptodev_sym_session *sess)
+{
+	if (unlikely(op->type != RTE_CRYPTO_OP_TYPE_SYMMETRIC))
+		return -1;
+
+	return __rte_crypto_sym_op_attach_sym_session(op->sym, sess);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_CRYPTO_H_ */
diff --git a/lib/librte_cryptodev/rte_crypto_sym.h b/lib/librte_cryptodev/rte_crypto_sym.h
new file mode 100644
index 00000000..4ae9b9e8
--- /dev/null
+++ b/lib/librte_cryptodev/rte_crypto_sym.h
@@ -0,0 +1,662 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2016 Intel Corporation. All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_CRYPTO_SYM_H_
+#define _RTE_CRYPTO_SYM_H_
+
+/**
+ * @file rte_crypto_sym.h
+ *
+ * RTE Definitions for Symmetric Cryptography
+ *
+ * Defines symmetric cipher and authentication algorithms and modes, as well
+ * as supported symmetric crypto operation combinations.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <string.h>
+
+#include <rte_mbuf.h>
+#include <rte_memory.h>
+#include <rte_mempool.h>
+
+
+/** Symmetric Cipher Algorithms */
+enum rte_crypto_cipher_algorithm {
+	RTE_CRYPTO_CIPHER_NULL = 1,
+	/**< NULL cipher algorithm. No mode applies to the NULL algorithm. */
+
+	RTE_CRYPTO_CIPHER_3DES_CBC,
+	/**< Triple DES algorithm in CBC mode */
+	RTE_CRYPTO_CIPHER_3DES_CTR,
+	/**< Triple DES algorithm in CTR mode */
+	RTE_CRYPTO_CIPHER_3DES_ECB,
+	/**< Triple DES algorithm in ECB mode */
+
+	RTE_CRYPTO_CIPHER_AES_CBC,
+	/**< AES algorithm in CBC mode */
+	RTE_CRYPTO_CIPHER_AES_CCM,
+	/**< AES algorithm in CCM mode. When this cipher algorithm is used the
+	 * *RTE_CRYPTO_AUTH_AES_CCM* element of the
+	 * *rte_crypto_hash_algorithm* enum MUST be used to set up the related
+	 * *rte_crypto_auth_xform* structure in the session context or in
+	 * the op_params of the crypto operation structure in the case of a
+	 * session-less crypto operation
+	 */
+	RTE_CRYPTO_CIPHER_AES_CTR,
+	/**< AES algorithm in Counter mode */
+	RTE_CRYPTO_CIPHER_AES_ECB,
+	/**< AES algorithm in ECB mode */
+	RTE_CRYPTO_CIPHER_AES_F8,
+	/**< AES algorithm in F8 mode */
+	RTE_CRYPTO_CIPHER_AES_GCM,
+	/**< AES algorithm in GCM mode. When this cipher algorithm is used the
+	 * *RTE_CRYPTO_AUTH_AES_GCM* element of the
+	 * *rte_crypto_auth_algorithm* enum MUST be used to set up the related
+	 * *rte_crypto_auth_setup_data* structure in the session context or in
+	 * the op_params of the crypto operation structure in the case of a
+	 * session-less crypto operation.
+	 */
+	RTE_CRYPTO_CIPHER_AES_XTS,
+	/**< AES algorithm in XTS mode */
+
+	RTE_CRYPTO_CIPHER_ARC4,
+	/**< (A)RC4 cipher algorithm */
+
+	RTE_CRYPTO_CIPHER_KASUMI_F8,
+	/**< Kasumi algorithm in F8 mode */
+
+	RTE_CRYPTO_CIPHER_SNOW3G_UEA2,
+	/**< SNOW3G algorithm in UEA2 mode */
+
+	RTE_CRYPTO_CIPHER_ZUC_EEA3,
+	/**< ZUC algorithm in EEA3 mode */
+
+	RTE_CRYPTO_CIPHER_LIST_END
+};
+
+/** Symmetric Cipher Direction */
+enum rte_crypto_cipher_operation {
+	RTE_CRYPTO_CIPHER_OP_ENCRYPT,
+	/**< Encrypt cipher operation */
+	RTE_CRYPTO_CIPHER_OP_DECRYPT
+	/**< Decrypt cipher operation */
+};
+
+/**
+ * Symmetric Cipher Setup Data.
+ *
+ * This structure contains data relating to Cipher (Encryption and Decryption)
+ *  use to create a session.
+ */
+struct rte_crypto_cipher_xform {
+	enum rte_crypto_cipher_operation op;
+	/**< This parameter determines if the cipher operation is an encrypt or
+	 * a decrypt operation. For the RC4 algorithm and the F8/CTR modes,
+	 * only encrypt operations are valid.
+	 */
+	enum rte_crypto_cipher_algorithm algo;
+	/**< Cipher algorithm */
+
+	struct {
+		uint8_t *data;	/**< pointer to key data */
+		size_t length;	/**< key length in bytes */
+	} key;
+	/**< Cipher key
+	 *
+	 * For the RTE_CRYPTO_CIPHER_AES_F8 mode of operation, key.data will
+	 * point to a concatenation of the AES encryption key followed by a
+	 * keymask. As per RFC3711, the keymask should be padded with trailing
+	 * bytes to match the length of the encryption key used.
+	 *
+	 * For AES-XTS mode of operation, two keys must be provided and
+	 * key.data must point to the two keys concatenated together (Key1 ||
+	 * Key2). The cipher key length will contain the total size of both
+	 * keys.
+	 *
+	 * Cipher key length is in bytes. For AES it can be 128 bits (16 bytes),
+	 * 192 bits (24 bytes) or 256 bits (32 bytes).
+	 *
+	 * For the CCM mode of operation, the only supported key length is 128
+	 * bits (16 bytes).
+	 *
+	 * For the RTE_CRYPTO_CIPHER_AES_F8 mode of operation, key.length
+	 * should be set to the combined length of the encryption key and the
+	 * keymask. Since the keymask and the encryption key are the same size,
+	 * key.length should be set to 2 x the AES encryption key length.
+	 *
+	 * For the AES-XTS mode of operation:
+	 *  - Two keys must be provided and key.length refers to total length of
+	 *    the two keys.
+	 *  - Each key can be either 128 bits (16 bytes) or 256 bits (32 bytes).
+	 *  - Both keys must have the same size.
+	 **/
+};
+
+/** Symmetric Authentication / Hash Algorithms */
+enum rte_crypto_auth_algorithm {
+	RTE_CRYPTO_AUTH_NULL = 1,
+	/**< NULL hash algorithm. */
+
+	RTE_CRYPTO_AUTH_AES_CBC_MAC,
+	/**< AES-CBC-MAC algorithm. Only 128-bit keys are supported. */
+	RTE_CRYPTO_AUTH_AES_CCM,
+	/**< AES algorithm in CCM mode. This is an authenticated cipher. When
+	 * this hash algorithm is used, the *RTE_CRYPTO_CIPHER_AES_CCM*
+	 * element of the *rte_crypto_cipher_algorithm* enum MUST be used to
+	 * set up the related rte_crypto_cipher_setup_data structure in the
+	 * session context or the corresponding parameter in the crypto
+	 * operation data structures op_params parameter MUST be set for a
+	 * session-less crypto operation.
+	 */
+	RTE_CRYPTO_AUTH_AES_CMAC,
+	/**< AES CMAC algorithm. */
+	RTE_CRYPTO_AUTH_AES_GCM,
+	/**< AES algorithm in GCM mode. When this hash algorithm
+	 * is used, the RTE_CRYPTO_CIPHER_AES_GCM element of the
+	 * rte_crypto_cipher_algorithm enum MUST be used to set up the related
+	 * rte_crypto_cipher_setup_data structure in the session context, or
+	 * the corresponding parameter in the crypto operation data structures
+	 * op_params parameter MUST be set for a session-less crypto operation.
+	 */
+	RTE_CRYPTO_AUTH_AES_GMAC,
+	/**< AES GMAC algorithm. When this hash algorithm
+	* is used, the RTE_CRYPTO_CIPHER_AES_GCM element of the
+	* rte_crypto_cipher_algorithm enum MUST be used to set up the related
+	* rte_crypto_cipher_setup_data structure in the session context,  or
+	* the corresponding parameter in the crypto operation data structures
+	* op_params parameter MUST be set for a session-less crypto operation.
+	*/
+	RTE_CRYPTO_AUTH_AES_XCBC_MAC,
+	/**< AES XCBC algorithm. */
+
+	RTE_CRYPTO_AUTH_KASUMI_F9,
+	/**< Kasumi algorithm in F9 mode. */
+
+	RTE_CRYPTO_AUTH_MD5,
+	/**< MD5 algorithm */
+	RTE_CRYPTO_AUTH_MD5_HMAC,
+	/**< HMAC using MD5 algorithm */
+
+	RTE_CRYPTO_AUTH_SHA1,
+	/**< 128 bit SHA algorithm. */
+	RTE_CRYPTO_AUTH_SHA1_HMAC,
+	/**< HMAC using 128 bit SHA algorithm. */
+	RTE_CRYPTO_AUTH_SHA224,
+	/**< 224 bit SHA algorithm. */
+	RTE_CRYPTO_AUTH_SHA224_HMAC,
+	/**< HMAC using 224 bit SHA algorithm. */
+	RTE_CRYPTO_AUTH_SHA256,
+	/**< 256 bit SHA algorithm. */
+	RTE_CRYPTO_AUTH_SHA256_HMAC,
+	/**< HMAC using 256 bit SHA algorithm. */
+	RTE_CRYPTO_AUTH_SHA384,
+	/**< 384 bit SHA algorithm. */
+	RTE_CRYPTO_AUTH_SHA384_HMAC,
+	/**< HMAC using 384 bit SHA algorithm. */
+	RTE_CRYPTO_AUTH_SHA512,
+	/**< 512 bit SHA algorithm. */
+	RTE_CRYPTO_AUTH_SHA512_HMAC,
+	/**< HMAC using 512 bit SHA algorithm. */
+
+	RTE_CRYPTO_AUTH_SNOW3G_UIA2,
+	/**< SNOW3G algorithm in UIA2 mode. */
+
+	RTE_CRYPTO_AUTH_ZUC_EIA3,
+	/**< ZUC algorithm in EIA3 mode */
+
+	RTE_CRYPTO_AUTH_LIST_END
+};
+
+/** Symmetric Authentication / Hash Operations */
+enum rte_crypto_auth_operation {
+	RTE_CRYPTO_AUTH_OP_VERIFY,	/**< Verify authentication digest */
+	RTE_CRYPTO_AUTH_OP_GENERATE	/**< Generate authentication digest */
+};
+
+/**
+ * Authentication / Hash transform data.
+ *
+ * This structure contains data relating to an authentication/hash crypto
+ * transforms. The fields op, algo and digest_length are common to all
+ * authentication transforms and MUST be set.
+ */
+struct rte_crypto_auth_xform {
+	enum rte_crypto_auth_operation op;
+	/**< Authentication operation type */
+	enum rte_crypto_auth_algorithm algo;
+	/**< Authentication algorithm selection */
+
+	struct {
+		uint8_t *data;	/**< pointer to key data */
+		size_t length;	/**< key length in bytes */
+	} key;
+	/**< Authentication key data.
+	 * The authentication key length MUST be less than or equal to the
+	 * block size of the algorithm. It is the callers responsibility to
+	 * ensure that the key length is compliant with the standard being used
+	 * (for example RFC 2104, FIPS 198a).
+	 */
+
+	uint32_t digest_length;
+	/**< Length of the digest to be returned. If the verify option is set,
+	 * this specifies the length of the digest to be compared for the
+	 * session.
+	 *
+	 * If the value is less than the maximum length allowed by the hash,
+	 * the result shall be truncated.  If the value is greater than the
+	 * maximum length allowed by the hash then an error will be generated
+	 * by *rte_cryptodev_sym_session_create* or by the
+	 * *rte_cryptodev_sym_enqueue_burst* if using session-less APIs.
+	 */
+
+	uint32_t add_auth_data_length;
+	/**< The length of the additional authenticated data (AAD) in bytes.
+	 * The maximum permitted value is 240 bytes, unless otherwise specified
+	 * below.
+	 *
+	 * This field must be specified when the hash algorithm is one of the
+	 * following:
+	 *
+	 * - For SNOW3G (@ref RTE_CRYPTO_AUTH_SNOW3G_UIA2), this is the
+	 *   length of the IV (which should be 16).
+	 *
+	 * - For GCM (@ref RTE_CRYPTO_AUTH_AES_GCM).  In this case, this is
+	 *   the length of the Additional Authenticated Data (called A, in NIST
+	 *   SP800-38D).
+	 *
+	 * - For CCM (@ref RTE_CRYPTO_AUTH_AES_CCM).  In this case, this is
+	 *   the length of the associated data (called A, in NIST SP800-38C).
+	 *   Note that this does NOT include the length of any padding, or the
+	 *   18 bytes reserved at the start of the above field to store the
+	 *   block B0 and the encoded length.  The maximum permitted value in
+	 *   this case is 222 bytes.
+	 *
+	 * @note
+	 *  For AES-GMAC (@ref RTE_CRYPTO_AUTH_AES_GMAC) mode of operation
+	 *  this field is not used and should be set to 0. Instead the length
+	 *  of the AAD data is specified in the message length to hash field of
+	 *  the rte_crypto_sym_op_data structure.
+	 */
+};
+
+/** Crypto transformation types */
+enum rte_crypto_sym_xform_type {
+	RTE_CRYPTO_SYM_XFORM_NOT_SPECIFIED = 0,	/**< No xform specified */
+	RTE_CRYPTO_SYM_XFORM_AUTH,		/**< Authentication xform */
+	RTE_CRYPTO_SYM_XFORM_CIPHER		/**< Cipher xform  */
+};
+
+/**
+ * Symmetric crypto transform structure.
+ *
+ * This is used to specify the crypto transforms required, multiple transforms
+ * can be chained together to specify a chain transforms such as authentication
+ * then cipher, or cipher then authentication. Each transform structure can
+ * hold a single transform, the type field is used to specify which transform
+ * is contained within the union
+ */
+struct rte_crypto_sym_xform {
+	struct rte_crypto_sym_xform *next;
+	/**< next xform in chain */
+	enum rte_crypto_sym_xform_type type
+	; /**< xform type */
+	union {
+		struct rte_crypto_auth_xform auth;
+		/**< Authentication / hash xform */
+		struct rte_crypto_cipher_xform cipher;
+		/**< Cipher xform */
+	};
+};
+
+/**
+ * Crypto operation session type. This is used to specify whether a crypto
+ * operation has session structure attached for immutable parameters or if all
+ * operation information is included in the operation data structure.
+ */
+enum rte_crypto_sym_op_sess_type {
+	RTE_CRYPTO_SYM_OP_WITH_SESSION,	/**< Session based crypto operation */
+	RTE_CRYPTO_SYM_OP_SESSIONLESS	/**< Session-less crypto operation */
+};
+
+
+struct rte_cryptodev_sym_session;
+
+/**
+ * Symmetric Cryptographic Operation.
+ *
+ * This structure contains data relating to performing symmetric cryptographic
+ * processing on a referenced mbuf data buffer.
+ *
+ * When a symmetric crypto operation is enqueued with the device for processing
+ * it must have a valid *rte_mbuf* structure attached, via m_src parameter,
+ * which contains the source data which the crypto operation is to be performed
+ * on.
+ */
+struct rte_crypto_sym_op {
+	struct rte_mbuf *m_src;	/**< source mbuf */
+	struct rte_mbuf *m_dst;	/**< destination mbuf */
+
+	enum rte_crypto_sym_op_sess_type sess_type;
+
+	union {
+		struct rte_cryptodev_sym_session *session;
+		/**< Handle for the initialised session context */
+		struct rte_crypto_sym_xform *xform;
+		/**< Session-less API crypto operation parameters */
+	};
+
+	struct {
+		struct {
+			uint32_t offset;
+			 /**< Starting point for cipher processing, specified
+			  * as number of bytes from start of data in the source
+			  * buffer. The result of the cipher operation will be
+			  * written back into the output buffer starting at
+			  * this location.
+			  *
+			  * @note
+			  * For Snow3G @ RTE_CRYPTO_CIPHER_SNOW3G_UEA2,
+			  * this field should be in bits.
+			  */
+
+			uint32_t length;
+			 /**< The message length, in bytes, of the source buffer
+			  * on which the cryptographic operation will be
+			  * computed. This must be a multiple of the block size
+			  * if a block cipher is being used. This is also the
+			  * same as the result length.
+			  *
+			  * @note
+			  * In the case of CCM @ref RTE_CRYPTO_AUTH_AES_CCM,
+			  * this value should not include the length of the
+			  * padding or the length of the MAC; the driver will
+			  * compute the actual number of bytes over which the
+			  * encryption will occur, which will include these
+			  * values.
+			  *
+			  * @note
+			  * For AES-GMAC @ref RTE_CRYPTO_AUTH_AES_GMAC, this
+			  * field should be set to 0.
+			  *
+			  * @note
+			  * For Snow3G @ RTE_CRYPTO_AUTH_SNOW3G_UEA2
+			  * this field should be in bits.
+			  */
+		} data; /**< Data offsets and length for ciphering */
+
+		struct {
+			uint8_t *data;
+			/**< Initialisation Vector or Counter.
+			 *
+			 * - For block ciphers in CBC or F8 mode, or for Kasumi
+			 * in F8 mode, or for SNOW3G in UEA2 mode, this is the
+			 * Initialisation Vector (IV) value.
+			 *
+			 * - For block ciphers in CTR mode, this is the counter.
+			 *
+			 * - For GCM mode, this is either the IV (if the length
+			 * is 96 bits) or J0 (for other sizes), where J0 is as
+			 * defined by NIST SP800-38D. Regardless of the IV
+			 * length, a full 16 bytes needs to be allocated.
+			 *
+			 * - For CCM mode, the first byte is reserved, and the
+			 * nonce should be written starting at &iv[1] (to allow
+			 * space for the implementation to write in the flags
+			 * in the first byte). Note that a full 16 bytes should
+			 * be allocated, even though the length field will
+			 * have a value less than this.
+			 *
+			 * - For AES-XTS, this is the 128bit tweak, i, from
+			 * IEEE Std 1619-2007.
+			 *
+			 * For optimum performance, the data pointed to SHOULD
+			 * be 8-byte aligned.
+			 */
+			phys_addr_t phys_addr;
+			uint16_t length;
+			/**< Length of valid IV data.
+			 *
+			 * - For block ciphers in CBC or F8 mode, or for Kasumi
+			 * in F8 mode, or for SNOW3G in UEA2 mode, this is the
+			 * length of the IV (which must be the same as the
+			 * block length of the cipher).
+			 *
+			 * - For block ciphers in CTR mode, this is the length
+			 * of the counter (which must be the same as the block
+			 * length of the cipher).
+			 *
+			 * - For GCM mode, this is either 12 (for 96-bit IVs)
+			 * or 16, in which case data points to J0.
+			 *
+			 * - For CCM mode, this is the length of the nonce,
+			 * which can be in the range 7 to 13 inclusive.
+			 */
+		} iv;	/**< Initialisation vector parameters */
+	} cipher;
+
+	struct {
+		struct {
+			uint32_t offset;
+			 /**< Starting point for hash processing, specified as
+			  * number of bytes from start of packet in source
+			  * buffer.
+			  *
+			  * @note
+			  * For CCM and GCM modes of operation, this field is
+			  * ignored. The field @ref aad field
+			  * should be set instead.
+			  *
+			  * @note For AES-GMAC (@ref RTE_CRYPTO_AUTH_AES_GMAC)
+			  * mode of operation, this field specifies the start
+			  * of the AAD data in the source buffer.
+			  *
+			  * @note
+			  * For Snow3G @ RTE_CRYPTO_AUTH_SNOW3G_UIA2
+			  * this field should be in bits.
+			  */
+
+			uint32_t length;
+			 /**< The message length, in bytes, of the source
+			  * buffer that the hash will be computed on.
+			  *
+			  * @note
+			  * For CCM and GCM modes of operation, this field is
+			  * ignored. The field @ref aad field should be set
+			  * instead.
+			  *
+			  * @note
+			  * For AES-GMAC @ref RTE_CRYPTO_AUTH_AES_GMAC mode
+			  * of operation, this field specifies the length of
+			  * the AAD data in the source buffer.
+			  *
+			  * @note
+			  * For Snow3G @ RTE_CRYPTO_AUTH_SNOW3G_UIA2
+			  * this field should be in bits.
+			  */
+		} data; /**< Data offsets and length for authentication */
+
+		struct {
+			uint8_t *data;
+			/**< If this member of this structure is set this is a
+			 * pointer to the location where the digest result
+			 * should be inserted (in the case of digest generation)
+			 * or where the purported digest exists (in the case of
+			 * digest verification).
+			 *
+			 * At session creation time, the client specified the
+			 * digest result length with the digest_length member
+			 * of the @ref rte_crypto_auth_xform structure. For
+			 * physical crypto devices the caller must allocate at
+			 * least digest_length of physically contiguous memory
+			 * at this location.
+			 *
+			 * For digest generation, the digest result will
+			 * overwrite any data at this location.
+			 *
+			 * @note
+			 * For GCM (@ref RTE_CRYPTO_AUTH_AES_GCM), for
+			 * "digest result" read "authentication tag T".
+			 *
+			 * If this member is not set the digest result is
+			 * understood to be in the destination buffer for
+			 * digest generation, and in the source buffer for
+			 * digest verification. The location of the digest
+			 * result in this case is immediately following the
+			 * region over which the digest is computed.
+			 */
+			phys_addr_t phys_addr;
+			/**< Physical address of digest */
+			uint16_t length;
+			/**< Length of digest */
+		} digest; /**< Digest parameters */
+
+		struct {
+			uint8_t *data;
+			/**< Pointer to Additional Authenticated Data (AAD)
+			 * needed for authenticated cipher mechanisms (CCM and
+			 * GCM), and to the IV for SNOW3G authentication
+			 * (@ref RTE_CRYPTO_AUTH_SNOW3G_UIA2). For other
+			 * authentication mechanisms this pointer is ignored.
+			 *
+			 * The length of the data pointed to by this field is
+			 * set up for the session in the @ref
+			 * rte_crypto_auth_xform structure as part of the @ref
+			 * rte_cryptodev_sym_session_create function call.
+			 * This length must not exceed 240 bytes.
+			 *
+			 * Specifically for CCM (@ref RTE_CRYPTO_AUTH_AES_CCM),
+			 * the caller should setup this field as follows:
+			 *
+			 * - the nonce should be written starting at an offset
+			 * of one byte into the array, leaving room for the
+			 * implementation to write in the flags to the first
+			 *  byte.
+			 *
+			 * - the additional  authentication data itself should
+			 * be written starting at an offset of 18 bytes into
+			 * the array, leaving room for the length encoding in
+			 * the first two bytes of the second block.
+			 *
+			 * - the array should be big enough to hold the above
+			 *  fields, plus any padding to round this up to the
+			 *  nearest multiple of the block size (16 bytes).
+			 *  Padding will be added by the implementation.
+			 *
+			 * Finally, for GCM (@ref RTE_CRYPTO_AUTH_AES_GCM), the
+			 * caller should setup this field as follows:
+			 *
+			 * - the AAD is written in starting at byte 0
+			 * - the array must be big enough to hold the AAD, plus
+			 * any space to round this up to the nearest multiple
+			 * of the block size (16 bytes).
+			 *
+			 * @note
+			 * For AES-GMAC (@ref RTE_CRYPTO_AUTH_AES_GMAC) mode of
+			 * operation, this field is not used and should be set
+			 * to 0. Instead the AAD data should be placed in the
+			 * source buffer.
+			 */
+			phys_addr_t phys_addr;	/**< physical address */
+			uint16_t length;	/**< Length of digest */
+		} aad;
+		/**< Additional authentication parameters */
+	} auth;
+} __rte_cache_aligned;
+
+
+/**
+ * Reset the fields of a symmetric operation to their default values.
+ *
+ * @param	op	The crypto operation to be reset.
+ */
+static inline void
+__rte_crypto_sym_op_reset(struct rte_crypto_sym_op *op)
+{
+	memset(op, 0, sizeof(*op));
+
+	op->sess_type = RTE_CRYPTO_SYM_OP_SESSIONLESS;
+}
+
+
+/**
+ * Allocate space for symmetric crypto xforms in the private data space of the
+ * crypto operation. This also defaults the crypto xform type to
+ * RTE_CRYPTO_SYM_XFORM_NOT_SPECIFIED and configures the chaining of the xforms
+ * in the crypto operation
+ *
+ * @return
+ * - On success returns pointer to first crypto xform in crypto operations chain
+ * - On failure returns NULL
+ */
+static inline struct rte_crypto_sym_xform *
+__rte_crypto_sym_op_sym_xforms_alloc(struct rte_crypto_sym_op *sym_op,
+		void *priv_data, uint8_t nb_xforms)
+{
+	struct rte_crypto_sym_xform *xform;
+
+	sym_op->xform = xform = (struct rte_crypto_sym_xform *)priv_data;
+
+	do {
+		xform->type = RTE_CRYPTO_SYM_XFORM_NOT_SPECIFIED;
+		xform = xform->next = --nb_xforms > 0 ? xform + 1 : NULL;
+	} while (xform);
+
+	return sym_op->xform;
+}
+
+
+/**
+ * Attach a session to a symmetric crypto operation
+ *
+ * @param	sym_op	crypto operation
+ * @param	sess	cryptodev session
+ */
+static inline int
+__rte_crypto_sym_op_attach_sym_session(struct rte_crypto_sym_op *sym_op,
+		struct rte_cryptodev_sym_session *sess)
+{
+	sym_op->session = sess;
+	sym_op->sess_type = RTE_CRYPTO_SYM_OP_WITH_SESSION;
+
+	return 0;
+}
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_CRYPTO_SYM_H_ */
diff --git a/lib/librte_cryptodev/rte_cryptodev.c b/lib/librte_cryptodev/rte_cryptodev.c
new file mode 100644
index 00000000..aa4ea425
--- /dev/null
+++ b/lib/librte_cryptodev/rte_cryptodev.c
@@ -0,0 +1,1162 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2015-2016 Intel Corporation. All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/types.h>
+#include <sys/queue.h>
+#include <ctype.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdarg.h>
+#include <errno.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <netinet/in.h>
+
+#include <rte_byteorder.h>
+#include <rte_log.h>
+#include <rte_debug.h>
+#include <rte_dev.h>
+#include <rte_interrupts.h>
+#include <rte_pci.h>
+#include <rte_memory.h>
+#include <rte_memcpy.h>
+#include <rte_memzone.h>
+#include <rte_launch.h>
+#include <rte_tailq.h>
+#include <rte_eal.h>
+#include <rte_per_lcore.h>
+#include <rte_lcore.h>
+#include <rte_atomic.h>
+#include <rte_branch_prediction.h>
+#include <rte_common.h>
+#include <rte_ring.h>
+#include <rte_mempool.h>
+#include <rte_malloc.h>
+#include <rte_mbuf.h>
+#include <rte_errno.h>
+#include <rte_spinlock.h>
+#include <rte_string_fns.h>
+
+#include "rte_crypto.h"
+#include "rte_cryptodev.h"
+#include "rte_cryptodev_pmd.h"
+
+struct rte_cryptodev rte_crypto_devices[RTE_CRYPTO_MAX_DEVS];
+
+struct rte_cryptodev *rte_cryptodevs = &rte_crypto_devices[0];
+
+static struct rte_cryptodev_global cryptodev_globals = {
+		.devs			= &rte_crypto_devices[0],
+		.data			= { NULL },
+		.nb_devs		= 0,
+		.max_devs		= RTE_CRYPTO_MAX_DEVS
+};
+
+struct rte_cryptodev_global *rte_cryptodev_globals = &cryptodev_globals;
+
+/* spinlock for crypto device callbacks */
+static rte_spinlock_t rte_cryptodev_cb_lock = RTE_SPINLOCK_INITIALIZER;
+
+
+/**
+ * The user application callback description.
+ *
+ * It contains callback address to be registered by user application,
+ * the pointer to the parameters for callback, and the event type.
+ */
+struct rte_cryptodev_callback {
+	TAILQ_ENTRY(rte_cryptodev_callback) next; /**< Callbacks list */
+	rte_cryptodev_cb_fn cb_fn;		/**< Callback address */
+	void *cb_arg;				/**< Parameter for callback */
+	enum rte_cryptodev_event_type event;	/**< Interrupt event type */
+	uint32_t active;			/**< Callback is executing */
+};
+
+
+const char *
+rte_cryptodev_get_feature_name(uint64_t flag)
+{
+	switch (flag) {
+	case RTE_CRYPTODEV_FF_SYMMETRIC_CRYPTO:
+		return "SYMMETRIC_CRYPTO";
+	case RTE_CRYPTODEV_FF_ASYMMETRIC_CRYPTO:
+		return "ASYMMETRIC_CRYPTO";
+	case RTE_CRYPTODEV_FF_SYM_OPERATION_CHAINING:
+		return "SYM_OPERATION_CHAINING";
+	case RTE_CRYPTODEV_FF_CPU_SSE:
+		return "CPU_SSE";
+	case RTE_CRYPTODEV_FF_CPU_AVX:
+		return "CPU_AVX";
+	case RTE_CRYPTODEV_FF_CPU_AVX2:
+		return "CPU_AVX2";
+	case RTE_CRYPTODEV_FF_CPU_AESNI:
+		return "CPU_AESNI";
+	case RTE_CRYPTODEV_FF_HW_ACCELERATED:
+		return "HW_ACCELERATED";
+
+	default:
+		return NULL;
+	}
+}
+
+
+int
+rte_cryptodev_create_vdev(const char *name, const char *args)
+{
+	return rte_eal_vdev_init(name, args);
+}
+
+int
+rte_cryptodev_get_dev_id(const char *name) {
+	unsigned i;
+
+	if (name == NULL)
+		return -1;
+
+	for (i = 0; i < rte_cryptodev_globals->max_devs; i++)
+		if ((strcmp(rte_cryptodev_globals->devs[i].data->name, name)
+				== 0) &&
+				(rte_cryptodev_globals->devs[i].attached ==
+						RTE_CRYPTODEV_ATTACHED))
+			return i;
+
+	return -1;
+}
+
+uint8_t
+rte_cryptodev_count(void)
+{
+	return rte_cryptodev_globals->nb_devs;
+}
+
+uint8_t
+rte_cryptodev_count_devtype(enum rte_cryptodev_type type)
+{
+	uint8_t i, dev_count = 0;
+
+	for (i = 0; i < rte_cryptodev_globals->max_devs; i++)
+		if (rte_cryptodev_globals->devs[i].dev_type == type &&
+			rte_cryptodev_globals->devs[i].attached ==
+					RTE_CRYPTODEV_ATTACHED)
+			dev_count++;
+
+	return dev_count;
+}
+
+int
+rte_cryptodev_socket_id(uint8_t dev_id)
+{
+	struct rte_cryptodev *dev;
+
+	if (!rte_cryptodev_pmd_is_valid_dev(dev_id))
+		return -1;
+
+	dev = rte_cryptodev_pmd_get_dev(dev_id);
+
+	return dev->data->socket_id;
+}
+
+static inline int
+rte_cryptodev_data_alloc(uint8_t dev_id, struct rte_cryptodev_data **data,
+		int socket_id)
+{
+	char mz_name[RTE_CRYPTODEV_NAME_MAX_LEN];
+	const struct rte_memzone *mz;
+	int n;
+
+	/* generate memzone name */
+	n = snprintf(mz_name, sizeof(mz_name), "rte_cryptodev_data_%u", dev_id);
+	if (n >= (int)sizeof(mz_name))
+		return -EINVAL;
+
+	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+		mz = rte_memzone_reserve(mz_name,
+				sizeof(struct rte_cryptodev_data),
+				socket_id, 0);
+	} else
+		mz = rte_memzone_lookup(mz_name);
+
+	if (mz == NULL)
+		return -ENOMEM;
+
+	*data = mz->addr;
+	if (rte_eal_process_type() == RTE_PROC_PRIMARY)
+		memset(*data, 0, sizeof(struct rte_cryptodev_data));
+
+	return 0;
+}
+
+static uint8_t
+rte_cryptodev_find_free_device_index(void)
+{
+	uint8_t dev_id;
+
+	for (dev_id = 0; dev_id < RTE_CRYPTO_MAX_DEVS; dev_id++) {
+		if (rte_crypto_devices[dev_id].attached ==
+				RTE_CRYPTODEV_DETACHED)
+			return dev_id;
+	}
+	return RTE_CRYPTO_MAX_DEVS;
+}
+
+struct rte_cryptodev *
+rte_cryptodev_pmd_allocate(const char *name, enum pmd_type type, int socket_id)
+{
+	struct rte_cryptodev *cryptodev;
+	uint8_t dev_id;
+
+	if (rte_cryptodev_pmd_get_named_dev(name) != NULL) {
+		CDEV_LOG_ERR("Crypto device with name %s already "
+				"allocated!", name);
+		return NULL;
+	}
+
+	dev_id = rte_cryptodev_find_free_device_index();
+	if (dev_id == RTE_CRYPTO_MAX_DEVS) {
+		CDEV_LOG_ERR("Reached maximum number of crypto devices");
+		return NULL;
+	}
+
+	cryptodev = rte_cryptodev_pmd_get_dev(dev_id);
+
+	if (cryptodev->data == NULL) {
+		struct rte_cryptodev_data *cryptodev_data =
+				cryptodev_globals.data[dev_id];
+
+		int retval = rte_cryptodev_data_alloc(dev_id, &cryptodev_data,
+				socket_id);
+
+		if (retval < 0 || cryptodev_data == NULL)
+			return NULL;
+
+		cryptodev->data = cryptodev_data;
+
+		snprintf(cryptodev->data->name, RTE_CRYPTODEV_NAME_MAX_LEN,
+				"%s", name);
+
+		cryptodev->data->dev_id = dev_id;
+		cryptodev->data->socket_id = socket_id;
+		cryptodev->data->dev_started = 0;
+
+		cryptodev->attached = RTE_CRYPTODEV_ATTACHED;
+		cryptodev->pmd_type = type;
+
+		cryptodev_globals.nb_devs++;
+	}
+
+	return cryptodev;
+}
+
+static inline int
+rte_cryptodev_create_unique_device_name(char *name, size_t size,
+		struct rte_pci_device *pci_dev)
+{
+	int ret;
+
+	if ((name == NULL) || (pci_dev == NULL))
+		return -EINVAL;
+
+	ret = snprintf(name, size, "%d:%d.%d",
+			pci_dev->addr.bus, pci_dev->addr.devid,
+			pci_dev->addr.function);
+	if (ret < 0)
+		return ret;
+	return 0;
+}
+
+int
+rte_cryptodev_pmd_release_device(struct rte_cryptodev *cryptodev)
+{
+	int ret;
+
+	if (cryptodev == NULL)
+		return -EINVAL;
+
+	ret = rte_cryptodev_close(cryptodev->data->dev_id);
+	if (ret < 0)
+		return ret;
+
+	cryptodev->attached = RTE_CRYPTODEV_DETACHED;
+	cryptodev_globals.nb_devs--;
+	return 0;
+}
+
+struct rte_cryptodev *
+rte_cryptodev_pmd_virtual_dev_init(const char *name, size_t dev_private_size,
+		int socket_id)
+{
+	struct rte_cryptodev *cryptodev;
+
+	/* allocate device structure */
+	cryptodev = rte_cryptodev_pmd_allocate(name, PMD_VDEV, socket_id);
+	if (cryptodev == NULL)
+		return NULL;
+
+	/* allocate private device structure */
+	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+		cryptodev->data->dev_private =
+				rte_zmalloc_socket("cryptodev device private",
+						dev_private_size,
+						RTE_CACHE_LINE_SIZE,
+						socket_id);
+
+		if (cryptodev->data->dev_private == NULL)
+			rte_panic("Cannot allocate memzone for private device"
+					" data");
+	}
+
+	/* initialise user call-back tail queue */
+	TAILQ_INIT(&(cryptodev->link_intr_cbs));
+
+	return cryptodev;
+}
+
+static int
+rte_cryptodev_init(struct rte_pci_driver *pci_drv,
+		struct rte_pci_device *pci_dev)
+{
+	struct rte_cryptodev_driver *cryptodrv;
+	struct rte_cryptodev *cryptodev;
+
+	char cryptodev_name[RTE_CRYPTODEV_NAME_MAX_LEN];
+
+	int retval;
+
+	cryptodrv = (struct rte_cryptodev_driver *)pci_drv;
+	if (cryptodrv == NULL)
+		return -ENODEV;
+
+	/* Create unique Crypto device name using PCI address */
+	rte_cryptodev_create_unique_device_name(cryptodev_name,
+			sizeof(cryptodev_name), pci_dev);
+
+	cryptodev = rte_cryptodev_pmd_allocate(cryptodev_name, PMD_PDEV,
+			rte_socket_id());
+	if (cryptodev == NULL)
+		return -ENOMEM;
+
+	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+		cryptodev->data->dev_private =
+				rte_zmalloc_socket(
+						"cryptodev private structure",
+						cryptodrv->dev_private_size,
+						RTE_CACHE_LINE_SIZE,
+						rte_socket_id());
+
+		if (cryptodev->data->dev_private == NULL)
+			rte_panic("Cannot allocate memzone for private "
+					"device data");
+	}
+
+	cryptodev->pci_dev = pci_dev;
+	cryptodev->driver = cryptodrv;
+
+	/* init user callbacks */
+	TAILQ_INIT(&(cryptodev->link_intr_cbs));
+
+	/* Invoke PMD device initialization function */
+	retval = (*cryptodrv->cryptodev_init)(cryptodrv, cryptodev);
+	if (retval == 0)
+		return 0;
+
+	CDEV_LOG_ERR("driver %s: crypto_dev_init(vendor_id=0x%x device_id=0x%x)"
+			" failed", pci_drv->name,
+			(unsigned) pci_dev->id.vendor_id,
+			(unsigned) pci_dev->id.device_id);
+
+	if (rte_eal_process_type() == RTE_PROC_PRIMARY)
+		rte_free(cryptodev->data->dev_private);
+
+	cryptodev->attached = RTE_CRYPTODEV_DETACHED;
+	cryptodev_globals.nb_devs--;
+
+	return -ENXIO;
+}
+
+static int
+rte_cryptodev_uninit(struct rte_pci_device *pci_dev)
+{
+	const struct rte_cryptodev_driver *cryptodrv;
+	struct rte_cryptodev *cryptodev;
+	char cryptodev_name[RTE_CRYPTODEV_NAME_MAX_LEN];
+	int ret;
+
+	if (pci_dev == NULL)
+		return -EINVAL;
+
+	/* Create unique device name using PCI address */
+	rte_cryptodev_create_unique_device_name(cryptodev_name,
+			sizeof(cryptodev_name), pci_dev);
+
+	cryptodev = rte_cryptodev_pmd_get_named_dev(cryptodev_name);
+	if (cryptodev == NULL)
+		return -ENODEV;
+
+	cryptodrv = (const struct rte_cryptodev_driver *)pci_dev->driver;
+	if (cryptodrv == NULL)
+		return -ENODEV;
+
+	/* Invoke PMD device uninit function */
+	if (*cryptodrv->cryptodev_uninit) {
+		ret = (*cryptodrv->cryptodev_uninit)(cryptodrv, cryptodev);
+		if (ret)
+			return ret;
+	}
+
+	/* free crypto device */
+	rte_cryptodev_pmd_release_device(cryptodev);
+
+	if (rte_eal_process_type() == RTE_PROC_PRIMARY)
+		rte_free(cryptodev->data->dev_private);
+
+	cryptodev->pci_dev = NULL;
+	cryptodev->driver = NULL;
+	cryptodev->data = NULL;
+
+	return 0;
+}
+
+int
+rte_cryptodev_pmd_driver_register(struct rte_cryptodev_driver *cryptodrv,
+		enum pmd_type type)
+{
+	/* Call crypto device initialization directly if device is virtual */
+	if (type == PMD_VDEV)
+		return rte_cryptodev_init((struct rte_pci_driver *)cryptodrv,
+				NULL);
+
+	/*
+	 * Register PCI driver for physical device intialisation during
+	 * PCI probing
+	 */
+	cryptodrv->pci_drv.devinit = rte_cryptodev_init;
+	cryptodrv->pci_drv.devuninit = rte_cryptodev_uninit;
+
+	rte_eal_pci_register(&cryptodrv->pci_drv);
+
+	return 0;
+}
+
+
+uint16_t
+rte_cryptodev_queue_pair_count(uint8_t dev_id)
+{
+	struct rte_cryptodev *dev;
+
+	dev = &rte_crypto_devices[dev_id];
+	return dev->data->nb_queue_pairs;
+}
+
+static int
+rte_cryptodev_queue_pairs_config(struct rte_cryptodev *dev, uint16_t nb_qpairs,
+		int socket_id)
+{
+	struct rte_cryptodev_info dev_info;
+	void **qp;
+	unsigned i;
+
+	if ((dev == NULL) || (nb_qpairs < 1)) {
+		CDEV_LOG_ERR("invalid param: dev %p, nb_queues %u",
+							dev, nb_qpairs);
+		return -EINVAL;
+	}
+
+	CDEV_LOG_DEBUG("Setup %d queues pairs on device %u",
+			nb_qpairs, dev->data->dev_id);
+
+	memset(&dev_info, 0, sizeof(struct rte_cryptodev_info));
+
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_infos_get, -ENOTSUP);
+	(*dev->dev_ops->dev_infos_get)(dev, &dev_info);
+
+	if (nb_qpairs > (dev_info.max_nb_queue_pairs)) {
+		CDEV_LOG_ERR("Invalid num queue_pairs (%u) for dev %u",
+				nb_qpairs, dev->data->dev_id);
+	    return -EINVAL;
+	}
+
+	if (dev->data->queue_pairs == NULL) { /* first time configuration */
+		dev->data->queue_pairs = rte_zmalloc_socket(
+				"cryptodev->queue_pairs",
+				sizeof(dev->data->queue_pairs[0]) * nb_qpairs,
+				RTE_CACHE_LINE_SIZE, socket_id);
+
+		if (dev->data->queue_pairs == NULL) {
+			dev->data->nb_queue_pairs = 0;
+			CDEV_LOG_ERR("failed to get memory for qp meta data, "
+							"nb_queues %u",
+							nb_qpairs);
+			return -(ENOMEM);
+		}
+	} else { /* re-configure */
+		int ret;
+		uint16_t old_nb_queues = dev->data->nb_queue_pairs;
+
+		qp = dev->data->queue_pairs;
+
+		RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->queue_pair_release,
+				-ENOTSUP);
+
+		for (i = nb_qpairs; i < old_nb_queues; i++) {
+			ret = (*dev->dev_ops->queue_pair_release)(dev, i);
+			if (ret < 0)
+				return ret;
+		}
+
+		qp = rte_realloc(qp, sizeof(qp[0]) * nb_qpairs,
+				RTE_CACHE_LINE_SIZE);
+		if (qp == NULL) {
+			CDEV_LOG_ERR("failed to realloc qp meta data,"
+						" nb_queues %u", nb_qpairs);
+			return -(ENOMEM);
+		}
+
+		if (nb_qpairs > old_nb_queues) {
+			uint16_t new_qs = nb_qpairs - old_nb_queues;
+
+			memset(qp + old_nb_queues, 0,
+				sizeof(qp[0]) * new_qs);
+		}
+
+		dev->data->queue_pairs = qp;
+
+	}
+	dev->data->nb_queue_pairs = nb_qpairs;
+	return 0;
+}
+
+int
+rte_cryptodev_queue_pair_start(uint8_t dev_id, uint16_t queue_pair_id)
+{
+	struct rte_cryptodev *dev;
+
+	if (!rte_cryptodev_pmd_is_valid_dev(dev_id)) {
+		CDEV_LOG_ERR("Invalid dev_id=%" PRIu8, dev_id);
+		return -EINVAL;
+	}
+
+	dev = &rte_crypto_devices[dev_id];
+	if (queue_pair_id >= dev->data->nb_queue_pairs) {
+		CDEV_LOG_ERR("Invalid queue_pair_id=%d", queue_pair_id);
+		return -EINVAL;
+	}
+
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->queue_pair_start, -ENOTSUP);
+
+	return dev->dev_ops->queue_pair_start(dev, queue_pair_id);
+
+}
+
+int
+rte_cryptodev_queue_pair_stop(uint8_t dev_id, uint16_t queue_pair_id)
+{
+	struct rte_cryptodev *dev;
+
+	if (!rte_cryptodev_pmd_is_valid_dev(dev_id)) {
+		CDEV_LOG_ERR("Invalid dev_id=%" PRIu8, dev_id);
+		return -EINVAL;
+	}
+
+	dev = &rte_crypto_devices[dev_id];
+	if (queue_pair_id >= dev->data->nb_queue_pairs) {
+		CDEV_LOG_ERR("Invalid queue_pair_id=%d", queue_pair_id);
+		return -EINVAL;
+	}
+
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->queue_pair_stop, -ENOTSUP);
+
+	return dev->dev_ops->queue_pair_stop(dev, queue_pair_id);
+
+}
+
+static int
+rte_cryptodev_sym_session_pool_create(struct rte_cryptodev *dev,
+		unsigned nb_objs, unsigned obj_cache_size, int socket_id);
+
+int
+rte_cryptodev_configure(uint8_t dev_id, struct rte_cryptodev_config *config)
+{
+	struct rte_cryptodev *dev;
+	int diag;
+
+	if (!rte_cryptodev_pmd_is_valid_dev(dev_id)) {
+		CDEV_LOG_ERR("Invalid dev_id=%" PRIu8, dev_id);
+		return -EINVAL;
+	}
+
+	dev = &rte_crypto_devices[dev_id];
+
+	if (dev->data->dev_started) {
+		CDEV_LOG_ERR(
+		    "device %d must be stopped to allow configuration", dev_id);
+		return -EBUSY;
+	}
+
+	/* Setup new number of queue pairs and reconfigure device. */
+	diag = rte_cryptodev_queue_pairs_config(dev, config->nb_queue_pairs,
+			config->socket_id);
+	if (diag != 0) {
+		CDEV_LOG_ERR("dev%d rte_crypto_dev_queue_pairs_config = %d",
+				dev_id, diag);
+		return diag;
+	}
+
+	/* Setup Session mempool for device */
+	return rte_cryptodev_sym_session_pool_create(dev,
+			config->session_mp.nb_objs,
+			config->session_mp.cache_size,
+			config->socket_id);
+}
+
+
+int
+rte_cryptodev_start(uint8_t dev_id)
+{
+	struct rte_cryptodev *dev;
+	int diag;
+
+	CDEV_LOG_DEBUG("Start dev_id=%" PRIu8, dev_id);
+
+	if (!rte_cryptodev_pmd_is_valid_dev(dev_id)) {
+		CDEV_LOG_ERR("Invalid dev_id=%" PRIu8, dev_id);
+		return -EINVAL;
+	}
+
+	dev = &rte_crypto_devices[dev_id];
+
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_start, -ENOTSUP);
+
+	if (dev->data->dev_started != 0) {
+		CDEV_LOG_ERR("Device with dev_id=%" PRIu8 " already started",
+			dev_id);
+		return 0;
+	}
+
+	diag = (*dev->dev_ops->dev_start)(dev);
+	if (diag == 0)
+		dev->data->dev_started = 1;
+	else
+		return diag;
+
+	return 0;
+}
+
+void
+rte_cryptodev_stop(uint8_t dev_id)
+{
+	struct rte_cryptodev *dev;
+
+	if (!rte_cryptodev_pmd_is_valid_dev(dev_id)) {
+		CDEV_LOG_ERR("Invalid dev_id=%" PRIu8, dev_id);
+		return;
+	}
+
+	dev = &rte_crypto_devices[dev_id];
+
+	RTE_FUNC_PTR_OR_RET(*dev->dev_ops->dev_stop);
+
+	if (dev->data->dev_started == 0) {
+		CDEV_LOG_ERR("Device with dev_id=%" PRIu8 " already stopped",
+			dev_id);
+		return;
+	}
+
+	dev->data->dev_started = 0;
+	(*dev->dev_ops->dev_stop)(dev);
+}
+
+int
+rte_cryptodev_close(uint8_t dev_id)
+{
+	struct rte_cryptodev *dev;
+	int retval;
+
+	if (!rte_cryptodev_pmd_is_valid_dev(dev_id)) {
+		CDEV_LOG_ERR("Invalid dev_id=%" PRIu8, dev_id);
+		return -1;
+	}
+
+	dev = &rte_crypto_devices[dev_id];
+
+	/* Device must be stopped before it can be closed */
+	if (dev->data->dev_started == 1) {
+		CDEV_LOG_ERR("Device %u must be stopped before closing",
+				dev_id);
+		return -EBUSY;
+	}
+
+	/* We can't close the device if there are outstanding sessions in use */
+	if (dev->data->session_pool != NULL) {
+		if (!rte_mempool_full(dev->data->session_pool)) {
+			CDEV_LOG_ERR("dev_id=%u close failed, session mempool "
+					"has sessions still in use, free "
+					"all sessions before calling close",
+					(unsigned)dev_id);
+			return -EBUSY;
+		}
+	}
+
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_close, -ENOTSUP);
+	retval = (*dev->dev_ops->dev_close)(dev);
+
+	if (retval < 0)
+		return retval;
+
+	return 0;
+}
+
+int
+rte_cryptodev_queue_pair_setup(uint8_t dev_id, uint16_t queue_pair_id,
+		const struct rte_cryptodev_qp_conf *qp_conf, int socket_id)
+{
+	struct rte_cryptodev *dev;
+
+	if (!rte_cryptodev_pmd_is_valid_dev(dev_id)) {
+		CDEV_LOG_ERR("Invalid dev_id=%" PRIu8, dev_id);
+		return -EINVAL;
+	}
+
+	dev = &rte_crypto_devices[dev_id];
+	if (queue_pair_id >= dev->data->nb_queue_pairs) {
+		CDEV_LOG_ERR("Invalid queue_pair_id=%d", queue_pair_id);
+		return -EINVAL;
+	}
+
+	if (dev->data->dev_started) {
+		CDEV_LOG_ERR(
+		    "device %d must be stopped to allow configuration", dev_id);
+		return -EBUSY;
+	}
+
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->queue_pair_setup, -ENOTSUP);
+
+	return (*dev->dev_ops->queue_pair_setup)(dev, queue_pair_id, qp_conf,
+			socket_id);
+}
+
+
+int
+rte_cryptodev_stats_get(uint8_t dev_id, struct rte_cryptodev_stats *stats)
+{
+	struct rte_cryptodev *dev;
+
+	if (!rte_cryptodev_pmd_is_valid_dev(dev_id)) {
+		CDEV_LOG_ERR("Invalid dev_id=%d", dev_id);
+		return -ENODEV;
+	}
+
+	if (stats == NULL) {
+		CDEV_LOG_ERR("Invalid stats ptr");
+		return -EINVAL;
+	}
+
+	dev = &rte_crypto_devices[dev_id];
+	memset(stats, 0, sizeof(*stats));
+
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->stats_get, -ENOTSUP);
+	(*dev->dev_ops->stats_get)(dev, stats);
+	return 0;
+}
+
+void
+rte_cryptodev_stats_reset(uint8_t dev_id)
+{
+	struct rte_cryptodev *dev;
+
+	if (!rte_cryptodev_pmd_is_valid_dev(dev_id)) {
+		CDEV_LOG_ERR("Invalid dev_id=%" PRIu8, dev_id);
+		return;
+	}
+
+	dev = &rte_crypto_devices[dev_id];
+
+	RTE_FUNC_PTR_OR_RET(*dev->dev_ops->stats_reset);
+	(*dev->dev_ops->stats_reset)(dev);
+}
+
+
+void
+rte_cryptodev_info_get(uint8_t dev_id, struct rte_cryptodev_info *dev_info)
+{
+	struct rte_cryptodev *dev;
+
+	if (dev_id >= cryptodev_globals.nb_devs) {
+		CDEV_LOG_ERR("Invalid dev_id=%d", dev_id);
+		return;
+	}
+
+	dev = &rte_crypto_devices[dev_id];
+
+	memset(dev_info, 0, sizeof(struct rte_cryptodev_info));
+
+	RTE_FUNC_PTR_OR_RET(*dev->dev_ops->dev_infos_get);
+	(*dev->dev_ops->dev_infos_get)(dev, dev_info);
+
+	dev_info->pci_dev = dev->pci_dev;
+	if (dev->driver)
+		dev_info->driver_name = dev->driver->pci_drv.name;
+}
+
+
+int
+rte_cryptodev_callback_register(uint8_t dev_id,
+			enum rte_cryptodev_event_type event,
+			rte_cryptodev_cb_fn cb_fn, void *cb_arg)
+{
+	struct rte_cryptodev *dev;
+	struct rte_cryptodev_callback *user_cb;
+
+	if (!cb_fn)
+		return -EINVAL;
+
+	if (!rte_cryptodev_pmd_is_valid_dev(dev_id)) {
+		CDEV_LOG_ERR("Invalid dev_id=%" PRIu8, dev_id);
+		return -EINVAL;
+	}
+
+	dev = &rte_crypto_devices[dev_id];
+	rte_spinlock_lock(&rte_cryptodev_cb_lock);
+
+	TAILQ_FOREACH(user_cb, &(dev->link_intr_cbs), next) {
+		if (user_cb->cb_fn == cb_fn &&
+			user_cb->cb_arg == cb_arg &&
+			user_cb->event == event) {
+			break;
+		}
+	}
+
+	/* create a new callback. */
+	if (user_cb == NULL) {
+		user_cb = rte_zmalloc("INTR_USER_CALLBACK",
+				sizeof(struct rte_cryptodev_callback), 0);
+		if (user_cb != NULL) {
+			user_cb->cb_fn = cb_fn;
+			user_cb->cb_arg = cb_arg;
+			user_cb->event = event;
+			TAILQ_INSERT_TAIL(&(dev->link_intr_cbs), user_cb, next);
+		}
+	}
+
+	rte_spinlock_unlock(&rte_cryptodev_cb_lock);
+	return (user_cb == NULL) ? -ENOMEM : 0;
+}
+
+int
+rte_cryptodev_callback_unregister(uint8_t dev_id,
+			enum rte_cryptodev_event_type event,
+			rte_cryptodev_cb_fn cb_fn, void *cb_arg)
+{
+	int ret;
+	struct rte_cryptodev *dev;
+	struct rte_cryptodev_callback *cb, *next;
+
+	if (!cb_fn)
+		return -EINVAL;
+
+	if (!rte_cryptodev_pmd_is_valid_dev(dev_id)) {
+		CDEV_LOG_ERR("Invalid dev_id=%" PRIu8, dev_id);
+		return -EINVAL;
+	}
+
+	dev = &rte_crypto_devices[dev_id];
+	rte_spinlock_lock(&rte_cryptodev_cb_lock);
+
+	ret = 0;
+	for (cb = TAILQ_FIRST(&dev->link_intr_cbs); cb != NULL; cb = next) {
+
+		next = TAILQ_NEXT(cb, next);
+
+		if (cb->cb_fn != cb_fn || cb->event != event ||
+				(cb->cb_arg != (void *)-1 &&
+				cb->cb_arg != cb_arg))
+			continue;
+
+		/*
+		 * if this callback is not executing right now,
+		 * then remove it.
+		 */
+		if (cb->active == 0) {
+			TAILQ_REMOVE(&(dev->link_intr_cbs), cb, next);
+			rte_free(cb);
+		} else {
+			ret = -EAGAIN;
+		}
+	}
+
+	rte_spinlock_unlock(&rte_cryptodev_cb_lock);
+	return ret;
+}
+
+void
+rte_cryptodev_pmd_callback_process(struct rte_cryptodev *dev,
+	enum rte_cryptodev_event_type event)
+{
+	struct rte_cryptodev_callback *cb_lst;
+	struct rte_cryptodev_callback dev_cb;
+
+	rte_spinlock_lock(&rte_cryptodev_cb_lock);
+	TAILQ_FOREACH(cb_lst, &(dev->link_intr_cbs), next) {
+		if (cb_lst->cb_fn == NULL || cb_lst->event != event)
+			continue;
+		dev_cb = *cb_lst;
+		cb_lst->active = 1;
+		rte_spinlock_unlock(&rte_cryptodev_cb_lock);
+		dev_cb.cb_fn(dev->data->dev_id, dev_cb.event,
+						dev_cb.cb_arg);
+		rte_spinlock_lock(&rte_cryptodev_cb_lock);
+		cb_lst->active = 0;
+	}
+	rte_spinlock_unlock(&rte_cryptodev_cb_lock);
+}
+
+
+static void
+rte_cryptodev_sym_session_init(struct rte_mempool *mp,
+		void *opaque_arg,
+		void *_sess,
+		__rte_unused unsigned i)
+{
+	struct rte_cryptodev_sym_session *sess = _sess;
+	struct rte_cryptodev *dev = opaque_arg;
+
+	memset(sess, 0, mp->elt_size);
+
+	sess->dev_id = dev->data->dev_id;
+	sess->dev_type = dev->dev_type;
+	sess->mp = mp;
+
+	if (dev->dev_ops->session_initialize)
+		(*dev->dev_ops->session_initialize)(mp, sess->_private);
+}
+
+static int
+rte_cryptodev_sym_session_pool_create(struct rte_cryptodev *dev,
+		unsigned nb_objs, unsigned obj_cache_size, int socket_id)
+{
+	char mp_name[RTE_CRYPTODEV_NAME_MAX_LEN];
+	unsigned priv_sess_size;
+
+	unsigned n = snprintf(mp_name, sizeof(mp_name), "cdev_%d_sess_mp",
+			dev->data->dev_id);
+	if (n > sizeof(mp_name)) {
+		CDEV_LOG_ERR("Unable to create unique name for session mempool");
+		return -ENOMEM;
+	}
+
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->session_get_size, -ENOTSUP);
+	priv_sess_size = (*dev->dev_ops->session_get_size)(dev);
+	if (priv_sess_size == 0) {
+		CDEV_LOG_ERR("%s returned and invalid private session size ",
+						dev->data->name);
+		return -ENOMEM;
+	}
+
+	unsigned elt_size = sizeof(struct rte_cryptodev_sym_session) +
+			priv_sess_size;
+
+	dev->data->session_pool = rte_mempool_lookup(mp_name);
+	if (dev->data->session_pool != NULL) {
+		if ((dev->data->session_pool->elt_size != elt_size) ||
+				(dev->data->session_pool->cache_size <
+				obj_cache_size) ||
+				(dev->data->session_pool->size < nb_objs)) {
+
+			CDEV_LOG_ERR("%s mempool already exists with different"
+					" initialization parameters", mp_name);
+			dev->data->session_pool = NULL;
+			return -ENOMEM;
+		}
+	} else {
+		dev->data->session_pool = rte_mempool_create(
+				mp_name, /* mempool name */
+				nb_objs, /* number of elements*/
+				elt_size, /* element size*/
+				obj_cache_size, /* Cache size*/
+				0, /* private data size */
+				NULL, /* obj initialization constructor */
+				NULL, /* obj initialization constructor arg */
+				rte_cryptodev_sym_session_init,
+				/**< obj constructor*/
+				dev, /* obj constructor arg */
+				socket_id, /* socket id */
+				0); /* flags */
+
+		if (dev->data->session_pool == NULL) {
+			CDEV_LOG_ERR("%s mempool allocation failed", mp_name);
+			return -ENOMEM;
+		}
+	}
+
+	CDEV_LOG_DEBUG("%s mempool created!", mp_name);
+	return 0;
+}
+
+struct rte_cryptodev_sym_session *
+rte_cryptodev_sym_session_create(uint8_t dev_id,
+		struct rte_crypto_sym_xform *xform)
+{
+	struct rte_cryptodev *dev;
+	struct rte_cryptodev_sym_session *sess;
+	void *_sess;
+
+	if (!rte_cryptodev_pmd_is_valid_dev(dev_id)) {
+		CDEV_LOG_ERR("Invalid dev_id=%d", dev_id);
+		return NULL;
+	}
+
+	dev = &rte_crypto_devices[dev_id];
+
+	/* Allocate a session structure from the session pool */
+	if (rte_mempool_get(dev->data->session_pool, &_sess)) {
+		CDEV_LOG_ERR("Couldn't get object from session mempool");
+		return NULL;
+	}
+
+	sess = (struct rte_cryptodev_sym_session *)_sess;
+
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->session_configure, NULL);
+	if (dev->dev_ops->session_configure(dev, xform, sess->_private) ==
+			NULL) {
+		CDEV_LOG_ERR("dev_id %d failed to configure session details",
+				dev_id);
+
+		/* Return session to mempool */
+		rte_mempool_put(sess->mp, _sess);
+		return NULL;
+	}
+
+	return sess;
+}
+
+struct rte_cryptodev_sym_session *
+rte_cryptodev_sym_session_free(uint8_t dev_id,
+		struct rte_cryptodev_sym_session *sess)
+{
+	struct rte_cryptodev *dev;
+
+	if (!rte_cryptodev_pmd_is_valid_dev(dev_id)) {
+		CDEV_LOG_ERR("Invalid dev_id=%d", dev_id);
+		return sess;
+	}
+
+	dev = &rte_crypto_devices[dev_id];
+
+	/* Check the session belongs to this device type */
+	if (sess->dev_type != dev->dev_type)
+		return sess;
+
+	/* Let device implementation clear session material */
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->session_clear, sess);
+	dev->dev_ops->session_clear(dev, (void *)sess->_private);
+
+	/* Return session to mempool */
+	rte_mempool_put(sess->mp, (void *)sess);
+
+	return NULL;
+}
+
+/** Initialise rte_crypto_op mempool element */
+static void
+rte_crypto_op_init(struct rte_mempool *mempool,
+		void *opaque_arg,
+		void *_op_data,
+		__rte_unused unsigned i)
+{
+	struct rte_crypto_op *op = _op_data;
+	enum rte_crypto_op_type type = *(enum rte_crypto_op_type *)opaque_arg;
+
+	memset(_op_data, 0, mempool->elt_size);
+
+	__rte_crypto_op_reset(op, type);
+
+	op->phys_addr = rte_mem_virt2phy(_op_data);
+	op->mempool = mempool;
+}
+
+
+struct rte_mempool *
+rte_crypto_op_pool_create(const char *name, enum rte_crypto_op_type type,
+		unsigned nb_elts, unsigned cache_size, uint16_t priv_size,
+		int socket_id)
+{
+	struct rte_crypto_op_pool_private *priv;
+
+	unsigned elt_size = sizeof(struct rte_crypto_op) +
+			sizeof(struct rte_crypto_sym_op) +
+			priv_size;
+
+	/* lookup mempool in case already allocated */
+	struct rte_mempool *mp = rte_mempool_lookup(name);
+
+	if (mp != NULL) {
+		priv = (struct rte_crypto_op_pool_private *)
+				rte_mempool_get_priv(mp);
+
+		if (mp->elt_size != elt_size ||
+				mp->cache_size < cache_size ||
+				mp->size < nb_elts ||
+				priv->priv_size <  priv_size) {
+			mp = NULL;
+			CDEV_LOG_ERR("Mempool %s already exists but with "
+					"incompatible parameters", name);
+			return NULL;
+		}
+		return mp;
+	}
+
+	mp = rte_mempool_create(
+			name,
+			nb_elts,
+			elt_size,
+			cache_size,
+			sizeof(struct rte_crypto_op_pool_private),
+			NULL,
+			NULL,
+			rte_crypto_op_init,
+			&type,
+			socket_id,
+			0);
+
+	if (mp == NULL) {
+		CDEV_LOG_ERR("Failed to create mempool %s", name);
+		return NULL;
+	}
+
+	priv = (struct rte_crypto_op_pool_private *)
+			rte_mempool_get_priv(mp);
+
+	priv->priv_size = priv_size;
+	priv->type = type;
+
+	return mp;
+}
diff --git a/lib/librte_cryptodev/rte_cryptodev.h b/lib/librte_cryptodev/rte_cryptodev.h
new file mode 100644
index 00000000..d47f1e87
--- /dev/null
+++ b/lib/librte_cryptodev/rte_cryptodev.h
@@ -0,0 +1,896 @@
+/*-
+ *
+ *   Copyright(c) 2015-2016 Intel Corporation. All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_CRYPTODEV_H_
+#define _RTE_CRYPTODEV_H_
+
+/**
+ * @file rte_cryptodev.h
+ *
+ * RTE Cryptographic Device APIs
+ *
+ * Defines RTE Crypto Device APIs for the provisioning of cipher and
+ * authentication operations.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "rte_kvargs.h"
+#include "rte_crypto.h"
+#include "rte_dev.h"
+
+#define CRYPTODEV_NAME_NULL_PMD		("cryptodev_null_pmd")
+/**< Null crypto PMD device name */
+#define CRYPTODEV_NAME_AESNI_MB_PMD	("cryptodev_aesni_mb_pmd")
+/**< AES-NI Multi buffer PMD device name */
+#define CRYPTODEV_NAME_AESNI_GCM_PMD	("cryptodev_aesni_gcm_pmd")
+/**< AES-NI GCM PMD device name */
+#define CRYPTODEV_NAME_QAT_SYM_PMD	("cryptodev_qat_sym_pmd")
+/**< Intel QAT Symmetric Crypto PMD device name */
+#define CRYPTODEV_NAME_SNOW3G_PMD	("cryptodev_snow3g_pmd")
+/**< SNOW 3G PMD device name */
+
+/** Crypto device type */
+enum rte_cryptodev_type {
+	RTE_CRYPTODEV_NULL_PMD = 1,	/**< Null crypto PMD */
+	RTE_CRYPTODEV_AESNI_GCM_PMD,	/**< AES-NI GCM PMD */
+	RTE_CRYPTODEV_AESNI_MB_PMD,	/**< AES-NI multi buffer PMD */
+	RTE_CRYPTODEV_QAT_SYM_PMD,	/**< QAT PMD Symmetric Crypto */
+	RTE_CRYPTODEV_SNOW3G_PMD,	/**< SNOW 3G PMD */
+};
+
+extern const char **rte_cyptodev_names;
+
+/* Logging Macros */
+
+#define CDEV_LOG_ERR(fmt, args...)					\
+		RTE_LOG(ERR, CRYPTODEV, "%s() line %u: " fmt "\n",	\
+				__func__, __LINE__, ## args)
+
+#define CDEV_PMD_LOG_ERR(dev, fmt, args...)				\
+		RTE_LOG(ERR, CRYPTODEV, "[%s] %s() line %u: " fmt "\n", \
+				dev, __func__, __LINE__, ## args)
+
+#ifdef RTE_LIBRTE_CRYPTODEV_DEBUG
+#define CDEV_LOG_DEBUG(fmt, args...)					\
+		RTE_LOG(DEBUG, CRYPTODEV, "%s() line %u: " fmt "\n",	\
+				__func__, __LINE__, ## args)		\
+
+#define CDEV_PMD_TRACE(fmt, args...)					\
+		RTE_LOG(DEBUG, CRYPTODEV, "[%s] %s: " fmt "\n",		\
+				dev, __func__, ## args)
+
+#else
+#define CDEV_LOG_DEBUG(fmt, args...)
+#define CDEV_PMD_TRACE(fmt, args...)
+#endif
+
+/**
+ * Symmetric Crypto Capability
+ */
+struct rte_cryptodev_symmetric_capability {
+	enum rte_crypto_sym_xform_type xform_type;
+	/**< Transform type : Authentication / Cipher */
+	union {
+		struct {
+			enum rte_crypto_auth_algorithm algo;
+			/**< authentication algorithm */
+			uint16_t block_size;
+			/**< algorithm block size */
+			struct {
+				uint16_t min;	/**< minimum key size */
+				uint16_t max;	/**< maximum key size */
+				uint16_t increment;
+				/**< if a range of sizes are supported,
+				 * this parameter is used to indicate
+				 * increments in byte size that are supported
+				 * between the minimum and maximum */
+			} key_size;
+			/**< auth key size range */
+			struct {
+				uint16_t min;	/**< minimum digest size */
+				uint16_t max;	/**< maximum digest size */
+				uint16_t increment;
+				/**< if a range of sizes are supported,
+				 * this parameter is used to indicate
+				 * increments in byte size that are supported
+				 * between the minimum and maximum */
+			} digest_size;
+			/**< digest size range */
+			struct {
+				uint16_t min;	/**< minimum aad size */
+				uint16_t max;	/**< maximum aad size */
+				uint16_t increment;
+				/**< if a range of sizes are supported,
+				 * this parameter is used to indicate
+				 * increments in byte size that are supported
+				 * between the minimum and maximum */
+			} aad_size;
+			/**< Additional authentication data size range */
+		} auth;
+		/**< Symmetric Authentication transform capabilities */
+		struct {
+			enum rte_crypto_cipher_algorithm algo;
+			/**< cipher algorithm */
+			uint16_t block_size;
+			/**< algorithm block size */
+			struct {
+				uint16_t min;	/**< minimum key size */
+				uint16_t max;	/**< maximum key size */
+				uint16_t increment;
+				/**< if a range of sizes are supported,
+				 * this parameter is used to indicate
+				 * increments in byte size that are supported
+				 * between the minimum and maximum */
+			} key_size;
+			/**< cipher key size range */
+			struct {
+				uint16_t min;	/**< minimum iv size */
+				uint16_t max;	/**< maximum iv size */
+				uint16_t increment;
+				/**< if a range of sizes are supported,
+				 * this parameter is used to indicate
+				 * increments in byte size that are supported
+				 * between the minimum and maximum */
+			} iv_size;
+			/**< Initialisation vector data size range */
+		} cipher;
+		/**< Symmetric Cipher transform capabilities */
+	};
+};
+
+/** Structure used to capture a capability of a crypto device */
+struct rte_cryptodev_capabilities {
+	enum rte_crypto_op_type op;
+	/**< Operation type */
+
+	union {
+		struct rte_cryptodev_symmetric_capability sym;
+		/**< Symmetric operation capability parameters */
+	};
+};
+
+/** Macro used at end of crypto PMD list */
+#define RTE_CRYPTODEV_END_OF_CAPABILITIES_LIST() \
+	{ RTE_CRYPTO_OP_TYPE_UNDEFINED }
+
+
+/**
+ * Crypto device supported feature flags
+ *
+ * Note:
+ * New features flags should be added to the end of the list
+ *
+ * Keep these flags synchronised with rte_cryptodev_get_feature_name()
+ */
+#define	RTE_CRYPTODEV_FF_SYMMETRIC_CRYPTO	(1ULL << 0)
+/**< Symmetric crypto operations are supported */
+#define	RTE_CRYPTODEV_FF_ASYMMETRIC_CRYPTO	(1ULL << 1)
+/**< Asymmetric crypto operations are supported */
+#define	RTE_CRYPTODEV_FF_SYM_OPERATION_CHAINING	(1ULL << 2)
+/**< Chaining symmetric crypto operations are supported */
+#define	RTE_CRYPTODEV_FF_CPU_SSE		(1ULL << 3)
+/**< Utilises CPU SIMD SSE instructions */
+#define	RTE_CRYPTODEV_FF_CPU_AVX		(1ULL << 4)
+/**< Utilises CPU SIMD AVX instructions */
+#define	RTE_CRYPTODEV_FF_CPU_AVX2		(1ULL << 5)
+/**< Utilises CPU SIMD AVX2 instructions */
+#define	RTE_CRYPTODEV_FF_CPU_AESNI		(1ULL << 6)
+/**< Utilises CPU AES-NI instructions */
+#define	RTE_CRYPTODEV_FF_HW_ACCELERATED		(1ULL << 7)
+/**< Operations are off-loaded to an external hardware accelerator */
+
+
+/**
+ * Get the name of a crypto device feature flag
+ *
+ * @param	flag	The mask describing the flag.
+ *
+ * @return
+ *   The name of this flag, or NULL if it's not a valid feature flag.
+ */
+
+extern const char *
+rte_cryptodev_get_feature_name(uint64_t flag);
+
+/**  Crypto device information */
+struct rte_cryptodev_info {
+	const char *driver_name;		/**< Driver name. */
+	enum rte_cryptodev_type dev_type;	/**< Device type */
+	struct rte_pci_device *pci_dev;		/**< PCI information. */
+
+	uint64_t feature_flags;			/**< Feature flags */
+
+	const struct rte_cryptodev_capabilities *capabilities;
+	/**< Array of devices supported capabilities */
+
+	unsigned max_nb_queue_pairs;
+	/**< Maximum number of queues pairs supported by device. */
+
+	struct {
+		unsigned max_nb_sessions;
+		/**< Maximum number of sessions supported by device. */
+	} sym;
+};
+
+#define RTE_CRYPTODEV_DETACHED  (0)
+#define RTE_CRYPTODEV_ATTACHED  (1)
+
+/** Definitions of Crypto device event types */
+enum rte_cryptodev_event_type {
+	RTE_CRYPTODEV_EVENT_UNKNOWN,	/**< unknown event type */
+	RTE_CRYPTODEV_EVENT_ERROR,	/**< error interrupt event */
+	RTE_CRYPTODEV_EVENT_MAX		/**< max value of this enum */
+};
+
+/** Crypto device queue pair configuration structure. */
+struct rte_cryptodev_qp_conf {
+	uint32_t nb_descriptors; /**< Number of descriptors per queue pair */
+};
+
+/**
+ * Typedef for application callback function to be registered by application
+ * software for notification of device events
+ *
+ * @param	dev_id	Crypto device identifier
+ * @param	event	Crypto device event to register for notification of.
+ * @param	cb_arg	User specified parameter to be passed as to passed to
+ *			users callback function.
+ */
+typedef void (*rte_cryptodev_cb_fn)(uint8_t dev_id,
+		enum rte_cryptodev_event_type event, void *cb_arg);
+
+
+/** Crypto Device statistics */
+struct rte_cryptodev_stats {
+	uint64_t enqueued_count;
+	/**< Count of all operations enqueued */
+	uint64_t dequeued_count;
+	/**< Count of all operations dequeued */
+
+	uint64_t enqueue_err_count;
+	/**< Total error count on operations enqueued */
+	uint64_t dequeue_err_count;
+	/**< Total error count on operations dequeued */
+};
+
+#define RTE_CRYPTODEV_VDEV_DEFAULT_MAX_NB_QUEUE_PAIRS	8
+#define RTE_CRYPTODEV_VDEV_DEFAULT_MAX_NB_SESSIONS	2048
+
+/**
+ * @internal
+ * Initialisation parameters for virtual crypto devices
+ */
+struct rte_crypto_vdev_init_params {
+	unsigned max_nb_queue_pairs;
+	unsigned max_nb_sessions;
+	uint8_t socket_id;
+};
+
+#define RTE_CRYPTODEV_VDEV_MAX_NB_QP_ARG		("max_nb_queue_pairs")
+#define RTE_CRYPTODEV_VDEV_MAX_NB_SESS_ARG		("max_nb_sessions")
+#define RTE_CRYPTODEV_VDEV_SOCKET_ID			("socket_id")
+
+static const char *cryptodev_vdev_valid_params[] = {
+	RTE_CRYPTODEV_VDEV_MAX_NB_QP_ARG,
+	RTE_CRYPTODEV_VDEV_MAX_NB_SESS_ARG,
+	RTE_CRYPTODEV_VDEV_SOCKET_ID
+};
+
+static inline uint8_t
+number_of_sockets(void)
+{
+	int sockets = 0;
+	int i;
+	const struct rte_memseg *ms = rte_eal_get_physmem_layout();
+
+	for (i = 0; ((i < RTE_MAX_MEMSEG) && (ms[i].addr != NULL)); i++) {
+		if (sockets < ms[i].socket_id)
+			sockets = ms[i].socket_id;
+	}
+
+	/* Number of sockets = maximum socket_id + 1 */
+	return ++sockets;
+}
+
+/** Parse integer from integer argument */
+static inline int
+__rte_cryptodev_parse_integer_arg(const char *key __rte_unused,
+		const char *value, void *extra_args)
+{
+	int *i = (int *) extra_args;
+
+	*i = atoi(value);
+	if (*i < 0) {
+		CDEV_LOG_ERR("Argument has to be positive.");
+		return -1;
+	}
+
+	return 0;
+}
+
+/**
+ * Parse virtual device initialisation parameters input arguments
+ * @internal
+ *
+ * @params	params		Initialisation parameters with defaults set.
+ * @params	input_args	Command line arguments
+ *
+ * @return
+ * 0 on successful parse
+ * <0 on failure to parse
+ */
+static inline int
+rte_cryptodev_parse_vdev_init_params(struct rte_crypto_vdev_init_params *params,
+		const char *input_args)
+{
+	struct rte_kvargs *kvlist;
+	int ret;
+
+	if (params == NULL)
+		return -EINVAL;
+
+	if (input_args) {
+		kvlist = rte_kvargs_parse(input_args,
+				cryptodev_vdev_valid_params);
+		if (kvlist == NULL)
+			return -1;
+
+		ret = rte_kvargs_process(kvlist,
+					RTE_CRYPTODEV_VDEV_MAX_NB_QP_ARG,
+					&__rte_cryptodev_parse_integer_arg,
+					&params->max_nb_queue_pairs);
+		if (ret < 0)
+			goto free_kvlist;
+
+		ret = rte_kvargs_process(kvlist,
+					RTE_CRYPTODEV_VDEV_MAX_NB_SESS_ARG,
+					&__rte_cryptodev_parse_integer_arg,
+					&params->max_nb_sessions);
+		if (ret < 0)
+			goto free_kvlist;
+
+		ret = rte_kvargs_process(kvlist, RTE_CRYPTODEV_VDEV_SOCKET_ID,
+					&__rte_cryptodev_parse_integer_arg,
+					&params->socket_id);
+		if (ret < 0)
+			goto free_kvlist;
+
+		if (params->socket_id >= number_of_sockets()) {
+			CDEV_LOG_ERR("Invalid socket id specified to create "
+				"the virtual crypto device on");
+			goto free_kvlist;
+		}
+	}
+
+	return 0;
+
+free_kvlist:
+	rte_kvargs_free(kvlist);
+	return ret;
+}
+
+/**
+ * Create a virtual crypto device
+ *
+ * @param	name	Cryptodev PMD name of device to be created.
+ * @param	args	Options arguments for device.
+ *
+ * @return
+ * - On successful creation of the cryptodev the device index is returned,
+ *   which will be between 0 and rte_cryptodev_count().
+ * - In the case of a failure, returns -1.
+ */
+extern int
+rte_cryptodev_create_vdev(const char *name, const char *args);
+
+/**
+ * Get the device identifier for the named crypto device.
+ *
+ * @param	name	device name to select the device structure.
+ *
+ * @return
+ *   - Returns crypto device identifier on success.
+ *   - Return -1 on failure to find named crypto device.
+ */
+extern int
+rte_cryptodev_get_dev_id(const char *name);
+
+/**
+ * Get the total number of crypto devices that have been successfully
+ * initialised.
+ *
+ * @return
+ *   - The total number of usable crypto devices.
+ */
+extern uint8_t
+rte_cryptodev_count(void);
+
+extern uint8_t
+rte_cryptodev_count_devtype(enum rte_cryptodev_type type);
+/*
+ * Return the NUMA socket to which a device is connected
+ *
+ * @param dev_id
+ *   The identifier of the device
+ * @return
+ *   The NUMA socket id to which the device is connected or
+ *   a default of zero if the socket could not be determined.
+ *   -1 if returned is the dev_id value is out of range.
+ */
+extern int
+rte_cryptodev_socket_id(uint8_t dev_id);
+
+/** Crypto device configuration structure */
+struct rte_cryptodev_config {
+	int socket_id;			/**< Socket to allocate resources on */
+	uint16_t nb_queue_pairs;
+	/**< Number of queue pairs to configure on device */
+
+	struct {
+		uint32_t nb_objs;	/**< Number of objects in mempool */
+		uint32_t cache_size;	/**< l-core object cache size */
+	} session_mp;		/**< Session mempool configuration */
+};
+
+/**
+ * Configure a device.
+ *
+ * This function must be invoked first before any other function in the
+ * API. This function can also be re-invoked when a device is in the
+ * stopped state.
+ *
+ * @param	dev_id		The identifier of the device to configure.
+ * @param	config		The crypto device configuration structure.
+ *
+ * @return
+ *   - 0: Success, device configured.
+ *   - <0: Error code returned by the driver configuration function.
+ */
+extern int
+rte_cryptodev_configure(uint8_t dev_id, struct rte_cryptodev_config *config);
+
+/**
+ * Start an device.
+ *
+ * The device start step is the last one and consists of setting the configured
+ * offload features and in starting the transmit and the receive units of the
+ * device.
+ * On success, all basic functions exported by the API (link status,
+ * receive/transmit, and so on) can be invoked.
+ *
+ * @param dev_id
+ *   The identifier of the device.
+ * @return
+ *   - 0: Success, device started.
+ *   - <0: Error code of the driver device start function.
+ */
+extern int
+rte_cryptodev_start(uint8_t dev_id);
+
+/**
+ * Stop an device. The device can be restarted with a call to
+ * rte_cryptodev_start()
+ *
+ * @param	dev_id		The identifier of the device.
+ */
+extern void
+rte_cryptodev_stop(uint8_t dev_id);
+
+/**
+ * Close an device. The device cannot be restarted!
+ *
+ * @param	dev_id		The identifier of the device.
+ *
+ * @return
+ *  - 0 on successfully closing device
+ *  - <0 on failure to close device
+ */
+extern int
+rte_cryptodev_close(uint8_t dev_id);
+
+/**
+ * Allocate and set up a receive queue pair for a device.
+ *
+ *
+ * @param	dev_id		The identifier of the device.
+ * @param	queue_pair_id	The index of the queue pairs to set up. The
+ *				value must be in the range [0, nb_queue_pair
+ *				- 1] previously supplied to
+ *				rte_cryptodev_configure().
+ * @param	qp_conf		The pointer to the configuration data to be
+ *				used for the queue pair. NULL value is
+ *				allowed, in which case default configuration
+ *				will be used.
+ * @param	socket_id	The *socket_id* argument is the socket
+ *				identifier in case of NUMA. The value can be
+ *				*SOCKET_ID_ANY* if there is no NUMA constraint
+ *				for the DMA memory allocated for the receive
+ *				queue pair.
+ *
+ * @return
+ *   - 0: Success, queue pair correctly set up.
+ *   - <0: Queue pair configuration failed
+ */
+extern int
+rte_cryptodev_queue_pair_setup(uint8_t dev_id, uint16_t queue_pair_id,
+		const struct rte_cryptodev_qp_conf *qp_conf, int socket_id);
+
+/**
+ * Start a specified queue pair of a device. It is used
+ * when deferred_start flag of the specified queue is true.
+ *
+ * @param	dev_id		The identifier of the device
+ * @param	queue_pair_id	The index of the queue pair to start. The value
+ *				must be in the range [0, nb_queue_pair - 1]
+ *				previously supplied to
+ *				rte_crypto_dev_configure().
+ * @return
+ *   - 0: Success, the transmit queue is correctly set up.
+ *   - -EINVAL: The dev_id or the queue_id out of range.
+ *   - -ENOTSUP: The function not supported in PMD driver.
+ */
+extern int
+rte_cryptodev_queue_pair_start(uint8_t dev_id, uint16_t queue_pair_id);
+
+/**
+ * Stop specified queue pair of a device
+ *
+ * @param	dev_id		The identifier of the device
+ * @param	queue_pair_id	The index of the queue pair to stop. The value
+ *				must be in the range [0, nb_queue_pair - 1]
+ *				previously supplied to
+ *				rte_cryptodev_configure().
+ * @return
+ *   - 0: Success, the transmit queue is correctly set up.
+ *   - -EINVAL: The dev_id or the queue_id out of range.
+ *   - -ENOTSUP: The function not supported in PMD driver.
+ */
+extern int
+rte_cryptodev_queue_pair_stop(uint8_t dev_id, uint16_t queue_pair_id);
+
+/**
+ * Get the number of queue pairs on a specific crypto device
+ *
+ * @param	dev_id		Crypto device identifier.
+ * @return
+ *   - The number of configured queue pairs.
+ */
+extern uint16_t
+rte_cryptodev_queue_pair_count(uint8_t dev_id);
+
+
+/**
+ * Retrieve the general I/O statistics of a device.
+ *
+ * @param	dev_id		The identifier of the device.
+ * @param	stats		A pointer to a structure of type
+ *				*rte_cryptodev_stats* to be filled with the
+ *				values of device counters.
+ * @return
+ *   - Zero if successful.
+ *   - Non-zero otherwise.
+ */
+extern int
+rte_cryptodev_stats_get(uint8_t dev_id, struct rte_cryptodev_stats *stats);
+
+/**
+ * Reset the general I/O statistics of a device.
+ *
+ * @param	dev_id		The identifier of the device.
+ */
+extern void
+rte_cryptodev_stats_reset(uint8_t dev_id);
+
+/**
+ * Retrieve the contextual information of a device.
+ *
+ * @param	dev_id		The identifier of the device.
+ * @param	dev_info	A pointer to a structure of type
+ *				*rte_cryptodev_info* to be filled with the
+ *				contextual information of the device.
+ */
+extern void
+rte_cryptodev_info_get(uint8_t dev_id, struct rte_cryptodev_info *dev_info);
+
+
+/**
+ * Register a callback function for specific device id.
+ *
+ * @param	dev_id		Device id.
+ * @param	event		Event interested.
+ * @param	cb_fn		User supplied callback function to be called.
+ * @param	cb_arg		Pointer to the parameters for the registered
+ *				callback.
+ *
+ * @return
+ *  - On success, zero.
+ *  - On failure, a negative value.
+ */
+extern int
+rte_cryptodev_callback_register(uint8_t dev_id,
+		enum rte_cryptodev_event_type event,
+		rte_cryptodev_cb_fn cb_fn, void *cb_arg);
+
+/**
+ * Unregister a callback function for specific device id.
+ *
+ * @param	dev_id		The device identifier.
+ * @param	event		Event interested.
+ * @param	cb_fn		User supplied callback function to be called.
+ * @param	cb_arg		Pointer to the parameters for the registered
+ *				callback.
+ *
+ * @return
+ *  - On success, zero.
+ *  - On failure, a negative value.
+ */
+extern int
+rte_cryptodev_callback_unregister(uint8_t dev_id,
+		enum rte_cryptodev_event_type event,
+		rte_cryptodev_cb_fn cb_fn, void *cb_arg);
+
+
+typedef uint16_t (*dequeue_pkt_burst_t)(void *qp,
+		struct rte_crypto_op **ops,	uint16_t nb_ops);
+/**< Dequeue processed packets from queue pair of a device. */
+
+typedef uint16_t (*enqueue_pkt_burst_t)(void *qp,
+		struct rte_crypto_op **ops,	uint16_t nb_ops);
+/**< Enqueue packets for processing on queue pair of a device. */
+
+
+
+
+struct rte_cryptodev_callback;
+
+/** Structure to keep track of registered callbacks */
+TAILQ_HEAD(rte_cryptodev_cb_list, rte_cryptodev_callback);
+
+/** The data structure associated with each crypto device. */
+struct rte_cryptodev {
+	dequeue_pkt_burst_t dequeue_burst;
+	/**< Pointer to PMD receive function. */
+	enqueue_pkt_burst_t enqueue_burst;
+	/**< Pointer to PMD transmit function. */
+
+	const struct rte_cryptodev_driver *driver;
+	/**< Driver for this device */
+	struct rte_cryptodev_data *data;
+	/**< Pointer to device data */
+	struct rte_cryptodev_ops *dev_ops;
+	/**< Functions exported by PMD */
+	uint64_t feature_flags;
+	/**< Supported features */
+	struct rte_pci_device *pci_dev;
+	/**< PCI info. supplied by probing */
+
+	enum rte_cryptodev_type dev_type;
+	/**< Crypto device type */
+	enum pmd_type pmd_type;
+	/**< PMD type - PDEV / VDEV */
+
+	struct rte_cryptodev_cb_list link_intr_cbs;
+	/**< User application callback for interrupts if present */
+
+	uint8_t attached : 1;
+	/**< Flag indicating the device is attached */
+} __rte_cache_aligned;
+
+
+#define RTE_CRYPTODEV_NAME_MAX_LEN	(64)
+/**< Max length of name of crypto PMD */
+
+/**
+ *
+ * The data part, with no function pointers, associated with each device.
+ *
+ * This structure is safe to place in shared memory to be common among
+ * different processes in a multi-process configuration.
+ */
+struct rte_cryptodev_data {
+	uint8_t dev_id;
+	/**< Device ID for this instance */
+	uint8_t socket_id;
+	/**< Socket ID where memory is allocated */
+	char name[RTE_CRYPTODEV_NAME_MAX_LEN];
+	/**< Unique identifier name */
+
+	uint8_t dev_started : 1;
+	/**< Device state: STARTED(1)/STOPPED(0) */
+
+	struct rte_mempool *session_pool;
+	/**< Session memory pool */
+	void **queue_pairs;
+	/**< Array of pointers to queue pairs. */
+	uint16_t nb_queue_pairs;
+	/**< Number of device queue pairs. */
+
+	void *dev_private;
+	/**< PMD-specific private data */
+} __rte_cache_aligned;
+
+extern struct rte_cryptodev *rte_cryptodevs;
+/**
+ *
+ * Dequeue a burst of processed crypto operations from a queue on the crypto
+ * device. The dequeued operation are stored in *rte_crypto_op* structures
+ * whose pointers are supplied in the *ops* array.
+ *
+ * The rte_cryptodev_dequeue_burst() function returns the number of ops
+ * actually dequeued, which is the number of *rte_crypto_op* data structures
+ * effectively supplied into the *ops* array.
+ *
+ * A return value equal to *nb_ops* indicates that the queue contained
+ * at least *nb_ops* operations, and this is likely to signify that other
+ * processed operations remain in the devices output queue. Applications
+ * implementing a "retrieve as many processed operations as possible" policy
+ * can check this specific case and keep invoking the
+ * rte_cryptodev_dequeue_burst() function until a value less than
+ * *nb_ops* is returned.
+ *
+ * The rte_cryptodev_dequeue_burst() function does not provide any error
+ * notification to avoid the corresponding overhead.
+ *
+ * @param	dev_id		The symmetric crypto device identifier
+ * @param	qp_id		The index of the queue pair from which to
+ *				retrieve processed packets. The value must be
+ *				in the range [0, nb_queue_pair - 1] previously
+ *				supplied to rte_cryptodev_configure().
+ * @param	ops		The address of an array of pointers to
+ *				*rte_crypto_op* structures that must be
+ *				large enough to store *nb_ops* pointers in it.
+ * @param	nb_ops		The maximum number of operations to dequeue.
+ *
+ * @return
+ *   - The number of operations actually dequeued, which is the number
+ *   of pointers to *rte_crypto_op* structures effectively supplied to the
+ *   *ops* array.
+ */
+static inline uint16_t
+rte_cryptodev_dequeue_burst(uint8_t dev_id, uint16_t qp_id,
+		struct rte_crypto_op **ops, uint16_t nb_ops)
+{
+	struct rte_cryptodev *dev = &rte_cryptodevs[dev_id];
+
+	nb_ops = (*dev->dequeue_burst)
+			(dev->data->queue_pairs[qp_id], ops, nb_ops);
+
+	return nb_ops;
+}
+
+/**
+ * Enqueue a burst of operations for processing on a crypto device.
+ *
+ * The rte_cryptodev_enqueue_burst() function is invoked to place
+ * crypto operations on the queue *qp_id* of the device designated by
+ * its *dev_id*.
+ *
+ * The *nb_ops* parameter is the number of operations to process which are
+ * supplied in the *ops* array of *rte_crypto_op* structures.
+ *
+ * The rte_cryptodev_enqueue_burst() function returns the number of
+ * operations it actually enqueued for processing. A return value equal to
+ * *nb_ops* means that all packets have been enqueued.
+ *
+ * @param	dev_id		The identifier of the device.
+ * @param	qp_id		The index of the queue pair which packets are
+ *				to be enqueued for processing. The value
+ *				must be in the range [0, nb_queue_pairs - 1]
+ *				previously supplied to
+ *				 *rte_cryptodev_configure*.
+ * @param	ops		The address of an array of *nb_ops* pointers
+ *				to *rte_crypto_op* structures which contain
+ *				the crypto operations to be processed.
+ * @param	nb_ops		The number of operations to process.
+ *
+ * @return
+ * The number of operations actually enqueued on the crypto device. The return
+ * value can be less than the value of the *nb_ops* parameter when the
+ * crypto devices queue is full or if invalid parameters are specified in
+ * a *rte_crypto_op*.
+ */
+static inline uint16_t
+rte_cryptodev_enqueue_burst(uint8_t dev_id, uint16_t qp_id,
+		struct rte_crypto_op **ops, uint16_t nb_ops)
+{
+	struct rte_cryptodev *dev = &rte_cryptodevs[dev_id];
+
+	return (*dev->enqueue_burst)(
+			dev->data->queue_pairs[qp_id], ops, nb_ops);
+}
+
+
+/** Cryptodev symmetric crypto session */
+struct rte_cryptodev_sym_session {
+	struct {
+		uint8_t dev_id;
+		/**< Device Id */
+		enum rte_cryptodev_type dev_type;
+		/** Crypto Device type session created on */
+		struct rte_mempool *mp;
+		/**< Mempool session allocated from */
+	} __rte_aligned(8);
+	/**< Public symmetric session details */
+
+	char _private[0];
+	/**< Private session material */
+};
+
+
+/**
+ * Initialise a session for symmetric cryptographic operations.
+ *
+ * This function is used by the client to initialize immutable
+ * parameters of symmetric cryptographic operation.
+ * To perform the operation the rte_cryptodev_enqueue_burst function is
+ * used.  Each mbuf should contain a reference to the session
+ * pointer returned from this function contained within it's crypto_op if a
+ * session-based operation is being provisioned. Memory to contain the session
+ * information is allocated from within mempool managed by the cryptodev.
+ *
+ * The rte_cryptodev_session_free must be called to free allocated
+ * memory when the session is no longer required.
+ *
+ * @param	dev_id		The device identifier.
+ * @param	xform		Crypto transform chain.
+
+ *
+ * @return
+ *  Pointer to the created session or NULL
+ */
+extern struct rte_cryptodev_sym_session *
+rte_cryptodev_sym_session_create(uint8_t dev_id,
+		struct rte_crypto_sym_xform *xform);
+
+/**
+ * Free the memory associated with a previously allocated session.
+ *
+ * @param	dev_id		The device identifier.
+ * @param	session		Session pointer previously allocated by
+ *				*rte_cryptodev_sym_session_create*.
+ *
+ * @return
+ *   NULL on successful freeing of session.
+ *   Session pointer on failure to free session.
+ */
+extern struct rte_cryptodev_sym_session *
+rte_cryptodev_sym_session_free(uint8_t dev_id,
+		struct rte_cryptodev_sym_session *session);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_CRYPTODEV_H_ */
diff --git a/lib/librte_cryptodev/rte_cryptodev_pmd.h b/lib/librte_cryptodev/rte_cryptodev_pmd.h
new file mode 100644
index 00000000..7d049ea3
--- /dev/null
+++ b/lib/librte_cryptodev/rte_cryptodev_pmd.h
@@ -0,0 +1,543 @@
+/*-
+ *
+ *   Copyright(c) 2015-2016 Intel Corporation. All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_CRYPTODEV_PMD_H_
+#define _RTE_CRYPTODEV_PMD_H_
+
+/** @file
+ * RTE Crypto PMD APIs
+ *
+ * @note
+ * These API are from crypto PMD only and user applications should not call
+ * them directly.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <string.h>
+
+#include <rte_dev.h>
+#include <rte_pci.h>
+#include <rte_malloc.h>
+#include <rte_mbuf.h>
+#include <rte_mempool.h>
+#include <rte_log.h>
+
+#include "rte_crypto.h"
+#include "rte_cryptodev.h"
+
+
+#ifdef RTE_LIBRTE_CRYPTODEV_DEBUG
+#define RTE_PMD_DEBUG_TRACE(...) \
+	rte_pmd_debug_trace(__func__, __VA_ARGS__)
+#else
+#define RTE_PMD_DEBUG_TRACE(fmt, args...)
+#endif
+
+struct rte_cryptodev_session {
+	struct {
+		uint8_t dev_id;
+		enum rte_cryptodev_type type;
+		struct rte_mempool *mp;
+	} __rte_aligned(8);
+
+	char _private[0];
+};
+
+struct rte_cryptodev_driver;
+
+/**
+ * Initialisation function of a crypto driver invoked for each matching
+ * crypto PCI device detected during the PCI probing phase.
+ *
+ * @param	drv	The pointer to the [matching] crypto driver structure
+ *			supplied by the PMD when it registered itself.
+ * @param	dev	The dev pointer is the address of the *rte_cryptodev*
+ *			structure associated with the matching device and which
+ *			has been [automatically] allocated in the
+ *			*rte_crypto_devices* array.
+ *
+ * @return
+ *   - 0: Success, the device is properly initialised by the driver.
+ *        In particular, the driver MUST have set up the *dev_ops* pointer
+ *        of the *dev* structure.
+ *   - <0: Error code of the device initialisation failure.
+ */
+typedef int (*cryptodev_init_t)(struct rte_cryptodev_driver *drv,
+		struct rte_cryptodev *dev);
+
+/**
+ * Finalisation function of a driver invoked for each matching
+ * PCI device detected during the PCI closing phase.
+ *
+ * @param	drv	The pointer to the [matching] driver structure supplied
+ *			by the PMD when it registered itself.
+ * @param	dev	The dev pointer is the address of the *rte_cryptodev*
+ *			structure associated with the matching device and which
+ *			has been [automatically] allocated in the
+ *			*rte_crypto_devices* array.
+ *
+ *  * @return
+ *   - 0: Success, the device is properly finalised by the driver.
+ *        In particular, the driver MUST free the *dev_ops* pointer
+ *        of the *dev* structure.
+ *   - <0: Error code of the device initialisation failure.
+ */
+typedef int (*cryptodev_uninit_t)(const struct rte_cryptodev_driver  *drv,
+				struct rte_cryptodev *dev);
+
+/**
+ * The structure associated with a PMD driver.
+ *
+ * Each driver acts as a PCI driver and is represented by a generic
+ * *crypto_driver* structure that holds:
+ *
+ * - An *rte_pci_driver* structure (which must be the first field).
+ *
+ * - The *cryptodev_init* function invoked for each matching PCI device.
+ *
+ * - The size of the private data to allocate for each matching device.
+ */
+struct rte_cryptodev_driver {
+	struct rte_pci_driver pci_drv;	/**< The PMD is also a PCI driver. */
+	unsigned dev_private_size;	/**< Size of device private data. */
+
+	cryptodev_init_t cryptodev_init;	/**< Device init function. */
+	cryptodev_uninit_t cryptodev_uninit;	/**< Device uninit function. */
+};
+
+
+/** Global structure used for maintaining state of allocated crypto devices */
+struct rte_cryptodev_global {
+	struct rte_cryptodev *devs;	/**< Device information array */
+	struct rte_cryptodev_data *data[RTE_CRYPTO_MAX_DEVS];
+	/**< Device private data */
+	uint8_t nb_devs;		/**< Number of devices found */
+	uint8_t max_devs;		/**< Max number of devices */
+};
+
+/** pointer to global crypto devices data structure. */
+extern struct rte_cryptodev_global *rte_cryptodev_globals;
+
+/**
+ * Get the rte_cryptodev structure device pointer for the device. Assumes a
+ * valid device index.
+ *
+ * @param	dev_id	Device ID value to select the device structure.
+ *
+ * @return
+ *   - The rte_cryptodev structure pointer for the given device ID.
+ */
+static inline struct rte_cryptodev *
+rte_cryptodev_pmd_get_dev(uint8_t dev_id)
+{
+	return &rte_cryptodev_globals->devs[dev_id];
+}
+
+/**
+ * Get the rte_cryptodev structure device pointer for the named device.
+ *
+ * @param	name	device name to select the device structure.
+ *
+ * @return
+ *   - The rte_cryptodev structure pointer for the given device ID.
+ */
+static inline struct rte_cryptodev *
+rte_cryptodev_pmd_get_named_dev(const char *name)
+{
+	struct rte_cryptodev *dev;
+	unsigned i;
+
+	if (name == NULL)
+		return NULL;
+
+	for (i = 0, dev = &rte_cryptodev_globals->devs[i];
+			i < rte_cryptodev_globals->max_devs; i++) {
+		if ((dev->attached == RTE_CRYPTODEV_ATTACHED) &&
+				(strcmp(dev->data->name, name) == 0))
+			return dev;
+	}
+
+	return NULL;
+}
+
+/**
+ * Validate if the crypto device index is valid attached crypto device.
+ *
+ * @param	dev_id	Crypto device index.
+ *
+ * @return
+ *   - If the device index is valid (1) or not (0).
+ */
+static inline unsigned
+rte_cryptodev_pmd_is_valid_dev(uint8_t dev_id)
+{
+	struct rte_cryptodev *dev = NULL;
+
+	if (dev_id >= rte_cryptodev_globals->nb_devs)
+		return 0;
+
+	dev = rte_cryptodev_pmd_get_dev(dev_id);
+	if (dev->attached != RTE_CRYPTODEV_ATTACHED)
+		return 0;
+	else
+		return 1;
+}
+
+/**
+ * The pool of rte_cryptodev structures.
+ */
+extern struct rte_cryptodev *rte_cryptodevs;
+
+
+/**
+ * Definitions of all functions exported by a driver through the
+ * the generic structure of type *crypto_dev_ops* supplied in the
+ * *rte_cryptodev* structure associated with a device.
+ */
+
+/**
+ *	Function used to configure device.
+ *
+ * @param	dev	Crypto device pointer
+ *
+ * @return	Returns 0 on success
+ */
+typedef int (*cryptodev_configure_t)(struct rte_cryptodev *dev);
+
+/**
+ * Function used to start a configured device.
+ *
+ * @param	dev	Crypto device pointer
+ *
+ * @return	Returns 0 on success
+ */
+typedef int (*cryptodev_start_t)(struct rte_cryptodev *dev);
+
+/**
+ * Function used to stop a configured device.
+ *
+ * @param	dev	Crypto device pointer
+ */
+typedef void (*cryptodev_stop_t)(struct rte_cryptodev *dev);
+
+/**
+ * Function used to close a configured device.
+ *
+ * @param	dev	Crypto device pointer
+ * @return
+ * - 0 on success.
+ * - EAGAIN if can't close as device is busy
+ */
+typedef int (*cryptodev_close_t)(struct rte_cryptodev *dev);
+
+
+/**
+ * Function used to get statistics of a device.
+ *
+ * @param	dev	Crypto device pointer
+ * @param	stats	Pointer to crypto device stats structure to populate
+ */
+typedef void (*cryptodev_stats_get_t)(struct rte_cryptodev *dev,
+				struct rte_cryptodev_stats *stats);
+
+
+/**
+ * Function used to reset statistics of a device.
+ *
+ * @param	dev	Crypto device pointer
+ */
+typedef void (*cryptodev_stats_reset_t)(struct rte_cryptodev *dev);
+
+
+/**
+ * Function used to get specific information of a device.
+ *
+ * @param	dev	Crypto device pointer
+ */
+typedef void (*cryptodev_info_get_t)(struct rte_cryptodev *dev,
+				struct rte_cryptodev_info *dev_info);
+
+/**
+ * Start queue pair of a device.
+ *
+ * @param	dev	Crypto device pointer
+ * @param	qp_id	Queue Pair Index
+ *
+ * @return	Returns 0 on success.
+ */
+typedef int (*cryptodev_queue_pair_start_t)(struct rte_cryptodev *dev,
+				uint16_t qp_id);
+
+/**
+ * Stop queue pair of a device.
+ *
+ * @param	dev	Crypto device pointer
+ * @param	qp_id	Queue Pair Index
+ *
+ * @return	Returns 0 on success.
+ */
+typedef int (*cryptodev_queue_pair_stop_t)(struct rte_cryptodev *dev,
+				uint16_t qp_id);
+
+/**
+ * Setup a queue pair for a device.
+ *
+ * @param	dev		Crypto device pointer
+ * @param	qp_id		Queue Pair Index
+ * @param	qp_conf		Queue configuration structure
+ * @param	socket_id	Socket Index
+ *
+ * @return	Returns 0 on success.
+ */
+typedef int (*cryptodev_queue_pair_setup_t)(struct rte_cryptodev *dev,
+		uint16_t qp_id,	const struct rte_cryptodev_qp_conf *qp_conf,
+		int socket_id);
+
+/**
+ * Release memory resources allocated by given queue pair.
+ *
+ * @param	dev	Crypto device pointer
+ * @param	qp_id	Queue Pair Index
+ *
+ * @return
+ * - 0 on success.
+ * - EAGAIN if can't close as device is busy
+ */
+typedef int (*cryptodev_queue_pair_release_t)(struct rte_cryptodev *dev,
+		uint16_t qp_id);
+
+/**
+ * Get number of available queue pairs of a device.
+ *
+ * @param	dev	Crypto device pointer
+ *
+ * @return	Returns number of queue pairs on success.
+ */
+typedef uint32_t (*cryptodev_queue_pair_count_t)(struct rte_cryptodev *dev);
+
+/**
+ * Create a session mempool to allocate sessions from
+ *
+ * @param	dev		Crypto device pointer
+ * @param	nb_objs		number of sessions objects in mempool
+ * @param	obj_cache	l-core object cache size, see *rte_ring_create*
+ * @param	socket_id	Socket Id to allocate  mempool on.
+ *
+ * @return
+ * - On success returns a pointer to a rte_mempool
+ * - On failure returns a NULL pointer
+ */
+typedef int (*cryptodev_sym_create_session_pool_t)(
+		struct rte_cryptodev *dev, unsigned nb_objs,
+		unsigned obj_cache_size, int socket_id);
+
+
+/**
+ * Get the size of a cryptodev session
+ *
+ * @param	dev		Crypto device pointer
+ *
+ * @return
+ *  - On success returns the size of the session structure for device
+ *  - On failure returns 0
+ */
+typedef unsigned (*cryptodev_sym_get_session_private_size_t)(
+		struct rte_cryptodev *dev);
+
+/**
+ * Initialize a Crypto session on a device.
+ *
+ * @param	dev		Crypto device pointer
+ * @param	xform		Single or chain of crypto xforms
+ * @param	priv_sess	Pointer to cryptodev's private session structure
+ *
+ * @return
+ *  - Returns private session structure on success.
+ *  - Returns NULL on failure.
+ */
+typedef void (*cryptodev_sym_initialize_session_t)(struct rte_mempool *mempool,
+		void *session_private);
+
+/**
+ * Configure a Crypto session on a device.
+ *
+ * @param	dev		Crypto device pointer
+ * @param	xform		Single or chain of crypto xforms
+ * @param	priv_sess	Pointer to cryptodev's private session structure
+ *
+ * @return
+ *  - Returns private session structure on success.
+ *  - Returns NULL on failure.
+ */
+typedef void * (*cryptodev_sym_configure_session_t)(struct rte_cryptodev *dev,
+		struct rte_crypto_sym_xform *xform, void *session_private);
+
+/**
+ * Free Crypto session.
+ * @param	session		Cryptodev session structure to free
+ */
+typedef void (*cryptodev_sym_free_session_t)(struct rte_cryptodev *dev,
+		void *session_private);
+
+
+/** Crypto device operations function pointer table */
+struct rte_cryptodev_ops {
+	cryptodev_configure_t dev_configure;	/**< Configure device. */
+	cryptodev_start_t dev_start;		/**< Start device. */
+	cryptodev_stop_t dev_stop;		/**< Stop device. */
+	cryptodev_close_t dev_close;		/**< Close device. */
+
+	cryptodev_info_get_t dev_infos_get;	/**< Get device info. */
+
+	cryptodev_stats_get_t stats_get;
+	/**< Get device statistics. */
+	cryptodev_stats_reset_t stats_reset;
+	/**< Reset device statistics. */
+
+	cryptodev_queue_pair_setup_t queue_pair_setup;
+	/**< Set up a device queue pair. */
+	cryptodev_queue_pair_release_t queue_pair_release;
+	/**< Release a queue pair. */
+	cryptodev_queue_pair_start_t queue_pair_start;
+	/**< Start a queue pair. */
+	cryptodev_queue_pair_stop_t queue_pair_stop;
+	/**< Stop a queue pair. */
+	cryptodev_queue_pair_count_t queue_pair_count;
+	/**< Get count of the queue pairs. */
+
+	cryptodev_sym_get_session_private_size_t session_get_size;
+	/**< Return private session. */
+	cryptodev_sym_initialize_session_t session_initialize;
+	/**< Initialization function for private session data */
+	cryptodev_sym_configure_session_t session_configure;
+	/**< Configure a Crypto session. */
+	cryptodev_sym_free_session_t session_clear;
+	/**< Clear a Crypto sessions private data. */
+};
+
+
+/**
+ * Function for internal use by dummy drivers primarily, e.g. ring-based
+ * driver.
+ * Allocates a new cryptodev slot for an crypto device and returns the pointer
+ * to that slot for the driver to use.
+ *
+ * @param	name		Unique identifier name for each device
+ * @param	type		Device type of this Crypto device
+ * @param	socket_id	Socket to allocate resources on.
+ * @return
+ *   - Slot in the rte_dev_devices array for a new device;
+ */
+struct rte_cryptodev *
+rte_cryptodev_pmd_allocate(const char *name, enum pmd_type type, int socket_id);
+
+/**
+ * Creates a new virtual crypto device and returns the pointer
+ * to that device.
+ *
+ * @param	name			PMD type name
+ * @param	dev_private_size	Size of crypto PMDs private data
+ * @param	socket_id		Socket to allocate resources on.
+ *
+ * @return
+ *   - Cryptodev pointer if device is successfully created.
+ *   - NULL if device cannot be created.
+ */
+struct rte_cryptodev *
+rte_cryptodev_pmd_virtual_dev_init(const char *name, size_t dev_private_size,
+		int socket_id);
+
+
+/**
+ * Function for internal use by dummy drivers primarily, e.g. ring-based
+ * driver.
+ * Release the specified cryptodev device.
+ *
+ * @param cryptodev
+ * The *cryptodev* pointer is the address of the *rte_cryptodev* structure.
+ * @return
+ *   - 0 on success, negative on error
+ */
+extern int
+rte_cryptodev_pmd_release_device(struct rte_cryptodev *cryptodev);
+
+
+/**
+ * Register a Crypto [Poll Mode] driver.
+ *
+ * Function invoked by the initialization function of a Crypto driver
+ * to simultaneously register itself as Crypto Poll Mode Driver and to either:
+ *
+ *	a - register itself as PCI driver if the crypto device is a physical
+ *		device, by invoking the rte_eal_pci_register() function to
+ *		register the *pci_drv* structure embedded in the *crypto_drv*
+ *		structure, after having stored the address of the
+ *		rte_cryptodev_init() function in the *devinit* field of the
+ *		*pci_drv* structure.
+ *
+ *		During the PCI probing phase, the rte_cryptodev_init()
+ *		function is invoked for each PCI [device] matching the
+ *		embedded PCI identifiers provided by the driver.
+ *
+ *	b, complete the initialization sequence if the device is a virtual
+ *		device by calling the rte_cryptodev_init() directly passing a
+ *		NULL parameter for the rte_pci_device structure.
+ *
+ *   @param crypto_drv	crypto_driver structure associated with the crypto
+ *					driver.
+ *   @param type		pmd type
+ */
+extern int
+rte_cryptodev_pmd_driver_register(struct rte_cryptodev_driver *crypto_drv,
+		enum pmd_type type);
+
+/**
+ * Executes all the user application registered callbacks for the specific
+ * device.
+ *  *
+ * @param	dev	Pointer to cryptodev struct
+ * @param	event	Crypto device interrupt event type.
+ *
+ * @return
+ *  void
+ */
+void rte_cryptodev_pmd_callback_process(struct rte_cryptodev *dev,
+				enum rte_cryptodev_event_type event);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_CRYPTODEV_PMD_H_ */
diff --git a/lib/librte_cryptodev/rte_cryptodev_version.map b/lib/librte_cryptodev/rte_cryptodev_version.map
new file mode 100644
index 00000000..41004e1c
--- /dev/null
+++ b/lib/librte_cryptodev/rte_cryptodev_version.map
@@ -0,0 +1,34 @@
+DPDK_16.04 {
+	global:
+
+	rte_cryptodevs;
+	rte_cryptodev_callback_register;
+	rte_cryptodev_callback_unregister;
+	rte_cryptodev_close;
+	rte_cryptodev_count;
+	rte_cryptodev_count_devtype;
+	rte_cryptodev_configure;
+	rte_cryptodev_create_vdev;
+	rte_cryptodev_get_dev_id;
+	rte_cryptodev_get_feature_name;
+	rte_cryptodev_info_get;
+	rte_cryptodev_pmd_allocate;
+	rte_cryptodev_pmd_callback_process;
+	rte_cryptodev_pmd_driver_register;
+	rte_cryptodev_pmd_release_device;
+	rte_cryptodev_pmd_virtual_dev_init;
+	rte_cryptodev_sym_session_create;
+	rte_cryptodev_sym_session_free;
+	rte_cryptodev_socket_id;
+	rte_cryptodev_start;
+	rte_cryptodev_stats_get;
+	rte_cryptodev_stats_reset;
+	rte_cryptodev_stop;
+	rte_cryptodev_queue_pair_count;
+	rte_cryptodev_queue_pair_setup;
+	rte_cryptodev_queue_pair_start;
+	rte_cryptodev_queue_pair_stop;
+	rte_crypto_op_pool_create;
+
+	local: *;
+};
diff --git a/lib/librte_distributor/Makefile b/lib/librte_distributor/Makefile
new file mode 100644
index 00000000..4c9af172
--- /dev/null
+++ b/lib/librte_distributor/Makefile
@@ -0,0 +1,54 @@
+#   BSD LICENSE
+#
+#   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+#   All rights reserved.
+#
+#   Redistribution and use in source and binary forms, with or without
+#   modification, are permitted provided that the following conditions
+#   are met:
+#
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in
+#       the documentation and/or other materials provided with the
+#       distribution.
+#     * Neither the name of Intel Corporation nor the names of its
+#       contributors may be used to endorse or promote products derived
+#       from this software without specific prior written permission.
+#
+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# library name
+LIB = librte_distributor.a
+
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR)
+
+EXPORT_MAP := rte_distributor_version.map
+
+LIBABIVER := 1
+
+# all source are stored in SRCS-y
+SRCS-$(CONFIG_RTE_LIBRTE_DISTRIBUTOR) := rte_distributor.c
+
+# install this header file
+SYMLINK-$(CONFIG_RTE_LIBRTE_DISTRIBUTOR)-include := rte_distributor.h
+
+# this lib needs eal
+DEPDIRS-$(CONFIG_RTE_LIBRTE_DISTRIBUTOR) += lib/librte_eal
+DEPDIRS-$(CONFIG_RTE_LIBRTE_DISTRIBUTOR) += lib/librte_mbuf
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_distributor/rte_distributor.c b/lib/librte_distributor/rte_distributor.c
new file mode 100644
index 00000000..f3f778c9
--- /dev/null
+++ b/lib/librte_distributor/rte_distributor.c
@@ -0,0 +1,487 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <sys/queue.h>
+#include <string.h>
+#include <rte_mbuf.h>
+#include <rte_memory.h>
+#include <rte_memzone.h>
+#include <rte_errno.h>
+#include <rte_string_fns.h>
+#include <rte_eal_memconfig.h>
+#include "rte_distributor.h"
+
+#define NO_FLAGS 0
+#define RTE_DISTRIB_PREFIX "DT_"
+
+/* we will use the bottom four bits of pointer for flags, shifting out
+ * the top four bits to make room (since a 64-bit pointer actually only uses
+ * 48 bits). An arithmetic-right-shift will then appropriately restore the
+ * original pointer value with proper sign extension into the top bits. */
+#define RTE_DISTRIB_FLAG_BITS 4
+#define RTE_DISTRIB_FLAGS_MASK (0x0F)
+#define RTE_DISTRIB_NO_BUF 0       /**< empty flags: no buffer requested */
+#define RTE_DISTRIB_GET_BUF (1)    /**< worker requests a buffer, returns old */
+#define RTE_DISTRIB_RETURN_BUF (2) /**< worker returns a buffer, no request */
+
+#define RTE_DISTRIB_BACKLOG_SIZE 8
+#define RTE_DISTRIB_BACKLOG_MASK (RTE_DISTRIB_BACKLOG_SIZE - 1)
+
+#define RTE_DISTRIB_MAX_RETURNS 128
+#define RTE_DISTRIB_RETURNS_MASK (RTE_DISTRIB_MAX_RETURNS - 1)
+
+/**
+ * Maximum number of workers allowed.
+ * Be aware of increasing the limit, becaus it is limited by how we track
+ * in-flight tags. See @in_flight_bitmask and @rte_distributor_process
+ */
+#define RTE_DISTRIB_MAX_WORKERS	64
+
+/**
+ * Buffer structure used to pass the pointer data between cores. This is cache
+ * line aligned, but to improve performance and prevent adjacent cache-line
+ * prefetches of buffers for other workers, e.g. when worker 1's buffer is on
+ * the next cache line to worker 0, we pad this out to three cache lines.
+ * Only 64-bits of the memory is actually used though.
+ */
+union rte_distributor_buffer {
+	volatile int64_t bufptr64;
+	char pad[RTE_CACHE_LINE_SIZE*3];
+} __rte_cache_aligned;
+
+struct rte_distributor_backlog {
+	unsigned start;
+	unsigned count;
+	int64_t pkts[RTE_DISTRIB_BACKLOG_SIZE];
+};
+
+struct rte_distributor_returned_pkts {
+	unsigned start;
+	unsigned count;
+	struct rte_mbuf *mbufs[RTE_DISTRIB_MAX_RETURNS];
+};
+
+struct rte_distributor {
+	TAILQ_ENTRY(rte_distributor) next;    /**< Next in list. */
+
+	char name[RTE_DISTRIBUTOR_NAMESIZE];  /**< Name of the ring. */
+	unsigned num_workers;                 /**< Number of workers polling */
+
+	uint32_t in_flight_tags[RTE_DISTRIB_MAX_WORKERS];
+		/**< Tracks the tag being processed per core */
+	uint64_t in_flight_bitmask;
+		/**< on/off bits for in-flight tags.
+		 * Note that if RTE_DISTRIB_MAX_WORKERS is larger than 64 then
+		 * the bitmask has to expand.
+		 */
+
+	struct rte_distributor_backlog backlog[RTE_DISTRIB_MAX_WORKERS];
+
+	union rte_distributor_buffer bufs[RTE_DISTRIB_MAX_WORKERS];
+
+	struct rte_distributor_returned_pkts returns;
+};
+
+TAILQ_HEAD(rte_distributor_list, rte_distributor);
+
+static struct rte_tailq_elem rte_distributor_tailq = {
+	.name = "RTE_DISTRIBUTOR",
+};
+EAL_REGISTER_TAILQ(rte_distributor_tailq)
+
+/**** APIs called by workers ****/
+
+void
+rte_distributor_request_pkt(struct rte_distributor *d,
+		unsigned worker_id, struct rte_mbuf *oldpkt)
+{
+	union rte_distributor_buffer *buf = &d->bufs[worker_id];
+	int64_t req = (((int64_t)(uintptr_t)oldpkt) << RTE_DISTRIB_FLAG_BITS)
+			| RTE_DISTRIB_GET_BUF;
+	while (unlikely(buf->bufptr64 & RTE_DISTRIB_FLAGS_MASK))
+		rte_pause();
+	buf->bufptr64 = req;
+}
+
+struct rte_mbuf *
+rte_distributor_poll_pkt(struct rte_distributor *d,
+		unsigned worker_id)
+{
+	union rte_distributor_buffer *buf = &d->bufs[worker_id];
+	if (buf->bufptr64 & RTE_DISTRIB_GET_BUF)
+		return NULL;
+
+	/* since bufptr64 is signed, this should be an arithmetic shift */
+	int64_t ret = buf->bufptr64 >> RTE_DISTRIB_FLAG_BITS;
+	return (struct rte_mbuf *)((uintptr_t)ret);
+}
+
+struct rte_mbuf *
+rte_distributor_get_pkt(struct rte_distributor *d,
+		unsigned worker_id, struct rte_mbuf *oldpkt)
+{
+	struct rte_mbuf *ret;
+	rte_distributor_request_pkt(d, worker_id, oldpkt);
+	while ((ret = rte_distributor_poll_pkt(d, worker_id)) == NULL)
+		rte_pause();
+	return ret;
+}
+
+int
+rte_distributor_return_pkt(struct rte_distributor *d,
+		unsigned worker_id, struct rte_mbuf *oldpkt)
+{
+	union rte_distributor_buffer *buf = &d->bufs[worker_id];
+	uint64_t req = (((int64_t)(uintptr_t)oldpkt) << RTE_DISTRIB_FLAG_BITS)
+			| RTE_DISTRIB_RETURN_BUF;
+	buf->bufptr64 = req;
+	return 0;
+}
+
+/**** APIs called on distributor core ***/
+
+/* as name suggests, adds a packet to the backlog for a particular worker */
+static int
+add_to_backlog(struct rte_distributor_backlog *bl, int64_t item)
+{
+	if (bl->count == RTE_DISTRIB_BACKLOG_SIZE)
+		return -1;
+
+	bl->pkts[(bl->start + bl->count++) & (RTE_DISTRIB_BACKLOG_MASK)]
+			= item;
+	return 0;
+}
+
+/* takes the next packet for a worker off the backlog */
+static int64_t
+backlog_pop(struct rte_distributor_backlog *bl)
+{
+	bl->count--;
+	return bl->pkts[bl->start++ & RTE_DISTRIB_BACKLOG_MASK];
+}
+
+/* stores a packet returned from a worker inside the returns array */
+static inline void
+store_return(uintptr_t oldbuf, struct rte_distributor *d,
+		unsigned *ret_start, unsigned *ret_count)
+{
+	/* store returns in a circular buffer - code is branch-free */
+	d->returns.mbufs[(*ret_start + *ret_count) & RTE_DISTRIB_RETURNS_MASK]
+			= (void *)oldbuf;
+	*ret_start += (*ret_count == RTE_DISTRIB_RETURNS_MASK) & !!(oldbuf);
+	*ret_count += (*ret_count != RTE_DISTRIB_RETURNS_MASK) & !!(oldbuf);
+}
+
+static inline void
+handle_worker_shutdown(struct rte_distributor *d, unsigned wkr)
+{
+	d->in_flight_tags[wkr] = 0;
+	d->in_flight_bitmask &= ~(1UL << wkr);
+	d->bufs[wkr].bufptr64 = 0;
+	if (unlikely(d->backlog[wkr].count != 0)) {
+		/* On return of a packet, we need to move the
+		 * queued packets for this core elsewhere.
+		 * Easiest solution is to set things up for
+		 * a recursive call. That will cause those
+		 * packets to be queued up for the next free
+		 * core, i.e. it will return as soon as a
+		 * core becomes free to accept the first
+		 * packet, as subsequent ones will be added to
+		 * the backlog for that core.
+		 */
+		struct rte_mbuf *pkts[RTE_DISTRIB_BACKLOG_SIZE];
+		unsigned i;
+		struct rte_distributor_backlog *bl = &d->backlog[wkr];
+
+		for (i = 0; i < bl->count; i++) {
+			unsigned idx = (bl->start + i) &
+					RTE_DISTRIB_BACKLOG_MASK;
+			pkts[i] = (void *)((uintptr_t)(bl->pkts[idx] >>
+					RTE_DISTRIB_FLAG_BITS));
+		}
+		/* recursive call.
+		 * Note that the tags were set before first level call
+		 * to rte_distributor_process.
+		 */
+		rte_distributor_process(d, pkts, i);
+		bl->count = bl->start = 0;
+	}
+}
+
+/* this function is called when process() fn is called without any new
+ * packets. It goes through all the workers and clears any returned packets
+ * to do a partial flush.
+ */
+static int
+process_returns(struct rte_distributor *d)
+{
+	unsigned wkr;
+	unsigned flushed = 0;
+	unsigned ret_start = d->returns.start,
+			ret_count = d->returns.count;
+
+	for (wkr = 0; wkr < d->num_workers; wkr++) {
+
+		const int64_t data = d->bufs[wkr].bufptr64;
+		uintptr_t oldbuf = 0;
+
+		if (data & RTE_DISTRIB_GET_BUF) {
+			flushed++;
+			if (d->backlog[wkr].count)
+				d->bufs[wkr].bufptr64 =
+						backlog_pop(&d->backlog[wkr]);
+			else {
+				d->bufs[wkr].bufptr64 = RTE_DISTRIB_GET_BUF;
+				d->in_flight_tags[wkr] = 0;
+				d->in_flight_bitmask &= ~(1UL << wkr);
+			}
+			oldbuf = data >> RTE_DISTRIB_FLAG_BITS;
+		} else if (data & RTE_DISTRIB_RETURN_BUF) {
+			handle_worker_shutdown(d, wkr);
+			oldbuf = data >> RTE_DISTRIB_FLAG_BITS;
+		}
+
+		store_return(oldbuf, d, &ret_start, &ret_count);
+	}
+
+	d->returns.start = ret_start;
+	d->returns.count = ret_count;
+
+	return flushed;
+}
+
+/* process a set of packets to distribute them to workers */
+int
+rte_distributor_process(struct rte_distributor *d,
+		struct rte_mbuf **mbufs, unsigned num_mbufs)
+{
+	unsigned next_idx = 0;
+	unsigned wkr = 0;
+	struct rte_mbuf *next_mb = NULL;
+	int64_t next_value = 0;
+	uint32_t new_tag = 0;
+	unsigned ret_start = d->returns.start,
+			ret_count = d->returns.count;
+
+	if (unlikely(num_mbufs == 0))
+		return process_returns(d);
+
+	while (next_idx < num_mbufs || next_mb != NULL) {
+
+		int64_t data = d->bufs[wkr].bufptr64;
+		uintptr_t oldbuf = 0;
+
+		if (!next_mb) {
+			next_mb = mbufs[next_idx++];
+			next_value = (((int64_t)(uintptr_t)next_mb)
+					<< RTE_DISTRIB_FLAG_BITS);
+			/*
+			 * User is advocated to set tag vaue for each
+			 * mbuf before calling rte_distributor_process.
+			 * User defined tags are used to identify flows,
+			 * or sessions.
+			 */
+			new_tag = next_mb->hash.usr;
+
+			/*
+			 * Note that if RTE_DISTRIB_MAX_WORKERS is larger than 64
+			 * then the size of match has to be expanded.
+			 */
+			uint64_t match = 0;
+			unsigned i;
+			/*
+			 * to scan for a match use "xor" and "not" to get a 0/1
+			 * value, then use shifting to merge to single "match"
+			 * variable, where a one-bit indicates a match for the
+			 * worker given by the bit-position
+			 */
+			for (i = 0; i < d->num_workers; i++)
+				match |= (!(d->in_flight_tags[i] ^ new_tag)
+					<< i);
+
+			/* Only turned-on bits are considered as match */
+			match &= d->in_flight_bitmask;
+
+			if (match) {
+				next_mb = NULL;
+				unsigned worker = __builtin_ctzl(match);
+				if (add_to_backlog(&d->backlog[worker],
+						next_value) < 0)
+					next_idx--;
+			}
+		}
+
+		if ((data & RTE_DISTRIB_GET_BUF) &&
+				(d->backlog[wkr].count || next_mb)) {
+
+			if (d->backlog[wkr].count)
+				d->bufs[wkr].bufptr64 =
+						backlog_pop(&d->backlog[wkr]);
+
+			else {
+				d->bufs[wkr].bufptr64 = next_value;
+				d->in_flight_tags[wkr] = new_tag;
+				d->in_flight_bitmask |= (1UL << wkr);
+				next_mb = NULL;
+			}
+			oldbuf = data >> RTE_DISTRIB_FLAG_BITS;
+		} else if (data & RTE_DISTRIB_RETURN_BUF) {
+			handle_worker_shutdown(d, wkr);
+			oldbuf = data >> RTE_DISTRIB_FLAG_BITS;
+		}
+
+		/* store returns in a circular buffer */
+		store_return(oldbuf, d, &ret_start, &ret_count);
+
+		if (++wkr == d->num_workers)
+			wkr = 0;
+	}
+	/* to finish, check all workers for backlog and schedule work for them
+	 * if they are ready */
+	for (wkr = 0; wkr < d->num_workers; wkr++)
+		if (d->backlog[wkr].count &&
+				(d->bufs[wkr].bufptr64 & RTE_DISTRIB_GET_BUF)) {
+
+			int64_t oldbuf = d->bufs[wkr].bufptr64 >>
+					RTE_DISTRIB_FLAG_BITS;
+			store_return(oldbuf, d, &ret_start, &ret_count);
+
+			d->bufs[wkr].bufptr64 = backlog_pop(&d->backlog[wkr]);
+		}
+
+	d->returns.start = ret_start;
+	d->returns.count = ret_count;
+	return num_mbufs;
+}
+
+/* return to the caller, packets returned from workers */
+int
+rte_distributor_returned_pkts(struct rte_distributor *d,
+		struct rte_mbuf **mbufs, unsigned max_mbufs)
+{
+	struct rte_distributor_returned_pkts *returns = &d->returns;
+	unsigned retval = (max_mbufs < returns->count) ?
+			max_mbufs : returns->count;
+	unsigned i;
+
+	for (i = 0; i < retval; i++) {
+		unsigned idx = (returns->start + i) & RTE_DISTRIB_RETURNS_MASK;
+		mbufs[i] = returns->mbufs[idx];
+	}
+	returns->start += i;
+	returns->count -= i;
+
+	return retval;
+}
+
+/* return the number of packets in-flight in a distributor, i.e. packets
+ * being workered on or queued up in a backlog. */
+static inline unsigned
+total_outstanding(const struct rte_distributor *d)
+{
+	unsigned wkr, total_outstanding;
+
+	total_outstanding = __builtin_popcountl(d->in_flight_bitmask);
+
+	for (wkr = 0; wkr < d->num_workers; wkr++)
+		total_outstanding += d->backlog[wkr].count;
+
+	return total_outstanding;
+}
+
+/* flush the distributor, so that there are no outstanding packets in flight or
+ * queued up. */
+int
+rte_distributor_flush(struct rte_distributor *d)
+{
+	const unsigned flushed = total_outstanding(d);
+
+	while (total_outstanding(d) > 0)
+		rte_distributor_process(d, NULL, 0);
+
+	return flushed;
+}
+
+/* clears the internal returns array in the distributor */
+void
+rte_distributor_clear_returns(struct rte_distributor *d)
+{
+	d->returns.start = d->returns.count = 0;
+#ifndef __OPTIMIZE__
+	memset(d->returns.mbufs, 0, sizeof(d->returns.mbufs));
+#endif
+}
+
+/* creates a distributor instance */
+struct rte_distributor *
+rte_distributor_create(const char *name,
+		unsigned socket_id,
+		unsigned num_workers)
+{
+	struct rte_distributor *d;
+	struct rte_distributor_list *distributor_list;
+	char mz_name[RTE_MEMZONE_NAMESIZE];
+	const struct rte_memzone *mz;
+
+	/* compilation-time checks */
+	RTE_BUILD_BUG_ON((sizeof(*d) & RTE_CACHE_LINE_MASK) != 0);
+	RTE_BUILD_BUG_ON((RTE_DISTRIB_MAX_WORKERS & 7) != 0);
+	RTE_BUILD_BUG_ON(RTE_DISTRIB_MAX_WORKERS >
+				sizeof(d->in_flight_bitmask) * CHAR_BIT);
+
+	if (name == NULL || num_workers >= RTE_DISTRIB_MAX_WORKERS) {
+		rte_errno = EINVAL;
+		return NULL;
+	}
+
+	snprintf(mz_name, sizeof(mz_name), RTE_DISTRIB_PREFIX"%s", name);
+	mz = rte_memzone_reserve(mz_name, sizeof(*d), socket_id, NO_FLAGS);
+	if (mz == NULL) {
+		rte_errno = ENOMEM;
+		return NULL;
+	}
+
+	d = mz->addr;
+	snprintf(d->name, sizeof(d->name), "%s", name);
+	d->num_workers = num_workers;
+
+	distributor_list = RTE_TAILQ_CAST(rte_distributor_tailq.head,
+					  rte_distributor_list);
+
+	rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK);
+	TAILQ_INSERT_TAIL(distributor_list, d, next);
+	rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
+
+	return d;
+}
diff --git a/lib/librte_distributor/rte_distributor.h b/lib/librte_distributor/rte_distributor.h
new file mode 100644
index 00000000..7d36bc8a
--- /dev/null
+++ b/lib/librte_distributor/rte_distributor.h
@@ -0,0 +1,247 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_DISTRIBUTE_H_
+#define _RTE_DISTRIBUTE_H_
+
+/**
+ * @file
+ * RTE distributor
+ *
+ * The distributor is a component which is designed to pass packets
+ * one-at-a-time to workers, with dynamic load balancing.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define RTE_DISTRIBUTOR_NAMESIZE 32 /**< Length of name for instance */
+
+struct rte_distributor;
+struct rte_mbuf;
+
+/**
+ * Function to create a new distributor instance
+ *
+ * Reserves the memory needed for the distributor operation and
+ * initializes the distributor to work with the configured number of workers.
+ *
+ * @param name
+ *   The name to be given to the distributor instance.
+ * @param socket_id
+ *   The NUMA node on which the memory is to be allocated
+ * @param num_workers
+ *   The maximum number of workers that will request packets from this
+ *   distributor
+ * @return
+ *   The newly created distributor instance
+ */
+struct rte_distributor *
+rte_distributor_create(const char *name, unsigned socket_id,
+		unsigned num_workers);
+
+/*  *** APIS to be called on the distributor lcore ***  */
+/*
+ * The following APIs are the public APIs which are designed for use on a
+ * single lcore which acts as the distributor lcore for a given distributor
+ * instance. These functions cannot be called on multiple cores simultaneously
+ * without using locking to protect access to the internals of the distributor.
+ *
+ * NOTE: a given lcore cannot act as both a distributor lcore and a worker lcore
+ * for the same distributor instance, otherwise deadlock will result.
+ */
+
+/**
+ * Process a set of packets by distributing them among workers that request
+ * packets. The distributor will ensure that no two packets that have the
+ * same flow id, or tag, in the mbuf will be procesed at the same time.
+ *
+ * The user is advocated to set tag for each mbuf before calling this function.
+ * If user doesn't set the tag, the tag value can be various values depending on
+ * driver implementation and configuration.
+ *
+ * This is not multi-thread safe and should only be called on a single lcore.
+ *
+ * @param d
+ *   The distributor instance to be used
+ * @param mbufs
+ *   The mbufs to be distributed
+ * @param num_mbufs
+ *   The number of mbufs in the mbufs array
+ * @return
+ *   The number of mbufs processed.
+ */
+int
+rte_distributor_process(struct rte_distributor *d,
+		struct rte_mbuf **mbufs, unsigned num_mbufs);
+
+/**
+ * Get a set of mbufs that have been returned to the distributor by workers
+ *
+ * This should only be called on the same lcore as rte_distributor_process()
+ *
+ * @param d
+ *   The distributor instance to be used
+ * @param mbufs
+ *   The mbufs pointer array to be filled in
+ * @param max_mbufs
+ *   The size of the mbufs array
+ * @return
+ *   The number of mbufs returned in the mbufs array.
+ */
+int
+rte_distributor_returned_pkts(struct rte_distributor *d,
+		struct rte_mbuf **mbufs, unsigned max_mbufs);
+
+/**
+ * Flush the distributor component, so that there are no in-flight or
+ * backlogged packets awaiting processing
+ *
+ * This should only be called on the same lcore as rte_distributor_process()
+ *
+ * @param d
+ *   The distributor instance to be used
+ * @return
+ *   The number of queued/in-flight packets that were completed by this call.
+ */
+int
+rte_distributor_flush(struct rte_distributor *d);
+
+/**
+ * Clears the array of returned packets used as the source for the
+ * rte_distributor_returned_pkts() API call.
+ *
+ * This should only be called on the same lcore as rte_distributor_process()
+ *
+ * @param d
+ *   The distributor instance to be used
+ */
+void
+rte_distributor_clear_returns(struct rte_distributor *d);
+
+/*  *** APIS to be called on the worker lcores ***  */
+/*
+ * The following APIs are the public APIs which are designed for use on
+ * multiple lcores which act as workers for a distributor. Each lcore should use
+ * a unique worker id when requesting packets.
+ *
+ * NOTE: a given lcore cannot act as both a distributor lcore and a worker lcore
+ * for the same distributor instance, otherwise deadlock will result.
+ */
+
+/**
+ * API called by a worker to get a new packet to process. Any previous packet
+ * given to the worker is assumed to have completed processing, and may be
+ * optionally returned to the distributor via the oldpkt parameter.
+ *
+ * @param d
+ *   The distributor instance to be used
+ * @param worker_id
+ *   The worker instance number to use - must be less that num_workers passed
+ *   at distributor creation time.
+ * @param oldpkt
+ *   The previous packet, if any, being processed by the worker
+ *
+ * @return
+ *   A new packet to be processed by the worker thread.
+ */
+struct rte_mbuf *
+rte_distributor_get_pkt(struct rte_distributor *d,
+		unsigned worker_id, struct rte_mbuf *oldpkt);
+
+/**
+ * API called by a worker to return a completed packet without requesting a
+ * new packet, for example, because a worker thread is shutting down
+ *
+ * @param d
+ *   The distributor instance to be used
+ * @param worker_id
+ *   The worker instance number to use - must be less that num_workers passed
+ *   at distributor creation time.
+ * @param mbuf
+ *   The previous packet being processed by the worker
+ */
+int
+rte_distributor_return_pkt(struct rte_distributor *d, unsigned worker_id,
+		struct rte_mbuf *mbuf);
+
+/**
+ * API called by a worker to request a new packet to process.
+ * Any previous packet given to the worker is assumed to have completed
+ * processing, and may be optionally returned to the distributor via
+ * the oldpkt parameter.
+ * Unlike rte_distributor_get_pkt(), this function does not wait for a new
+ * packet to be provided by the distributor.
+ *
+ * NOTE: after calling this function, rte_distributor_poll_pkt() should
+ * be used to poll for the packet requested. The rte_distributor_get_pkt()
+ * API should *not* be used to try and retrieve the new packet.
+ *
+ * @param d
+ *   The distributor instance to be used
+ * @param worker_id
+ *   The worker instance number to use - must be less that num_workers passed
+ *   at distributor creation time.
+ * @param oldpkt
+ *   The previous packet, if any, being processed by the worker
+ */
+void
+rte_distributor_request_pkt(struct rte_distributor *d,
+		unsigned worker_id, struct rte_mbuf *oldpkt);
+
+/**
+ * API called by a worker to check for a new packet that was previously
+ * requested by a call to rte_distributor_request_pkt(). It does not wait
+ * for the new packet to be available, but returns NULL if the request has
+ * not yet been fulfilled by the distributor.
+ *
+ * @param d
+ *   The distributor instance to be used
+ * @param worker_id
+ *   The worker instance number to use - must be less that num_workers passed
+ *   at distributor creation time.
+ *
+ * @return
+ *   A new packet to be processed by the worker thread, or NULL if no
+ *   packet is yet available.
+ */
+struct rte_mbuf *
+rte_distributor_poll_pkt(struct rte_distributor *d,
+		unsigned worker_id);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/lib/librte_distributor/rte_distributor_version.map b/lib/librte_distributor/rte_distributor_version.map
new file mode 100644
index 00000000..73fdc437
--- /dev/null
+++ b/lib/librte_distributor/rte_distributor_version.map
@@ -0,0 +1,15 @@
+DPDK_2.0 {
+	global:
+
+	rte_distributor_clear_returns;
+	rte_distributor_create;
+	rte_distributor_flush;
+	rte_distributor_get_pkt;
+	rte_distributor_poll_pkt;
+	rte_distributor_process;
+	rte_distributor_request_pkt;
+	rte_distributor_return_pkt;
+	rte_distributor_returned_pkts;
+
+	local: *;
+};
diff --git a/lib/librte_eal/Makefile b/lib/librte_eal/Makefile
new file mode 100644
index 00000000..cf11a099
--- /dev/null
+++ b/lib/librte_eal/Makefile
@@ -0,0 +1,38 @@
+#   BSD LICENSE
+#
+#   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+#   All rights reserved.
+#
+#   Redistribution and use in source and binary forms, with or without
+#   modification, are permitted provided that the following conditions
+#   are met:
+#
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in
+#       the documentation and/or other materials provided with the
+#       distribution.
+#     * Neither the name of Intel Corporation nor the names of its
+#       contributors may be used to endorse or promote products derived
+#       from this software without specific prior written permission.
+#
+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+DIRS-y += common
+DIRS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += linuxapp
+DIRS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += bsdapp
+
+include $(RTE_SDK)/mk/rte.subdir.mk
diff --git a/lib/librte_eal/bsdapp/Makefile b/lib/librte_eal/bsdapp/Makefile
new file mode 100644
index 00000000..0e6e2be9
--- /dev/null
+++ b/lib/librte_eal/bsdapp/Makefile
@@ -0,0 +1,38 @@
+#   BSD LICENSE
+#
+#   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+#   All rights reserved.
+#
+#   Redistribution and use in source and binary forms, with or without
+#   modification, are permitted provided that the following conditions
+#   are met:
+#
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in
+#       the documentation and/or other materials provided with the
+#       distribution.
+#     * Neither the name of Intel Corporation nor the names of its
+#       contributors may be used to endorse or promote products derived
+#       from this software without specific prior written permission.
+#
+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+DIRS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal
+DIRS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += contigmem
+DIRS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += nic_uio
+
+include $(RTE_SDK)/mk/rte.subdir.mk
diff --git a/lib/librte_eal/bsdapp/contigmem/BSDmakefile b/lib/librte_eal/bsdapp/contigmem/BSDmakefile
new file mode 100644
index 00000000..f64374c6
--- /dev/null
+++ b/lib/librte_eal/bsdapp/contigmem/BSDmakefile
@@ -0,0 +1,36 @@
+#   BSD LICENSE
+#
+#   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+#   All rights reserved.
+#
+#   Redistribution and use in source and binary forms, with or without
+#   modification, are permitted provided that the following conditions
+#   are met:
+#
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in
+#       the documentation and/or other materials provided with the
+#       distribution.
+#     * Neither the name of Intel Corporation nor the names of its
+#       contributors may be used to endorse or promote products derived
+#       from this software without specific prior written permission.
+#
+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+KMOD=	contigmem
+SRCS=	contigmem.c device_if.h bus_if.h
+
+.include <bsd.kmod.mk>
diff --git a/lib/librte_eal/bsdapp/contigmem/Makefile b/lib/librte_eal/bsdapp/contigmem/Makefile
new file mode 100644
index 00000000..bab005fd
--- /dev/null
+++ b/lib/librte_eal/bsdapp/contigmem/Makefile
@@ -0,0 +1,52 @@
+#   BSD LICENSE
+#
+#   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+#   All rights reserved.
+#
+#   Redistribution and use in source and binary forms, with or without
+#   modification, are permitted provided that the following conditions
+#   are met:
+#
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in
+#       the documentation and/or other materials provided with the
+#       distribution.
+#     * Neither the name of Intel Corporation nor the names of its
+#       contributors may be used to endorse or promote products derived
+#       from this software without specific prior written permission.
+#
+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+#
+# module name and path
+#
+MODULE = contigmem
+
+#
+# CFLAGS
+#
+MODULE_CFLAGS += -I$(SRCDIR)
+MODULE_CFLAGS += -I$(RTE_OUTPUT)/include
+MODULE_CFLAGS += -Winline -Wall -Werror
+MODULE_CFLAGS += -include $(RTE_OUTPUT)/include/rte_config.h
+
+#
+# all source are stored in SRCS-y
+#
+SRCS-y := contigmem.c
+
+include $(RTE_SDK)/mk/rte.bsdmodule.mk
diff --git a/lib/librte_eal/bsdapp/contigmem/contigmem.c b/lib/librte_eal/bsdapp/contigmem/contigmem.c
new file mode 100644
index 00000000..c6ca3b9c
--- /dev/null
+++ b/lib/librte_eal/bsdapp/contigmem/contigmem.c
@@ -0,0 +1,232 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/bio.h>
+#include <sys/bus.h>
+#include <sys/conf.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/module.h>
+#include <sys/proc.h>
+#include <sys/rwlock.h>
+#include <sys/systm.h>
+#include <sys/sysctl.h>
+
+#include <machine/bus.h>
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+#include <vm/vm_object.h>
+#include <vm/vm_page.h>
+#include <vm/vm_pager.h>
+
+static int              contigmem_load(void);
+static int              contigmem_unload(void);
+static int              contigmem_physaddr(SYSCTL_HANDLER_ARGS);
+
+static d_mmap_t         contigmem_mmap;
+static d_mmap_single_t  contigmem_mmap_single;
+static d_open_t         contigmem_open;
+
+static int              contigmem_num_buffers = RTE_CONTIGMEM_DEFAULT_NUM_BUFS;
+static int64_t          contigmem_buffer_size = RTE_CONTIGMEM_DEFAULT_BUF_SIZE;
+
+static eventhandler_tag contigmem_eh_tag;
+static void            *contigmem_buffers[RTE_CONTIGMEM_MAX_NUM_BUFS];
+static struct cdev     *contigmem_cdev = NULL;
+
+TUNABLE_INT("hw.contigmem.num_buffers", &contigmem_num_buffers);
+TUNABLE_QUAD("hw.contigmem.buffer_size", &contigmem_buffer_size);
+
+static SYSCTL_NODE(_hw, OID_AUTO, contigmem, CTLFLAG_RD, 0, "contigmem");
+
+SYSCTL_INT(_hw_contigmem, OID_AUTO, num_buffers, CTLFLAG_RD,
+	&contigmem_num_buffers, 0, "Number of contigmem buffers allocated");
+SYSCTL_QUAD(_hw_contigmem, OID_AUTO, buffer_size, CTLFLAG_RD,
+	&contigmem_buffer_size, 0, "Size of each contiguous buffer");
+
+static SYSCTL_NODE(_hw_contigmem, OID_AUTO, physaddr, CTLFLAG_RD, 0,
+	"physaddr");
+
+MALLOC_DEFINE(M_CONTIGMEM, "contigmem", "contigmem(4) allocations");
+
+static int contigmem_modevent(module_t mod, int type, void *arg)
+{
+	int error = 0;
+
+	switch (type) {
+	case MOD_LOAD:
+		error = contigmem_load();
+		break;
+	case MOD_UNLOAD:
+		error = contigmem_unload();
+		break;
+	default:
+		break;
+	}
+
+	return error;
+}
+
+moduledata_t contigmem_mod = {
+	"contigmem",
+	(modeventhand_t)contigmem_modevent,
+	0
+};
+
+DECLARE_MODULE(contigmem, contigmem_mod, SI_SUB_DRIVERS, SI_ORDER_ANY);
+MODULE_VERSION(contigmem, 1);
+
+static struct cdevsw contigmem_ops = {
+	.d_name         = "contigmem",
+	.d_version      = D_VERSION,
+	.d_mmap         = contigmem_mmap,
+	.d_mmap_single  = contigmem_mmap_single,
+	.d_open         = contigmem_open,
+};
+
+static int
+contigmem_load()
+{
+	char index_string[8], description[32];
+	int  i;
+
+	if (contigmem_num_buffers > RTE_CONTIGMEM_MAX_NUM_BUFS) {
+		printf("%d buffers requested is greater than %d allowed\n",
+				contigmem_num_buffers, RTE_CONTIGMEM_MAX_NUM_BUFS);
+		return EINVAL;
+	}
+
+	if (contigmem_buffer_size < PAGE_SIZE ||
+			(contigmem_buffer_size & (contigmem_buffer_size - 1)) != 0) {
+		printf("buffer size 0x%lx is not greater than PAGE_SIZE and "
+				"power of two\n", contigmem_buffer_size);
+		return EINVAL;
+	}
+
+	for (i = 0; i < contigmem_num_buffers; i++) {
+		contigmem_buffers[i] =
+				contigmalloc(contigmem_buffer_size, M_CONTIGMEM, M_ZERO, 0,
+			BUS_SPACE_MAXADDR, contigmem_buffer_size, 0);
+
+		if (contigmem_buffers[i] == NULL) {
+			printf("contigmalloc failed for buffer %d\n", i);
+			return ENOMEM;
+		}
+
+		printf("%2u: virt=%p phys=%p\n", i, contigmem_buffers[i],
+				(void *)pmap_kextract((vm_offset_t)contigmem_buffers[i]));
+
+		snprintf(index_string, sizeof(index_string), "%d", i);
+		snprintf(description, sizeof(description),
+				"phys addr for buffer %d", i);
+		SYSCTL_ADD_PROC(NULL,
+				&SYSCTL_NODE_CHILDREN(_hw_contigmem, physaddr), OID_AUTO,
+				index_string, CTLTYPE_U64 | CTLFLAG_RD,
+				(void *)(uintptr_t)i, 0, contigmem_physaddr, "LU",
+				description);
+	}
+
+	contigmem_cdev = make_dev_credf(0, &contigmem_ops, 0, NULL, UID_ROOT,
+			GID_WHEEL, 0600, "contigmem");
+
+	return 0;
+}
+
+static int
+contigmem_unload()
+{
+	int i;
+
+	if (contigmem_cdev != NULL)
+		destroy_dev(contigmem_cdev);
+
+	if (contigmem_eh_tag != NULL)
+		EVENTHANDLER_DEREGISTER(process_exit, contigmem_eh_tag);
+
+	for (i = 0; i < RTE_CONTIGMEM_MAX_NUM_BUFS; i++)
+		if (contigmem_buffers[i] != NULL)
+			contigfree(contigmem_buffers[i], contigmem_buffer_size,
+					M_CONTIGMEM);
+
+	return 0;
+}
+
+static int
+contigmem_physaddr(SYSCTL_HANDLER_ARGS)
+{
+	uint64_t	physaddr;
+	int		index = (int)(uintptr_t)arg1;
+
+	physaddr = (uint64_t)vtophys(contigmem_buffers[index]);
+	return sysctl_handle_64(oidp, &physaddr, 0, req);
+}
+
+static int
+contigmem_open(struct cdev *cdev, int fflags, int devtype,
+		struct thread *td)
+{
+	return 0;
+}
+
+static int
+contigmem_mmap(struct cdev *cdev, vm_ooffset_t offset, vm_paddr_t *paddr,
+		int prot, vm_memattr_t *memattr)
+{
+
+	*paddr = offset;
+	return 0;
+}
+
+static int
+contigmem_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t size,
+		struct vm_object **obj, int nprot)
+{
+	/*
+	 * The buffer index is encoded in the offset.  Divide the offset by
+	 *  PAGE_SIZE to get the index of the buffer requested by the user
+	 *  app.
+	 */
+	if ((*offset/PAGE_SIZE) >= contigmem_num_buffers)
+		return EINVAL;
+
+	*offset = (vm_ooffset_t)vtophys(contigmem_buffers[*offset/PAGE_SIZE]);
+	*obj = vm_pager_allocate(OBJT_DEVICE, cdev, size, nprot, *offset,
+			curthread->td_ucred);
+
+	return 0;
+}
diff --git a/lib/librte_eal/bsdapp/eal/Makefile b/lib/librte_eal/bsdapp/eal/Makefile
new file mode 100644
index 00000000..9054ad61
--- /dev/null
+++ b/lib/librte_eal/bsdapp/eal/Makefile
@@ -0,0 +1,117 @@
+#   BSD LICENSE
+#
+#   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+#   All rights reserved.
+#
+#   Redistribution and use in source and binary forms, with or without
+#   modification, are permitted provided that the following conditions
+#   are met:
+#
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in
+#       the documentation and/or other materials provided with the
+#       distribution.
+#     * Neither the name of Intel Corporation nor the names of its
+#       contributors may be used to endorse or promote products derived
+#       from this software without specific prior written permission.
+#
+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+LIB = librte_eal.a
+
+ARCH_DIR ?= $(RTE_ARCH)
+VPATH += $(RTE_SDK)/lib/librte_eal/common
+VPATH += $(RTE_SDK)/lib/librte_eal/common/arch/$(ARCH_DIR)
+
+CFLAGS += -I$(SRCDIR)/include
+CFLAGS += -I$(RTE_SDK)/lib/librte_eal/common
+CFLAGS += -I$(RTE_SDK)/lib/librte_eal/common/include
+CFLAGS += -I$(RTE_SDK)/lib/librte_ring
+CFLAGS += -I$(RTE_SDK)/lib/librte_mempool
+CFLAGS += $(WERROR_FLAGS) -O3
+
+LDLIBS += -lexecinfo
+LDLIBS += -lpthread
+LDLIBS += -lgcc_s
+
+EXPORT_MAP := rte_eal_version.map
+
+LIBABIVER := 2
+
+# specific to linuxapp exec-env
+SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) := eal.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_memory.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_hugepage_info.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_thread.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_log.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_pci.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_debug.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_lcore.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_timer.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_interrupts.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_alarm.c
+
+# from common dir
+SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_lcore.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_timer.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_memzone.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_log.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_launch.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_pci.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_pci_uio.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_memory.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_tailqs.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_errno.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_cpuflags.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_string_fns.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_hexdump.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_devargs.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_dev.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_options.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_thread.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_proc.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += rte_malloc.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += malloc_elem.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += malloc_heap.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += rte_keepalive.c
+
+# from arch dir
+SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += rte_cpuflags.c
+
+CFLAGS_eal_common_cpuflags.o := $(CPUFLAGS_LIST)
+
+CFLAGS_eal.o := -D_GNU_SOURCE
+#CFLAGS_eal_thread.o := -D_GNU_SOURCE
+CFLAGS_eal_log.o := -D_GNU_SOURCE
+CFLAGS_eal_common_log.o := -D_GNU_SOURCE
+
+# workaround for a gcc bug with noreturn attribute
+# http://gcc.gnu.org/bugzilla/show_bug.cgi?id=12603
+ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y)
+CFLAGS_eal_thread.o += -Wno-return-type
+CFLAGS_eal_hpet.o += -Wno-return-type
+endif
+
+INC := rte_interrupts.h
+
+SYMLINK-$(CONFIG_RTE_EXEC_ENV_BSDAPP)-include/exec-env := \
+	$(addprefix include/exec-env/,$(INC))
+
+DEPDIRS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += lib/librte_eal/common
+DEPDIRS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += lib/librte_eal/common/arch/$(ARCH_DIR)
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_eal/bsdapp/eal/eal.c b/lib/librte_eal/bsdapp/eal/eal.c
new file mode 100644
index 00000000..06bfd4e0
--- /dev/null
+++ b/lib/librte_eal/bsdapp/eal/eal.c
@@ -0,0 +1,638 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2014 6WIND S.A.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <stdarg.h>
+#include <unistd.h>
+#include <pthread.h>
+#include <syslog.h>
+#include <getopt.h>
+#include <sys/file.h>
+#include <stddef.h>
+#include <errno.h>
+#include <limits.h>
+#include <errno.h>
+#include <sys/mman.h>
+#include <sys/queue.h>
+
+#include <rte_common.h>
+#include <rte_debug.h>
+#include <rte_memory.h>
+#include <rte_memzone.h>
+#include <rte_launch.h>
+#include <rte_eal.h>
+#include <rte_eal_memconfig.h>
+#include <rte_per_lcore.h>
+#include <rte_lcore.h>
+#include <rte_log.h>
+#include <rte_random.h>
+#include <rte_cycles.h>
+#include <rte_string_fns.h>
+#include <rte_cpuflags.h>
+#include <rte_interrupts.h>
+#include <rte_pci.h>
+#include <rte_dev.h>
+#include <rte_devargs.h>
+#include <rte_common.h>
+#include <rte_version.h>
+#include <rte_atomic.h>
+#include <malloc_heap.h>
+
+#include "eal_private.h"
+#include "eal_thread.h"
+#include "eal_internal_cfg.h"
+#include "eal_filesystem.h"
+#include "eal_hugepages.h"
+#include "eal_options.h"
+
+#define MEMSIZE_IF_NO_HUGE_PAGE (64ULL * 1024ULL * 1024ULL)
+
+/* Allow the application to print its usage message too if set */
+static rte_usage_hook_t	rte_application_usage_hook = NULL;
+/* early configuration structure, when memory config is not mmapped */
+static struct rte_mem_config early_mem_config;
+
+/* define fd variable here, because file needs to be kept open for the
+ * duration of the program, as we hold a write lock on it in the primary proc */
+static int mem_cfg_fd = -1;
+
+static struct flock wr_lock = {
+		.l_type = F_WRLCK,
+		.l_whence = SEEK_SET,
+		.l_start = offsetof(struct rte_mem_config, memseg),
+		.l_len = sizeof(early_mem_config.memseg),
+};
+
+/* Address of global and public configuration */
+static struct rte_config rte_config = {
+		.mem_config = &early_mem_config,
+};
+
+/* internal configuration (per-core) */
+struct lcore_config lcore_config[RTE_MAX_LCORE];
+
+/* internal configuration */
+struct internal_config internal_config;
+
+/* used by rte_rdtsc() */
+int rte_cycles_vmware_tsc_map;
+
+/* Return a pointer to the configuration structure */
+struct rte_config *
+rte_eal_get_configuration(void)
+{
+	return &rte_config;
+}
+
+/* parse a sysfs (or other) file containing one integer value */
+int
+eal_parse_sysfs_value(const char *filename, unsigned long *val)
+{
+	FILE *f;
+	char buf[BUFSIZ];
+	char *end = NULL;
+
+	if ((f = fopen(filename, "r")) == NULL) {
+		RTE_LOG(ERR, EAL, "%s(): cannot open sysfs value %s\n",
+			__func__, filename);
+		return -1;
+	}
+
+	if (fgets(buf, sizeof(buf), f) == NULL) {
+		RTE_LOG(ERR, EAL, "%s(): cannot read sysfs value %s\n",
+			__func__, filename);
+		fclose(f);
+		return -1;
+	}
+	*val = strtoul(buf, &end, 0);
+	if ((buf[0] == '\0') || (end == NULL) || (*end != '\n')) {
+		RTE_LOG(ERR, EAL, "%s(): cannot parse sysfs value %s\n",
+				__func__, filename);
+		fclose(f);
+		return -1;
+	}
+	fclose(f);
+	return 0;
+}
+
+
+/* create memory configuration in shared/mmap memory. Take out
+ * a write lock on the memsegs, so we can auto-detect primary/secondary.
+ * This means we never close the file while running (auto-close on exit).
+ * We also don't lock the whole file, so that in future we can use read-locks
+ * on other parts, e.g. memzones, to detect if there are running secondary
+ * processes. */
+static void
+rte_eal_config_create(void)
+{
+	void *rte_mem_cfg_addr;
+	int retval;
+
+	const char *pathname = eal_runtime_config_path();
+
+	if (internal_config.no_shconf)
+		return;
+
+	if (mem_cfg_fd < 0){
+		mem_cfg_fd = open(pathname, O_RDWR | O_CREAT, 0660);
+		if (mem_cfg_fd < 0)
+			rte_panic("Cannot open '%s' for rte_mem_config\n", pathname);
+	}
+
+	retval = ftruncate(mem_cfg_fd, sizeof(*rte_config.mem_config));
+	if (retval < 0){
+		close(mem_cfg_fd);
+		rte_panic("Cannot resize '%s' for rte_mem_config\n", pathname);
+	}
+
+	retval = fcntl(mem_cfg_fd, F_SETLK, &wr_lock);
+	if (retval < 0){
+		close(mem_cfg_fd);
+		rte_exit(EXIT_FAILURE, "Cannot create lock on '%s'. Is another primary "
+				"process running?\n", pathname);
+	}
+
+	rte_mem_cfg_addr = mmap(NULL, sizeof(*rte_config.mem_config),
+				PROT_READ | PROT_WRITE, MAP_SHARED, mem_cfg_fd, 0);
+
+	if (rte_mem_cfg_addr == MAP_FAILED){
+		rte_panic("Cannot mmap memory for rte_config\n");
+	}
+	memcpy(rte_mem_cfg_addr, &early_mem_config, sizeof(early_mem_config));
+	rte_config.mem_config = (struct rte_mem_config *) rte_mem_cfg_addr;
+}
+
+/* attach to an existing shared memory config */
+static void
+rte_eal_config_attach(void)
+{
+	void *rte_mem_cfg_addr;
+	const char *pathname = eal_runtime_config_path();
+
+	if (internal_config.no_shconf)
+		return;
+
+	if (mem_cfg_fd < 0){
+		mem_cfg_fd = open(pathname, O_RDWR);
+		if (mem_cfg_fd < 0)
+			rte_panic("Cannot open '%s' for rte_mem_config\n", pathname);
+	}
+
+	rte_mem_cfg_addr = mmap(NULL, sizeof(*rte_config.mem_config),
+				PROT_READ | PROT_WRITE, MAP_SHARED, mem_cfg_fd, 0);
+	close(mem_cfg_fd);
+	if (rte_mem_cfg_addr == MAP_FAILED)
+		rte_panic("Cannot mmap memory for rte_config\n");
+
+	rte_config.mem_config = (struct rte_mem_config *) rte_mem_cfg_addr;
+}
+
+/* Detect if we are a primary or a secondary process */
+enum rte_proc_type_t
+eal_proc_type_detect(void)
+{
+	enum rte_proc_type_t ptype = RTE_PROC_PRIMARY;
+	const char *pathname = eal_runtime_config_path();
+
+	/* if we can open the file but not get a write-lock we are a secondary
+	 * process. NOTE: if we get a file handle back, we keep that open
+	 * and don't close it to prevent a race condition between multiple opens */
+	if (((mem_cfg_fd = open(pathname, O_RDWR)) >= 0) &&
+			(fcntl(mem_cfg_fd, F_SETLK, &wr_lock) < 0))
+		ptype = RTE_PROC_SECONDARY;
+
+	RTE_LOG(INFO, EAL, "Auto-detected process type: %s\n",
+			ptype == RTE_PROC_PRIMARY ? "PRIMARY" : "SECONDARY");
+
+	return ptype;
+}
+
+/* Sets up rte_config structure with the pointer to shared memory config.*/
+static void
+rte_config_init(void)
+{
+	rte_config.process_type = internal_config.process_type;
+
+	switch (rte_config.process_type){
+	case RTE_PROC_PRIMARY:
+		rte_eal_config_create();
+		break;
+	case RTE_PROC_SECONDARY:
+		rte_eal_config_attach();
+		rte_eal_mcfg_wait_complete(rte_config.mem_config);
+		break;
+	case RTE_PROC_AUTO:
+	case RTE_PROC_INVALID:
+		rte_panic("Invalid process type\n");
+	}
+}
+
+/* display usage */
+static void
+eal_usage(const char *prgname)
+{
+	printf("\nUsage: %s ", prgname);
+	eal_common_usage();
+	/* Allow the application to print its usage message too if hook is set */
+	if ( rte_application_usage_hook ) {
+		printf("===== Application Usage =====\n\n");
+		rte_application_usage_hook(prgname);
+	}
+}
+
+/* Set a per-application usage message */
+rte_usage_hook_t
+rte_set_application_usage_hook( rte_usage_hook_t usage_func )
+{
+	rte_usage_hook_t	old_func;
+
+	/* Will be NULL on the first call to denote the last usage routine. */
+	old_func					= rte_application_usage_hook;
+	rte_application_usage_hook	= usage_func;
+
+	return old_func;
+}
+
+static inline size_t
+eal_get_hugepage_mem_size(void)
+{
+	uint64_t size = 0;
+	unsigned i, j;
+
+	for (i = 0; i < internal_config.num_hugepage_sizes; i++) {
+		struct hugepage_info *hpi = &internal_config.hugepage_info[i];
+		if (hpi->hugedir != NULL) {
+			for (j = 0; j < RTE_MAX_NUMA_NODES; j++) {
+				size += hpi->hugepage_sz * hpi->num_pages[j];
+			}
+		}
+	}
+
+	return (size < SIZE_MAX) ? (size_t)(size) : SIZE_MAX;
+}
+
+/* Parse the arguments for --log-level only */
+static void
+eal_log_level_parse(int argc, char **argv)
+{
+	int opt;
+	char **argvopt;
+	int option_index;
+	const int old_optind = optind;
+	const int old_optopt = optopt;
+	const int old_optreset = optreset;
+	char * const old_optarg = optarg;
+
+	argvopt = argv;
+	optind = 1;
+	optreset = 1;
+
+	eal_reset_internal_config(&internal_config);
+
+	while ((opt = getopt_long(argc, argvopt, eal_short_options,
+				  eal_long_options, &option_index)) != EOF) {
+
+		int ret;
+
+		/* getopt is not happy, stop right now */
+		if (opt == '?')
+			break;
+
+		ret = (opt == OPT_LOG_LEVEL_NUM) ?
+			eal_parse_common_option(opt, optarg, &internal_config) : 0;
+
+		/* common parser is not happy */
+		if (ret < 0)
+			break;
+	}
+
+	/* restore getopt lib */
+	optind = old_optind;
+	optopt = old_optopt;
+	optreset = old_optreset;
+	optarg = old_optarg;
+}
+
+/* Parse the argument given in the command line of the application */
+static int
+eal_parse_args(int argc, char **argv)
+{
+	int opt, ret;
+	char **argvopt;
+	int option_index;
+	char *prgname = argv[0];
+	const int old_optind = optind;
+	const int old_optopt = optopt;
+	const int old_optreset = optreset;
+	char * const old_optarg = optarg;
+
+	argvopt = argv;
+	optind = 1;
+	optreset = 1;
+
+	while ((opt = getopt_long(argc, argvopt, eal_short_options,
+				  eal_long_options, &option_index)) != EOF) {
+
+		/* getopt is not happy, stop right now */
+		if (opt == '?') {
+			eal_usage(prgname);
+			ret = -1;
+			goto out;
+		}
+
+		ret = eal_parse_common_option(opt, optarg, &internal_config);
+		/* common parser is not happy */
+		if (ret < 0) {
+			eal_usage(prgname);
+			ret = -1;
+			goto out;
+		}
+		/* common parser handled this option */
+		if (ret == 0)
+			continue;
+
+		switch (opt) {
+		case 'h':
+			eal_usage(prgname);
+			exit(EXIT_SUCCESS);
+		default:
+			if (opt < OPT_LONG_MIN_NUM && isprint(opt)) {
+				RTE_LOG(ERR, EAL, "Option %c is not supported "
+					"on FreeBSD\n", opt);
+			} else if (opt >= OPT_LONG_MIN_NUM &&
+				   opt < OPT_LONG_MAX_NUM) {
+				RTE_LOG(ERR, EAL, "Option %s is not supported "
+					"on FreeBSD\n",
+					eal_long_options[option_index].name);
+			} else {
+				RTE_LOG(ERR, EAL, "Option %d is not supported "
+					"on FreeBSD\n", opt);
+			}
+			eal_usage(prgname);
+			ret = -1;
+			goto out;
+		}
+	}
+
+	if (eal_adjust_config(&internal_config) != 0) {
+		ret = -1;
+		goto out;
+	}
+
+	/* sanity checks */
+	if (eal_check_common_options(&internal_config) != 0) {
+		eal_usage(prgname);
+		ret = -1;
+		goto out;
+	}
+
+	if (optind >= 0)
+		argv[optind-1] = prgname;
+	ret = optind-1;
+
+out:
+	/* restore getopt lib */
+	optind = old_optind;
+	optopt = old_optopt;
+	optreset = old_optreset;
+	optarg = old_optarg;
+
+	return ret;
+}
+
+static void
+eal_check_mem_on_local_socket(void)
+{
+	const struct rte_memseg *ms;
+	int i, socket_id;
+
+	socket_id = rte_lcore_to_socket_id(rte_config.master_lcore);
+
+	ms = rte_eal_get_physmem_layout();
+
+	for (i = 0; i < RTE_MAX_MEMSEG; i++)
+		if (ms[i].socket_id == socket_id &&
+				ms[i].len > 0)
+			return;
+
+	RTE_LOG(WARNING, EAL, "WARNING: Master core has no "
+			"memory on local socket!\n");
+}
+
+static int
+sync_func(__attribute__((unused)) void *arg)
+{
+	return 0;
+}
+
+inline static void
+rte_eal_mcfg_complete(void)
+{
+	/* ALL shared mem_config related INIT DONE */
+	if (rte_config.process_type == RTE_PROC_PRIMARY)
+		rte_config.mem_config->magic = RTE_MAGIC;
+}
+
+/* return non-zero if hugepages are enabled. */
+int rte_eal_has_hugepages(void)
+{
+	return !internal_config.no_hugetlbfs;
+}
+
+/* Abstraction for port I/0 privilege */
+int
+rte_eal_iopl_init(void)
+{
+	static int fd;
+
+	fd = open("/dev/io", O_RDWR);
+	if (fd < 0)
+		return -1;
+	/* keep fd open for iopl */
+	return 0;
+}
+
+/* Launch threads, called at application init(). */
+int
+rte_eal_init(int argc, char **argv)
+{
+	int i, fctret, ret;
+	pthread_t thread_id;
+	static rte_atomic32_t run_once = RTE_ATOMIC32_INIT(0);
+	char cpuset[RTE_CPU_AFFINITY_STR_LEN];
+	char thread_name[RTE_MAX_THREAD_NAME_LEN];
+
+	if (!rte_atomic32_test_and_set(&run_once))
+		return -1;
+
+	thread_id = pthread_self();
+
+	if (rte_eal_log_early_init() < 0)
+		rte_panic("Cannot init early logs\n");
+
+	eal_log_level_parse(argc, argv);
+
+	/* set log level as early as possible */
+	rte_set_log_level(internal_config.log_level);
+
+	if (rte_eal_cpu_init() < 0)
+		rte_panic("Cannot detect lcores\n");
+
+	fctret = eal_parse_args(argc, argv);
+	if (fctret < 0)
+		exit(1);
+
+	if (internal_config.no_hugetlbfs == 0 &&
+			internal_config.process_type != RTE_PROC_SECONDARY &&
+			eal_hugepage_info_init() < 0)
+		rte_panic("Cannot get hugepage information\n");
+
+	if (internal_config.memory == 0 && internal_config.force_sockets == 0) {
+		if (internal_config.no_hugetlbfs)
+			internal_config.memory = MEMSIZE_IF_NO_HUGE_PAGE;
+		else
+			internal_config.memory = eal_get_hugepage_mem_size();
+	}
+
+	if (internal_config.vmware_tsc_map == 1) {
+#ifdef RTE_LIBRTE_EAL_VMWARE_TSC_MAP_SUPPORT
+		rte_cycles_vmware_tsc_map = 1;
+		RTE_LOG (DEBUG, EAL, "Using VMWARE TSC MAP, "
+				"you must have monitor_control.pseudo_perfctr = TRUE\n");
+#else
+		RTE_LOG (WARNING, EAL, "Ignoring --vmware-tsc-map because "
+				"RTE_LIBRTE_EAL_VMWARE_TSC_MAP_SUPPORT is not set\n");
+#endif
+	}
+
+	rte_srand(rte_rdtsc());
+
+	rte_config_init();
+
+	if (rte_eal_memory_init() < 0)
+		rte_panic("Cannot init memory\n");
+
+	if (rte_eal_memzone_init() < 0)
+		rte_panic("Cannot init memzone\n");
+
+	if (rte_eal_tailqs_init() < 0)
+		rte_panic("Cannot init tail queues for objects\n");
+
+/*	if (rte_eal_log_init(argv[0], internal_config.syslog_facility) < 0)
+		rte_panic("Cannot init logs\n");*/
+
+	if (rte_eal_alarm_init() < 0)
+		rte_panic("Cannot init interrupt-handling thread\n");
+
+	if (rte_eal_intr_init() < 0)
+		rte_panic("Cannot init interrupt-handling thread\n");
+
+	if (rte_eal_timer_init() < 0)
+		rte_panic("Cannot init HPET or TSC timers\n");
+
+	if (rte_eal_pci_init() < 0)
+		rte_panic("Cannot init PCI\n");
+
+	eal_check_mem_on_local_socket();
+
+	if (eal_plugins_init() < 0)
+		rte_panic("Cannot init plugins\n");
+
+	eal_thread_init_master(rte_config.master_lcore);
+
+	ret = eal_thread_dump_affinity(cpuset, RTE_CPU_AFFINITY_STR_LEN);
+
+	RTE_LOG(DEBUG, EAL, "Master lcore %u is ready (tid=%p;cpuset=[%s%s])\n",
+		rte_config.master_lcore, thread_id, cpuset,
+		ret == 0 ? "" : "...");
+
+	if (rte_eal_dev_init() < 0)
+		rte_panic("Cannot init pmd devices\n");
+
+	RTE_LCORE_FOREACH_SLAVE(i) {
+
+		/*
+		 * create communication pipes between master thread
+		 * and children
+		 */
+		if (pipe(lcore_config[i].pipe_master2slave) < 0)
+			rte_panic("Cannot create pipe\n");
+		if (pipe(lcore_config[i].pipe_slave2master) < 0)
+			rte_panic("Cannot create pipe\n");
+
+		lcore_config[i].state = WAIT;
+
+		/* create a thread for each lcore */
+		ret = pthread_create(&lcore_config[i].thread_id, NULL,
+				     eal_thread_loop, NULL);
+		if (ret != 0)
+			rte_panic("Cannot create thread\n");
+
+		/* Set thread_name for aid in debugging. */
+		snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN,
+				"lcore-slave-%d", i);
+		pthread_set_name_np(lcore_config[i].thread_id, thread_name);
+	}
+
+	/*
+	 * Launch a dummy function on all slave lcores, so that master lcore
+	 * knows they are all ready when this function returns.
+	 */
+	rte_eal_mp_remote_launch(sync_func, NULL, SKIP_MASTER);
+	rte_eal_mp_wait_lcore();
+
+	/* Probe & Initialize PCI devices */
+	if (rte_eal_pci_probe())
+		rte_panic("Cannot probe PCI\n");
+
+	rte_eal_mcfg_complete();
+
+	return fctret;
+}
+
+/* get core role */
+enum rte_lcore_role_t
+rte_eal_lcore_role(unsigned lcore_id)
+{
+	return rte_config.lcore_role[lcore_id];
+}
+
+enum rte_proc_type_t
+rte_eal_process_type(void)
+{
+	return rte_config.process_type;
+}
diff --git a/lib/librte_eal/bsdapp/eal/eal_alarm.c b/lib/librte_eal/bsdapp/eal/eal_alarm.c
new file mode 100644
index 00000000..204df85d
--- /dev/null
+++ b/lib/librte_eal/bsdapp/eal/eal_alarm.c
@@ -0,0 +1,60 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <stdlib.h>
+#include <errno.h>
+
+#include <rte_alarm.h>
+#include <rte_common.h>
+#include "eal_private.h"
+
+int
+rte_eal_alarm_init(void)
+{
+	return 0;
+}
+
+
+int
+rte_eal_alarm_set(uint64_t us __rte_unused,
+		rte_eal_alarm_callback cb_fn __rte_unused,
+		void *cb_arg __rte_unused)
+{
+	return -ENOTSUP;
+}
+
+int
+rte_eal_alarm_cancel(rte_eal_alarm_callback cb_fn __rte_unused,
+		void *cb_arg __rte_unused)
+{
+	return -ENOTSUP;
+}
diff --git a/lib/librte_eal/bsdapp/eal/eal_debug.c b/lib/librte_eal/bsdapp/eal/eal_debug.c
new file mode 100644
index 00000000..907fbfa7
--- /dev/null
+++ b/lib/librte_eal/bsdapp/eal/eal_debug.c
@@ -0,0 +1,119 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <execinfo.h>
+#include <stdarg.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdint.h>
+
+#include <rte_log.h>
+#include <rte_debug.h>
+#include <rte_common.h>
+
+#define BACKTRACE_SIZE 256
+
+/* dump the stack of the calling core */
+void rte_dump_stack(void)
+{
+	void *func[BACKTRACE_SIZE];
+	char **symb = NULL;
+	int size;
+
+	size = backtrace(func, BACKTRACE_SIZE);
+	symb = backtrace_symbols(func, size);
+
+	if (symb == NULL)
+		return;
+
+	while (size > 0) {
+		rte_log(RTE_LOG_ERR, RTE_LOGTYPE_EAL,
+			"%d: [%s]\n", size, symb[size - 1]);
+		size --;
+	}
+
+	free(symb);
+}
+
+/* not implemented in this environment */
+void rte_dump_registers(void)
+{
+	return;
+}
+
+/* call abort(), it will generate a coredump if enabled */
+void __rte_panic(const char *funcname, const char *format, ...)
+{
+	va_list ap;
+
+	/* disable history */
+	rte_log_set_history(0);
+
+	rte_log(RTE_LOG_CRIT, RTE_LOGTYPE_EAL, "PANIC in %s():\n", funcname);
+	va_start(ap, format);
+	rte_vlog(RTE_LOG_CRIT, RTE_LOGTYPE_EAL, format, ap);
+	va_end(ap);
+	rte_dump_stack();
+	rte_dump_registers();
+	abort();
+}
+
+/*
+ * Like rte_panic this terminates the application. However, no traceback is
+ * provided and no core-dump is generated.
+ */
+void
+rte_exit(int exit_code, const char *format, ...)
+{
+	va_list ap;
+
+	/* disable history */
+	rte_log_set_history(0);
+
+	if (exit_code != 0)
+		RTE_LOG(CRIT, EAL, "Error - exiting with code: %d\n"
+				"  Cause: ", exit_code);
+
+	va_start(ap, format);
+	rte_vlog(RTE_LOG_CRIT, RTE_LOGTYPE_EAL, format, ap);
+	va_end(ap);
+
+#ifndef RTE_EAL_ALWAYS_PANIC_ON_ERROR
+	exit(exit_code);
+#else
+	rte_dump_stack();
+	rte_dump_registers();
+	abort();
+#endif
+}
diff --git a/lib/librte_eal/bsdapp/eal/eal_hugepage_info.c b/lib/librte_eal/bsdapp/eal/eal_hugepage_info.c
new file mode 100644
index 00000000..8a33c30c
--- /dev/null
+++ b/lib/librte_eal/bsdapp/eal/eal_hugepage_info.c
@@ -0,0 +1,134 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <sys/types.h>
+#include <sys/sysctl.h>
+#include <sys/mman.h>
+#include <string.h>
+
+#include <rte_log.h>
+#include <fcntl.h>
+#include "eal_hugepages.h"
+#include "eal_internal_cfg.h"
+#include "eal_filesystem.h"
+
+#define CONTIGMEM_DEV "/dev/contigmem"
+
+/*
+ * Uses mmap to create a shared memory area for storage of data
+ * Used in this file to store the hugepage file map on disk
+ */
+static void *
+create_shared_memory(const char *filename, const size_t mem_size)
+{
+	void *retval;
+	int fd = open(filename, O_CREAT | O_RDWR, 0666);
+	if (fd < 0)
+		return NULL;
+	if (ftruncate(fd, mem_size) < 0) {
+		close(fd);
+		return NULL;
+	}
+	retval = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+	close(fd);
+	return retval;
+}
+
+/*
+ * No hugepage support on freebsd, but we dummy it, using contigmem driver
+ */
+int
+eal_hugepage_info_init(void)
+{
+	size_t sysctl_size;
+	int num_buffers, fd, error;
+	int64_t buffer_size;
+	/* re-use the linux "internal config" structure for our memory data */
+	struct hugepage_info *hpi = &internal_config.hugepage_info[0];
+	struct hugepage_info *tmp_hpi;
+
+	sysctl_size = sizeof(num_buffers);
+	error = sysctlbyname("hw.contigmem.num_buffers", &num_buffers,
+			&sysctl_size, NULL, 0);
+
+	if (error != 0) {
+		RTE_LOG(ERR, EAL, "could not read sysctl hw.contigmem.num_buffers");
+		return -1;
+	}
+
+	sysctl_size = sizeof(buffer_size);
+	error = sysctlbyname("hw.contigmem.buffer_size", &buffer_size,
+			&sysctl_size, NULL, 0);
+
+	if (error != 0) {
+		RTE_LOG(ERR, EAL, "could not read sysctl hw.contigmem.buffer_size");
+		return -1;
+	}
+
+	fd = open(CONTIGMEM_DEV, O_RDWR);
+	if (fd < 0) {
+		RTE_LOG(ERR, EAL, "could not open "CONTIGMEM_DEV"\n");
+		return -1;
+	}
+
+	if (buffer_size >= 1<<30)
+		RTE_LOG(INFO, EAL, "Contigmem driver has %d buffers, each of size %dGB\n",
+				num_buffers, (int)(buffer_size>>30));
+	else if (buffer_size >= 1<<20)
+		RTE_LOG(INFO, EAL, "Contigmem driver has %d buffers, each of size %dMB\n",
+				num_buffers, (int)(buffer_size>>20));
+	else
+		RTE_LOG(INFO, EAL, "Contigmem driver has %d buffers, each of size %dKB\n",
+				num_buffers, (int)(buffer_size>>10));
+
+	internal_config.num_hugepage_sizes = 1;
+	hpi->hugedir = CONTIGMEM_DEV;
+	hpi->hugepage_sz = buffer_size;
+	hpi->num_pages[0] = num_buffers;
+	hpi->lock_descriptor = fd;
+
+	tmp_hpi = create_shared_memory(eal_hugepage_info_path(),
+					sizeof(struct hugepage_info));
+	if (tmp_hpi == NULL ) {
+		RTE_LOG(ERR, EAL, "Failed to create shared memory!\n");
+		return -1;
+	}
+
+	memcpy(tmp_hpi, hpi, sizeof(struct hugepage_info));
+
+	if ( munmap(tmp_hpi, sizeof(struct hugepage_info)) < 0) {
+		RTE_LOG(ERR, EAL, "Failed to unmap shared memory!\n");
+		return -1;
+	}
+
+	return 0;
+}
diff --git a/lib/librte_eal/bsdapp/eal/eal_interrupts.c b/lib/librte_eal/bsdapp/eal/eal_interrupts.c
new file mode 100644
index 00000000..836e4836
--- /dev/null
+++ b/lib/librte_eal/bsdapp/eal/eal_interrupts.c
@@ -0,0 +1,119 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <rte_common.h>
+#include <rte_interrupts.h>
+#include "eal_private.h"
+
+int
+rte_intr_callback_register(struct rte_intr_handle *intr_handle __rte_unused,
+			rte_intr_callback_fn cb __rte_unused,
+			void *cb_arg __rte_unused)
+{
+	return -ENOTSUP;
+}
+
+int
+rte_intr_callback_unregister(struct rte_intr_handle *intr_handle __rte_unused,
+			rte_intr_callback_fn cb_fn __rte_unused,
+			void *cb_arg __rte_unused)
+{
+	return -ENOTSUP;
+}
+
+int
+rte_intr_enable(struct rte_intr_handle *intr_handle __rte_unused)
+{
+	return -ENOTSUP;
+}
+
+int
+rte_intr_disable(struct rte_intr_handle *intr_handle __rte_unused)
+{
+	return -ENOTSUP;
+}
+
+int
+rte_eal_intr_init(void)
+{
+	return 0;
+}
+
+int
+rte_intr_rx_ctl(struct rte_intr_handle *intr_handle,
+		int epfd, int op, unsigned int vec, void *data)
+{
+	RTE_SET_USED(intr_handle);
+	RTE_SET_USED(epfd);
+	RTE_SET_USED(op);
+	RTE_SET_USED(vec);
+	RTE_SET_USED(data);
+
+	return -ENOTSUP;
+}
+
+int
+rte_intr_efd_enable(struct rte_intr_handle *intr_handle, uint32_t nb_efd)
+{
+	RTE_SET_USED(intr_handle);
+	RTE_SET_USED(nb_efd);
+
+	return 0;
+}
+
+void
+rte_intr_efd_disable(struct rte_intr_handle *intr_handle)
+{
+	RTE_SET_USED(intr_handle);
+}
+
+int
+rte_intr_dp_is_en(struct rte_intr_handle *intr_handle)
+{
+	RTE_SET_USED(intr_handle);
+	return 0;
+}
+
+int
+rte_intr_allow_others(struct rte_intr_handle *intr_handle)
+{
+	RTE_SET_USED(intr_handle);
+	return 1;
+}
+
+int
+rte_intr_cap_multiple(struct rte_intr_handle *intr_handle)
+{
+	RTE_SET_USED(intr_handle);
+	return 0;
+}
diff --git a/lib/librte_eal/bsdapp/eal/eal_lcore.c b/lib/librte_eal/bsdapp/eal/eal_lcore.c
new file mode 100644
index 00000000..b8bfafde
--- /dev/null
+++ b/lib/librte_eal/bsdapp/eal/eal_lcore.c
@@ -0,0 +1,79 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <unistd.h>
+#include <sys/sysctl.h>
+
+#include <rte_log.h>
+#include <rte_eal.h>
+#include <rte_lcore.h>
+#include <rte_common.h>
+#include <rte_debug.h>
+
+#include "eal_private.h"
+#include "eal_thread.h"
+
+/* No topology information available on FreeBSD including NUMA info */
+unsigned
+eal_cpu_core_id(__rte_unused unsigned lcore_id)
+{
+	return 0;
+}
+
+static int
+eal_get_ncpus(void)
+{
+	int mib[2] = {CTL_HW, HW_NCPU};
+	int ncpu;
+	size_t len = sizeof(ncpu);
+
+	sysctl(mib, 2, &ncpu, &len, NULL, 0);
+	RTE_LOG(INFO, EAL, "Sysctl reports %d cpus\n", ncpu);
+	return ncpu;
+}
+
+unsigned
+eal_cpu_socket_id(__rte_unused unsigned cpu_id)
+{
+	return 0;
+}
+
+/* Check if a cpu is present by the presence of the
+ * cpu information for it.
+ */
+int
+eal_cpu_detected(unsigned lcore_id)
+{
+	const unsigned ncpus = eal_get_ncpus();
+	return lcore_id < ncpus;
+}
diff --git a/lib/librte_eal/bsdapp/eal/eal_log.c b/lib/librte_eal/bsdapp/eal/eal_log.c
new file mode 100644
index 00000000..a425f7a8
--- /dev/null
+++ b/lib/librte_eal/bsdapp/eal/eal_log.c
@@ -0,0 +1,57 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <rte_common.h>
+#include <rte_log.h>
+
+#include <eal_private.h>
+
+/*
+ * set the log to default function, called during eal init process,
+ * once memzones are available.
+ */
+int
+rte_eal_log_init(const char *id __rte_unused, int facility __rte_unused)
+{
+	if (rte_eal_common_log_init(stderr) < 0)
+		return -1;
+	return 0;
+}
+
+int
+rte_eal_log_early_init(void)
+{
+	rte_openlog_stream(stderr);
+	return 0;
+}
diff --git a/lib/librte_eal/bsdapp/eal/eal_memory.c b/lib/librte_eal/bsdapp/eal/eal_memory.c
new file mode 100644
index 00000000..3614da8d
--- /dev/null
+++ b/lib/librte_eal/bsdapp/eal/eal_memory.c
@@ -0,0 +1,194 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <sys/mman.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/sysctl.h>
+#include <inttypes.h>
+#include <fcntl.h>
+
+#include <rte_eal.h>
+#include <rte_eal_memconfig.h>
+#include <rte_log.h>
+#include <rte_string_fns.h>
+#include "eal_private.h"
+#include "eal_internal_cfg.h"
+#include "eal_filesystem.h"
+
+#define EAL_PAGE_SIZE (sysconf(_SC_PAGESIZE))
+
+/*
+ * Get physical address of any mapped virtual address in the current process.
+ */
+phys_addr_t
+rte_mem_virt2phy(const void *virtaddr)
+{
+	/* XXX not implemented. This function is only used by
+	 * rte_mempool_virt2phy() when hugepages are disabled. */
+	(void)virtaddr;
+	return RTE_BAD_PHYS_ADDR;
+}
+
+int
+rte_eal_hugepage_init(void)
+{
+	struct rte_mem_config *mcfg;
+	uint64_t total_mem = 0;
+	void *addr;
+	unsigned i, j, seg_idx = 0;
+
+	/* get pointer to global configuration */
+	mcfg = rte_eal_get_configuration()->mem_config;
+
+	/* for debug purposes, hugetlbfs can be disabled */
+	if (internal_config.no_hugetlbfs) {
+		addr = malloc(internal_config.memory);
+		mcfg->memseg[0].phys_addr = (phys_addr_t)(uintptr_t)addr;
+		mcfg->memseg[0].addr = addr;
+		mcfg->memseg[0].hugepage_sz = RTE_PGSIZE_4K;
+		mcfg->memseg[0].len = internal_config.memory;
+		mcfg->memseg[0].socket_id = 0;
+		return 0;
+	}
+
+	/* map all hugepages and sort them */
+	for (i = 0; i < internal_config.num_hugepage_sizes; i ++){
+		struct hugepage_info *hpi;
+
+		hpi = &internal_config.hugepage_info[i];
+		for (j = 0; j < hpi->num_pages[0]; j++) {
+			struct rte_memseg *seg;
+			uint64_t physaddr;
+			int error;
+			size_t sysctl_size = sizeof(physaddr);
+			char physaddr_str[64];
+
+			addr = mmap(NULL, hpi->hugepage_sz, PROT_READ|PROT_WRITE,
+				    MAP_SHARED, hpi->lock_descriptor,
+				    j * EAL_PAGE_SIZE);
+			if (addr == MAP_FAILED) {
+				RTE_LOG(ERR, EAL, "Failed to mmap buffer %u from %s\n",
+						j, hpi->hugedir);
+				return -1;
+			}
+
+			snprintf(physaddr_str, sizeof(physaddr_str), "hw.contigmem"
+					".physaddr.%d", j);
+			error = sysctlbyname(physaddr_str, &physaddr, &sysctl_size,
+					NULL, 0);
+			if (error < 0) {
+				RTE_LOG(ERR, EAL, "Failed to get physical addr for buffer %u "
+						"from %s\n", j, hpi->hugedir);
+				return -1;
+			}
+
+			seg = &mcfg->memseg[seg_idx++];
+			seg->addr = addr;
+			seg->phys_addr = physaddr;
+			seg->hugepage_sz = hpi->hugepage_sz;
+			seg->len = hpi->hugepage_sz;
+			seg->nchannel = mcfg->nchannel;
+			seg->nrank = mcfg->nrank;
+			seg->socket_id = 0;
+
+			RTE_LOG(INFO, EAL, "Mapped memory segment %u @ %p: physaddr:0x%"
+					PRIx64", len %zu\n",
+					seg_idx, addr, physaddr, hpi->hugepage_sz);
+			if (total_mem >= internal_config.memory ||
+					seg_idx >= RTE_MAX_MEMSEG)
+				break;
+		}
+	}
+	return 0;
+}
+
+int
+rte_eal_hugepage_attach(void)
+{
+	const struct hugepage_info *hpi;
+	int fd_hugepage_info, fd_hugepage = -1;
+	unsigned i = 0;
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+
+	/* Obtain a file descriptor for hugepage_info */
+	fd_hugepage_info = open(eal_hugepage_info_path(), O_RDONLY);
+	if (fd_hugepage_info < 0) {
+		RTE_LOG(ERR, EAL, "Could not open %s\n", eal_hugepage_info_path());
+		return -1;
+	}
+
+	/* Map the shared hugepage_info into the process address spaces */
+	hpi = mmap(NULL, sizeof(struct hugepage_info), PROT_READ, MAP_PRIVATE,
+			fd_hugepage_info, 0);
+	if (hpi == NULL) {
+		RTE_LOG(ERR, EAL, "Could not mmap %s\n", eal_hugepage_info_path());
+		goto error;
+	}
+
+	/* Obtain a file descriptor for contiguous memory */
+	fd_hugepage = open(hpi->hugedir, O_RDWR);
+	if (fd_hugepage < 0) {
+		RTE_LOG(ERR, EAL, "Could not open %s\n", hpi->hugedir);
+		goto error;
+	}
+
+	/* Map the contiguous memory into each memory segment */
+	for (i = 0; i < hpi->num_pages[0]; i++) {
+
+		void *addr;
+		struct rte_memseg *seg = &mcfg->memseg[i];
+
+		addr = mmap(seg->addr, hpi->hugepage_sz, PROT_READ|PROT_WRITE,
+			    MAP_SHARED|MAP_FIXED, fd_hugepage,
+			    i * EAL_PAGE_SIZE);
+		if (addr == MAP_FAILED || addr != seg->addr) {
+			RTE_LOG(ERR, EAL, "Failed to mmap buffer %u from %s\n",
+				i, hpi->hugedir);
+			goto error;
+		}
+
+	}
+
+	/* hugepage_info is no longer required */
+	munmap((void *)(uintptr_t)hpi, sizeof(struct hugepage_info));
+	close(fd_hugepage_info);
+	close(fd_hugepage);
+	return 0;
+
+error:
+	if (fd_hugepage_info >= 0)
+		close(fd_hugepage_info);
+	if (fd_hugepage >= 0)
+		close(fd_hugepage);
+	return -1;
+}
diff --git a/lib/librte_eal/bsdapp/eal/eal_pci.c b/lib/librte_eal/bsdapp/eal/eal_pci.c
new file mode 100644
index 00000000..2d16d782
--- /dev/null
+++ b/lib/librte_eal/bsdapp/eal/eal_pci.c
@@ -0,0 +1,633 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <ctype.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdarg.h>
+#include <unistd.h>
+#include <inttypes.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stdarg.h>
+#include <errno.h>
+#include <dirent.h>
+#include <limits.h>
+#include <sys/queue.h>
+#include <sys/mman.h>
+#include <sys/ioctl.h>
+#include <sys/pciio.h>
+#include <dev/pci/pcireg.h>
+
+#if defined(RTE_ARCH_X86)
+#include <sys/types.h>
+#include <machine/cpufunc.h>
+#endif
+
+#include <rte_interrupts.h>
+#include <rte_log.h>
+#include <rte_pci.h>
+#include <rte_common.h>
+#include <rte_launch.h>
+#include <rte_memory.h>
+#include <rte_memzone.h>
+#include <rte_eal.h>
+#include <rte_eal_memconfig.h>
+#include <rte_per_lcore.h>
+#include <rte_lcore.h>
+#include <rte_malloc.h>
+#include <rte_string_fns.h>
+#include <rte_debug.h>
+#include <rte_devargs.h>
+
+#include "eal_filesystem.h"
+#include "eal_private.h"
+
+/**
+ * @file
+ * PCI probing under linux
+ *
+ * This code is used to simulate a PCI probe by parsing information in
+ * sysfs. Moreover, when a registered driver matches a device, the
+ * kernel driver currently using it is unloaded and replaced by
+ * igb_uio module, which is a very minimal userland driver for Intel
+ * network card, only providing access to PCI BAR to applications, and
+ * enabling bus master.
+ */
+
+/* unbind kernel driver for this device */
+int
+pci_unbind_kernel_driver(struct rte_pci_device *dev __rte_unused)
+{
+	RTE_LOG(ERR, EAL, "RTE_PCI_DRV_FORCE_UNBIND flag is not implemented "
+		"for BSD\n");
+	return -ENOTSUP;
+}
+
+/* Map pci device */
+int
+rte_eal_pci_map_device(struct rte_pci_device *dev)
+{
+	int ret = -1;
+
+	/* try mapping the NIC resources */
+	switch (dev->kdrv) {
+	case RTE_KDRV_NIC_UIO:
+		/* map resources for devices that use uio */
+		ret = pci_uio_map_resource(dev);
+		break;
+	default:
+		RTE_LOG(DEBUG, EAL,
+			"  Not managed by a supported kernel driver, skipped\n");
+		ret = 1;
+		break;
+	}
+
+	return ret;
+}
+
+/* Unmap pci device */
+void
+rte_eal_pci_unmap_device(struct rte_pci_device *dev)
+{
+	/* try unmapping the NIC resources */
+	switch (dev->kdrv) {
+	case RTE_KDRV_NIC_UIO:
+		/* unmap resources for devices that use uio */
+		pci_uio_unmap_resource(dev);
+		break;
+	default:
+		RTE_LOG(DEBUG, EAL,
+			"  Not managed by a supported kernel driver, skipped\n");
+		break;
+	}
+}
+
+void
+pci_uio_free_resource(struct rte_pci_device *dev,
+		struct mapped_pci_resource *uio_res)
+{
+	rte_free(uio_res);
+
+	if (dev->intr_handle.fd) {
+		close(dev->intr_handle.fd);
+		dev->intr_handle.fd = -1;
+		dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN;
+	}
+}
+
+int
+pci_uio_alloc_resource(struct rte_pci_device *dev,
+		struct mapped_pci_resource **uio_res)
+{
+	char devname[PATH_MAX]; /* contains the /dev/uioX */
+	struct rte_pci_addr *loc;
+
+	loc = &dev->addr;
+
+	snprintf(devname, sizeof(devname), "/dev/uio@pci:%u:%u:%u",
+			dev->addr.bus, dev->addr.devid, dev->addr.function);
+
+	if (access(devname, O_RDWR) < 0) {
+		RTE_LOG(WARNING, EAL, "  "PCI_PRI_FMT" not managed by UIO driver, "
+				"skipping\n", loc->domain, loc->bus, loc->devid, loc->function);
+		return 1;
+	}
+
+	/* save fd if in primary process */
+	dev->intr_handle.fd = open(devname, O_RDWR);
+	if (dev->intr_handle.fd < 0) {
+		RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
+			devname, strerror(errno));
+		goto error;
+	}
+	dev->intr_handle.type = RTE_INTR_HANDLE_UIO;
+
+	/* allocate the mapping details for secondary processes*/
+	*uio_res = rte_zmalloc("UIO_RES", sizeof(**uio_res), 0);
+	if (*uio_res == NULL) {
+		RTE_LOG(ERR, EAL,
+			"%s(): cannot store uio mmap details\n", __func__);
+		goto error;
+	}
+
+	snprintf((*uio_res)->path, sizeof((*uio_res)->path), "%s", devname);
+	memcpy(&(*uio_res)->pci_addr, &dev->addr, sizeof((*uio_res)->pci_addr));
+
+	return 0;
+
+error:
+	pci_uio_free_resource(dev, *uio_res);
+	return -1;
+}
+
+int
+pci_uio_map_resource_by_index(struct rte_pci_device *dev, int res_idx,
+		struct mapped_pci_resource *uio_res, int map_idx)
+{
+	int fd;
+	char *devname;
+	void *mapaddr;
+	uint64_t offset;
+	uint64_t pagesz;
+	struct pci_map *maps;
+
+	maps = uio_res->maps;
+	devname = uio_res->path;
+	pagesz = sysconf(_SC_PAGESIZE);
+
+	/* allocate memory to keep path */
+	maps[map_idx].path = rte_malloc(NULL, strlen(devname) + 1, 0);
+	if (maps[map_idx].path == NULL) {
+		RTE_LOG(ERR, EAL, "Cannot allocate memory for path: %s\n",
+				strerror(errno));
+		return -1;
+	}
+
+	/*
+	 * open resource file, to mmap it
+	 */
+	fd = open(devname, O_RDWR);
+	if (fd < 0) {
+		RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
+				devname, strerror(errno));
+		goto error;
+	}
+
+	/* if matching map is found, then use it */
+	offset = res_idx * pagesz;
+	mapaddr = pci_map_resource(NULL, fd, (off_t)offset,
+			(size_t)dev->mem_resource[res_idx].len, 0);
+	close(fd);
+	if (mapaddr == MAP_FAILED)
+		goto error;
+
+	maps[map_idx].phaddr = dev->mem_resource[res_idx].phys_addr;
+	maps[map_idx].size = dev->mem_resource[res_idx].len;
+	maps[map_idx].addr = mapaddr;
+	maps[map_idx].offset = offset;
+	strcpy(maps[map_idx].path, devname);
+	dev->mem_resource[res_idx].addr = mapaddr;
+
+	return 0;
+
+error:
+	rte_free(maps[map_idx].path);
+	return -1;
+}
+
+static int
+pci_scan_one(int dev_pci_fd, struct pci_conf *conf)
+{
+	struct rte_pci_device *dev;
+	struct pci_bar_io bar;
+	unsigned i, max;
+
+	dev = malloc(sizeof(*dev));
+	if (dev == NULL) {
+		return -1;
+	}
+
+	memset(dev, 0, sizeof(*dev));
+	dev->addr.domain = conf->pc_sel.pc_domain;
+	dev->addr.bus = conf->pc_sel.pc_bus;
+	dev->addr.devid = conf->pc_sel.pc_dev;
+	dev->addr.function = conf->pc_sel.pc_func;
+
+	/* get vendor id */
+	dev->id.vendor_id = conf->pc_vendor;
+
+	/* get device id */
+	dev->id.device_id = conf->pc_device;
+
+	/* get subsystem_vendor id */
+	dev->id.subsystem_vendor_id = conf->pc_subvendor;
+
+	/* get subsystem_device id */
+	dev->id.subsystem_device_id = conf->pc_subdevice;
+
+	/* TODO: get max_vfs */
+	dev->max_vfs = 0;
+
+	/* FreeBSD has no NUMA support (yet) */
+	dev->numa_node = 0;
+
+	/* FreeBSD has only one pass through driver */
+	dev->kdrv = RTE_KDRV_NIC_UIO;
+
+	/* parse resources */
+	switch (conf->pc_hdr & PCIM_HDRTYPE) {
+	case PCIM_HDRTYPE_NORMAL:
+		max = PCIR_MAX_BAR_0;
+		break;
+	case PCIM_HDRTYPE_BRIDGE:
+		max = PCIR_MAX_BAR_1;
+		break;
+	case PCIM_HDRTYPE_CARDBUS:
+		max = PCIR_MAX_BAR_2;
+		break;
+	default:
+		goto skipdev;
+	}
+
+	for (i = 0; i <= max; i++) {
+		bar.pbi_sel = conf->pc_sel;
+		bar.pbi_reg = PCIR_BAR(i);
+		if (ioctl(dev_pci_fd, PCIOCGETBAR, &bar) < 0)
+			continue;
+
+		dev->mem_resource[i].len = bar.pbi_length;
+		if (PCI_BAR_IO(bar.pbi_base)) {
+			dev->mem_resource[i].addr = (void *)(bar.pbi_base & ~((uint64_t)0xf));
+			continue;
+		}
+		dev->mem_resource[i].phys_addr = bar.pbi_base & ~((uint64_t)0xf);
+	}
+
+	/* device is valid, add in list (sorted) */
+	if (TAILQ_EMPTY(&pci_device_list)) {
+		TAILQ_INSERT_TAIL(&pci_device_list, dev, next);
+	}
+	else {
+		struct rte_pci_device *dev2 = NULL;
+		int ret;
+
+		TAILQ_FOREACH(dev2, &pci_device_list, next) {
+			ret = rte_eal_compare_pci_addr(&dev->addr, &dev2->addr);
+			if (ret > 0)
+				continue;
+			else if (ret < 0) {
+				TAILQ_INSERT_BEFORE(dev2, dev, next);
+				return 0;
+			} else { /* already registered */
+				dev2->kdrv = dev->kdrv;
+				dev2->max_vfs = dev->max_vfs;
+				memmove(dev2->mem_resource,
+					dev->mem_resource,
+					sizeof(dev->mem_resource));
+				free(dev);
+				return 0;
+			}
+		}
+		TAILQ_INSERT_TAIL(&pci_device_list, dev, next);
+	}
+
+	return 0;
+
+skipdev:
+	free(dev);
+	return 0;
+}
+
+/*
+ * Scan the content of the PCI bus, and add the devices in the devices
+ * list. Call pci_scan_one() for each pci entry found.
+ */
+int
+rte_eal_pci_scan(void)
+{
+	int fd;
+	unsigned dev_count = 0;
+	struct pci_conf matches[16];
+	struct pci_conf_io conf_io = {
+			.pat_buf_len = 0,
+			.num_patterns = 0,
+			.patterns = NULL,
+			.match_buf_len = sizeof(matches),
+			.matches = &matches[0],
+	};
+
+	fd = open("/dev/pci", O_RDONLY);
+	if (fd < 0) {
+		RTE_LOG(ERR, EAL, "%s(): error opening /dev/pci\n", __func__);
+		goto error;
+	}
+
+	do {
+		unsigned i;
+		if (ioctl(fd, PCIOCGETCONF, &conf_io) < 0) {
+			RTE_LOG(ERR, EAL, "%s(): error with ioctl on /dev/pci: %s\n",
+					__func__, strerror(errno));
+			goto error;
+		}
+
+		for (i = 0; i < conf_io.num_matches; i++)
+			if (pci_scan_one(fd, &matches[i]) < 0)
+				goto error;
+
+		dev_count += conf_io.num_matches;
+	} while(conf_io.status == PCI_GETCONF_MORE_DEVS);
+
+	close(fd);
+
+	RTE_LOG(ERR, EAL, "PCI scan found %u devices\n", dev_count);
+	return 0;
+
+error:
+	if (fd >= 0)
+		close(fd);
+	return -1;
+}
+
+/* Read PCI config space. */
+int rte_eal_pci_read_config(const struct rte_pci_device *dev,
+			    void *buf, size_t len, off_t offset)
+{
+	int fd = -1;
+	struct pci_io pi = {
+		.pi_sel = {
+			.pc_domain = dev->addr.domain,
+			.pc_bus = dev->addr.bus,
+			.pc_dev = dev->addr.devid,
+			.pc_func = dev->addr.function,
+		},
+		.pi_reg = offset,
+		.pi_width = len,
+	};
+
+	if (len == 3 || len > sizeof(pi.pi_data)) {
+		RTE_LOG(ERR, EAL, "%s(): invalid pci read length\n", __func__);
+		goto error;
+	}
+
+	fd = open("/dev/pci", O_RDONLY);
+	if (fd < 0) {
+		RTE_LOG(ERR, EAL, "%s(): error opening /dev/pci\n", __func__);
+		goto error;
+	}
+
+	if (ioctl(fd, PCIOCREAD, &pi) < 0)
+		goto error;
+	close(fd);
+
+	memcpy(buf, &pi.pi_data, len);
+	return 0;
+
+ error:
+	if (fd >= 0)
+		close(fd);
+	return -1;
+}
+
+/* Write PCI config space. */
+int rte_eal_pci_write_config(const struct rte_pci_device *dev,
+			     const void *buf, size_t len, off_t offset)
+{
+	int fd = -1;
+
+	struct pci_io pi = {
+		.pi_sel = {
+			.pc_domain = dev->addr.domain,
+			.pc_bus = dev->addr.bus,
+			.pc_dev = dev->addr.devid,
+			.pc_func = dev->addr.function,
+		},
+		.pi_reg = offset,
+		.pi_data = *(const uint32_t *)buf,
+		.pi_width = len,
+	};
+
+	if (len == 3 || len > sizeof(pi.pi_data)) {
+		RTE_LOG(ERR, EAL, "%s(): invalid pci read length\n", __func__);
+		goto error;
+	}
+
+	memcpy(&pi.pi_data, buf, len);
+
+	fd = open("/dev/pci", O_RDONLY);
+	if (fd < 0) {
+		RTE_LOG(ERR, EAL, "%s(): error opening /dev/pci\n", __func__);
+		goto error;
+	}
+
+	if (ioctl(fd, PCIOCWRITE, &pi) < 0)
+		goto error;
+
+	close(fd);
+	return 0;
+
+ error:
+	if (fd >= 0)
+		close(fd);
+	return -1;
+}
+
+int
+rte_eal_pci_ioport_map(struct rte_pci_device *dev, int bar,
+		       struct rte_pci_ioport *p)
+{
+	int ret;
+
+	switch (dev->kdrv) {
+#if defined(RTE_ARCH_X86)
+	case RTE_KDRV_NIC_UIO:
+		if ((uintptr_t) dev->mem_resource[bar].addr <= UINT16_MAX) {
+			p->base = (uintptr_t)dev->mem_resource[bar].addr;
+			ret = 0;
+		} else
+			ret = -1;
+		break;
+#endif
+	default:
+		ret = -1;
+		break;
+	}
+
+	if (!ret)
+		p->dev = dev;
+
+	return ret;
+}
+
+static void
+pci_uio_ioport_read(struct rte_pci_ioport *p,
+		    void *data, size_t len, off_t offset)
+{
+#if defined(RTE_ARCH_X86)
+	uint8_t *d;
+	int size;
+	unsigned short reg = p->base + offset;
+
+	for (d = data; len > 0; d += size, reg += size, len -= size) {
+		if (len >= 4) {
+			size = 4;
+			*(uint32_t *)d = inl(reg);
+		} else if (len >= 2) {
+			size = 2;
+			*(uint16_t *)d = inw(reg);
+		} else {
+			size = 1;
+			*d = inb(reg);
+		}
+	}
+#else
+	RTE_SET_USED(p);
+	RTE_SET_USED(data);
+	RTE_SET_USED(len);
+	RTE_SET_USED(offset);
+#endif
+}
+
+void
+rte_eal_pci_ioport_read(struct rte_pci_ioport *p,
+			void *data, size_t len, off_t offset)
+{
+	switch (p->dev->kdrv) {
+	case RTE_KDRV_NIC_UIO:
+		pci_uio_ioport_read(p, data, len, offset);
+		break;
+	default:
+		break;
+	}
+}
+
+static void
+pci_uio_ioport_write(struct rte_pci_ioport *p,
+		     const void *data, size_t len, off_t offset)
+{
+#if defined(RTE_ARCH_X86)
+	const uint8_t *s;
+	int size;
+	unsigned short reg = p->base + offset;
+
+	for (s = data; len > 0; s += size, reg += size, len -= size) {
+		if (len >= 4) {
+			size = 4;
+			outl(*(const uint32_t *)s, reg);
+		} else if (len >= 2) {
+			size = 2;
+			outw(*(const uint16_t *)s, reg);
+		} else {
+			size = 1;
+			outb(*s, reg);
+		}
+	}
+#else
+	RTE_SET_USED(p);
+	RTE_SET_USED(data);
+	RTE_SET_USED(len);
+	RTE_SET_USED(offset);
+#endif
+}
+
+void
+rte_eal_pci_ioport_write(struct rte_pci_ioport *p,
+			 const void *data, size_t len, off_t offset)
+{
+	switch (p->dev->kdrv) {
+	case RTE_KDRV_NIC_UIO:
+		pci_uio_ioport_write(p, data, len, offset);
+		break;
+	default:
+		break;
+	}
+}
+
+int
+rte_eal_pci_ioport_unmap(struct rte_pci_ioport *p)
+{
+	int ret;
+
+	switch (p->dev->kdrv) {
+#if defined(RTE_ARCH_X86)
+	case RTE_KDRV_NIC_UIO:
+		ret = 0;
+		break;
+#endif
+	default:
+		ret = -1;
+		break;
+	}
+
+	return ret;
+}
+
+/* Init the PCI EAL subsystem */
+int
+rte_eal_pci_init(void)
+{
+	TAILQ_INIT(&pci_driver_list);
+	TAILQ_INIT(&pci_device_list);
+
+	/* for debug purposes, PCI can be disabled */
+	if (internal_config.no_pci)
+		return 0;
+
+	if (rte_eal_pci_scan() < 0) {
+		RTE_LOG(ERR, EAL, "%s(): Cannot scan PCI bus\n", __func__);
+		return -1;
+	}
+	return 0;
+}
diff --git a/lib/librte_eal/bsdapp/eal/eal_thread.c b/lib/librte_eal/bsdapp/eal/eal_thread.c
new file mode 100644
index 00000000..9a034373
--- /dev/null
+++ b/lib/librte_eal/bsdapp/eal/eal_thread.c
@@ -0,0 +1,201 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <sched.h>
+#include <pthread_np.h>
+#include <sys/queue.h>
+#include <sys/thr.h>
+
+#include <rte_debug.h>
+#include <rte_atomic.h>
+#include <rte_launch.h>
+#include <rte_log.h>
+#include <rte_memory.h>
+#include <rte_memzone.h>
+#include <rte_per_lcore.h>
+#include <rte_eal.h>
+#include <rte_per_lcore.h>
+#include <rte_lcore.h>
+
+#include "eal_private.h"
+#include "eal_thread.h"
+
+RTE_DEFINE_PER_LCORE(unsigned, _lcore_id) = LCORE_ID_ANY;
+RTE_DEFINE_PER_LCORE(unsigned, _socket_id) = (unsigned)SOCKET_ID_ANY;
+RTE_DEFINE_PER_LCORE(rte_cpuset_t, _cpuset);
+
+/*
+ * Send a message to a slave lcore identified by slave_id to call a
+ * function f with argument arg. Once the execution is done, the
+ * remote lcore switch in FINISHED state.
+ */
+int
+rte_eal_remote_launch(int (*f)(void *), void *arg, unsigned slave_id)
+{
+	int n;
+	char c = 0;
+	int m2s = lcore_config[slave_id].pipe_master2slave[1];
+	int s2m = lcore_config[slave_id].pipe_slave2master[0];
+
+	if (lcore_config[slave_id].state != WAIT)
+		return -EBUSY;
+
+	lcore_config[slave_id].f = f;
+	lcore_config[slave_id].arg = arg;
+
+	/* send message */
+	n = 0;
+	while (n == 0 || (n < 0 && errno == EINTR))
+		n = write(m2s, &c, 1);
+	if (n < 0)
+		rte_panic("cannot write on configuration pipe\n");
+
+	/* wait ack */
+	do {
+		n = read(s2m, &c, 1);
+	} while (n < 0 && errno == EINTR);
+
+	if (n <= 0)
+		rte_panic("cannot read on configuration pipe\n");
+
+	return 0;
+}
+
+/* set affinity for current thread */
+static int
+eal_thread_set_affinity(void)
+{
+	unsigned lcore_id = rte_lcore_id();
+
+	/* acquire system unique id  */
+	rte_gettid();
+
+	/* update EAL thread core affinity */
+	return rte_thread_set_affinity(&lcore_config[lcore_id].cpuset);
+}
+
+void eal_thread_init_master(unsigned lcore_id)
+{
+	/* set the lcore ID in per-lcore memory area */
+	RTE_PER_LCORE(_lcore_id) = lcore_id;
+
+	/* set CPU affinity */
+	if (eal_thread_set_affinity() < 0)
+		rte_panic("cannot set affinity\n");
+}
+
+/* main loop of threads */
+__attribute__((noreturn)) void *
+eal_thread_loop(__attribute__((unused)) void *arg)
+{
+	char c;
+	int n, ret;
+	unsigned lcore_id;
+	pthread_t thread_id;
+	int m2s, s2m;
+	char cpuset[RTE_CPU_AFFINITY_STR_LEN];
+
+	thread_id = pthread_self();
+
+	/* retrieve our lcore_id from the configuration structure */
+	RTE_LCORE_FOREACH_SLAVE(lcore_id) {
+		if (thread_id == lcore_config[lcore_id].thread_id)
+			break;
+	}
+	if (lcore_id == RTE_MAX_LCORE)
+		rte_panic("cannot retrieve lcore id\n");
+
+	m2s = lcore_config[lcore_id].pipe_master2slave[0];
+	s2m = lcore_config[lcore_id].pipe_slave2master[1];
+
+	/* set the lcore ID in per-lcore memory area */
+	RTE_PER_LCORE(_lcore_id) = lcore_id;
+
+	/* set CPU affinity */
+	if (eal_thread_set_affinity() < 0)
+		rte_panic("cannot set affinity\n");
+
+	ret = eal_thread_dump_affinity(cpuset, RTE_CPU_AFFINITY_STR_LEN);
+
+	RTE_LOG(DEBUG, EAL, "lcore %u is ready (tid=%p;cpuset=[%s%s])\n",
+		lcore_id, thread_id, cpuset, ret == 0 ? "" : "...");
+
+	/* read on our pipe to get commands */
+	while (1) {
+		void *fct_arg;
+
+		/* wait command */
+		do {
+			n = read(m2s, &c, 1);
+		} while (n < 0 && errno == EINTR);
+
+		if (n <= 0)
+			rte_panic("cannot read on configuration pipe\n");
+
+		lcore_config[lcore_id].state = RUNNING;
+
+		/* send ack */
+		n = 0;
+		while (n == 0 || (n < 0 && errno == EINTR))
+			n = write(s2m, &c, 1);
+		if (n < 0)
+			rte_panic("cannot write on configuration pipe\n");
+
+		if (lcore_config[lcore_id].f == NULL)
+			rte_panic("NULL function pointer\n");
+
+		/* call the function and store the return value */
+		fct_arg = lcore_config[lcore_id].arg;
+		ret = lcore_config[lcore_id].f(fct_arg);
+		lcore_config[lcore_id].ret = ret;
+		rte_wmb();
+		lcore_config[lcore_id].state = FINISHED;
+	}
+
+	/* never reached */
+	/* pthread_exit(NULL); */
+	/* return NULL; */
+}
+
+/* require calling thread tid by gettid() */
+int rte_sys_gettid(void)
+{
+	long lwpid;
+	thr_self(&lwpid);
+	return (int)lwpid;
+}
diff --git a/lib/librte_eal/bsdapp/eal/eal_timer.c b/lib/librte_eal/bsdapp/eal/eal_timer.c
new file mode 100644
index 00000000..f12d9bd2
--- /dev/null
+++ b/lib/librte_eal/bsdapp/eal/eal_timer.c
@@ -0,0 +1,94 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <string.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <inttypes.h>
+#include <sys/types.h>
+#include <sys/sysctl.h>
+#include <errno.h>
+
+#include <rte_common.h>
+#include <rte_log.h>
+#include <rte_cycles.h>
+#include <rte_memory.h>
+#include <rte_memzone.h>
+#include <rte_eal.h>
+#include <rte_debug.h>
+
+#include "eal_private.h"
+#include "eal_internal_cfg.h"
+
+#ifdef RTE_LIBEAL_USE_HPET
+#warning HPET is not supported in FreeBSD
+#endif
+
+enum timer_source eal_timer_source = EAL_TIMER_TSC;
+
+uint64_t
+get_tsc_freq(void)
+{
+	size_t sz;
+	int tmp;
+	uint64_t tsc_hz;
+
+	sz = sizeof(tmp);
+	tmp = 0;
+
+	if (sysctlbyname("kern.timecounter.smp_tsc", &tmp, &sz, NULL, 0))
+		RTE_LOG(WARNING, EAL, "%s\n", strerror(errno));
+	else if (tmp != 1)
+		RTE_LOG(WARNING, EAL, "TSC is not safe to use in SMP mode\n");
+
+	tmp = 0;
+
+	if (sysctlbyname("kern.timecounter.invariant_tsc", &tmp, &sz, NULL, 0))
+		RTE_LOG(WARNING, EAL, "%s\n", strerror(errno));
+	else if (tmp != 1)
+		RTE_LOG(WARNING, EAL, "TSC is not invariant\n");
+
+	sz = sizeof(tsc_hz);
+	if (sysctlbyname("machdep.tsc_freq", &tsc_hz, &sz, NULL, 0)) {
+		RTE_LOG(WARNING, EAL, "%s\n", strerror(errno));
+		return 0;
+	}
+
+	return tsc_hz;
+}
+
+int
+rte_eal_timer_init(void)
+{
+	set_tsc_freq();
+	return 0;
+}
diff --git a/lib/librte_eal/bsdapp/eal/include/exec-env/rte_dom0_common.h b/lib/librte_eal/bsdapp/eal/include/exec-env/rte_dom0_common.h
new file mode 100644
index 00000000..99a33432
--- /dev/null
+++ b/lib/librte_eal/bsdapp/eal/include/exec-env/rte_dom0_common.h
@@ -0,0 +1,107 @@
+/*-
+ *   This file is provided under a dual BSD/LGPLv2 license.  When using or
+ *   redistributing this file, you may do so under either license.
+ *
+ *   GNU LESSER GENERAL PUBLIC LICENSE
+ *
+ *   Copyright(c) 2007-2014 Intel Corporation. All rights reserved.
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of version 2.1 of the GNU Lesser General Public License
+ *   as published by the Free Software Foundation.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *   Lesser General Public License for more details.
+ *
+ *   You should have received a copy of the GNU Lesser General Public License
+ *   along with this program; if not, write to the Free Software
+ *   Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ *   Contact Information:
+ *   Intel Corporation
+ *
+ *
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *   * Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in
+ *     the documentation and/or other materials provided with the
+ *     distribution.
+ *   * Neither the name of Intel Corporation nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ *    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef _RTE_DOM0_COMMON_H_
+#define _RTE_DOM0_COMMON_H_
+
+#ifdef __KERNEL__
+#include <linux/if.h>
+#endif
+
+#define DOM0_NAME_MAX   256
+#define DOM0_MM_DEV   "/dev/dom0_mm"
+
+#define DOM0_CONTIG_NUM_ORDER       9       /**< 2M order */
+#define DOM0_NUM_MEMSEG             512     /**< Maximum nb. of memory segment. */
+#define DOM0_MEMBLOCK_SIZE          0x200000 /**< Maximum nb. of memory block(2M). */
+#define DOM0_CONFIG_MEMSIZE         4096     /**< Maximum config memory size(4G). */
+#define DOM0_NUM_MEMBLOCK (DOM0_CONFIG_MEMSIZE / 2) /**< Maximum nb. of 2M memory block. */
+
+#define RTE_DOM0_IOCTL_PREPARE_MEMSEG    _IOWR(0, 1 , struct memory_info)
+#define RTE_DOM0_IOCTL_ATTACH_TO_MEMSEG  _IOWR(0, 2 , char *)
+#define RTE_DOM0_IOCTL_GET_NUM_MEMSEG    _IOWR(0, 3, int)
+#define RTE_DOM0_IOCTL_GET_MEMSEG_INFO   _IOWR(0, 4, void *)
+
+/**
+ * A structure used to store memory information.
+ */
+struct memory_info {
+	char name[DOM0_NAME_MAX];
+	uint64_t size;
+};
+
+/**
+ * A structure used to store memory segment information.
+ */
+struct memseg_info {
+	uint32_t idx;
+	uint64_t pfn;
+	uint64_t size;
+	uint64_t mfn[DOM0_NUM_MEMBLOCK];
+};
+
+/**
+ * A structure used to store memory block information.
+ */
+struct memblock_info {
+	uint8_t  exchange_flag;
+	uint64_t vir_addr;
+	uint64_t pfn;
+	uint64_t mfn;
+};
+#endif /* _RTE_DOM0_COMMON_H_ */
diff --git a/lib/librte_eal/bsdapp/eal/include/exec-env/rte_interrupts.h b/lib/librte_eal/bsdapp/eal/include/exec-env/rte_interrupts.h
new file mode 100644
index 00000000..c1995ee1
--- /dev/null
+++ b/lib/librte_eal/bsdapp/eal/include/exec-env/rte_interrupts.h
@@ -0,0 +1,137 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_INTERRUPTS_H_
+#error "don't include this file directly, please include generic <rte_interrupts.h>"
+#endif
+
+#ifndef _RTE_BSDAPP_INTERRUPTS_H_
+#define _RTE_BSDAPP_INTERRUPTS_H_
+
+#define RTE_INTR_VEC_ZERO_OFFSET      0
+#define RTE_INTR_VEC_RXTX_OFFSET      1
+
+#define RTE_MAX_RXTX_INTR_VEC_ID     32
+
+enum rte_intr_handle_type {
+	RTE_INTR_HANDLE_UNKNOWN = 0,
+	RTE_INTR_HANDLE_UIO,      /**< uio device handle */
+	RTE_INTR_HANDLE_ALARM,    /**< alarm handle */
+	RTE_INTR_HANDLE_MAX
+};
+
+/** Handle for interrupts. */
+struct rte_intr_handle {
+	int fd;                          /**< file descriptor */
+	int uio_cfg_fd;                  /**< UIO config file descriptor */
+	enum rte_intr_handle_type type;  /**< handle type */
+	int max_intr;                    /**< max interrupt requested */
+	uint32_t nb_efd;                 /**< number of available efds */
+	int *intr_vec;                   /**< intr vector number array */
+};
+
+/**
+ * @param intr_handle
+ *   Pointer to the interrupt handle.
+ * @param epfd
+ *   Epoll instance fd which the intr vector associated to.
+ * @param op
+ *   The operation be performed for the vector.
+ *   Operation type of {ADD, DEL}.
+ * @param vec
+ *   RX intr vector number added to the epoll instance wait list.
+ * @param data
+ *   User raw data.
+ * @return
+ *   - On success, zero.
+ *   - On failure, a negative value.
+ */
+int
+rte_intr_rx_ctl(struct rte_intr_handle *intr_handle,
+		int epfd, int op, unsigned int vec, void *data);
+
+/**
+ * It enables the fastpath event fds if it's necessary.
+ * It creates event fds when multi-vectors allowed,
+ * otherwise it multiplexes the single event fds.
+ *
+ * @param intr_handle
+ *   Pointer to the interrupt handle.
+ * @param nb_efd
+ *   Number of interrupt vector trying to enable.
+ *   The value 0 is not allowed.
+ * @return
+ *   - On success, zero.
+ *   - On failure, a negative value.
+ */
+int
+rte_intr_efd_enable(struct rte_intr_handle *intr_handle, uint32_t nb_efd);
+
+/**
+ * It disable the fastpath event fds.
+ * It deletes registered eventfds and closes the open fds.
+ *
+ * @param intr_handle
+ *   Pointer to the interrupt handle.
+ */
+void
+rte_intr_efd_disable(struct rte_intr_handle *intr_handle);
+
+/**
+ * The fastpath interrupt is enabled or not.
+ *
+ * @param intr_handle
+ *   Pointer to the interrupt handle.
+ */
+int rte_intr_dp_is_en(struct rte_intr_handle *intr_handle);
+
+/**
+ * The interrupt handle instance allows other cause or not.
+ * Other cause stands for none fastpath interrupt.
+ *
+ * @param intr_handle
+ *   Pointer to the interrupt handle.
+ */
+int rte_intr_allow_others(struct rte_intr_handle *intr_handle);
+
+/**
+ * The multiple interrupt vector capability of interrupt handle instance.
+ * It returns zero if no multiple interrupt vector support.
+ *
+ * @param intr_handle
+ *   Pointer to the interrupt handle.
+ */
+int
+rte_intr_cap_multiple(struct rte_intr_handle *intr_handle);
+
+#endif /* _RTE_BSDAPP_INTERRUPTS_H_ */
diff --git a/lib/librte_eal/bsdapp/eal/rte_eal_version.map b/lib/librte_eal/bsdapp/eal/rte_eal_version.map
new file mode 100644
index 00000000..58c2951e
--- /dev/null
+++ b/lib/librte_eal/bsdapp/eal/rte_eal_version.map
@@ -0,0 +1,153 @@
+DPDK_2.0 {
+	global:
+
+	__rte_panic;
+	devargs_list;
+	eal_parse_sysfs_value;
+	eal_timer_source;
+	lcore_config;
+	pci_device_list;
+	pci_driver_list;
+	per_lcore__lcore_id;
+	per_lcore__rte_errno;
+	rte_calloc;
+	rte_calloc_socket;
+	rte_cpu_check_supported;
+	rte_cpu_get_flag_enabled;
+	rte_cycles_vmware_tsc_map;
+	rte_delay_us;
+	rte_dump_physmem_layout;
+	rte_dump_registers;
+	rte_dump_stack;
+	rte_dump_tailq;
+	rte_eal_alarm_cancel;
+	rte_eal_alarm_set;
+	rte_eal_dev_init;
+	rte_eal_devargs_add;
+	rte_eal_devargs_dump;
+	rte_eal_devargs_type_count;
+	rte_eal_driver_register;
+	rte_eal_driver_unregister;
+	rte_eal_get_configuration;
+	rte_eal_get_lcore_state;
+	rte_eal_get_physmem_layout;
+	rte_eal_get_physmem_size;
+	rte_eal_has_hugepages;
+	rte_eal_hpet_init;
+	rte_eal_init;
+	rte_eal_iopl_init;
+	rte_eal_lcore_role;
+	rte_eal_mp_remote_launch;
+	rte_eal_mp_wait_lcore;
+	rte_eal_parse_devargs_str;
+	rte_eal_pci_dump;
+	rte_eal_pci_probe;
+	rte_eal_pci_probe_one;
+	rte_eal_pci_register;
+	rte_eal_pci_scan;
+	rte_eal_pci_unregister;
+	rte_eal_process_type;
+	rte_eal_remote_launch;
+	rte_eal_tailq_lookup;
+	rte_eal_tailq_register;
+	rte_eal_vdev_init;
+	rte_eal_vdev_uninit;
+	rte_eal_wait_lcore;
+	rte_exit;
+	rte_free;
+	rte_get_hpet_cycles;
+	rte_get_hpet_hz;
+	rte_get_log_level;
+	rte_get_log_type;
+	rte_get_tsc_hz;
+	rte_hexdump;
+	rte_intr_callback_register;
+	rte_intr_callback_unregister;
+	rte_intr_disable;
+	rte_intr_enable;
+	rte_log;
+	rte_log_add_in_history;
+	rte_log_cur_msg_loglevel;
+	rte_log_cur_msg_logtype;
+	rte_log_dump_history;
+	rte_log_set_history;
+	rte_logs;
+	rte_malloc;
+	rte_malloc_dump_stats;
+	rte_malloc_get_socket_stats;
+	rte_malloc_set_limit;
+	rte_malloc_socket;
+	rte_malloc_validate;
+	rte_malloc_virt2phy;
+	rte_mem_lock_page;
+	rte_mem_phy2mch;
+	rte_mem_virt2phy;
+	rte_memdump;
+	rte_memory_get_nchannel;
+	rte_memory_get_nrank;
+	rte_memzone_dump;
+	rte_memzone_lookup;
+	rte_memzone_reserve;
+	rte_memzone_reserve_aligned;
+	rte_memzone_reserve_bounded;
+	rte_memzone_walk;
+	rte_openlog_stream;
+	rte_realloc;
+	rte_set_application_usage_hook;
+	rte_set_log_level;
+	rte_set_log_type;
+	rte_socket_id;
+	rte_strerror;
+	rte_strsplit;
+	rte_sys_gettid;
+	rte_thread_get_affinity;
+	rte_thread_set_affinity;
+	rte_vlog;
+	rte_xen_dom0_memory_attach;
+	rte_xen_dom0_memory_init;
+	rte_zmalloc;
+	rte_zmalloc_socket;
+
+	local: *;
+};
+
+DPDK_2.1 {
+	global:
+
+	rte_eal_pci_detach;
+	rte_eal_pci_read_config;
+	rte_eal_pci_write_config;
+	rte_intr_allow_others;
+	rte_intr_dp_is_en;
+	rte_intr_efd_disable;
+	rte_intr_efd_enable;
+	rte_intr_rx_ctl;
+	rte_memzone_free;
+
+} DPDK_2.0;
+
+DPDK_2.2 {
+	global:
+
+	rte_intr_cap_multiple;
+	rte_keepalive_create;
+	rte_keepalive_dispatch_pings;
+	rte_keepalive_mark_alive;
+	rte_keepalive_register_core;
+	rte_xen_dom0_supported;
+
+} DPDK_2.1;
+
+DPDK_16.04 {
+	global:
+
+	rte_cpu_get_flag_name;
+	rte_eal_pci_ioport_map;
+	rte_eal_pci_ioport_read;
+	rte_eal_pci_ioport_unmap;
+	rte_eal_pci_ioport_write;
+	rte_eal_pci_map_device;
+	rte_eal_pci_unmap_device;
+	rte_eal_primary_proc_alive;
+
+} DPDK_2.2;
diff --git a/lib/librte_eal/bsdapp/nic_uio/BSDmakefile b/lib/librte_eal/bsdapp/nic_uio/BSDmakefile
new file mode 100644
index 00000000..5454ed85
--- /dev/null
+++ b/lib/librte_eal/bsdapp/nic_uio/BSDmakefile
@@ -0,0 +1,36 @@
+#   BSD LICENSE
+#
+#   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+#   All rights reserved.
+#
+#   Redistribution and use in source and binary forms, with or without
+#   modification, are permitted provided that the following conditions
+#   are met:
+#
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in
+#       the documentation and/or other materials provided with the
+#       distribution.
+#     * Neither the name of Intel Corporation nor the names of its
+#       contributors may be used to endorse or promote products derived
+#       from this software without specific prior written permission.
+#
+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+KMOD=	nic_uio
+SRCS=	nic_uio.c device_if.h bus_if.h pci_if.h
+
+.include <bsd.kmod.mk>
diff --git a/lib/librte_eal/bsdapp/nic_uio/Makefile b/lib/librte_eal/bsdapp/nic_uio/Makefile
new file mode 100644
index 00000000..89957615
--- /dev/null
+++ b/lib/librte_eal/bsdapp/nic_uio/Makefile
@@ -0,0 +1,52 @@
+#   BSD LICENSE
+#
+#   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+#   All rights reserved.
+#
+#   Redistribution and use in source and binary forms, with or without
+#   modification, are permitted provided that the following conditions
+#   are met:
+#
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in
+#       the documentation and/or other materials provided with the
+#       distribution.
+#     * Neither the name of Intel Corporation nor the names of its
+#       contributors may be used to endorse or promote products derived
+#       from this software without specific prior written permission.
+#
+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+#
+# module name and path
+#
+MODULE = nic_uio
+
+#
+# CFLAGS
+#
+MODULE_CFLAGS += -I$(SRCDIR)
+MODULE_CFLAGS += -I$(RTE_OUTPUT)/include
+MODULE_CFLAGS += -Winline -Wall -Werror
+MODULE_CFLAGS += -include $(RTE_OUTPUT)/include/rte_config.h
+
+#
+# all source are stored in SRCS-y
+#
+SRCS-y := nic_uio.c
+
+include $(RTE_SDK)/mk/rte.bsdmodule.mk
diff --git a/lib/librte_eal/bsdapp/nic_uio/nic_uio.c b/lib/librte_eal/bsdapp/nic_uio/nic_uio.c
new file mode 100644
index 00000000..99a4975c
--- /dev/null
+++ b/lib/librte_eal/bsdapp/nic_uio/nic_uio.c
@@ -0,0 +1,335 @@
+/* -
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h> /* defines used in kernel.h */
+#include <sys/module.h>
+#include <sys/kernel.h> /* types used in module initialization */
+#include <sys/conf.h> /* cdevsw struct */
+#include <sys/bus.h> /* structs, prototypes for pci bus stuff and DEVMETHOD */
+#include <sys/rman.h>
+#include <sys/systm.h>
+#include <sys/rwlock.h>
+#include <sys/proc.h>
+
+#include <machine/bus.h>
+#include <dev/pci/pcivar.h> /* For pci_get macros! */
+#include <dev/pci/pcireg.h> /* The softc holds our per-instance data. */
+#include <vm/vm.h>
+#include <vm/uma.h>
+#include <vm/vm_object.h>
+#include <vm/vm_page.h>
+#include <vm/vm_pager.h>
+
+
+#define MAX_BARS (PCIR_MAX_BAR_0 + 1)
+
+#define MAX_DETACHED_DEVICES	128
+static device_t detached_devices[MAX_DETACHED_DEVICES] = {};
+static int num_detached = 0;
+
+struct nic_uio_softc {
+	device_t        dev_t;
+	struct cdev     *my_cdev;
+	int              bar_id[MAX_BARS];
+	struct resource *bar_res[MAX_BARS];
+	u_long           bar_start[MAX_BARS];
+	u_long           bar_size[MAX_BARS];
+};
+
+/* Function prototypes */
+static d_open_t         nic_uio_open;
+static d_close_t        nic_uio_close;
+static d_mmap_t         nic_uio_mmap;
+static d_mmap_single_t  nic_uio_mmap_single;
+static int              nic_uio_probe(device_t dev);
+static int              nic_uio_attach(device_t dev);
+static int              nic_uio_detach(device_t dev);
+static int              nic_uio_shutdown(void);
+static int              nic_uio_modevent(module_t mod, int type, void *arg);
+
+static struct cdevsw uio_cdevsw = {
+		.d_name        = "nic_uio",
+		.d_version     = D_VERSION,
+		.d_open        = nic_uio_open,
+		.d_close       = nic_uio_close,
+		.d_mmap        = nic_uio_mmap,
+		.d_mmap_single = nic_uio_mmap_single,
+};
+
+static device_method_t nic_uio_methods[] = {
+	DEVMETHOD(device_probe,    nic_uio_probe),
+	DEVMETHOD(device_attach,   nic_uio_attach),
+	DEVMETHOD(device_detach,   nic_uio_detach),
+	DEVMETHOD_END
+};
+
+struct device {
+    int vend;
+    int dev;
+};
+
+struct pci_bdf {
+	uint32_t bus;
+	uint32_t devid;
+	uint32_t function;
+};
+
+static devclass_t nic_uio_devclass;
+
+DEFINE_CLASS_0(nic_uio, nic_uio_driver, nic_uio_methods, sizeof(struct nic_uio_softc));
+DRIVER_MODULE(nic_uio, pci, nic_uio_driver, nic_uio_devclass, nic_uio_modevent, 0);
+
+static int
+nic_uio_mmap(struct cdev *cdev, vm_ooffset_t offset, vm_paddr_t *paddr,
+		int prot, vm_memattr_t *memattr)
+{
+	*paddr = offset;
+	return 0;
+}
+
+static int
+nic_uio_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t size,
+		struct vm_object **obj, int nprot)
+{
+	/*
+	 * The BAR index is encoded in the offset.  Divide the offset by
+	 *  PAGE_SIZE to get the index of the bar requested by the user
+	 *  app.
+	 */
+	unsigned bar = *offset/PAGE_SIZE;
+	struct nic_uio_softc *sc = cdev->si_drv1;
+
+	if (bar >= MAX_BARS)
+		return EINVAL;
+
+	if (sc->bar_res[bar] == NULL) {
+		sc->bar_id[bar] = PCIR_BAR(bar);
+
+		if (PCI_BAR_IO(pci_read_config(sc->dev_t, sc->bar_id[bar], 4)))
+			sc->bar_res[bar] = bus_alloc_resource_any(sc->dev_t, SYS_RES_IOPORT,
+					&sc->bar_id[bar], RF_ACTIVE);
+		else
+			sc->bar_res[bar] = bus_alloc_resource_any(sc->dev_t, SYS_RES_MEMORY,
+					&sc->bar_id[bar], RF_ACTIVE);
+	}
+	if (sc->bar_res[bar] == NULL)
+		return ENXIO;
+
+	sc->bar_start[bar] = rman_get_start(sc->bar_res[bar]);
+	sc->bar_size[bar] = rman_get_size(sc->bar_res[bar]);
+
+	device_printf(sc->dev_t, "Bar %u @ %lx, size %lx\n", bar,
+			sc->bar_start[bar], sc->bar_size[bar]);
+
+	*offset = sc->bar_start[bar];
+	*obj = vm_pager_allocate(OBJT_DEVICE, cdev, size, nprot, *offset,
+				curthread->td_ucred);
+	return 0;
+}
+
+
+int
+nic_uio_open(struct cdev *dev, int oflags, int devtype, struct thread *td)
+{
+	return 0;
+}
+
+int
+nic_uio_close(struct cdev *dev, int fflag, int devtype, struct thread *td)
+{
+	return 0;
+}
+
+static int
+nic_uio_probe (device_t dev)
+{
+	int i;
+	unsigned int bus = pci_get_bus(dev);
+	unsigned int device = pci_get_slot(dev);
+	unsigned int function = pci_get_function(dev);
+
+	for (i = 0; i < num_detached; i++)
+		if (bus == pci_get_bus(detached_devices[i]) &&
+		    device == pci_get_slot(detached_devices[i]) &&
+		    function == pci_get_function(detached_devices[i])) {
+			device_set_desc(dev, "DPDK PCI Device");
+			return BUS_PROBE_SPECIFIC;
+		}
+
+	return ENXIO;
+}
+
+static int
+nic_uio_attach(device_t dev)
+{
+	int i;
+	struct nic_uio_softc *sc;
+
+	sc = device_get_softc(dev);
+	sc->dev_t = dev;
+	sc->my_cdev = make_dev(&uio_cdevsw, device_get_unit(dev),
+			UID_ROOT, GID_WHEEL, 0600, "uio@pci:%u:%u:%u",
+			pci_get_bus(dev), pci_get_slot(dev), pci_get_function(dev));
+	if (sc->my_cdev == NULL)
+		return ENXIO;
+	sc->my_cdev->si_drv1 = sc;
+
+	for (i = 0; i < MAX_BARS; i++)
+		sc->bar_res[i] = NULL;
+
+	pci_enable_busmaster(dev);
+
+	return 0;
+}
+
+static int
+nic_uio_detach(device_t dev)
+{
+	int i;
+	struct nic_uio_softc *sc;
+	sc = device_get_softc(dev);
+
+	for (i = 0; i < MAX_BARS; i++)
+		if (sc->bar_res[i] != NULL) {
+
+			if (PCI_BAR_IO(pci_read_config(dev, sc->bar_id[i], 4)))
+				bus_release_resource(dev, SYS_RES_IOPORT, sc->bar_id[i],
+						sc->bar_res[i]);
+			else
+				bus_release_resource(dev, SYS_RES_MEMORY, sc->bar_id[i],
+						sc->bar_res[i]);
+		}
+
+	if (sc->my_cdev != NULL)
+		destroy_dev(sc->my_cdev);
+	return 0;
+}
+
+static void
+nic_uio_load(void)
+{
+	uint32_t bus, device, function;
+	device_t dev;
+	char bdf_str[256];
+	char *token, *remaining;
+
+	memset(bdf_str, 0, sizeof(bdf_str));
+	TUNABLE_STR_FETCH("hw.nic_uio.bdfs", bdf_str, sizeof(bdf_str));
+	remaining = bdf_str;
+	/*
+	 * Users should specify PCI BDFs in the format "b:d:f,b:d:f,b:d:f".
+	 *  But the code below does not try differentiate between : and ,
+	 *  and just blindly uses 3 tokens at a time to construct a
+	 *  bus/device/function tuple.
+	 *
+	 * There is no checking on strtol() return values, but this should
+	 *  be OK.  Worst case is it cannot convert and returns 0.  This
+	 *  could give us a different BDF than intended, but as long as the
+	 *  PCI device/vendor ID does not match it will not matter.
+	 */
+	while (1) {
+		if (remaining == NULL || remaining[0] == '\0')
+			break;
+		token = strsep(&remaining, ",:");
+		if (token == NULL)
+			break;
+		bus = strtol(token, NULL, 10);
+		token = strsep(&remaining, ",:");
+		if (token == NULL)
+			break;
+		device = strtol(token, NULL, 10);
+		token = strsep(&remaining, ",:");
+		if (token == NULL)
+			break;
+		function = strtol(token, NULL, 10);
+
+		dev = pci_find_bsf(bus, device, function);
+		if (dev == NULL)
+			continue;
+
+		if (num_detached < MAX_DETACHED_DEVICES) {
+			printf("nic_uio_load: detaching and storing dev=%p\n",
+			       dev);
+			detached_devices[num_detached++] = dev;
+		} else {
+			printf("nic_uio_load: reached MAX_DETACHED_DEVICES=%d. dev=%p won't be reattached\n",
+			       MAX_DETACHED_DEVICES, dev);
+		}
+		device_detach(dev);
+	}
+}
+
+static void
+nic_uio_unload(void)
+{
+	int i;
+	printf("nic_uio_unload: entered...\n");
+
+	for (i = 0; i < num_detached; i++) {
+		printf("nic_uio_unload: calling to device_probe_and_attach for dev=%p...\n",
+			detached_devices[i]);
+		device_probe_and_attach(detached_devices[i]);
+		printf("nic_uio_unload: done.\n");
+	}
+
+	printf("nic_uio_unload: leaving...\n");
+}
+
+static int
+nic_uio_shutdown(void)
+{
+	return 0;
+}
+
+static int
+nic_uio_modevent(module_t mod, int type, void *arg)
+{
+
+	switch (type) {
+	case MOD_LOAD:
+		nic_uio_load();
+		break;
+	case MOD_UNLOAD:
+		nic_uio_unload();
+		break;
+	case MOD_SHUTDOWN:
+		nic_uio_shutdown();
+		break;
+	default:
+		break;
+	}
+
+	return 0;
+}
diff --git a/lib/librte_eal/common/Makefile b/lib/librte_eal/common/Makefile
new file mode 100644
index 00000000..f5ea0ee8
--- /dev/null
+++ b/lib/librte_eal/common/Makefile
@@ -0,0 +1,61 @@
+#   BSD LICENSE
+#
+#   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+#   All rights reserved.
+#
+#   Redistribution and use in source and binary forms, with or without
+#   modification, are permitted provided that the following conditions
+#   are met:
+#
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in
+#       the documentation and/or other materials provided with the
+#       distribution.
+#     * Neither the name of Intel Corporation nor the names of its
+#       contributors may be used to endorse or promote products derived
+#       from this software without specific prior written permission.
+#
+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+INC := rte_branch_prediction.h rte_common.h
+INC += rte_debug.h rte_eal.h rte_errno.h rte_launch.h rte_lcore.h
+INC += rte_log.h rte_memory.h rte_memzone.h rte_pci.h
+INC += rte_pci_dev_ids.h rte_per_lcore.h rte_random.h
+INC += rte_tailq.h rte_interrupts.h rte_alarm.h
+INC += rte_string_fns.h rte_version.h
+INC += rte_eal_memconfig.h rte_malloc_heap.h
+INC += rte_hexdump.h rte_devargs.h rte_dev.h
+INC += rte_pci_dev_feature_defs.h rte_pci_dev_features.h
+INC += rte_malloc.h rte_keepalive.h rte_time.h
+
+ifeq ($(CONFIG_RTE_INSECURE_FUNCTION_WARNING),y)
+INC += rte_warnings.h
+endif
+
+GENERIC_INC := rte_atomic.h rte_byteorder.h rte_cycles.h rte_prefetch.h
+GENERIC_INC += rte_spinlock.h rte_memcpy.h rte_cpuflags.h rte_rwlock.h
+# defined in mk/arch/$(RTE_ARCH)/rte.vars.mk
+ARCH_DIR ?= $(RTE_ARCH)
+ARCH_INC := $(notdir $(wildcard $(RTE_SDK)/lib/librte_eal/common/include/arch/$(ARCH_DIR)/*.h))
+
+SYMLINK-$(CONFIG_RTE_LIBRTE_EAL)-include := $(addprefix include/,$(INC))
+SYMLINK-$(CONFIG_RTE_LIBRTE_EAL)-include += \
+	$(addprefix include/arch/$(ARCH_DIR)/,$(ARCH_INC))
+SYMLINK-$(CONFIG_RTE_LIBRTE_EAL)-include/generic := \
+	$(addprefix include/generic/,$(GENERIC_INC))
+
+include $(RTE_SDK)/mk/rte.install.mk
diff --git a/lib/librte_eal/common/arch/arm/rte_cpuflags.c b/lib/librte_eal/common/arch/arm/rte_cpuflags.c
new file mode 100644
index 00000000..23240efd
--- /dev/null
+++ b/lib/librte_eal/common/arch/arm/rte_cpuflags.c
@@ -0,0 +1,179 @@
+/*
+ *   BSD LICENSE
+ *
+ *   Copyright (C) Cavium networks Ltd. 2015.
+ *   Copyright(c) 2015 RehiveTech. All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Cavium networks nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "rte_cpuflags.h"
+
+#include <elf.h>
+#include <fcntl.h>
+#include <assert.h>
+#include <unistd.h>
+#include <string.h>
+
+#ifndef AT_HWCAP
+#define AT_HWCAP 16
+#endif
+
+#ifndef AT_HWCAP2
+#define AT_HWCAP2 26
+#endif
+
+#ifndef AT_PLATFORM
+#define AT_PLATFORM 15
+#endif
+
+enum cpu_register_t {
+	REG_NONE = 0,
+	REG_HWCAP,
+	REG_HWCAP2,
+	REG_PLATFORM,
+	REG_MAX
+};
+
+typedef uint32_t hwcap_registers_t[REG_MAX];
+
+/**
+ * Struct to hold a processor feature entry
+ */
+struct feature_entry {
+	uint32_t reg;
+	uint32_t bit;
+#define CPU_FLAG_NAME_MAX_LEN 64
+	char name[CPU_FLAG_NAME_MAX_LEN];
+};
+
+#define FEAT_DEF(name, reg, bit) \
+	[RTE_CPUFLAG_##name] = {reg, bit, #name},
+
+#ifdef RTE_ARCH_ARMv7
+#define PLATFORM_STR "v7l"
+typedef Elf32_auxv_t _Elfx_auxv_t;
+
+const struct feature_entry rte_cpu_feature_table[] = {
+	FEAT_DEF(SWP,       REG_HWCAP,    0)
+	FEAT_DEF(HALF,      REG_HWCAP,    1)
+	FEAT_DEF(THUMB,     REG_HWCAP,    2)
+	FEAT_DEF(A26BIT,    REG_HWCAP,    3)
+	FEAT_DEF(FAST_MULT, REG_HWCAP,    4)
+	FEAT_DEF(FPA,       REG_HWCAP,    5)
+	FEAT_DEF(VFP,       REG_HWCAP,    6)
+	FEAT_DEF(EDSP,      REG_HWCAP,    7)
+	FEAT_DEF(JAVA,      REG_HWCAP,    8)
+	FEAT_DEF(IWMMXT,    REG_HWCAP,    9)
+	FEAT_DEF(CRUNCH,    REG_HWCAP,   10)
+	FEAT_DEF(THUMBEE,   REG_HWCAP,   11)
+	FEAT_DEF(NEON,      REG_HWCAP,   12)
+	FEAT_DEF(VFPv3,     REG_HWCAP,   13)
+	FEAT_DEF(VFPv3D16,  REG_HWCAP,   14)
+	FEAT_DEF(TLS,       REG_HWCAP,   15)
+	FEAT_DEF(VFPv4,     REG_HWCAP,   16)
+	FEAT_DEF(IDIVA,     REG_HWCAP,   17)
+	FEAT_DEF(IDIVT,     REG_HWCAP,   18)
+	FEAT_DEF(VFPD32,    REG_HWCAP,   19)
+	FEAT_DEF(LPAE,      REG_HWCAP,   20)
+	FEAT_DEF(EVTSTRM,   REG_HWCAP,   21)
+	FEAT_DEF(AES,       REG_HWCAP2,   0)
+	FEAT_DEF(PMULL,     REG_HWCAP2,   1)
+	FEAT_DEF(SHA1,      REG_HWCAP2,   2)
+	FEAT_DEF(SHA2,      REG_HWCAP2,   3)
+	FEAT_DEF(CRC32,     REG_HWCAP2,   4)
+	FEAT_DEF(V7L,       REG_PLATFORM, 0)
+};
+
+#elif defined RTE_ARCH_ARM64
+#define PLATFORM_STR "aarch64"
+typedef Elf64_auxv_t _Elfx_auxv_t;
+
+const struct feature_entry rte_cpu_feature_table[] = {
+	FEAT_DEF(FP,		REG_HWCAP,    0)
+	FEAT_DEF(NEON,		REG_HWCAP,    1)
+	FEAT_DEF(EVTSTRM,	REG_HWCAP,    2)
+	FEAT_DEF(AES,		REG_HWCAP,    3)
+	FEAT_DEF(PMULL,		REG_HWCAP,    4)
+	FEAT_DEF(SHA1,		REG_HWCAP,    5)
+	FEAT_DEF(SHA2,		REG_HWCAP,    6)
+	FEAT_DEF(CRC32,		REG_HWCAP,    7)
+	FEAT_DEF(ATOMICS,	REG_HWCAP,    8)
+	FEAT_DEF(AARCH64,	REG_PLATFORM, 1)
+};
+#endif /* RTE_ARCH */
+
+/*
+ * Read AUXV software register and get cpu features for ARM
+ */
+static void
+rte_cpu_get_features(hwcap_registers_t out)
+{
+	int auxv_fd;
+	_Elfx_auxv_t auxv;
+
+	auxv_fd = open("/proc/self/auxv", O_RDONLY);
+	assert(auxv_fd);
+	while (read(auxv_fd, &auxv, sizeof(auxv)) == sizeof(auxv)) {
+		if (auxv.a_type == AT_HWCAP) {
+			out[REG_HWCAP] = auxv.a_un.a_val;
+		} else if (auxv.a_type == AT_HWCAP2) {
+			out[REG_HWCAP2] = auxv.a_un.a_val;
+		} else if (auxv.a_type == AT_PLATFORM) {
+			if (!strcmp((const char *)auxv.a_un.a_val, PLATFORM_STR))
+				out[REG_PLATFORM] = 0x0001;
+		}
+	}
+}
+
+/*
+ * Checks if a particular flag is available on current machine.
+ */
+int
+rte_cpu_get_flag_enabled(enum rte_cpu_flag_t feature)
+{
+	const struct feature_entry *feat;
+	hwcap_registers_t regs = {0};
+
+	if (feature >= RTE_CPUFLAG_NUMFLAGS)
+		return -ENOENT;
+
+	feat = &rte_cpu_feature_table[feature];
+	if (feat->reg == REG_NONE)
+		return -EFAULT;
+
+	rte_cpu_get_features(regs);
+	return (regs[feat->reg] >> feat->bit) & 1;
+}
+
+const char *
+rte_cpu_get_flag_name(enum rte_cpu_flag_t feature)
+{
+	if (feature >= RTE_CPUFLAG_NUMFLAGS)
+		return NULL;
+	return rte_cpu_feature_table[feature].name;
+}
diff --git a/lib/librte_eal/common/arch/ppc_64/rte_cpuflags.c b/lib/librte_eal/common/arch/ppc_64/rte_cpuflags.c
new file mode 100644
index 00000000..a8147c86
--- /dev/null
+++ b/lib/librte_eal/common/arch/ppc_64/rte_cpuflags.c
@@ -0,0 +1,147 @@
+/*
+ *   BSD LICENSE
+ *
+ *   Copyright (C) IBM Corporation 2014.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of IBM Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include "rte_cpuflags.h"
+
+#include <elf.h>
+#include <fcntl.h>
+#include <assert.h>
+#include <unistd.h>
+
+/* Symbolic values for the entries in the auxiliary table */
+#define AT_HWCAP  16
+#define AT_HWCAP2 26
+
+/* software based registers */
+enum cpu_register_t {
+	REG_NONE = 0,
+	REG_HWCAP,
+	REG_HWCAP2,
+	REG_MAX
+};
+
+typedef uint32_t hwcap_registers_t[REG_MAX];
+
+struct feature_entry {
+	uint32_t reg;
+	uint32_t bit;
+#define CPU_FLAG_NAME_MAX_LEN 64
+	char name[CPU_FLAG_NAME_MAX_LEN];
+};
+
+#define FEAT_DEF(name, reg, bit) \
+	[RTE_CPUFLAG_##name] = {reg, bit, #name},
+
+const struct feature_entry rte_cpu_feature_table[] = {
+	FEAT_DEF(PPC_LE,                 REG_HWCAP,   0)
+	FEAT_DEF(TRUE_LE,                REG_HWCAP,   1)
+	FEAT_DEF(PSERIES_PERFMON_COMPAT, REG_HWCAP,   6)
+	FEAT_DEF(VSX,                    REG_HWCAP,   7)
+	FEAT_DEF(ARCH_2_06,              REG_HWCAP,   8)
+	FEAT_DEF(POWER6_EXT,             REG_HWCAP,   9)
+	FEAT_DEF(DFP,                    REG_HWCAP,  10)
+	FEAT_DEF(PA6T,                   REG_HWCAP,  11)
+	FEAT_DEF(ARCH_2_05,              REG_HWCAP,  12)
+	FEAT_DEF(ICACHE_SNOOP,           REG_HWCAP,  13)
+	FEAT_DEF(SMT,                    REG_HWCAP,  14)
+	FEAT_DEF(BOOKE,                  REG_HWCAP,  15)
+	FEAT_DEF(CELLBE,                 REG_HWCAP,  16)
+	FEAT_DEF(POWER5_PLUS,            REG_HWCAP,  17)
+	FEAT_DEF(POWER5,                 REG_HWCAP,  18)
+	FEAT_DEF(POWER4,                 REG_HWCAP,  19)
+	FEAT_DEF(NOTB,                   REG_HWCAP,  20)
+	FEAT_DEF(EFP_DOUBLE,             REG_HWCAP,  21)
+	FEAT_DEF(EFP_SINGLE,             REG_HWCAP,  22)
+	FEAT_DEF(SPE,                    REG_HWCAP,  23)
+	FEAT_DEF(UNIFIED_CACHE,          REG_HWCAP,  24)
+	FEAT_DEF(4xxMAC,                 REG_HWCAP,  25)
+	FEAT_DEF(MMU,                    REG_HWCAP,  26)
+	FEAT_DEF(FPU,                    REG_HWCAP,  27)
+	FEAT_DEF(ALTIVEC,                REG_HWCAP,  28)
+	FEAT_DEF(PPC601,                 REG_HWCAP,  29)
+	FEAT_DEF(PPC64,                  REG_HWCAP,  30)
+	FEAT_DEF(PPC32,                  REG_HWCAP,  31)
+	FEAT_DEF(TAR,                    REG_HWCAP2, 26)
+	FEAT_DEF(LSEL,                   REG_HWCAP2, 27)
+	FEAT_DEF(EBB,                    REG_HWCAP2, 28)
+	FEAT_DEF(DSCR,                   REG_HWCAP2, 29)
+	FEAT_DEF(HTM,                    REG_HWCAP2, 30)
+	FEAT_DEF(ARCH_2_07,              REG_HWCAP2, 31)
+};
+
+/*
+ * Read AUXV software register and get cpu features for Power
+ */
+static void
+rte_cpu_get_features(hwcap_registers_t out)
+{
+	int auxv_fd;
+	Elf64_auxv_t auxv;
+
+	auxv_fd = open("/proc/self/auxv", O_RDONLY);
+	assert(auxv_fd);
+	while (read(auxv_fd, &auxv,
+		sizeof(Elf64_auxv_t)) == sizeof(Elf64_auxv_t)) {
+		if (auxv.a_type == AT_HWCAP)
+			out[REG_HWCAP] = auxv.a_un.a_val;
+		else if (auxv.a_type == AT_HWCAP2)
+			out[REG_HWCAP2] = auxv.a_un.a_val;
+	}
+}
+
+/*
+ * Checks if a particular flag is available on current machine.
+ */
+int
+rte_cpu_get_flag_enabled(enum rte_cpu_flag_t feature)
+{
+	const struct feature_entry *feat;
+	hwcap_registers_t regs = {0};
+
+	if (feature >= RTE_CPUFLAG_NUMFLAGS)
+		return -ENOENT;
+
+	feat = &rte_cpu_feature_table[feature];
+	if (feat->reg == REG_NONE)
+		return -EFAULT;
+
+	rte_cpu_get_features(regs);
+	return (regs[feat->reg] >> feat->bit) & 1;
+}
+
+const char *
+rte_cpu_get_flag_name(enum rte_cpu_flag_t feature)
+{
+	if (feature >= RTE_CPUFLAG_NUMFLAGS)
+		return NULL;
+	return rte_cpu_feature_table[feature].name;
+}
diff --git a/lib/librte_eal/common/arch/tile/rte_cpuflags.c b/lib/librte_eal/common/arch/tile/rte_cpuflags.c
new file mode 100644
index 00000000..a2b6c51a
--- /dev/null
+++ b/lib/librte_eal/common/arch/tile/rte_cpuflags.c
@@ -0,0 +1,47 @@
+/*
+ *   BSD LICENSE
+ *
+ *   Copyright (C) EZchip Semiconductor Ltd. 2015.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of EZchip Semiconductor nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include "rte_cpuflags.h"
+
+#include <errno.h>
+
+const struct feature_entry rte_cpu_feature_table[] = {
+};
+
+/*
+ * Checks if a particular flag is available on current machine.
+ */
+int
+rte_cpu_get_flag_enabled(__attribute__((unused)) enum rte_cpu_flag_t feature)
+{
+	return -ENOENT;
+}
diff --git a/lib/librte_eal/common/arch/x86/rte_cpuflags.c b/lib/librte_eal/common/arch/x86/rte_cpuflags.c
new file mode 100644
index 00000000..01382571
--- /dev/null
+++ b/lib/librte_eal/common/arch/x86/rte_cpuflags.c
@@ -0,0 +1,220 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "rte_cpuflags.h"
+
+#include <stdio.h>
+#include <errno.h>
+#include <stdint.h>
+
+enum cpu_register_t {
+	RTE_REG_EAX = 0,
+	RTE_REG_EBX,
+	RTE_REG_ECX,
+	RTE_REG_EDX,
+};
+
+typedef uint32_t cpuid_registers_t[4];
+
+/**
+ * Struct to hold a processor feature entry
+ */
+struct feature_entry {
+	uint32_t leaf;				/**< cpuid leaf */
+	uint32_t subleaf;			/**< cpuid subleaf */
+	uint32_t reg;				/**< cpuid register */
+	uint32_t bit;				/**< cpuid register bit */
+#define CPU_FLAG_NAME_MAX_LEN 64
+	char name[CPU_FLAG_NAME_MAX_LEN];       /**< String for printing */
+};
+
+#define FEAT_DEF(name, leaf, subleaf, reg, bit) \
+	[RTE_CPUFLAG_##name] = {leaf, subleaf, reg, bit, #name },
+
+const struct feature_entry rte_cpu_feature_table[] = {
+	FEAT_DEF(SSE3, 0x00000001, 0, RTE_REG_ECX,  0)
+	FEAT_DEF(PCLMULQDQ, 0x00000001, 0, RTE_REG_ECX,  1)
+	FEAT_DEF(DTES64, 0x00000001, 0, RTE_REG_ECX,  2)
+	FEAT_DEF(MONITOR, 0x00000001, 0, RTE_REG_ECX,  3)
+	FEAT_DEF(DS_CPL, 0x00000001, 0, RTE_REG_ECX,  4)
+	FEAT_DEF(VMX, 0x00000001, 0, RTE_REG_ECX,  5)
+	FEAT_DEF(SMX, 0x00000001, 0, RTE_REG_ECX,  6)
+	FEAT_DEF(EIST, 0x00000001, 0, RTE_REG_ECX,  7)
+	FEAT_DEF(TM2, 0x00000001, 0, RTE_REG_ECX,  8)
+	FEAT_DEF(SSSE3, 0x00000001, 0, RTE_REG_ECX,  9)
+	FEAT_DEF(CNXT_ID, 0x00000001, 0, RTE_REG_ECX, 10)
+	FEAT_DEF(FMA, 0x00000001, 0, RTE_REG_ECX, 12)
+	FEAT_DEF(CMPXCHG16B, 0x00000001, 0, RTE_REG_ECX, 13)
+	FEAT_DEF(XTPR, 0x00000001, 0, RTE_REG_ECX, 14)
+	FEAT_DEF(PDCM, 0x00000001, 0, RTE_REG_ECX, 15)
+	FEAT_DEF(PCID, 0x00000001, 0, RTE_REG_ECX, 17)
+	FEAT_DEF(DCA, 0x00000001, 0, RTE_REG_ECX, 18)
+	FEAT_DEF(SSE4_1, 0x00000001, 0, RTE_REG_ECX, 19)
+	FEAT_DEF(SSE4_2, 0x00000001, 0, RTE_REG_ECX, 20)
+	FEAT_DEF(X2APIC, 0x00000001, 0, RTE_REG_ECX, 21)
+	FEAT_DEF(MOVBE, 0x00000001, 0, RTE_REG_ECX, 22)
+	FEAT_DEF(POPCNT, 0x00000001, 0, RTE_REG_ECX, 23)
+	FEAT_DEF(TSC_DEADLINE, 0x00000001, 0, RTE_REG_ECX, 24)
+	FEAT_DEF(AES, 0x00000001, 0, RTE_REG_ECX, 25)
+	FEAT_DEF(XSAVE, 0x00000001, 0, RTE_REG_ECX, 26)
+	FEAT_DEF(OSXSAVE, 0x00000001, 0, RTE_REG_ECX, 27)
+	FEAT_DEF(AVX, 0x00000001, 0, RTE_REG_ECX, 28)
+	FEAT_DEF(F16C, 0x00000001, 0, RTE_REG_ECX, 29)
+	FEAT_DEF(RDRAND, 0x00000001, 0, RTE_REG_ECX, 30)
+
+	FEAT_DEF(FPU, 0x00000001, 0, RTE_REG_EDX,  0)
+	FEAT_DEF(VME, 0x00000001, 0, RTE_REG_EDX,  1)
+	FEAT_DEF(DE, 0x00000001, 0, RTE_REG_EDX,  2)
+	FEAT_DEF(PSE, 0x00000001, 0, RTE_REG_EDX,  3)
+	FEAT_DEF(TSC, 0x00000001, 0, RTE_REG_EDX,  4)
+	FEAT_DEF(MSR, 0x00000001, 0, RTE_REG_EDX,  5)
+	FEAT_DEF(PAE, 0x00000001, 0, RTE_REG_EDX,  6)
+	FEAT_DEF(MCE, 0x00000001, 0, RTE_REG_EDX,  7)
+	FEAT_DEF(CX8, 0x00000001, 0, RTE_REG_EDX,  8)
+	FEAT_DEF(APIC, 0x00000001, 0, RTE_REG_EDX,  9)
+	FEAT_DEF(SEP, 0x00000001, 0, RTE_REG_EDX, 11)
+	FEAT_DEF(MTRR, 0x00000001, 0, RTE_REG_EDX, 12)
+	FEAT_DEF(PGE, 0x00000001, 0, RTE_REG_EDX, 13)
+	FEAT_DEF(MCA, 0x00000001, 0, RTE_REG_EDX, 14)
+	FEAT_DEF(CMOV, 0x00000001, 0, RTE_REG_EDX, 15)
+	FEAT_DEF(PAT, 0x00000001, 0, RTE_REG_EDX, 16)
+	FEAT_DEF(PSE36, 0x00000001, 0, RTE_REG_EDX, 17)
+	FEAT_DEF(PSN, 0x00000001, 0, RTE_REG_EDX, 18)
+	FEAT_DEF(CLFSH, 0x00000001, 0, RTE_REG_EDX, 19)
+	FEAT_DEF(DS, 0x00000001, 0, RTE_REG_EDX, 21)
+	FEAT_DEF(ACPI, 0x00000001, 0, RTE_REG_EDX, 22)
+	FEAT_DEF(MMX, 0x00000001, 0, RTE_REG_EDX, 23)
+	FEAT_DEF(FXSR, 0x00000001, 0, RTE_REG_EDX, 24)
+	FEAT_DEF(SSE, 0x00000001, 0, RTE_REG_EDX, 25)
+	FEAT_DEF(SSE2, 0x00000001, 0, RTE_REG_EDX, 26)
+	FEAT_DEF(SS, 0x00000001, 0, RTE_REG_EDX, 27)
+	FEAT_DEF(HTT, 0x00000001, 0, RTE_REG_EDX, 28)
+	FEAT_DEF(TM, 0x00000001, 0, RTE_REG_EDX, 29)
+	FEAT_DEF(PBE, 0x00000001, 0, RTE_REG_EDX, 31)
+
+	FEAT_DEF(DIGTEMP, 0x00000006, 0, RTE_REG_EAX,  0)
+	FEAT_DEF(TRBOBST, 0x00000006, 0, RTE_REG_EAX,  1)
+	FEAT_DEF(ARAT, 0x00000006, 0, RTE_REG_EAX,  2)
+	FEAT_DEF(PLN, 0x00000006, 0, RTE_REG_EAX,  4)
+	FEAT_DEF(ECMD, 0x00000006, 0, RTE_REG_EAX,  5)
+	FEAT_DEF(PTM, 0x00000006, 0, RTE_REG_EAX,  6)
+
+	FEAT_DEF(MPERF_APERF_MSR, 0x00000006, 0, RTE_REG_ECX,  0)
+	FEAT_DEF(ACNT2, 0x00000006, 0, RTE_REG_ECX,  1)
+	FEAT_DEF(ENERGY_EFF, 0x00000006, 0, RTE_REG_ECX,  3)
+
+	FEAT_DEF(FSGSBASE, 0x00000007, 0, RTE_REG_EBX,  0)
+	FEAT_DEF(BMI1, 0x00000007, 0, RTE_REG_EBX,  2)
+	FEAT_DEF(HLE, 0x00000007, 0, RTE_REG_EBX,  4)
+	FEAT_DEF(AVX2, 0x00000007, 0, RTE_REG_EBX,  5)
+	FEAT_DEF(SMEP, 0x00000007, 0, RTE_REG_EBX,  6)
+	FEAT_DEF(BMI2, 0x00000007, 0, RTE_REG_EBX,  7)
+	FEAT_DEF(ERMS, 0x00000007, 0, RTE_REG_EBX,  8)
+	FEAT_DEF(INVPCID, 0x00000007, 0, RTE_REG_EBX, 10)
+	FEAT_DEF(RTM, 0x00000007, 0, RTE_REG_EBX, 11)
+	FEAT_DEF(AVX512F, 0x00000007, 0, RTE_REG_EBX, 16)
+
+	FEAT_DEF(LAHF_SAHF, 0x80000001, 0, RTE_REG_ECX,  0)
+	FEAT_DEF(LZCNT, 0x80000001, 0, RTE_REG_ECX,  4)
+
+	FEAT_DEF(SYSCALL, 0x80000001, 0, RTE_REG_EDX, 11)
+	FEAT_DEF(XD, 0x80000001, 0, RTE_REG_EDX, 20)
+	FEAT_DEF(1GB_PG, 0x80000001, 0, RTE_REG_EDX, 26)
+	FEAT_DEF(RDTSCP, 0x80000001, 0, RTE_REG_EDX, 27)
+	FEAT_DEF(EM64T, 0x80000001, 0, RTE_REG_EDX, 29)
+
+	FEAT_DEF(INVTSC, 0x80000007, 0, RTE_REG_EDX,  8)
+};
+
+/*
+ * Execute CPUID instruction and get contents of a specific register
+ *
+ * This function, when compiled with GCC, will generate architecture-neutral
+ * code, as per GCC manual.
+ */
+static void
+rte_cpu_get_features(uint32_t leaf, uint32_t subleaf, cpuid_registers_t out)
+{
+#if defined(__i386__) && defined(__PIC__)
+	/* %ebx is a forbidden register if we compile with -fPIC or -fPIE */
+	asm volatile("movl %%ebx,%0 ; cpuid ; xchgl %%ebx,%0"
+		 : "=r" (out[RTE_REG_EBX]),
+		   "=a" (out[RTE_REG_EAX]),
+		   "=c" (out[RTE_REG_ECX]),
+		   "=d" (out[RTE_REG_EDX])
+		 : "a" (leaf), "c" (subleaf));
+#else
+	asm volatile("cpuid"
+		 : "=a" (out[RTE_REG_EAX]),
+		   "=b" (out[RTE_REG_EBX]),
+		   "=c" (out[RTE_REG_ECX]),
+		   "=d" (out[RTE_REG_EDX])
+		 : "a" (leaf), "c" (subleaf));
+#endif
+}
+
+int
+rte_cpu_get_flag_enabled(enum rte_cpu_flag_t feature)
+{
+	const struct feature_entry *feat;
+	cpuid_registers_t regs;
+
+	if (feature >= RTE_CPUFLAG_NUMFLAGS)
+		/* Flag does not match anything in the feature tables */
+		return -ENOENT;
+
+	feat = &rte_cpu_feature_table[feature];
+
+	if (!feat->leaf)
+		/* This entry in the table wasn't filled out! */
+		return -EFAULT;
+
+	rte_cpu_get_features(feat->leaf & 0xffff0000, 0, regs);
+	if (((regs[RTE_REG_EAX] ^ feat->leaf) & 0xffff0000) ||
+	      regs[RTE_REG_EAX] < feat->leaf)
+		return 0;
+
+	/* get the cpuid leaf containing the desired feature */
+	rte_cpu_get_features(feat->leaf, feat->subleaf, regs);
+
+	/* check if the feature is enabled */
+	return (regs[feat->reg] >> feat->bit) & 1;
+}
+
+const char *
+rte_cpu_get_flag_name(enum rte_cpu_flag_t feature)
+{
+	if (feature >= RTE_CPUFLAG_NUMFLAGS)
+		return NULL;
+	return rte_cpu_feature_table[feature].name;
+}
diff --git a/lib/librte_eal/common/eal_common_cpuflags.c b/lib/librte_eal/common/eal_common_cpuflags.c
new file mode 100644
index 00000000..ecb12409
--- /dev/null
+++ b/lib/librte_eal/common/eal_common_cpuflags.c
@@ -0,0 +1,76 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+
+#include <rte_common.h>
+#include <rte_cpuflags.h>
+
+/**
+ * Checks if the machine is adequate for running the binary. If it is not, the
+ * program exits with status 1.
+ * The function attribute forces this function to be called before main(). But
+ * with ICC, the check is generated by the compiler.
+ */
+#ifndef __INTEL_COMPILER
+void __attribute__ ((__constructor__))
+#else
+void
+#endif
+rte_cpu_check_supported(void)
+{
+	/* This is generated at compile-time by the build system */
+	static const enum rte_cpu_flag_t compile_time_flags[] = {
+			RTE_COMPILE_TIME_CPUFLAGS
+	};
+	unsigned count = RTE_DIM(compile_time_flags), i;
+	int ret;
+
+	for (i = 0; i < count; i++) {
+		ret = rte_cpu_get_flag_enabled(compile_time_flags[i]);
+
+		if (ret < 0) {
+			fprintf(stderr,
+				"ERROR: CPU feature flag lookup failed with error %d\n",
+				ret);
+			exit(1);
+		}
+		if (!ret) {
+			fprintf(stderr,
+			        "ERROR: This system does not support \"%s\".\n"
+			        "Please check that RTE_MACHINE is set correctly.\n",
+			        rte_cpu_get_flag_name(compile_time_flags[i]));
+			exit(1);
+		}
+	}
+}
diff --git a/lib/librte_eal/common/eal_common_dev.c b/lib/librte_eal/common/eal_common_dev.c
new file mode 100644
index 00000000..a8a4146c
--- /dev/null
+++ b/lib/librte_eal/common/eal_common_dev.c
@@ -0,0 +1,152 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2014 6WIND S.A.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <inttypes.h>
+#include <sys/queue.h>
+
+#include <rte_dev.h>
+#include <rte_devargs.h>
+#include <rte_debug.h>
+#include <rte_devargs.h>
+#include <rte_log.h>
+
+#include "eal_private.h"
+
+/** Global list of device drivers. */
+static struct rte_driver_list dev_driver_list =
+	TAILQ_HEAD_INITIALIZER(dev_driver_list);
+
+/* register a driver */
+void
+rte_eal_driver_register(struct rte_driver *driver)
+{
+	TAILQ_INSERT_TAIL(&dev_driver_list, driver, next);
+}
+
+/* unregister a driver */
+void
+rte_eal_driver_unregister(struct rte_driver *driver)
+{
+	TAILQ_REMOVE(&dev_driver_list, driver, next);
+}
+
+int
+rte_eal_vdev_init(const char *name, const char *args)
+{
+	struct rte_driver *driver;
+
+	if (name == NULL)
+		return -EINVAL;
+
+	TAILQ_FOREACH(driver, &dev_driver_list, next) {
+		if (driver->type != PMD_VDEV)
+			continue;
+
+		/*
+		 * search a driver prefix in virtual device name.
+		 * For example, if the driver is pcap PMD, driver->name
+		 * will be "eth_pcap", but "name" will be "eth_pcapN".
+		 * So use strncmp to compare.
+		 */
+		if (!strncmp(driver->name, name, strlen(driver->name)))
+			return driver->init(name, args);
+	}
+
+	RTE_LOG(ERR, EAL, "no driver found for %s\n", name);
+	return -EINVAL;
+}
+
+int
+rte_eal_dev_init(void)
+{
+	struct rte_devargs *devargs;
+	struct rte_driver *driver;
+
+	/*
+	 * Note that the dev_driver_list is populated here
+	 * from calls made to rte_eal_driver_register from constructor functions
+	 * embedded into PMD modules via the PMD_REGISTER_DRIVER macro
+	 */
+
+	/* call the init function for each virtual device */
+	TAILQ_FOREACH(devargs, &devargs_list, next) {
+
+		if (devargs->type != RTE_DEVTYPE_VIRTUAL)
+			continue;
+
+		if (rte_eal_vdev_init(devargs->virt.drv_name,
+					devargs->args)) {
+			RTE_LOG(ERR, EAL, "failed to initialize %s device\n",
+					devargs->virt.drv_name);
+			return -1;
+		}
+	}
+
+	/* Once the vdevs are initalized, start calling all the pdev drivers */
+	TAILQ_FOREACH(driver, &dev_driver_list, next) {
+		if (driver->type != PMD_PDEV)
+			continue;
+		/* PDEV drivers don't get passed any parameters */
+		driver->init(NULL, NULL);
+	}
+	return 0;
+}
+
+int
+rte_eal_vdev_uninit(const char *name)
+{
+	struct rte_driver *driver;
+
+	if (name == NULL)
+		return -EINVAL;
+
+	TAILQ_FOREACH(driver, &dev_driver_list, next) {
+		if (driver->type != PMD_VDEV)
+			continue;
+
+		/*
+		 * search a driver prefix in virtual device name.
+		 * For example, if the driver is pcap PMD, driver->name
+		 * will be "eth_pcap", but "name" will be "eth_pcapN".
+		 * So use strncmp to compare.
+		 */
+		if (!strncmp(driver->name, name, strlen(driver->name)))
+			return driver->uninit(name);
+	}
+
+	RTE_LOG(ERR, EAL, "no driver found for %s\n", name);
+	return -EINVAL;
+}
diff --git a/lib/librte_eal/common/eal_common_devargs.c b/lib/librte_eal/common/eal_common_devargs.c
new file mode 100644
index 00000000..2bfe54a1
--- /dev/null
+++ b/lib/librte_eal/common/eal_common_devargs.c
@@ -0,0 +1,176 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright 2014 6WIND S.A.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of 6WIND S.A nor the names of its contributors
+ *       may be used to endorse or promote products derived from this
+ *       software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* This file manages the list of devices and their arguments, as given
+ * by the user at startup
+ *
+ * Code here should not call rte_log since the EAL environment
+ * may not be initialized.
+ */
+
+#include <stdio.h>
+#include <string.h>
+
+#include <rte_pci.h>
+#include <rte_devargs.h>
+#include "eal_private.h"
+
+/** Global list of user devices */
+struct rte_devargs_list devargs_list =
+	TAILQ_HEAD_INITIALIZER(devargs_list);
+
+int
+rte_eal_parse_devargs_str(const char *devargs_str,
+			char **drvname, char **drvargs)
+{
+	char *sep;
+
+	if ((devargs_str) == NULL || (drvname) == NULL || (drvargs == NULL))
+		return -1;
+
+	*drvname = strdup(devargs_str);
+	if (drvname == NULL)
+		return -1;
+
+	/* set the first ',' to '\0' to split name and arguments */
+	sep = strchr(*drvname, ',');
+	if (sep != NULL) {
+		sep[0] = '\0';
+		*drvargs = strdup(sep + 1);
+	} else {
+		*drvargs = strdup("");
+	}
+
+	if (*drvargs == NULL) {
+		free(*drvname);
+		return -1;
+	}
+	return 0;
+}
+
+/* store a whitelist parameter for later parsing */
+int
+rte_eal_devargs_add(enum rte_devtype devtype, const char *devargs_str)
+{
+	struct rte_devargs *devargs = NULL;
+	char *buf = NULL;
+	int ret;
+
+	/* use malloc instead of rte_malloc as it's called early at init */
+	devargs = malloc(sizeof(*devargs));
+	if (devargs == NULL)
+		goto fail;
+
+	memset(devargs, 0, sizeof(*devargs));
+	devargs->type = devtype;
+
+	if (rte_eal_parse_devargs_str(devargs_str, &buf, &devargs->args))
+		goto fail;
+
+	switch (devargs->type) {
+	case RTE_DEVTYPE_WHITELISTED_PCI:
+	case RTE_DEVTYPE_BLACKLISTED_PCI:
+		/* try to parse pci identifier */
+		if (eal_parse_pci_BDF(buf, &devargs->pci.addr) != 0 &&
+		    eal_parse_pci_DomBDF(buf, &devargs->pci.addr) != 0)
+			goto fail;
+
+		break;
+	case RTE_DEVTYPE_VIRTUAL:
+		/* save driver name */
+		ret = snprintf(devargs->virt.drv_name,
+			       sizeof(devargs->virt.drv_name), "%s", buf);
+		if (ret < 0 || ret >= (int)sizeof(devargs->virt.drv_name))
+			goto fail;
+
+		break;
+	}
+
+	free(buf);
+	TAILQ_INSERT_TAIL(&devargs_list, devargs, next);
+	return 0;
+
+fail:
+	free(buf);
+	if (devargs) {
+		free(devargs->args);
+		free(devargs);
+	}
+
+	return -1;
+}
+
+/* count the number of devices of a specified type */
+unsigned int
+rte_eal_devargs_type_count(enum rte_devtype devtype)
+{
+	struct rte_devargs *devargs;
+	unsigned int count = 0;
+
+	TAILQ_FOREACH(devargs, &devargs_list, next) {
+		if (devargs->type != devtype)
+			continue;
+		count++;
+	}
+	return count;
+}
+
+/* dump the user devices on the console */
+void
+rte_eal_devargs_dump(FILE *f)
+{
+	struct rte_devargs *devargs;
+
+	fprintf(f, "User device white list:\n");
+	TAILQ_FOREACH(devargs, &devargs_list, next) {
+		if (devargs->type == RTE_DEVTYPE_WHITELISTED_PCI)
+			fprintf(f, "  PCI whitelist " PCI_PRI_FMT " %s\n",
+			       devargs->pci.addr.domain,
+			       devargs->pci.addr.bus,
+			       devargs->pci.addr.devid,
+			       devargs->pci.addr.function,
+			       devargs->args);
+		else if (devargs->type == RTE_DEVTYPE_BLACKLISTED_PCI)
+			fprintf(f, "  PCI blacklist " PCI_PRI_FMT " %s\n",
+			       devargs->pci.addr.domain,
+			       devargs->pci.addr.bus,
+			       devargs->pci.addr.devid,
+			       devargs->pci.addr.function,
+			       devargs->args);
+		else if (devargs->type == RTE_DEVTYPE_VIRTUAL)
+			fprintf(f, "  VIRTUAL %s %s\n",
+			       devargs->virt.drv_name,
+			       devargs->args);
+		else
+			fprintf(f, "  UNKNOWN %s\n", devargs->args);
+	}
+}
diff --git a/lib/librte_eal/common/eal_common_errno.c b/lib/librte_eal/common/eal_common_errno.c
new file mode 100644
index 00000000..de48d8e4
--- /dev/null
+++ b/lib/librte_eal/common/eal_common_errno.c
@@ -0,0 +1,72 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdarg.h>
+#include <errno.h>
+
+#include <rte_per_lcore.h>
+#include <rte_errno.h>
+#include <rte_string_fns.h>
+
+RTE_DEFINE_PER_LCORE(int, _rte_errno);
+
+const char *
+rte_strerror(int errnum)
+{
+#define RETVAL_SZ 256
+	static RTE_DEFINE_PER_LCORE(char[RETVAL_SZ], retval);
+
+	/* since some implementations of strerror_r throw an error
+	 * themselves if errnum is too big, we handle that case here */
+	if (errnum > RTE_MAX_ERRNO)
+		snprintf(RTE_PER_LCORE(retval), RETVAL_SZ,
+#ifdef RTE_EXEC_ENV_BSDAPP
+				"Unknown error: %d", errnum);
+#else
+				"Unknown error %d", errnum);
+#endif
+	else
+		switch (errnum){
+		case E_RTE_SECONDARY:
+			return "Invalid call in secondary process";
+		case E_RTE_NO_CONFIG:
+			return "Missing rte_config structure";
+		default:
+			strerror_r(errnum, RTE_PER_LCORE(retval), RETVAL_SZ);
+		}
+
+	return RTE_PER_LCORE(retval);
+}
diff --git a/lib/librte_eal/common/eal_common_hexdump.c b/lib/librte_eal/common/eal_common_hexdump.c
new file mode 100644
index 00000000..d5cbd703
--- /dev/null
+++ b/lib/librte_eal/common/eal_common_hexdump.c
@@ -0,0 +1,120 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <stdlib.h>
+#include <stdio.h>
+#include <errno.h>
+#include <stdint.h>
+#include <rte_hexdump.h>
+#include <rte_string_fns.h>
+
+#define LINE_LEN 128
+
+/**************************************************************************//**
+*
+* rte_hexdump - Dump out memory in a special hex dump format.
+*
+* DESCRIPTION
+* Dump out the message buffer in a special hex dump output format with characters
+* printed for each line of 16 hex values.
+*
+* RETURNS: N/A
+*
+* SEE ALSO:
+*/
+
+void
+rte_hexdump(FILE *f, const char * title, const void * buf, unsigned int len)
+{
+    unsigned int i, out, ofs;
+    const unsigned char *data = buf;
+    char line[LINE_LEN];    /* space needed 8+16*3+3+16 == 75 */
+
+    fprintf(f, "%s at [%p], len=%u\n", (title)? title  : "  Dump data", data, len);
+    ofs = 0;
+    while (ofs < len) {
+        /* format the line in the buffer, then use printf to output to screen */
+        out = snprintf(line, LINE_LEN, "%08X:", ofs);
+        for (i = 0; ((ofs + i) < len) && (i < 16); i++)
+            out += snprintf(line+out, LINE_LEN - out, " %02X", (data[ofs+i] & 0xff));
+        for(; i <= 16; i++)
+            out += snprintf(line+out, LINE_LEN - out, " | ");
+        for(i = 0; (ofs < len) && (i < 16); i++, ofs++) {
+            unsigned char c = data[ofs];
+            if ( (c < ' ') || (c > '~'))
+                c = '.';
+            out += snprintf(line+out, LINE_LEN - out, "%c", c);
+        }
+        fprintf(f, "%s\n", line);
+    }
+    fflush(f);
+}
+
+/**************************************************************************//**
+*
+* rte_memdump - Dump out memory in hex bytes with colons.
+*
+* DESCRIPTION
+* Dump out the message buffer in hex bytes with colons xx:xx:xx:xx:...
+*
+* RETURNS: N/A
+*
+* SEE ALSO:
+*/
+
+void
+rte_memdump(FILE *f, const char * title, const void * buf, unsigned int len)
+{
+    unsigned int i, out;
+    const unsigned char *data = buf;
+    char line[LINE_LEN];
+
+    if ( title )
+	fprintf(f, "%s: ", title);
+
+    line[0] = '\0';
+    for (i = 0, out = 0; i < len; i++) {
+	// Make sure we do not overrun the line buffer length.
+		if ( out >= (LINE_LEN - 4) ) {
+			fprintf(f, "%s", line);
+			out = 0;
+			line[out] = '\0';
+		}
+		out += snprintf(line+out, LINE_LEN - out, "%02x%s",
+				(data[i] & 0xff), ((i+1) < len)? ":" : "");
+    }
+    if ( out > 0 )
+	fprintf(f, "%s", line);
+    fprintf(f, "\n");
+
+    fflush(f);
+}
diff --git a/lib/librte_eal/common/eal_common_launch.c b/lib/librte_eal/common/eal_common_launch.c
new file mode 100644
index 00000000..229c3a03
--- /dev/null
+++ b/lib/librte_eal/common/eal_common_launch.c
@@ -0,0 +1,118 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <errno.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <sys/queue.h>
+
+#include <rte_launch.h>
+#include <rte_memory.h>
+#include <rte_memzone.h>
+#include <rte_eal.h>
+#include <rte_atomic.h>
+#include <rte_per_lcore.h>
+#include <rte_lcore.h>
+
+/*
+ * Wait until a lcore finished its job.
+ */
+int
+rte_eal_wait_lcore(unsigned slave_id)
+{
+	if (lcore_config[slave_id].state == WAIT)
+		return 0;
+
+	while (lcore_config[slave_id].state != WAIT &&
+	       lcore_config[slave_id].state != FINISHED);
+
+	rte_rmb();
+
+	/* we are in finished state, go to wait state */
+	lcore_config[slave_id].state = WAIT;
+	return lcore_config[slave_id].ret;
+}
+
+/*
+ * Check that every SLAVE lcores are in WAIT state, then call
+ * rte_eal_remote_launch() for all of them. If call_master is true
+ * (set to CALL_MASTER), also call the function on the master lcore.
+ */
+int
+rte_eal_mp_remote_launch(int (*f)(void *), void *arg,
+			 enum rte_rmt_call_master_t call_master)
+{
+	int lcore_id;
+	int master = rte_get_master_lcore();
+
+	/* check state of lcores */
+	RTE_LCORE_FOREACH_SLAVE(lcore_id) {
+		if (lcore_config[lcore_id].state != WAIT)
+			return -EBUSY;
+	}
+
+	/* send messages to cores */
+	RTE_LCORE_FOREACH_SLAVE(lcore_id) {
+		rte_eal_remote_launch(f, arg, lcore_id);
+	}
+
+	if (call_master == CALL_MASTER) {
+		lcore_config[master].ret = f(arg);
+		lcore_config[master].state = FINISHED;
+	}
+
+	return 0;
+}
+
+/*
+ * Return the state of the lcore identified by slave_id.
+ */
+enum rte_lcore_state_t
+rte_eal_get_lcore_state(unsigned lcore_id)
+{
+	return lcore_config[lcore_id].state;
+}
+
+/*
+ * Do a rte_eal_wait_lcore() for every lcore. The return values are
+ * ignored.
+ */
+void
+rte_eal_mp_wait_lcore(void)
+{
+	unsigned lcore_id;
+
+	RTE_LCORE_FOREACH_SLAVE(lcore_id) {
+		rte_eal_wait_lcore(lcore_id);
+	}
+}
diff --git a/lib/librte_eal/common/eal_common_lcore.c b/lib/librte_eal/common/eal_common_lcore.c
new file mode 100644
index 00000000..a4263ba5
--- /dev/null
+++ b/lib/librte_eal/common/eal_common_lcore.c
@@ -0,0 +1,110 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <unistd.h>
+#include <limits.h>
+#include <string.h>
+#include <dirent.h>
+
+#include <rte_log.h>
+#include <rte_eal.h>
+#include <rte_lcore.h>
+#include <rte_common.h>
+#include <rte_debug.h>
+
+#include "eal_private.h"
+#include "eal_thread.h"
+
+/*
+ * Parse /sys/devices/system/cpu to get the number of physical and logical
+ * processors on the machine. The function will fill the cpu_info
+ * structure.
+ */
+int
+rte_eal_cpu_init(void)
+{
+	/* pointer to global configuration */
+	struct rte_config *config = rte_eal_get_configuration();
+	unsigned lcore_id;
+	unsigned count = 0;
+
+	/*
+	 * Parse the maximum set of logical cores, detect the subset of running
+	 * ones and enable them by default.
+	 */
+	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
+		lcore_config[lcore_id].core_index = count;
+
+		/* init cpuset for per lcore config */
+		CPU_ZERO(&lcore_config[lcore_id].cpuset);
+
+		/* in 1:1 mapping, record related cpu detected state */
+		lcore_config[lcore_id].detected = eal_cpu_detected(lcore_id);
+		if (lcore_config[lcore_id].detected == 0) {
+			config->lcore_role[lcore_id] = ROLE_OFF;
+			lcore_config[lcore_id].core_index = -1;
+			continue;
+		}
+
+		/* By default, lcore 1:1 map to cpu id */
+		CPU_SET(lcore_id, &lcore_config[lcore_id].cpuset);
+
+		/* By default, each detected core is enabled */
+		config->lcore_role[lcore_id] = ROLE_RTE;
+		lcore_config[lcore_id].core_id = eal_cpu_core_id(lcore_id);
+		lcore_config[lcore_id].socket_id = eal_cpu_socket_id(lcore_id);
+		if (lcore_config[lcore_id].socket_id >= RTE_MAX_NUMA_NODES)
+#ifdef RTE_EAL_ALLOW_INV_SOCKET_ID
+			lcore_config[lcore_id].socket_id = 0;
+#else
+			rte_panic("Socket ID (%u) is greater than "
+				"RTE_MAX_NUMA_NODES (%d)\n",
+				lcore_config[lcore_id].socket_id,
+				RTE_MAX_NUMA_NODES);
+#endif
+
+		RTE_LOG(DEBUG, EAL, "Detected lcore %u as "
+				"core %u on socket %u\n",
+				lcore_id, lcore_config[lcore_id].core_id,
+				lcore_config[lcore_id].socket_id);
+		count++;
+	}
+	/* Set the count of enabled logical cores of the EAL configuration */
+	config->lcore_count = count;
+	RTE_LOG(DEBUG, EAL,
+		"Support maximum %u logical core(s) by configuration.\n",
+		RTE_MAX_LCORE);
+	RTE_LOG(DEBUG, EAL, "Detected %u lcore(s)\n", config->lcore_count);
+
+	return 0;
+}
diff --git a/lib/librte_eal/common/eal_common_log.c b/lib/librte_eal/common/eal_common_log.c
new file mode 100644
index 00000000..1ae8de70
--- /dev/null
+++ b/lib/librte_eal/common/eal_common_log.c
@@ -0,0 +1,337 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <string.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdarg.h>
+#include <sys/types.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/queue.h>
+
+#include <rte_log.h>
+#include <rte_memory.h>
+#include <rte_memzone.h>
+#include <rte_launch.h>
+#include <rte_common.h>
+#include <rte_cycles.h>
+#include <rte_eal.h>
+#include <rte_per_lcore.h>
+#include <rte_lcore.h>
+#include <rte_atomic.h>
+#include <rte_debug.h>
+#include <rte_spinlock.h>
+#include <rte_branch_prediction.h>
+#include <rte_ring.h>
+#include <rte_mempool.h>
+
+#include "eal_private.h"
+
+#define LOG_ELT_SIZE     2048
+
+#define LOG_HISTORY_MP_NAME "log_history"
+
+STAILQ_HEAD(log_history_list, log_history);
+
+/**
+ * The structure of a message log in the log history.
+ */
+struct log_history {
+	STAILQ_ENTRY(log_history) next;
+	unsigned size;
+	char buf[0];
+};
+
+static struct rte_mempool *log_history_mp = NULL;
+static unsigned log_history_size = 0;
+static struct log_history_list log_history;
+
+/* global log structure */
+struct rte_logs rte_logs = {
+	.type = ~0,
+	.level = RTE_LOG_DEBUG,
+	.file = NULL,
+};
+
+static rte_spinlock_t log_dump_lock = RTE_SPINLOCK_INITIALIZER;
+static rte_spinlock_t log_list_lock = RTE_SPINLOCK_INITIALIZER;
+static FILE *default_log_stream;
+static int history_enabled = 1;
+
+/**
+ * This global structure stores some informations about the message
+ * that is currently beeing processed by one lcore
+ */
+struct log_cur_msg {
+	uint32_t loglevel; /**< log level - see rte_log.h */
+	uint32_t logtype;  /**< log type  - see rte_log.h */
+} __rte_cache_aligned;
+static struct log_cur_msg log_cur_msg[RTE_MAX_LCORE]; /**< per core log */
+
+
+/* default logs */
+
+int
+rte_log_add_in_history(const char *buf, size_t size)
+{
+	struct log_history *hist_buf = NULL;
+	static const unsigned hist_buf_size = LOG_ELT_SIZE - sizeof(*hist_buf);
+	void *obj;
+
+	if (history_enabled == 0)
+		return 0;
+
+	rte_spinlock_lock(&log_list_lock);
+
+	/* get a buffer for adding in history */
+	if (log_history_size > RTE_LOG_HISTORY) {
+		hist_buf = STAILQ_FIRST(&log_history);
+		if (hist_buf) {
+			STAILQ_REMOVE_HEAD(&log_history, next);
+			log_history_size--;
+		}
+	}
+	else {
+		if (rte_mempool_mc_get(log_history_mp, &obj) < 0)
+			obj = NULL;
+		hist_buf = obj;
+	}
+
+	/* no buffer */
+	if (hist_buf == NULL) {
+		rte_spinlock_unlock(&log_list_lock);
+		return -ENOBUFS;
+	}
+
+	/* not enough room for msg, buffer go back in mempool */
+	if (size >= hist_buf_size) {
+		rte_mempool_mp_put(log_history_mp, hist_buf);
+		rte_spinlock_unlock(&log_list_lock);
+		return -ENOBUFS;
+	}
+
+	/* add in history */
+	memcpy(hist_buf->buf, buf, size);
+	hist_buf->buf[size] = hist_buf->buf[hist_buf_size-1] = '\0';
+	hist_buf->size = size;
+	STAILQ_INSERT_TAIL(&log_history, hist_buf, next);
+	log_history_size++;
+	rte_spinlock_unlock(&log_list_lock);
+
+	return 0;
+}
+
+void
+rte_log_set_history(int enable)
+{
+	history_enabled = enable;
+}
+
+/* Change the stream that will be used by logging system */
+int
+rte_openlog_stream(FILE *f)
+{
+	if (f == NULL)
+		rte_logs.file = default_log_stream;
+	else
+		rte_logs.file = f;
+	return 0;
+}
+
+/* Set global log level */
+void
+rte_set_log_level(uint32_t level)
+{
+	rte_logs.level = (uint32_t)level;
+}
+
+/* Get global log level */
+uint32_t
+rte_get_log_level(void)
+{
+	return rte_logs.level;
+}
+
+/* Set global log type */
+void
+rte_set_log_type(uint32_t type, int enable)
+{
+	if (enable)
+		rte_logs.type |= type;
+	else
+		rte_logs.type &= (~type);
+}
+
+/* Get global log type */
+uint32_t
+rte_get_log_type(void)
+{
+	return rte_logs.type;
+}
+
+/* get the current loglevel for the message beeing processed */
+int rte_log_cur_msg_loglevel(void)
+{
+	unsigned lcore_id;
+	lcore_id = rte_lcore_id();
+	if (lcore_id >= RTE_MAX_LCORE)
+		return rte_get_log_level();
+	return log_cur_msg[lcore_id].loglevel;
+}
+
+/* get the current logtype for the message beeing processed */
+int rte_log_cur_msg_logtype(void)
+{
+	unsigned lcore_id;
+	lcore_id = rte_lcore_id();
+	if (lcore_id >= RTE_MAX_LCORE)
+		return rte_get_log_type();
+	return log_cur_msg[lcore_id].logtype;
+}
+
+/* Dump log history to file */
+void
+rte_log_dump_history(FILE *out)
+{
+	struct log_history_list tmp_log_history;
+	struct log_history *hist_buf;
+	unsigned i;
+
+	/* only one dump at a time */
+	rte_spinlock_lock(&log_dump_lock);
+
+	/* save list, and re-init to allow logging during dump */
+	rte_spinlock_lock(&log_list_lock);
+	tmp_log_history = log_history;
+	STAILQ_INIT(&log_history);
+	log_history_size = 0;
+	rte_spinlock_unlock(&log_list_lock);
+
+	for (i=0; i<RTE_LOG_HISTORY; i++) {
+
+		/* remove one message from history list */
+		hist_buf = STAILQ_FIRST(&tmp_log_history);
+
+		if (hist_buf == NULL)
+			break;
+
+		STAILQ_REMOVE_HEAD(&tmp_log_history, next);
+
+		/* write on stdout */
+		if (fwrite(hist_buf->buf, hist_buf->size, 1, out) == 0) {
+			rte_mempool_mp_put(log_history_mp, hist_buf);
+			break;
+		}
+
+		/* put back message structure in pool */
+		rte_mempool_mp_put(log_history_mp, hist_buf);
+	}
+	fflush(out);
+
+	rte_spinlock_unlock(&log_dump_lock);
+}
+
+/*
+ * Generates a log message The message will be sent in the stream
+ * defined by the previous call to rte_openlog_stream().
+ */
+int
+rte_vlog(uint32_t level, uint32_t logtype, const char *format, va_list ap)
+{
+	int ret;
+	FILE *f = rte_logs.file;
+	unsigned lcore_id;
+
+	if ((level > rte_logs.level) || !(logtype & rte_logs.type))
+		return 0;
+
+	/* save loglevel and logtype in a global per-lcore variable */
+	lcore_id = rte_lcore_id();
+	if (lcore_id < RTE_MAX_LCORE) {
+		log_cur_msg[lcore_id].loglevel = level;
+		log_cur_msg[lcore_id].logtype = logtype;
+	}
+
+	ret = vfprintf(f, format, ap);
+	fflush(f);
+	return ret;
+}
+
+/*
+ * Generates a log message The message will be sent in the stream
+ * defined by the previous call to rte_openlog_stream().
+ * No need to check level here, done by rte_vlog().
+ */
+int
+rte_log(uint32_t level, uint32_t logtype, const char *format, ...)
+{
+	va_list ap;
+	int ret;
+
+	va_start(ap, format);
+	ret = rte_vlog(level, logtype, format, ap);
+	va_end(ap);
+	return ret;
+}
+
+/*
+ * called by environment-specific log init function to initialize log
+ * history
+ */
+int
+rte_eal_common_log_init(FILE *default_log)
+{
+	STAILQ_INIT(&log_history);
+
+	/* reserve RTE_LOG_HISTORY*2 elements, so we can dump and
+	 * keep logging during this time */
+	log_history_mp = rte_mempool_create(LOG_HISTORY_MP_NAME, RTE_LOG_HISTORY*2,
+				LOG_ELT_SIZE, 0, 0,
+				NULL, NULL,
+				NULL, NULL,
+				SOCKET_ID_ANY, 0);
+
+	if ((log_history_mp == NULL) &&
+	    ((log_history_mp = rte_mempool_lookup(LOG_HISTORY_MP_NAME)) == NULL)){
+		RTE_LOG(ERR, EAL, "%s(): cannot create log_history mempool\n",
+			__func__);
+		return -1;
+	}
+
+	default_log_stream = default_log;
+	rte_openlog_stream(default_log);
+	return 0;
+}
diff --git a/lib/librte_eal/common/eal_common_memory.c b/lib/librte_eal/common/eal_common_memory.c
new file mode 100644
index 00000000..6155752e
--- /dev/null
+++ b/lib/librte_eal/common/eal_common_memory.c
@@ -0,0 +1,154 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <inttypes.h>
+#include <sys/queue.h>
+
+#include <rte_memory.h>
+#include <rte_memzone.h>
+#include <rte_eal.h>
+#include <rte_eal_memconfig.h>
+#include <rte_log.h>
+
+#include "eal_private.h"
+#include "eal_internal_cfg.h"
+
+/*
+ * Return a pointer to a read-only table of struct rte_physmem_desc
+ * elements, containing the layout of all addressable physical
+ * memory. The last element of the table contains a NULL address.
+ */
+const struct rte_memseg *
+rte_eal_get_physmem_layout(void)
+{
+	return rte_eal_get_configuration()->mem_config->memseg;
+}
+
+
+/* get the total size of memory */
+uint64_t
+rte_eal_get_physmem_size(void)
+{
+	const struct rte_mem_config *mcfg;
+	unsigned i = 0;
+	uint64_t total_len = 0;
+
+	/* get pointer to global configuration */
+	mcfg = rte_eal_get_configuration()->mem_config;
+
+	for (i = 0; i < RTE_MAX_MEMSEG; i++) {
+		if (mcfg->memseg[i].addr == NULL)
+			break;
+
+		total_len += mcfg->memseg[i].len;
+	}
+
+	return total_len;
+}
+
+/* Dump the physical memory layout on console */
+void
+rte_dump_physmem_layout(FILE *f)
+{
+	const struct rte_mem_config *mcfg;
+	unsigned i = 0;
+
+	/* get pointer to global configuration */
+	mcfg = rte_eal_get_configuration()->mem_config;
+
+	for (i = 0; i < RTE_MAX_MEMSEG; i++) {
+		if (mcfg->memseg[i].addr == NULL)
+			break;
+
+		fprintf(f, "Segment %u: phys:0x%"PRIx64", len:%zu, "
+		       "virt:%p, socket_id:%"PRId32", "
+		       "hugepage_sz:%"PRIu64", nchannel:%"PRIx32", "
+		       "nrank:%"PRIx32"\n", i,
+		       mcfg->memseg[i].phys_addr,
+		       mcfg->memseg[i].len,
+		       mcfg->memseg[i].addr,
+		       mcfg->memseg[i].socket_id,
+		       mcfg->memseg[i].hugepage_sz,
+		       mcfg->memseg[i].nchannel,
+		       mcfg->memseg[i].nrank);
+	}
+}
+
+/* return the number of memory channels */
+unsigned rte_memory_get_nchannel(void)
+{
+	return rte_eal_get_configuration()->mem_config->nchannel;
+}
+
+/* return the number of memory rank */
+unsigned rte_memory_get_nrank(void)
+{
+	return rte_eal_get_configuration()->mem_config->nrank;
+}
+
+static int
+rte_eal_memdevice_init(void)
+{
+	struct rte_config *config;
+
+	if (rte_eal_process_type() == RTE_PROC_SECONDARY)
+		return 0;
+
+	config = rte_eal_get_configuration();
+	config->mem_config->nchannel = internal_config.force_nchannel;
+	config->mem_config->nrank = internal_config.force_nrank;
+
+	return 0;
+}
+
+/* init memory subsystem */
+int
+rte_eal_memory_init(void)
+{
+	RTE_LOG(DEBUG, EAL, "Setting up physically contiguous memory...\n");
+
+	const int retval = rte_eal_process_type() == RTE_PROC_PRIMARY ?
+			rte_eal_hugepage_init() :
+			rte_eal_hugepage_attach();
+	if (retval < 0)
+		return -1;
+
+	if (internal_config.no_shconf == 0 && rte_eal_memdevice_init() < 0)
+		return -1;
+
+	return 0;
+}
diff --git a/lib/librte_eal/common/eal_common_memzone.c b/lib/librte_eal/common/eal_common_memzone.c
new file mode 100644
index 00000000..711c8457
--- /dev/null
+++ b/lib/librte_eal/common/eal_common_memzone.c
@@ -0,0 +1,445 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdarg.h>
+#include <inttypes.h>
+#include <string.h>
+#include <errno.h>
+#include <sys/queue.h>
+
+#include <rte_log.h>
+#include <rte_memory.h>
+#include <rte_memzone.h>
+#include <rte_eal.h>
+#include <rte_eal_memconfig.h>
+#include <rte_per_lcore.h>
+#include <rte_errno.h>
+#include <rte_string_fns.h>
+#include <rte_common.h>
+
+#include "malloc_heap.h"
+#include "malloc_elem.h"
+#include "eal_private.h"
+
+static inline const struct rte_memzone *
+memzone_lookup_thread_unsafe(const char *name)
+{
+	const struct rte_mem_config *mcfg;
+	const struct rte_memzone *mz;
+	unsigned i = 0;
+
+	/* get pointer to global configuration */
+	mcfg = rte_eal_get_configuration()->mem_config;
+
+	/*
+	 * the algorithm is not optimal (linear), but there are few
+	 * zones and this function should be called at init only
+	 */
+	for (i = 0; i < RTE_MAX_MEMZONE; i++) {
+		mz = &mcfg->memzone[i];
+		if (mz->addr != NULL && !strncmp(name, mz->name, RTE_MEMZONE_NAMESIZE))
+			return &mcfg->memzone[i];
+	}
+
+	return NULL;
+}
+
+static inline struct rte_memzone *
+get_next_free_memzone(void)
+{
+	struct rte_mem_config *mcfg;
+	unsigned i = 0;
+
+	/* get pointer to global configuration */
+	mcfg = rte_eal_get_configuration()->mem_config;
+
+	for (i = 0; i < RTE_MAX_MEMZONE; i++) {
+		if (mcfg->memzone[i].addr == NULL)
+			return &mcfg->memzone[i];
+	}
+
+	return NULL;
+}
+
+/* This function will return the greatest free block if a heap has been
+ * specified. If no heap has been specified, it will return the heap and
+ * length of the greatest free block available in all heaps */
+static size_t
+find_heap_max_free_elem(int *s, unsigned align)
+{
+	struct rte_mem_config *mcfg;
+	struct rte_malloc_socket_stats stats;
+	int i, socket = *s;
+	size_t len = 0;
+
+	/* get pointer to global configuration */
+	mcfg = rte_eal_get_configuration()->mem_config;
+
+	for (i = 0; i < RTE_MAX_NUMA_NODES; i++) {
+		if ((socket != SOCKET_ID_ANY) && (socket != i))
+			continue;
+
+		malloc_heap_get_stats(&mcfg->malloc_heaps[i], &stats);
+		if (stats.greatest_free_size > len) {
+			len = stats.greatest_free_size;
+			*s = i;
+		}
+	}
+
+	return len - MALLOC_ELEM_OVERHEAD - align;
+}
+
+static const struct rte_memzone *
+memzone_reserve_aligned_thread_unsafe(const char *name, size_t len,
+		int socket_id, unsigned flags, unsigned align, unsigned bound)
+{
+	struct rte_mem_config *mcfg;
+	size_t requested_len;
+	int socket, i;
+
+	/* get pointer to global configuration */
+	mcfg = rte_eal_get_configuration()->mem_config;
+
+	/* no more room in config */
+	if (mcfg->memzone_cnt >= RTE_MAX_MEMZONE) {
+		RTE_LOG(ERR, EAL, "%s(): No more room in config\n", __func__);
+		rte_errno = ENOSPC;
+		return NULL;
+	}
+
+	/* zone already exist */
+	if ((memzone_lookup_thread_unsafe(name)) != NULL) {
+		RTE_LOG(DEBUG, EAL, "%s(): memzone <%s> already exists\n",
+			__func__, name);
+		rte_errno = EEXIST;
+		return NULL;
+	}
+
+	/* if alignment is not a power of two */
+	if (align && !rte_is_power_of_2(align)) {
+		RTE_LOG(ERR, EAL, "%s(): Invalid alignment: %u\n", __func__,
+				align);
+		rte_errno = EINVAL;
+		return NULL;
+	}
+
+	/* alignment less than cache size is not allowed */
+	if (align < RTE_CACHE_LINE_SIZE)
+		align = RTE_CACHE_LINE_SIZE;
+
+	/* align length on cache boundary. Check for overflow before doing so */
+	if (len > SIZE_MAX - RTE_CACHE_LINE_MASK) {
+		rte_errno = EINVAL; /* requested size too big */
+		return NULL;
+	}
+
+	len += RTE_CACHE_LINE_MASK;
+	len &= ~((size_t) RTE_CACHE_LINE_MASK);
+
+	/* save minimal requested  length */
+	requested_len = RTE_MAX((size_t)RTE_CACHE_LINE_SIZE,  len);
+
+	/* check that boundary condition is valid */
+	if (bound != 0 && (requested_len > bound || !rte_is_power_of_2(bound))) {
+		rte_errno = EINVAL;
+		return NULL;
+	}
+
+	if ((socket_id != SOCKET_ID_ANY) && (socket_id >= RTE_MAX_NUMA_NODES)) {
+		rte_errno = EINVAL;
+		return NULL;
+	}
+
+	if (!rte_eal_has_hugepages())
+		socket_id = SOCKET_ID_ANY;
+
+	if (len == 0) {
+		if (bound != 0)
+			requested_len = bound;
+		else
+			requested_len = find_heap_max_free_elem(&socket_id, align);
+	}
+
+	if (socket_id == SOCKET_ID_ANY)
+		socket = malloc_get_numa_socket();
+	else
+		socket = socket_id;
+
+	/* allocate memory on heap */
+	void *mz_addr = malloc_heap_alloc(&mcfg->malloc_heaps[socket], NULL,
+			requested_len, flags, align, bound);
+
+	if ((mz_addr == NULL) && (socket_id == SOCKET_ID_ANY)) {
+		/* try other heaps */
+		for (i = 0; i < RTE_MAX_NUMA_NODES; i++) {
+			if (socket == i)
+				continue;
+
+			mz_addr = malloc_heap_alloc(&mcfg->malloc_heaps[i],
+					NULL, requested_len, flags, align, bound);
+			if (mz_addr != NULL)
+				break;
+		}
+	}
+
+	if (mz_addr == NULL) {
+		rte_errno = ENOMEM;
+		return NULL;
+	}
+
+	const struct malloc_elem *elem = malloc_elem_from_data(mz_addr);
+
+	/* fill the zone in config */
+	struct rte_memzone *mz = get_next_free_memzone();
+
+	if (mz == NULL) {
+		RTE_LOG(ERR, EAL, "%s(): Cannot find free memzone but there is room "
+				"in config!\n", __func__);
+		rte_errno = ENOSPC;
+		return NULL;
+	}
+
+	mcfg->memzone_cnt++;
+	snprintf(mz->name, sizeof(mz->name), "%s", name);
+	mz->phys_addr = rte_malloc_virt2phy(mz_addr);
+	mz->addr = mz_addr;
+	mz->len = (requested_len == 0 ? elem->size : requested_len);
+	mz->hugepage_sz = elem->ms->hugepage_sz;
+	mz->socket_id = elem->ms->socket_id;
+	mz->flags = 0;
+	mz->memseg_id = elem->ms - rte_eal_get_configuration()->mem_config->memseg;
+
+	return mz;
+}
+
+static const struct rte_memzone *
+rte_memzone_reserve_thread_safe(const char *name, size_t len,
+				int socket_id, unsigned flags, unsigned align,
+				unsigned bound)
+{
+	struct rte_mem_config *mcfg;
+	const struct rte_memzone *mz = NULL;
+
+	/* get pointer to global configuration */
+	mcfg = rte_eal_get_configuration()->mem_config;
+
+	rte_rwlock_write_lock(&mcfg->mlock);
+
+	mz = memzone_reserve_aligned_thread_unsafe(
+		name, len, socket_id, flags, align, bound);
+
+	rte_rwlock_write_unlock(&mcfg->mlock);
+
+	return mz;
+}
+
+/*
+ * Return a pointer to a correctly filled memzone descriptor (with a
+ * specified alignment and boundary). If the allocation cannot be done,
+ * return NULL.
+ */
+const struct rte_memzone *
+rte_memzone_reserve_bounded(const char *name, size_t len, int socket_id,
+			    unsigned flags, unsigned align, unsigned bound)
+{
+	return rte_memzone_reserve_thread_safe(name, len, socket_id, flags,
+					       align, bound);
+}
+
+/*
+ * Return a pointer to a correctly filled memzone descriptor (with a
+ * specified alignment). If the allocation cannot be done, return NULL.
+ */
+const struct rte_memzone *
+rte_memzone_reserve_aligned(const char *name, size_t len, int socket_id,
+			    unsigned flags, unsigned align)
+{
+	return rte_memzone_reserve_thread_safe(name, len, socket_id, flags,
+					       align, 0);
+}
+
+/*
+ * Return a pointer to a correctly filled memzone descriptor. If the
+ * allocation cannot be done, return NULL.
+ */
+const struct rte_memzone *
+rte_memzone_reserve(const char *name, size_t len, int socket_id,
+		    unsigned flags)
+{
+	return rte_memzone_reserve_thread_safe(name, len, socket_id,
+					       flags, RTE_CACHE_LINE_SIZE, 0);
+}
+
+int
+rte_memzone_free(const struct rte_memzone *mz)
+{
+	struct rte_mem_config *mcfg;
+	int ret = 0;
+	void *addr;
+	unsigned idx;
+
+	if (mz == NULL)
+		return -EINVAL;
+
+	mcfg = rte_eal_get_configuration()->mem_config;
+
+	rte_rwlock_write_lock(&mcfg->mlock);
+
+	idx = ((uintptr_t)mz - (uintptr_t)mcfg->memzone);
+	idx = idx / sizeof(struct rte_memzone);
+
+	addr = mcfg->memzone[idx].addr;
+#ifdef RTE_LIBRTE_IVSHMEM
+	/*
+	 * If ioremap_addr is set, it's an IVSHMEM memzone and we cannot
+	 * free it.
+	 */
+	if (mcfg->memzone[idx].ioremap_addr != 0)
+		ret = -EINVAL;
+#endif
+	if (addr == NULL)
+		ret = -EINVAL;
+	else if (mcfg->memzone_cnt == 0) {
+		rte_panic("%s(): memzone address not NULL but memzone_cnt is 0!\n",
+				__func__);
+	} else {
+		memset(&mcfg->memzone[idx], 0, sizeof(mcfg->memzone[idx]));
+		mcfg->memzone_cnt--;
+	}
+
+	rte_rwlock_write_unlock(&mcfg->mlock);
+
+	rte_free(addr);
+
+	return ret;
+}
+
+/*
+ * Lookup for the memzone identified by the given name
+ */
+const struct rte_memzone *
+rte_memzone_lookup(const char *name)
+{
+	struct rte_mem_config *mcfg;
+	const struct rte_memzone *memzone = NULL;
+
+	mcfg = rte_eal_get_configuration()->mem_config;
+
+	rte_rwlock_read_lock(&mcfg->mlock);
+
+	memzone = memzone_lookup_thread_unsafe(name);
+
+	rte_rwlock_read_unlock(&mcfg->mlock);
+
+	return memzone;
+}
+
+/* Dump all reserved memory zones on console */
+void
+rte_memzone_dump(FILE *f)
+{
+	struct rte_mem_config *mcfg;
+	unsigned i = 0;
+
+	/* get pointer to global configuration */
+	mcfg = rte_eal_get_configuration()->mem_config;
+
+	rte_rwlock_read_lock(&mcfg->mlock);
+	/* dump all zones */
+	for (i=0; i<RTE_MAX_MEMZONE; i++) {
+		if (mcfg->memzone[i].addr == NULL)
+			break;
+		fprintf(f, "Zone %u: name:<%s>, phys:0x%"PRIx64", len:0x%zx"
+		       ", virt:%p, socket_id:%"PRId32", flags:%"PRIx32"\n", i,
+		       mcfg->memzone[i].name,
+		       mcfg->memzone[i].phys_addr,
+		       mcfg->memzone[i].len,
+		       mcfg->memzone[i].addr,
+		       mcfg->memzone[i].socket_id,
+		       mcfg->memzone[i].flags);
+	}
+	rte_rwlock_read_unlock(&mcfg->mlock);
+}
+
+/*
+ * Init the memzone subsystem
+ */
+int
+rte_eal_memzone_init(void)
+{
+	struct rte_mem_config *mcfg;
+	const struct rte_memseg *memseg;
+
+	/* get pointer to global configuration */
+	mcfg = rte_eal_get_configuration()->mem_config;
+
+	/* secondary processes don't need to initialise anything */
+	if (rte_eal_process_type() == RTE_PROC_SECONDARY)
+		return 0;
+
+	memseg = rte_eal_get_physmem_layout();
+	if (memseg == NULL) {
+		RTE_LOG(ERR, EAL, "%s(): Cannot get physical layout\n", __func__);
+		return -1;
+	}
+
+	rte_rwlock_write_lock(&mcfg->mlock);
+
+	/* delete all zones */
+	mcfg->memzone_cnt = 0;
+	memset(mcfg->memzone, 0, sizeof(mcfg->memzone));
+
+	rte_rwlock_write_unlock(&mcfg->mlock);
+
+	return rte_eal_malloc_heap_init();
+}
+
+/* Walk all reserved memory zones */
+void rte_memzone_walk(void (*func)(const struct rte_memzone *, void *),
+		      void *arg)
+{
+	struct rte_mem_config *mcfg;
+	unsigned i;
+
+	mcfg = rte_eal_get_configuration()->mem_config;
+
+	rte_rwlock_read_lock(&mcfg->mlock);
+	for (i=0; i<RTE_MAX_MEMZONE; i++) {
+		if (mcfg->memzone[i].addr != NULL)
+			(*func)(&mcfg->memzone[i], arg);
+	}
+	rte_rwlock_read_unlock(&mcfg->mlock);
+}
diff --git a/lib/librte_eal/common/eal_common_options.c b/lib/librte_eal/common/eal_common_options.c
new file mode 100644
index 00000000..2b418d52
--- /dev/null
+++ b/lib/librte_eal/common/eal_common_options.c
@@ -0,0 +1,1022 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2014 6WIND S.A.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <syslog.h>
+#include <ctype.h>
+#include <limits.h>
+#include <errno.h>
+#include <getopt.h>
+#include <dlfcn.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <dirent.h>
+
+#include <rte_eal.h>
+#include <rte_log.h>
+#include <rte_lcore.h>
+#include <rte_version.h>
+#include <rte_devargs.h>
+#include <rte_memcpy.h>
+
+#include "eal_internal_cfg.h"
+#include "eal_options.h"
+#include "eal_filesystem.h"
+
+#define BITS_PER_HEX 4
+
+const char
+eal_short_options[] =
+	"b:" /* pci-blacklist */
+	"c:" /* coremask */
+	"d:" /* driver */
+	"h"  /* help */
+	"l:" /* corelist */
+	"m:" /* memory size */
+	"n:" /* memory channels */
+	"r:" /* memory ranks */
+	"v"  /* version */
+	"w:" /* pci-whitelist */
+	;
+
+const struct option
+eal_long_options[] = {
+	{OPT_BASE_VIRTADDR,     1, NULL, OPT_BASE_VIRTADDR_NUM    },
+	{OPT_CREATE_UIO_DEV,    0, NULL, OPT_CREATE_UIO_DEV_NUM   },
+	{OPT_FILE_PREFIX,       1, NULL, OPT_FILE_PREFIX_NUM      },
+	{OPT_HELP,              0, NULL, OPT_HELP_NUM             },
+	{OPT_HUGE_DIR,          1, NULL, OPT_HUGE_DIR_NUM         },
+	{OPT_HUGE_UNLINK,       0, NULL, OPT_HUGE_UNLINK_NUM      },
+	{OPT_LCORES,            1, NULL, OPT_LCORES_NUM           },
+	{OPT_LOG_LEVEL,         1, NULL, OPT_LOG_LEVEL_NUM        },
+	{OPT_MASTER_LCORE,      1, NULL, OPT_MASTER_LCORE_NUM     },
+	{OPT_NO_HPET,           0, NULL, OPT_NO_HPET_NUM          },
+	{OPT_NO_HUGE,           0, NULL, OPT_NO_HUGE_NUM          },
+	{OPT_NO_PCI,            0, NULL, OPT_NO_PCI_NUM           },
+	{OPT_NO_SHCONF,         0, NULL, OPT_NO_SHCONF_NUM        },
+	{OPT_PCI_BLACKLIST,     1, NULL, OPT_PCI_BLACKLIST_NUM    },
+	{OPT_PCI_WHITELIST,     1, NULL, OPT_PCI_WHITELIST_NUM    },
+	{OPT_PROC_TYPE,         1, NULL, OPT_PROC_TYPE_NUM        },
+	{OPT_SOCKET_MEM,        1, NULL, OPT_SOCKET_MEM_NUM       },
+	{OPT_SYSLOG,            1, NULL, OPT_SYSLOG_NUM           },
+	{OPT_VDEV,              1, NULL, OPT_VDEV_NUM             },
+	{OPT_VFIO_INTR,         1, NULL, OPT_VFIO_INTR_NUM        },
+	{OPT_VMWARE_TSC_MAP,    0, NULL, OPT_VMWARE_TSC_MAP_NUM   },
+	{OPT_XEN_DOM0,          0, NULL, OPT_XEN_DOM0_NUM         },
+	{0,                     0, NULL, 0                        }
+};
+
+TAILQ_HEAD(shared_driver_list, shared_driver);
+
+/* Definition for shared object drivers. */
+struct shared_driver {
+	TAILQ_ENTRY(shared_driver) next;
+
+	char    name[PATH_MAX];
+	void*   lib_handle;
+};
+
+/* List of external loadable drivers */
+static struct shared_driver_list solib_list =
+TAILQ_HEAD_INITIALIZER(solib_list);
+
+/* Default path of external loadable drivers */
+static const char *default_solib_dir = RTE_EAL_PMD_PATH;
+
+static int master_lcore_parsed;
+static int mem_parsed;
+
+void
+eal_reset_internal_config(struct internal_config *internal_cfg)
+{
+	int i;
+
+	internal_cfg->memory = 0;
+	internal_cfg->force_nrank = 0;
+	internal_cfg->force_nchannel = 0;
+	internal_cfg->hugefile_prefix = HUGEFILE_PREFIX_DEFAULT;
+	internal_cfg->hugepage_dir = NULL;
+	internal_cfg->force_sockets = 0;
+	/* zero out the NUMA config */
+	for (i = 0; i < RTE_MAX_NUMA_NODES; i++)
+		internal_cfg->socket_mem[i] = 0;
+	/* zero out hugedir descriptors */
+	for (i = 0; i < MAX_HUGEPAGE_SIZES; i++)
+		internal_cfg->hugepage_info[i].lock_descriptor = -1;
+	internal_cfg->base_virtaddr = 0;
+
+	internal_cfg->syslog_facility = LOG_DAEMON;
+	/* default value from build option */
+	internal_cfg->log_level = RTE_LOG_LEVEL;
+
+	internal_cfg->xen_dom0_support = 0;
+
+	/* if set to NONE, interrupt mode is determined automatically */
+	internal_cfg->vfio_intr_mode = RTE_INTR_MODE_NONE;
+
+#ifdef RTE_LIBEAL_USE_HPET
+	internal_cfg->no_hpet = 0;
+#else
+	internal_cfg->no_hpet = 1;
+#endif
+	internal_cfg->vmware_tsc_map = 0;
+	internal_cfg->create_uio_dev = 0;
+}
+
+static int
+eal_plugin_add(const char *path)
+{
+	struct shared_driver *solib;
+
+	solib = malloc(sizeof(*solib));
+	if (solib == NULL) {
+		RTE_LOG(ERR, EAL, "malloc(solib) failed\n");
+		return -1;
+	}
+	memset(solib, 0, sizeof(*solib));
+	strncpy(solib->name, path, PATH_MAX-1);
+	solib->name[PATH_MAX-1] = 0;
+	TAILQ_INSERT_TAIL(&solib_list, solib, next);
+
+	return 0;
+}
+
+static int
+eal_plugindir_init(const char *path)
+{
+	DIR *d = NULL;
+	struct dirent *dent = NULL;
+	char sopath[PATH_MAX];
+
+	if (path == NULL || *path == '\0')
+		return 0;
+
+	d = opendir(path);
+	if (d == NULL) {
+		RTE_LOG(ERR, EAL, "failed to open directory %s: %s\n",
+			path, strerror(errno));
+		return -1;
+	}
+
+	while ((dent = readdir(d)) != NULL) {
+		struct stat sb;
+
+		snprintf(sopath, PATH_MAX-1, "%s/%s", path, dent->d_name);
+		sopath[PATH_MAX-1] = 0;
+
+		if (!(stat(sopath, &sb) == 0 && S_ISREG(sb.st_mode)))
+			continue;
+
+		if (eal_plugin_add(sopath) == -1)
+			break;
+	}
+
+	closedir(d);
+	/* XXX this ignores failures from readdir() itself */
+	return (dent == NULL) ? 0 : -1;
+}
+
+int
+eal_plugins_init(void)
+{
+	struct shared_driver *solib = NULL;
+
+	if (*default_solib_dir != '\0')
+		eal_plugin_add(default_solib_dir);
+
+	TAILQ_FOREACH(solib, &solib_list, next) {
+		struct stat sb;
+
+		if (stat(solib->name, &sb) == 0 && S_ISDIR(sb.st_mode)) {
+			if (eal_plugindir_init(solib->name) == -1) {
+				RTE_LOG(ERR, EAL,
+					"Cannot init plugin directory %s\n",
+					solib->name);
+				return -1;
+			}
+		} else {
+			RTE_LOG(DEBUG, EAL, "open shared lib %s\n",
+				solib->name);
+			solib->lib_handle = dlopen(solib->name, RTLD_NOW);
+			if (solib->lib_handle == NULL) {
+				RTE_LOG(ERR, EAL, "%s\n", dlerror());
+				return -1;
+			}
+		}
+
+	}
+	return 0;
+}
+
+/*
+ * Parse the coremask given as argument (hexadecimal string) and fill
+ * the global configuration (core role and core count) with the parsed
+ * value.
+ */
+static int xdigit2val(unsigned char c)
+{
+	int val;
+
+	if (isdigit(c))
+		val = c - '0';
+	else if (isupper(c))
+		val = c - 'A' + 10;
+	else
+		val = c - 'a' + 10;
+	return val;
+}
+
+static int
+eal_parse_coremask(const char *coremask)
+{
+	struct rte_config *cfg = rte_eal_get_configuration();
+	int i, j, idx = 0;
+	unsigned count = 0;
+	char c;
+	int val;
+
+	if (coremask == NULL)
+		return -1;
+	/* Remove all blank characters ahead and after .
+	 * Remove 0x/0X if exists.
+	 */
+	while (isblank(*coremask))
+		coremask++;
+	if (coremask[0] == '0' && ((coremask[1] == 'x')
+		|| (coremask[1] == 'X')))
+		coremask += 2;
+	i = strlen(coremask);
+	while ((i > 0) && isblank(coremask[i - 1]))
+		i--;
+	if (i == 0)
+		return -1;
+
+	for (i = i - 1; i >= 0 && idx < RTE_MAX_LCORE; i--) {
+		c = coremask[i];
+		if (isxdigit(c) == 0) {
+			/* invalid characters */
+			return -1;
+		}
+		val = xdigit2val(c);
+		for (j = 0; j < BITS_PER_HEX && idx < RTE_MAX_LCORE; j++, idx++)
+		{
+			if ((1 << j) & val) {
+				if (!lcore_config[idx].detected) {
+					RTE_LOG(ERR, EAL, "lcore %u "
+					        "unavailable\n", idx);
+					return -1;
+				}
+				cfg->lcore_role[idx] = ROLE_RTE;
+				lcore_config[idx].core_index = count;
+				count++;
+			} else {
+				cfg->lcore_role[idx] = ROLE_OFF;
+				lcore_config[idx].core_index = -1;
+			}
+		}
+	}
+	for (; i >= 0; i--)
+		if (coremask[i] != '0')
+			return -1;
+	for (; idx < RTE_MAX_LCORE; idx++) {
+		cfg->lcore_role[idx] = ROLE_OFF;
+		lcore_config[idx].core_index = -1;
+	}
+	if (count == 0)
+		return -1;
+	/* Update the count of enabled logical cores of the EAL configuration */
+	cfg->lcore_count = count;
+	return 0;
+}
+
+static int
+eal_parse_corelist(const char *corelist)
+{
+	struct rte_config *cfg = rte_eal_get_configuration();
+	int i, idx = 0;
+	unsigned count = 0;
+	char *end = NULL;
+	int min, max;
+
+	if (corelist == NULL)
+		return -1;
+
+	/* Remove all blank characters ahead and after */
+	while (isblank(*corelist))
+		corelist++;
+	i = strlen(corelist);
+	while ((i > 0) && isblank(corelist[i - 1]))
+		i--;
+
+	/* Reset config */
+	for (idx = 0; idx < RTE_MAX_LCORE; idx++) {
+		cfg->lcore_role[idx] = ROLE_OFF;
+		lcore_config[idx].core_index = -1;
+	}
+
+	/* Get list of cores */
+	min = RTE_MAX_LCORE;
+	do {
+		while (isblank(*corelist))
+			corelist++;
+		if (*corelist == '\0')
+			return -1;
+		errno = 0;
+		idx = strtoul(corelist, &end, 10);
+		if (errno || end == NULL)
+			return -1;
+		while (isblank(*end))
+			end++;
+		if (*end == '-') {
+			min = idx;
+		} else if ((*end == ',') || (*end == '\0')) {
+			max = idx;
+			if (min == RTE_MAX_LCORE)
+				min = idx;
+			for (idx = min; idx <= max; idx++) {
+				if (cfg->lcore_role[idx] != ROLE_RTE) {
+					cfg->lcore_role[idx] = ROLE_RTE;
+					lcore_config[idx].core_index = count;
+					count++;
+				}
+			}
+			min = RTE_MAX_LCORE;
+		} else
+			return -1;
+		corelist = end + 1;
+	} while (*end != '\0');
+
+	if (count == 0)
+		return -1;
+
+	/* Update the count of enabled logical cores of the EAL configuration */
+	cfg->lcore_count = count;
+
+	return 0;
+}
+
+/* Changes the lcore id of the master thread */
+static int
+eal_parse_master_lcore(const char *arg)
+{
+	char *parsing_end;
+	struct rte_config *cfg = rte_eal_get_configuration();
+
+	errno = 0;
+	cfg->master_lcore = (uint32_t) strtol(arg, &parsing_end, 0);
+	if (errno || parsing_end[0] != 0)
+		return -1;
+	if (cfg->master_lcore >= RTE_MAX_LCORE)
+		return -1;
+	master_lcore_parsed = 1;
+	return 0;
+}
+
+/*
+ * Parse elem, the elem could be single number/range or '(' ')' group
+ * 1) A single number elem, it's just a simple digit. e.g. 9
+ * 2) A single range elem, two digits with a '-' between. e.g. 2-6
+ * 3) A group elem, combines multiple 1) or 2) with '( )'. e.g (0,2-4,6)
+ *    Within group elem, '-' used for a range separator;
+ *                       ',' used for a single number.
+ */
+static int
+eal_parse_set(const char *input, uint16_t set[], unsigned num)
+{
+	unsigned idx;
+	const char *str = input;
+	char *end = NULL;
+	unsigned min, max;
+
+	memset(set, 0, num * sizeof(uint16_t));
+
+	while (isblank(*str))
+		str++;
+
+	/* only digit or left bracket is qualify for start point */
+	if ((!isdigit(*str) && *str != '(') || *str == '\0')
+		return -1;
+
+	/* process single number or single range of number */
+	if (*str != '(') {
+		errno = 0;
+		idx = strtoul(str, &end, 10);
+		if (errno || end == NULL || idx >= num)
+			return -1;
+		else {
+			while (isblank(*end))
+				end++;
+
+			min = idx;
+			max = idx;
+			if (*end == '-') {
+				/* process single <number>-<number> */
+				end++;
+				while (isblank(*end))
+					end++;
+				if (!isdigit(*end))
+					return -1;
+
+				errno = 0;
+				idx = strtoul(end, &end, 10);
+				if (errno || end == NULL || idx >= num)
+					return -1;
+				max = idx;
+				while (isblank(*end))
+					end++;
+				if (*end != ',' && *end != '\0')
+					return -1;
+			}
+
+			if (*end != ',' && *end != '\0' &&
+			    *end != '@')
+				return -1;
+
+			for (idx = RTE_MIN(min, max);
+			     idx <= RTE_MAX(min, max); idx++)
+				set[idx] = 1;
+
+			return end - input;
+		}
+	}
+
+	/* process set within bracket */
+	str++;
+	while (isblank(*str))
+		str++;
+	if (*str == '\0')
+		return -1;
+
+	min = RTE_MAX_LCORE;
+	do {
+
+		/* go ahead to the first digit */
+		while (isblank(*str))
+			str++;
+		if (!isdigit(*str))
+			return -1;
+
+		/* get the digit value */
+		errno = 0;
+		idx = strtoul(str, &end, 10);
+		if (errno || end == NULL || idx >= num)
+			return -1;
+
+		/* go ahead to separator '-',',' and ')' */
+		while (isblank(*end))
+			end++;
+		if (*end == '-') {
+			if (min == RTE_MAX_LCORE)
+				min = idx;
+			else /* avoid continuous '-' */
+				return -1;
+		} else if ((*end == ',') || (*end == ')')) {
+			max = idx;
+			if (min == RTE_MAX_LCORE)
+				min = idx;
+			for (idx = RTE_MIN(min, max);
+			     idx <= RTE_MAX(min, max); idx++)
+				set[idx] = 1;
+
+			min = RTE_MAX_LCORE;
+		} else
+			return -1;
+
+		str = end + 1;
+	} while (*end != '\0' && *end != ')');
+
+	return str - input;
+}
+
+/* convert from set array to cpuset bitmap */
+static int
+convert_to_cpuset(rte_cpuset_t *cpusetp,
+	      uint16_t *set, unsigned num)
+{
+	unsigned idx;
+
+	CPU_ZERO(cpusetp);
+
+	for (idx = 0; idx < num; idx++) {
+		if (!set[idx])
+			continue;
+
+		if (!lcore_config[idx].detected) {
+			RTE_LOG(ERR, EAL, "core %u "
+				"unavailable\n", idx);
+			return -1;
+		}
+
+		CPU_SET(idx, cpusetp);
+	}
+
+	return 0;
+}
+
+/*
+ * The format pattern: --lcores='<lcores[@cpus]>[<,lcores[@cpus]>...]'
+ * lcores, cpus could be a single digit/range or a group.
+ * '(' and ')' are necessary if it's a group.
+ * If not supply '@cpus', the value of cpus uses the same as lcores.
+ * e.g. '1,2@(5-7),(3-5)@(0,2),(0,6),7-8' means start 9 EAL thread as below
+ *   lcore 0 runs on cpuset 0x41 (cpu 0,6)
+ *   lcore 1 runs on cpuset 0x2 (cpu 1)
+ *   lcore 2 runs on cpuset 0xe0 (cpu 5,6,7)
+ *   lcore 3,4,5 runs on cpuset 0x5 (cpu 0,2)
+ *   lcore 6 runs on cpuset 0x41 (cpu 0,6)
+ *   lcore 7 runs on cpuset 0x80 (cpu 7)
+ *   lcore 8 runs on cpuset 0x100 (cpu 8)
+ */
+static int
+eal_parse_lcores(const char *lcores)
+{
+	struct rte_config *cfg = rte_eal_get_configuration();
+	static uint16_t set[RTE_MAX_LCORE];
+	unsigned idx = 0;
+	int i;
+	unsigned count = 0;
+	const char *lcore_start = NULL;
+	const char *end = NULL;
+	int offset;
+	rte_cpuset_t cpuset;
+	int lflags = 0;
+	int ret = -1;
+
+	if (lcores == NULL)
+		return -1;
+
+	/* Remove all blank characters ahead and after */
+	while (isblank(*lcores))
+		lcores++;
+	i = strlen(lcores);
+	while ((i > 0) && isblank(lcores[i - 1]))
+		i--;
+
+	CPU_ZERO(&cpuset);
+
+	/* Reset lcore config */
+	for (idx = 0; idx < RTE_MAX_LCORE; idx++) {
+		cfg->lcore_role[idx] = ROLE_OFF;
+		lcore_config[idx].core_index = -1;
+		CPU_ZERO(&lcore_config[idx].cpuset);
+	}
+
+	/* Get list of cores */
+	do {
+		while (isblank(*lcores))
+			lcores++;
+		if (*lcores == '\0')
+			goto err;
+
+		/* record lcore_set start point */
+		lcore_start = lcores;
+
+		/* go across a complete bracket */
+		if (*lcore_start == '(') {
+			lcores += strcspn(lcores, ")");
+			if (*lcores++ == '\0')
+				goto err;
+		}
+
+		/* scan the separator '@', ','(next) or '\0'(finish) */
+		lcores += strcspn(lcores, "@,");
+
+		if (*lcores == '@') {
+			/* explicit assign cpu_set */
+			offset = eal_parse_set(lcores + 1, set, RTE_DIM(set));
+			if (offset < 0)
+				goto err;
+
+			/* prepare cpu_set and update the end cursor */
+			if (0 > convert_to_cpuset(&cpuset,
+						  set, RTE_DIM(set)))
+				goto err;
+			end = lcores + 1 + offset;
+		} else { /* ',' or '\0' */
+			/* haven't given cpu_set, current loop done */
+			end = lcores;
+
+			/* go back to check <number>-<number> */
+			offset = strcspn(lcore_start, "(-");
+			if (offset < (end - lcore_start) &&
+			    *(lcore_start + offset) != '(')
+				lflags = 1;
+		}
+
+		if (*end != ',' && *end != '\0')
+			goto err;
+
+		/* parse lcore_set from start point */
+		if (0 > eal_parse_set(lcore_start, set, RTE_DIM(set)))
+			goto err;
+
+		/* without '@', by default using lcore_set as cpu_set */
+		if (*lcores != '@' &&
+		    0 > convert_to_cpuset(&cpuset, set, RTE_DIM(set)))
+			goto err;
+
+		/* start to update lcore_set */
+		for (idx = 0; idx < RTE_MAX_LCORE; idx++) {
+			if (!set[idx])
+				continue;
+
+			if (cfg->lcore_role[idx] != ROLE_RTE) {
+				lcore_config[idx].core_index = count;
+				cfg->lcore_role[idx] = ROLE_RTE;
+				count++;
+			}
+
+			if (lflags) {
+				CPU_ZERO(&cpuset);
+				CPU_SET(idx, &cpuset);
+			}
+			rte_memcpy(&lcore_config[idx].cpuset, &cpuset,
+				   sizeof(rte_cpuset_t));
+		}
+
+		lcores = end + 1;
+	} while (*end != '\0');
+
+	if (count == 0)
+		goto err;
+
+	cfg->lcore_count = count;
+	ret = 0;
+
+err:
+
+	return ret;
+}
+
+static int
+eal_parse_syslog(const char *facility, struct internal_config *conf)
+{
+	int i;
+	static struct {
+		const char *name;
+		int value;
+	} map[] = {
+		{ "auth", LOG_AUTH },
+		{ "cron", LOG_CRON },
+		{ "daemon", LOG_DAEMON },
+		{ "ftp", LOG_FTP },
+		{ "kern", LOG_KERN },
+		{ "lpr", LOG_LPR },
+		{ "mail", LOG_MAIL },
+		{ "news", LOG_NEWS },
+		{ "syslog", LOG_SYSLOG },
+		{ "user", LOG_USER },
+		{ "uucp", LOG_UUCP },
+		{ "local0", LOG_LOCAL0 },
+		{ "local1", LOG_LOCAL1 },
+		{ "local2", LOG_LOCAL2 },
+		{ "local3", LOG_LOCAL3 },
+		{ "local4", LOG_LOCAL4 },
+		{ "local5", LOG_LOCAL5 },
+		{ "local6", LOG_LOCAL6 },
+		{ "local7", LOG_LOCAL7 },
+		{ NULL, 0 }
+	};
+
+	for (i = 0; map[i].name; i++) {
+		if (!strcmp(facility, map[i].name)) {
+			conf->syslog_facility = map[i].value;
+			return 0;
+		}
+	}
+	return -1;
+}
+
+static int
+eal_parse_log_level(const char *level, uint32_t *log_level)
+{
+	char *end;
+	unsigned long tmp;
+
+	errno = 0;
+	tmp = strtoul(level, &end, 0);
+
+	/* check for errors */
+	if ((errno != 0) || (level[0] == '\0') ||
+	    end == NULL || (*end != '\0'))
+		return -1;
+
+	/* log_level is a uint32_t */
+	if (tmp >= UINT32_MAX)
+		return -1;
+
+	*log_level = tmp;
+	return 0;
+}
+
+static enum rte_proc_type_t
+eal_parse_proc_type(const char *arg)
+{
+	if (strncasecmp(arg, "primary", sizeof("primary")) == 0)
+		return RTE_PROC_PRIMARY;
+	if (strncasecmp(arg, "secondary", sizeof("secondary")) == 0)
+		return RTE_PROC_SECONDARY;
+	if (strncasecmp(arg, "auto", sizeof("auto")) == 0)
+		return RTE_PROC_AUTO;
+
+	return RTE_PROC_INVALID;
+}
+
+int
+eal_parse_common_option(int opt, const char *optarg,
+			struct internal_config *conf)
+{
+	switch (opt) {
+	/* blacklist */
+	case 'b':
+		if (rte_eal_devargs_add(RTE_DEVTYPE_BLACKLISTED_PCI,
+				optarg) < 0) {
+			return -1;
+		}
+		break;
+	/* whitelist */
+	case 'w':
+		if (rte_eal_devargs_add(RTE_DEVTYPE_WHITELISTED_PCI,
+				optarg) < 0) {
+			return -1;
+		}
+		break;
+	/* coremask */
+	case 'c':
+		if (eal_parse_coremask(optarg) < 0) {
+			RTE_LOG(ERR, EAL, "invalid coremask\n");
+			return -1;
+		}
+		break;
+	/* corelist */
+	case 'l':
+		if (eal_parse_corelist(optarg) < 0) {
+			RTE_LOG(ERR, EAL, "invalid core list\n");
+			return -1;
+		}
+		break;
+	/* size of memory */
+	case 'm':
+		conf->memory = atoi(optarg);
+		conf->memory *= 1024ULL;
+		conf->memory *= 1024ULL;
+		mem_parsed = 1;
+		break;
+	/* force number of channels */
+	case 'n':
+		conf->force_nchannel = atoi(optarg);
+		if (conf->force_nchannel == 0) {
+			RTE_LOG(ERR, EAL, "invalid channel number\n");
+			return -1;
+		}
+		break;
+	/* force number of ranks */
+	case 'r':
+		conf->force_nrank = atoi(optarg);
+		if (conf->force_nrank == 0 ||
+		    conf->force_nrank > 16) {
+			RTE_LOG(ERR, EAL, "invalid rank number\n");
+			return -1;
+		}
+		break;
+	/* force loading of external driver */
+	case 'd':
+		if (eal_plugin_add(optarg) == -1)
+			return -1;
+		break;
+	case 'v':
+		/* since message is explicitly requested by user, we
+		 * write message at highest log level so it can always
+		 * be seen
+		 * even if info or warning messages are disabled */
+		RTE_LOG(CRIT, EAL, "RTE Version: '%s'\n", rte_version());
+		break;
+
+	/* long options */
+	case OPT_HUGE_UNLINK_NUM:
+		conf->hugepage_unlink = 1;
+		break;
+
+	case OPT_NO_HUGE_NUM:
+		conf->no_hugetlbfs = 1;
+		break;
+
+	case OPT_NO_PCI_NUM:
+		conf->no_pci = 1;
+		break;
+
+	case OPT_NO_HPET_NUM:
+		conf->no_hpet = 1;
+		break;
+
+	case OPT_VMWARE_TSC_MAP_NUM:
+		conf->vmware_tsc_map = 1;
+		break;
+
+	case OPT_NO_SHCONF_NUM:
+		conf->no_shconf = 1;
+		break;
+
+	case OPT_PROC_TYPE_NUM:
+		conf->process_type = eal_parse_proc_type(optarg);
+		break;
+
+	case OPT_MASTER_LCORE_NUM:
+		if (eal_parse_master_lcore(optarg) < 0) {
+			RTE_LOG(ERR, EAL, "invalid parameter for --"
+					OPT_MASTER_LCORE "\n");
+			return -1;
+		}
+		break;
+
+	case OPT_VDEV_NUM:
+		if (rte_eal_devargs_add(RTE_DEVTYPE_VIRTUAL,
+				optarg) < 0) {
+			return -1;
+		}
+		break;
+
+	case OPT_SYSLOG_NUM:
+		if (eal_parse_syslog(optarg, conf) < 0) {
+			RTE_LOG(ERR, EAL, "invalid parameters for --"
+					OPT_SYSLOG "\n");
+			return -1;
+		}
+		break;
+
+	case OPT_LOG_LEVEL_NUM: {
+		uint32_t log;
+
+		if (eal_parse_log_level(optarg, &log) < 0) {
+			RTE_LOG(ERR, EAL,
+				"invalid parameters for --"
+				OPT_LOG_LEVEL "\n");
+			return -1;
+		}
+		conf->log_level = log;
+		break;
+	}
+	case OPT_LCORES_NUM:
+		if (eal_parse_lcores(optarg) < 0) {
+			RTE_LOG(ERR, EAL, "invalid parameter for --"
+				OPT_LCORES "\n");
+			return -1;
+		}
+		break;
+
+	/* don't know what to do, leave this to caller */
+	default:
+		return 1;
+
+	}
+
+	return 0;
+}
+
+int
+eal_adjust_config(struct internal_config *internal_cfg)
+{
+	int i;
+	struct rte_config *cfg = rte_eal_get_configuration();
+
+	if (internal_config.process_type == RTE_PROC_AUTO)
+		internal_config.process_type = eal_proc_type_detect();
+
+	/* default master lcore is the first one */
+	if (!master_lcore_parsed)
+		cfg->master_lcore = rte_get_next_lcore(-1, 0, 0);
+
+	/* if no memory amounts were requested, this will result in 0 and
+	 * will be overridden later, right after eal_hugepage_info_init() */
+	for (i = 0; i < RTE_MAX_NUMA_NODES; i++)
+		internal_cfg->memory += internal_cfg->socket_mem[i];
+
+	return 0;
+}
+
+int
+eal_check_common_options(struct internal_config *internal_cfg)
+{
+	struct rte_config *cfg = rte_eal_get_configuration();
+
+	if (cfg->lcore_role[cfg->master_lcore] != ROLE_RTE) {
+		RTE_LOG(ERR, EAL, "Master lcore is not enabled for DPDK\n");
+		return -1;
+	}
+
+	if (internal_cfg->process_type == RTE_PROC_INVALID) {
+		RTE_LOG(ERR, EAL, "Invalid process type specified\n");
+		return -1;
+	}
+	if (index(internal_cfg->hugefile_prefix, '%') != NULL) {
+		RTE_LOG(ERR, EAL, "Invalid char, '%%', in --"OPT_FILE_PREFIX" "
+			"option\n");
+		return -1;
+	}
+	if (mem_parsed && internal_cfg->force_sockets == 1) {
+		RTE_LOG(ERR, EAL, "Options -m and --"OPT_SOCKET_MEM" cannot "
+			"be specified at the same time\n");
+		return -1;
+	}
+	if (internal_cfg->no_hugetlbfs && internal_cfg->force_sockets == 1) {
+		RTE_LOG(ERR, EAL, "Option --"OPT_SOCKET_MEM" cannot "
+			"be specified together with --"OPT_NO_HUGE"\n");
+		return -1;
+	}
+
+	if (internal_cfg->no_hugetlbfs && internal_cfg->hugepage_unlink) {
+		RTE_LOG(ERR, EAL, "Option --"OPT_HUGE_UNLINK" cannot "
+			"be specified together with --"OPT_NO_HUGE"\n");
+		return -1;
+	}
+
+	if (rte_eal_devargs_type_count(RTE_DEVTYPE_WHITELISTED_PCI) != 0 &&
+		rte_eal_devargs_type_count(RTE_DEVTYPE_BLACKLISTED_PCI) != 0) {
+		RTE_LOG(ERR, EAL, "Options blacklist (-b) and whitelist (-w) "
+			"cannot be used at the same time\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+void
+eal_common_usage(void)
+{
+	printf("[options]\n\n"
+	       "EAL common options:\n"
+	       "  -c COREMASK         Hexadecimal bitmask of cores to run on\n"
+	       "  -l CORELIST         List of cores to run on\n"
+	       "                      The argument format is <c1>[-c2][,c3[-c4],...]\n"
+	       "                      where c1, c2, etc are core indexes between 0 and %d\n"
+	       "  --"OPT_LCORES" COREMAP    Map lcore set to physical cpu set\n"
+	       "                      The argument format is\n"
+	       "                            '<lcores[@cpus]>[<,lcores[@cpus]>...]'\n"
+	       "                      lcores and cpus list are grouped by '(' and ')'\n"
+	       "                      Within the group, '-' is used for range separator,\n"
+	       "                      ',' is used for single number separator.\n"
+	       "                      '( )' can be omitted for single element group,\n"
+	       "                      '@' can be omitted if cpus and lcores have the same value\n"
+	       "  --"OPT_MASTER_LCORE" ID   Core ID that is used as master\n"
+	       "  -n CHANNELS         Number of memory channels\n"
+	       "  -m MB               Memory to allocate (see also --"OPT_SOCKET_MEM")\n"
+	       "  -r RANKS            Force number of memory ranks (don't detect)\n"
+	       "  -b, --"OPT_PCI_BLACKLIST" Add a PCI device in black list.\n"
+	       "                      Prevent EAL from using this PCI device. The argument\n"
+	       "                      format is <domain:bus:devid.func>.\n"
+	       "  -w, --"OPT_PCI_WHITELIST" Add a PCI device in white list.\n"
+	       "                      Only use the specified PCI devices. The argument format\n"
+	       "                      is <[domain:]bus:devid.func>. This option can be present\n"
+	       "                      several times (once per device).\n"
+	       "                      [NOTE: PCI whitelist cannot be used with -b option]\n"
+	       "  --"OPT_VDEV"              Add a virtual device.\n"
+	       "                      The argument format is <driver><id>[,key=val,...]\n"
+	       "                      (ex: --vdev=eth_pcap0,iface=eth2).\n"
+	       "  -d LIB.so|DIR       Add a driver or driver directory\n"
+	       "                      (can be used multiple times)\n"
+	       "  --"OPT_VMWARE_TSC_MAP"    Use VMware TSC map instead of native RDTSC\n"
+	       "  --"OPT_PROC_TYPE"         Type of this process (primary|secondary|auto)\n"
+	       "  --"OPT_SYSLOG"            Set syslog facility\n"
+	       "  --"OPT_LOG_LEVEL"         Set default log level\n"
+	       "  -v                  Display version information on startup\n"
+	       "  -h, --help          This help\n"
+	       "\nEAL options for DEBUG use only:\n"
+	       "  --"OPT_HUGE_UNLINK"       Unlink hugepage files after init\n"
+	       "  --"OPT_NO_HUGE"           Use malloc instead of hugetlbfs\n"
+	       "  --"OPT_NO_PCI"            Disable PCI\n"
+	       "  --"OPT_NO_HPET"           Disable HPET\n"
+	       "  --"OPT_NO_SHCONF"         No shared config (mmap'd files)\n"
+	       "\n", RTE_MAX_LCORE);
+}
diff --git a/lib/librte_eal/common/eal_common_pci.c b/lib/librte_eal/common/eal_common_pci.c
new file mode 100644
index 00000000..40f49229
--- /dev/null
+++ b/lib/librte_eal/common/eal_common_pci.c
@@ -0,0 +1,457 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+/*   BSD LICENSE
+ *
+ *   Copyright 2013-2014 6WIND S.A.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of 6WIND S.A. nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <string.h>
+#include <inttypes.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <sys/queue.h>
+#include <sys/mman.h>
+
+#include <rte_interrupts.h>
+#include <rte_log.h>
+#include <rte_pci.h>
+#include <rte_per_lcore.h>
+#include <rte_memory.h>
+#include <rte_memzone.h>
+#include <rte_eal.h>
+#include <rte_string_fns.h>
+#include <rte_common.h>
+#include <rte_devargs.h>
+
+#include "eal_private.h"
+
+struct pci_driver_list pci_driver_list;
+struct pci_device_list pci_device_list;
+
+static struct rte_devargs *pci_devargs_lookup(struct rte_pci_device *dev)
+{
+	struct rte_devargs *devargs;
+
+	TAILQ_FOREACH(devargs, &devargs_list, next) {
+		if (devargs->type != RTE_DEVTYPE_BLACKLISTED_PCI &&
+			devargs->type != RTE_DEVTYPE_WHITELISTED_PCI)
+			continue;
+		if (!rte_eal_compare_pci_addr(&dev->addr, &devargs->pci.addr))
+			return devargs;
+	}
+	return NULL;
+}
+
+/* map a particular resource from a file */
+void *
+pci_map_resource(void *requested_addr, int fd, off_t offset, size_t size,
+		 int additional_flags)
+{
+	void *mapaddr;
+
+	/* Map the PCI memory resource of device */
+	mapaddr = mmap(requested_addr, size, PROT_READ | PROT_WRITE,
+			MAP_SHARED | additional_flags, fd, offset);
+	if (mapaddr == MAP_FAILED) {
+		RTE_LOG(ERR, EAL, "%s(): cannot mmap(%d, %p, 0x%lx, 0x%lx): %s (%p)\n",
+			__func__, fd, requested_addr,
+			(unsigned long)size, (unsigned long)offset,
+			strerror(errno), mapaddr);
+	} else
+		RTE_LOG(DEBUG, EAL, "  PCI memory mapped at %p\n", mapaddr);
+
+	return mapaddr;
+}
+
+/* unmap a particular resource */
+void
+pci_unmap_resource(void *requested_addr, size_t size)
+{
+	if (requested_addr == NULL)
+		return;
+
+	/* Unmap the PCI memory resource of device */
+	if (munmap(requested_addr, size)) {
+		RTE_LOG(ERR, EAL, "%s(): cannot munmap(%p, 0x%lx): %s\n",
+			__func__, requested_addr, (unsigned long)size,
+			strerror(errno));
+	} else
+		RTE_LOG(DEBUG, EAL, "  PCI memory unmapped at %p\n",
+				requested_addr);
+}
+
+/*
+ * If vendor/device ID match, call the devinit() function of the
+ * driver.
+ */
+static int
+rte_eal_pci_probe_one_driver(struct rte_pci_driver *dr, struct rte_pci_device *dev)
+{
+	int ret;
+	const struct rte_pci_id *id_table;
+
+	for (id_table = dr->id_table; id_table->vendor_id != 0; id_table++) {
+
+		/* check if device's identifiers match the driver's ones */
+		if (id_table->vendor_id != dev->id.vendor_id &&
+				id_table->vendor_id != PCI_ANY_ID)
+			continue;
+		if (id_table->device_id != dev->id.device_id &&
+				id_table->device_id != PCI_ANY_ID)
+			continue;
+		if (id_table->subsystem_vendor_id != dev->id.subsystem_vendor_id &&
+				id_table->subsystem_vendor_id != PCI_ANY_ID)
+			continue;
+		if (id_table->subsystem_device_id != dev->id.subsystem_device_id &&
+				id_table->subsystem_device_id != PCI_ANY_ID)
+			continue;
+
+		struct rte_pci_addr *loc = &dev->addr;
+
+		RTE_LOG(DEBUG, EAL, "PCI device "PCI_PRI_FMT" on NUMA socket %i\n",
+				loc->domain, loc->bus, loc->devid, loc->function,
+				dev->numa_node);
+
+		RTE_LOG(DEBUG, EAL, "  probe driver: %x:%x %s\n", dev->id.vendor_id,
+				dev->id.device_id, dr->name);
+
+		/* no initialization when blacklisted, return without error */
+		if (dev->devargs != NULL &&
+			dev->devargs->type == RTE_DEVTYPE_BLACKLISTED_PCI) {
+			RTE_LOG(DEBUG, EAL, "  Device is blacklisted, not initializing\n");
+			return 1;
+		}
+
+		if (dr->drv_flags & RTE_PCI_DRV_NEED_MAPPING) {
+			/* map resources for devices that use igb_uio */
+			ret = rte_eal_pci_map_device(dev);
+			if (ret != 0)
+				return ret;
+		} else if (dr->drv_flags & RTE_PCI_DRV_FORCE_UNBIND &&
+				rte_eal_process_type() == RTE_PROC_PRIMARY) {
+			/* unbind current driver */
+			if (pci_unbind_kernel_driver(dev) < 0)
+				return -1;
+		}
+
+		/* reference driver structure */
+		dev->driver = dr;
+
+		/* call the driver devinit() function */
+		return dr->devinit(dr, dev);
+	}
+	/* return positive value if driver doesn't support this device */
+	return 1;
+}
+
+/*
+ * If vendor/device ID match, call the devuninit() function of the
+ * driver.
+ */
+static int
+rte_eal_pci_detach_dev(struct rte_pci_driver *dr,
+		struct rte_pci_device *dev)
+{
+	const struct rte_pci_id *id_table;
+
+	if ((dr == NULL) || (dev == NULL))
+		return -EINVAL;
+
+	for (id_table = dr->id_table; id_table->vendor_id != 0; id_table++) {
+
+		/* check if device's identifiers match the driver's ones */
+		if (id_table->vendor_id != dev->id.vendor_id &&
+				id_table->vendor_id != PCI_ANY_ID)
+			continue;
+		if (id_table->device_id != dev->id.device_id &&
+				id_table->device_id != PCI_ANY_ID)
+			continue;
+		if (id_table->subsystem_vendor_id != dev->id.subsystem_vendor_id &&
+				id_table->subsystem_vendor_id != PCI_ANY_ID)
+			continue;
+		if (id_table->subsystem_device_id != dev->id.subsystem_device_id &&
+				id_table->subsystem_device_id != PCI_ANY_ID)
+			continue;
+
+		struct rte_pci_addr *loc = &dev->addr;
+
+		RTE_LOG(DEBUG, EAL, "PCI device "PCI_PRI_FMT" on NUMA socket %i\n",
+				loc->domain, loc->bus, loc->devid,
+				loc->function, dev->numa_node);
+
+		RTE_LOG(DEBUG, EAL, "  remove driver: %x:%x %s\n", dev->id.vendor_id,
+				dev->id.device_id, dr->name);
+
+		if (dr->devuninit && (dr->devuninit(dev) < 0))
+			return -1;	/* negative value is an error */
+
+		/* clear driver structure */
+		dev->driver = NULL;
+
+		if (dr->drv_flags & RTE_PCI_DRV_NEED_MAPPING)
+			/* unmap resources for devices that use igb_uio */
+			rte_eal_pci_unmap_device(dev);
+
+		return 0;
+	}
+
+	/* return positive value if driver doesn't support this device */
+	return 1;
+}
+
+/*
+ * If vendor/device ID match, call the devinit() function of all
+ * registered driver for the given device. Return -1 if initialization
+ * failed, return 1 if no driver is found for this device.
+ */
+static int
+pci_probe_all_drivers(struct rte_pci_device *dev)
+{
+	struct rte_pci_driver *dr = NULL;
+	int rc = 0;
+
+	if (dev == NULL)
+		return -1;
+
+	TAILQ_FOREACH(dr, &pci_driver_list, next) {
+		rc = rte_eal_pci_probe_one_driver(dr, dev);
+		if (rc < 0)
+			/* negative value is an error */
+			return -1;
+		if (rc > 0)
+			/* positive value means driver doesn't support it */
+			continue;
+		return 0;
+	}
+	return 1;
+}
+
+/*
+ * If vendor/device ID match, call the devuninit() function of all
+ * registered driver for the given device. Return -1 if initialization
+ * failed, return 1 if no driver is found for this device.
+ */
+static int
+pci_detach_all_drivers(struct rte_pci_device *dev)
+{
+	struct rte_pci_driver *dr = NULL;
+	int rc = 0;
+
+	if (dev == NULL)
+		return -1;
+
+	TAILQ_FOREACH(dr, &pci_driver_list, next) {
+		rc = rte_eal_pci_detach_dev(dr, dev);
+		if (rc < 0)
+			/* negative value is an error */
+			return -1;
+		if (rc > 0)
+			/* positive value means driver doesn't support it */
+			continue;
+		return 0;
+	}
+	return 1;
+}
+
+/*
+ * Find the pci device specified by pci address, then invoke probe function of
+ * the driver of the devive.
+ */
+int
+rte_eal_pci_probe_one(const struct rte_pci_addr *addr)
+{
+	struct rte_pci_device *dev = NULL;
+	int ret = 0;
+
+	if (addr == NULL)
+		return -1;
+
+	TAILQ_FOREACH(dev, &pci_device_list, next) {
+		if (rte_eal_compare_pci_addr(&dev->addr, addr))
+			continue;
+
+		ret = pci_probe_all_drivers(dev);
+		if (ret < 0)
+			goto err_return;
+		return 0;
+	}
+	return -1;
+
+err_return:
+	RTE_LOG(WARNING, EAL, "Requested device " PCI_PRI_FMT
+			" cannot be used\n", dev->addr.domain, dev->addr.bus,
+			dev->addr.devid, dev->addr.function);
+	return -1;
+}
+
+/*
+ * Detach device specified by its pci address.
+ */
+int
+rte_eal_pci_detach(const struct rte_pci_addr *addr)
+{
+	struct rte_pci_device *dev = NULL;
+	int ret = 0;
+
+	if (addr == NULL)
+		return -1;
+
+	TAILQ_FOREACH(dev, &pci_device_list, next) {
+		if (rte_eal_compare_pci_addr(&dev->addr, addr))
+			continue;
+
+		ret = pci_detach_all_drivers(dev);
+		if (ret < 0)
+			goto err_return;
+
+		TAILQ_REMOVE(&pci_device_list, dev, next);
+		return 0;
+	}
+	return -1;
+
+err_return:
+	RTE_LOG(WARNING, EAL, "Requested device " PCI_PRI_FMT
+			" cannot be used\n", dev->addr.domain, dev->addr.bus,
+			dev->addr.devid, dev->addr.function);
+	return -1;
+}
+
+/*
+ * Scan the content of the PCI bus, and call the devinit() function for
+ * all registered drivers that have a matching entry in its id_table
+ * for discovered devices.
+ */
+int
+rte_eal_pci_probe(void)
+{
+	struct rte_pci_device *dev = NULL;
+	struct rte_devargs *devargs;
+	int probe_all = 0;
+	int ret = 0;
+
+	if (rte_eal_devargs_type_count(RTE_DEVTYPE_WHITELISTED_PCI) == 0)
+		probe_all = 1;
+
+	TAILQ_FOREACH(dev, &pci_device_list, next) {
+
+		/* set devargs in PCI structure */
+		devargs = pci_devargs_lookup(dev);
+		if (devargs != NULL)
+			dev->devargs = devargs;
+
+		/* probe all or only whitelisted devices */
+		if (probe_all)
+			ret = pci_probe_all_drivers(dev);
+		else if (devargs != NULL &&
+			devargs->type == RTE_DEVTYPE_WHITELISTED_PCI)
+			ret = pci_probe_all_drivers(dev);
+		if (ret < 0)
+			rte_exit(EXIT_FAILURE, "Requested device " PCI_PRI_FMT
+				 " cannot be used\n", dev->addr.domain, dev->addr.bus,
+				 dev->addr.devid, dev->addr.function);
+	}
+
+	return 0;
+}
+
+/* dump one device */
+static int
+pci_dump_one_device(FILE *f, struct rte_pci_device *dev)
+{
+	int i;
+
+	fprintf(f, PCI_PRI_FMT, dev->addr.domain, dev->addr.bus,
+	       dev->addr.devid, dev->addr.function);
+	fprintf(f, " - vendor:%x device:%x\n", dev->id.vendor_id,
+	       dev->id.device_id);
+
+	for (i = 0; i != sizeof(dev->mem_resource) /
+		sizeof(dev->mem_resource[0]); i++) {
+		fprintf(f, "   %16.16"PRIx64" %16.16"PRIx64"\n",
+			dev->mem_resource[i].phys_addr,
+			dev->mem_resource[i].len);
+	}
+	return 0;
+}
+
+/* dump devices on the bus */
+void
+rte_eal_pci_dump(FILE *f)
+{
+	struct rte_pci_device *dev = NULL;
+
+	TAILQ_FOREACH(dev, &pci_device_list, next) {
+		pci_dump_one_device(f, dev);
+	}
+}
+
+/* register a driver */
+void
+rte_eal_pci_register(struct rte_pci_driver *driver)
+{
+	TAILQ_INSERT_TAIL(&pci_driver_list, driver, next);
+}
+
+/* unregister a driver */
+void
+rte_eal_pci_unregister(struct rte_pci_driver *driver)
+{
+	TAILQ_REMOVE(&pci_driver_list, driver, next);
+}
diff --git a/lib/librte_eal/common/eal_common_pci_uio.c b/lib/librte_eal/common/eal_common_pci_uio.c
new file mode 100644
index 00000000..f062e81d
--- /dev/null
+++ b/lib/librte_eal/common/eal_common_pci_uio.c
@@ -0,0 +1,222 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <fcntl.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+
+#include <rte_eal.h>
+#include <rte_tailq.h>
+#include <rte_log.h>
+#include <rte_malloc.h>
+
+#include "eal_private.h"
+
+static struct rte_tailq_elem rte_uio_tailq = {
+	.name = "UIO_RESOURCE_LIST",
+};
+EAL_REGISTER_TAILQ(rte_uio_tailq)
+
+static int
+pci_uio_map_secondary(struct rte_pci_device *dev)
+{
+	int fd, i;
+	struct mapped_pci_resource *uio_res;
+	struct mapped_pci_res_list *uio_res_list =
+			RTE_TAILQ_CAST(rte_uio_tailq.head, mapped_pci_res_list);
+
+	TAILQ_FOREACH(uio_res, uio_res_list, next) {
+
+		/* skip this element if it doesn't match our PCI address */
+		if (rte_eal_compare_pci_addr(&uio_res->pci_addr, &dev->addr))
+			continue;
+
+		for (i = 0; i != uio_res->nb_maps; i++) {
+			/*
+			 * open devname, to mmap it
+			 */
+			fd = open(uio_res->maps[i].path, O_RDWR);
+			if (fd < 0) {
+				RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
+					uio_res->maps[i].path, strerror(errno));
+				return -1;
+			}
+
+			void *mapaddr = pci_map_resource(uio_res->maps[i].addr,
+					fd, (off_t)uio_res->maps[i].offset,
+					(size_t)uio_res->maps[i].size, 0);
+			/* fd is not needed in slave process, close it */
+			close(fd);
+			if (mapaddr != uio_res->maps[i].addr) {
+				RTE_LOG(ERR, EAL,
+					"Cannot mmap device resource file %s to address: %p\n",
+					uio_res->maps[i].path,
+					uio_res->maps[i].addr);
+				return -1;
+			}
+		}
+		return 0;
+	}
+
+	RTE_LOG(ERR, EAL, "Cannot find resource for device\n");
+	return 1;
+}
+
+/* map the PCI resource of a PCI device in virtual memory */
+int
+pci_uio_map_resource(struct rte_pci_device *dev)
+{
+	int i, map_idx = 0, ret;
+	uint64_t phaddr;
+	struct mapped_pci_resource *uio_res = NULL;
+	struct mapped_pci_res_list *uio_res_list =
+		RTE_TAILQ_CAST(rte_uio_tailq.head, mapped_pci_res_list);
+
+	dev->intr_handle.fd = -1;
+	dev->intr_handle.uio_cfg_fd = -1;
+	dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN;
+
+	/* secondary processes - use already recorded details */
+	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+		return pci_uio_map_secondary(dev);
+
+	/* allocate uio resource */
+	ret = pci_uio_alloc_resource(dev, &uio_res);
+	if (ret)
+		return ret;
+
+	/* Map all BARs */
+	for (i = 0; i != PCI_MAX_RESOURCE; i++) {
+		/* skip empty BAR */
+		phaddr = dev->mem_resource[i].phys_addr;
+		if (phaddr == 0)
+			continue;
+
+		ret = pci_uio_map_resource_by_index(dev, i,
+				uio_res, map_idx);
+		if (ret)
+			goto error;
+
+		map_idx++;
+	}
+
+	uio_res->nb_maps = map_idx;
+
+	TAILQ_INSERT_TAIL(uio_res_list, uio_res, next);
+
+	return 0;
+error:
+	for (i = 0; i < map_idx; i++) {
+		pci_unmap_resource(uio_res->maps[i].addr,
+				(size_t)uio_res->maps[i].size);
+		rte_free(uio_res->maps[i].path);
+	}
+	pci_uio_free_resource(dev, uio_res);
+	return -1;
+}
+
+static void
+pci_uio_unmap(struct mapped_pci_resource *uio_res)
+{
+	int i;
+
+	if (uio_res == NULL)
+		return;
+
+	for (i = 0; i != uio_res->nb_maps; i++) {
+		pci_unmap_resource(uio_res->maps[i].addr,
+				(size_t)uio_res->maps[i].size);
+		rte_free(uio_res->maps[i].path);
+	}
+}
+
+static struct mapped_pci_resource *
+pci_uio_find_resource(struct rte_pci_device *dev)
+{
+	struct mapped_pci_resource *uio_res;
+	struct mapped_pci_res_list *uio_res_list =
+			RTE_TAILQ_CAST(rte_uio_tailq.head, mapped_pci_res_list);
+
+	if (dev == NULL)
+		return NULL;
+
+	TAILQ_FOREACH(uio_res, uio_res_list, next) {
+
+		/* skip this element if it doesn't match our PCI address */
+		if (!rte_eal_compare_pci_addr(&uio_res->pci_addr, &dev->addr))
+			return uio_res;
+	}
+	return NULL;
+}
+
+/* unmap the PCI resource of a PCI device in virtual memory */
+void
+pci_uio_unmap_resource(struct rte_pci_device *dev)
+{
+	struct mapped_pci_resource *uio_res;
+	struct mapped_pci_res_list *uio_res_list =
+			RTE_TAILQ_CAST(rte_uio_tailq.head, mapped_pci_res_list);
+
+	if (dev == NULL)
+		return;
+
+	/* find an entry for the device */
+	uio_res = pci_uio_find_resource(dev);
+	if (uio_res == NULL)
+		return;
+
+	/* secondary processes - just free maps */
+	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+		return pci_uio_unmap(uio_res);
+
+	TAILQ_REMOVE(uio_res_list, uio_res, next);
+
+	/* unmap all resources */
+	pci_uio_unmap(uio_res);
+
+	/* free uio resource */
+	rte_free(uio_res);
+
+	/* close fd if in primary process */
+	close(dev->intr_handle.fd);
+	if (dev->intr_handle.uio_cfg_fd >= 0) {
+		close(dev->intr_handle.uio_cfg_fd);
+		dev->intr_handle.uio_cfg_fd = -1;
+	}
+
+	dev->intr_handle.fd = -1;
+	dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN;
+}
diff --git a/lib/librte_eal/common/eal_common_proc.c b/lib/librte_eal/common/eal_common_proc.c
new file mode 100644
index 00000000..12e0fcac
--- /dev/null
+++ b/lib/librte_eal/common/eal_common_proc.c
@@ -0,0 +1,61 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2016 Intel Corporation. All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <rte_eal.h>
+
+#include "eal_filesystem.h"
+#include "eal_internal_cfg.h"
+
+int
+rte_eal_primary_proc_alive(const char *config_file_path)
+{
+	int config_fd;
+
+	if (config_file_path)
+		config_fd = open(config_file_path, O_RDONLY);
+	else {
+		char default_path[PATH_MAX+1];
+		snprintf(default_path, PATH_MAX, RUNTIME_CONFIG_FMT,
+			 default_config_dir, "rte");
+		config_fd = open(default_path, O_RDONLY);
+	}
+	if (config_fd < 0)
+		return 0;
+
+	int ret = lockf(config_fd, F_TEST, 0);
+	close(config_fd);
+
+	return !!ret;
+}
diff --git a/lib/librte_eal/common/eal_common_string_fns.c b/lib/librte_eal/common/eal_common_string_fns.c
new file mode 100644
index 00000000..125a3e2d
--- /dev/null
+++ b/lib/librte_eal/common/eal_common_string_fns.c
@@ -0,0 +1,69 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <string.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <errno.h>
+
+#include <rte_string_fns.h>
+
+/* split string into tokens */
+int
+rte_strsplit(char *string, int stringlen,
+	     char **tokens, int maxtokens, char delim)
+{
+	int i, tok = 0;
+	int tokstart = 1; /* first token is right at start of string */
+
+	if (string == NULL || tokens == NULL)
+		goto einval_error;
+
+	for (i = 0; i < stringlen; i++) {
+		if (string[i] == '\0' || tok >= maxtokens)
+			break;
+		if (tokstart) {
+			tokstart = 0;
+			tokens[tok++] = &string[i];
+		}
+		if (string[i] == delim) {
+			string[i] = '\0';
+			tokstart = 1;
+		}
+	}
+	return tok;
+
+einval_error:
+	errno = EINVAL;
+	return -1;
+}
diff --git a/lib/librte_eal/common/eal_common_tailqs.c b/lib/librte_eal/common/eal_common_tailqs.c
new file mode 100644
index 00000000..bb08ec8b
--- /dev/null
+++ b/lib/librte_eal/common/eal_common_tailqs.c
@@ -0,0 +1,202 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/queue.h>
+#include <stdint.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <string.h>
+#include <inttypes.h>
+
+#include <rte_memory.h>
+#include <rte_memzone.h>
+#include <rte_launch.h>
+#include <rte_eal.h>
+#include <rte_eal_memconfig.h>
+#include <rte_per_lcore.h>
+#include <rte_lcore.h>
+#include <rte_memory.h>
+#include <rte_atomic.h>
+#include <rte_branch_prediction.h>
+#include <rte_log.h>
+#include <rte_string_fns.h>
+#include <rte_debug.h>
+
+#include "eal_private.h"
+
+TAILQ_HEAD(rte_tailq_elem_head, rte_tailq_elem);
+/* local tailq list */
+static struct rte_tailq_elem_head rte_tailq_elem_head =
+	TAILQ_HEAD_INITIALIZER(rte_tailq_elem_head);
+
+/* number of tailqs registered, -1 before call to rte_eal_tailqs_init */
+static int rte_tailqs_count = -1;
+
+struct rte_tailq_head *
+rte_eal_tailq_lookup(const char *name)
+{
+	unsigned i;
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+
+	if (name == NULL)
+		return NULL;
+
+	for (i = 0; i < RTE_MAX_TAILQ; i++) {
+		if (!strncmp(name, mcfg->tailq_head[i].name,
+			     RTE_TAILQ_NAMESIZE-1))
+			return &mcfg->tailq_head[i];
+	}
+
+	return NULL;
+}
+
+void
+rte_dump_tailq(FILE *f)
+{
+	struct rte_mem_config *mcfg;
+	unsigned i = 0;
+
+	mcfg = rte_eal_get_configuration()->mem_config;
+
+	rte_rwlock_read_lock(&mcfg->qlock);
+	for (i = 0; i < RTE_MAX_TAILQ; i++) {
+		const struct rte_tailq_head *tailq = &mcfg->tailq_head[i];
+		const struct rte_tailq_entry_head *head = &tailq->tailq_head;
+
+		fprintf(f, "Tailq %u: qname:<%s>, tqh_first:%p, tqh_last:%p\n",
+			i, tailq->name, head->tqh_first, head->tqh_last);
+	}
+	rte_rwlock_read_unlock(&mcfg->qlock);
+}
+
+static struct rte_tailq_head *
+rte_eal_tailq_create(const char *name)
+{
+	struct rte_tailq_head *head = NULL;
+
+	if (!rte_eal_tailq_lookup(name) &&
+	    (rte_tailqs_count + 1 < RTE_MAX_TAILQ)) {
+		struct rte_mem_config *mcfg;
+
+		mcfg = rte_eal_get_configuration()->mem_config;
+		head = &mcfg->tailq_head[rte_tailqs_count];
+		snprintf(head->name, sizeof(head->name) - 1, "%s", name);
+		TAILQ_INIT(&head->tailq_head);
+		rte_tailqs_count++;
+	}
+
+	return head;
+}
+
+/* local register, used to store "early" tailqs before rte_eal_init() and to
+ * ensure secondary process only registers tailqs once. */
+static int
+rte_eal_tailq_local_register(struct rte_tailq_elem *t)
+{
+	struct rte_tailq_elem *temp;
+
+	TAILQ_FOREACH(temp, &rte_tailq_elem_head, next) {
+		if (!strncmp(t->name, temp->name, sizeof(temp->name)))
+			return -1;
+	}
+
+	TAILQ_INSERT_TAIL(&rte_tailq_elem_head, t, next);
+	return 0;
+}
+
+static void
+rte_eal_tailq_update(struct rte_tailq_elem *t)
+{
+	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+		/* primary process is the only one that creates */
+		t->head = rte_eal_tailq_create(t->name);
+	} else {
+		t->head = rte_eal_tailq_lookup(t->name);
+	}
+}
+
+int
+rte_eal_tailq_register(struct rte_tailq_elem *t)
+{
+	if (rte_eal_tailq_local_register(t) < 0) {
+		RTE_LOG(ERR, EAL,
+			"%s tailq is already registered\n", t->name);
+		goto error;
+	}
+
+	/* if a register happens after rte_eal_tailqs_init(), then we can update
+	 * tailq head */
+	if (rte_tailqs_count >= 0) {
+		rte_eal_tailq_update(t);
+		if (t->head == NULL) {
+			RTE_LOG(ERR, EAL,
+				"Cannot initialize tailq: %s\n", t->name);
+			TAILQ_REMOVE(&rte_tailq_elem_head, t, next);
+			goto error;
+		}
+	}
+
+	return 0;
+
+error:
+	t->head = NULL;
+	return -1;
+}
+
+int
+rte_eal_tailqs_init(void)
+{
+	struct rte_tailq_elem *t;
+
+	rte_tailqs_count = 0;
+
+	TAILQ_FOREACH(t, &rte_tailq_elem_head, next) {
+		/* second part of register job for "early" tailqs, see
+		 * rte_eal_tailq_register and EAL_REGISTER_TAILQ */
+		rte_eal_tailq_update(t);
+		if (t->head == NULL) {
+			RTE_LOG(ERR, EAL,
+				"Cannot initialize tailq: %s\n", t->name);
+			/* no need to TAILQ_REMOVE, we are going to panic in
+			 * rte_eal_init() */
+			goto fail;
+		}
+	}
+
+	return 0;
+
+fail:
+	rte_dump_tailq(stderr);
+	return -1;
+}
diff --git a/lib/librte_eal/common/eal_common_thread.c b/lib/librte_eal/common/eal_common_thread.c
new file mode 100644
index 00000000..2405e93f
--- /dev/null
+++ b/lib/librte_eal/common/eal_common_thread.c
@@ -0,0 +1,157 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <pthread.h>
+#include <sched.h>
+#include <assert.h>
+#include <string.h>
+
+#include <rte_lcore.h>
+#include <rte_memory.h>
+#include <rte_log.h>
+
+#include "eal_thread.h"
+
+RTE_DECLARE_PER_LCORE(unsigned , _socket_id);
+
+unsigned rte_socket_id(void)
+{
+	return RTE_PER_LCORE(_socket_id);
+}
+
+int eal_cpuset_socket_id(rte_cpuset_t *cpusetp)
+{
+	unsigned cpu = 0;
+	int socket_id = SOCKET_ID_ANY;
+	int sid;
+
+	if (cpusetp == NULL)
+		return SOCKET_ID_ANY;
+
+	do {
+		if (!CPU_ISSET(cpu, cpusetp))
+			continue;
+
+		if (socket_id == SOCKET_ID_ANY)
+			socket_id = eal_cpu_socket_id(cpu);
+
+		sid = eal_cpu_socket_id(cpu);
+		if (socket_id != sid) {
+			socket_id = SOCKET_ID_ANY;
+			break;
+		}
+
+	} while (++cpu < RTE_MAX_LCORE);
+
+	return socket_id;
+}
+
+int
+rte_thread_set_affinity(rte_cpuset_t *cpusetp)
+{
+	int s;
+	unsigned lcore_id;
+	pthread_t tid;
+
+	tid = pthread_self();
+
+	s = pthread_setaffinity_np(tid, sizeof(rte_cpuset_t), cpusetp);
+	if (s != 0) {
+		RTE_LOG(ERR, EAL, "pthread_setaffinity_np failed\n");
+		return -1;
+	}
+
+	/* store socket_id in TLS for quick access */
+	RTE_PER_LCORE(_socket_id) =
+		eal_cpuset_socket_id(cpusetp);
+
+	/* store cpuset in TLS for quick access */
+	memmove(&RTE_PER_LCORE(_cpuset), cpusetp,
+		sizeof(rte_cpuset_t));
+
+	lcore_id = rte_lcore_id();
+	if (lcore_id != (unsigned)LCORE_ID_ANY) {
+		/* EAL thread will update lcore_config */
+		lcore_config[lcore_id].socket_id = RTE_PER_LCORE(_socket_id);
+		memmove(&lcore_config[lcore_id].cpuset, cpusetp,
+			sizeof(rte_cpuset_t));
+	}
+
+	return 0;
+}
+
+void
+rte_thread_get_affinity(rte_cpuset_t *cpusetp)
+{
+	assert(cpusetp);
+	memmove(cpusetp, &RTE_PER_LCORE(_cpuset),
+		sizeof(rte_cpuset_t));
+}
+
+int
+eal_thread_dump_affinity(char *str, unsigned size)
+{
+	rte_cpuset_t cpuset;
+	unsigned cpu;
+	int ret;
+	unsigned int out = 0;
+
+	rte_thread_get_affinity(&cpuset);
+
+	for (cpu = 0; cpu < RTE_MAX_LCORE; cpu++) {
+		if (!CPU_ISSET(cpu, &cpuset))
+			continue;
+
+		ret = snprintf(str + out,
+			       size - out, "%u,", cpu);
+		if (ret < 0 || (unsigned)ret >= size - out) {
+			/* string will be truncated */
+			ret = -1;
+			goto exit;
+		}
+
+		out += ret;
+	}
+
+	ret = 0;
+exit:
+	/* remove the last separator */
+	if (out > 0)
+		str[out - 1] = '\0';
+
+	return ret;
+}
diff --git a/lib/librte_eal/common/eal_common_timer.c b/lib/librte_eal/common/eal_common_timer.c
new file mode 100644
index 00000000..c4227cd8
--- /dev/null
+++ b/lib/librte_eal/common/eal_common_timer.c
@@ -0,0 +1,86 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <string.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <inttypes.h>
+#include <sys/types.h>
+#include <errno.h>
+
+#include <rte_common.h>
+#include <rte_log.h>
+#include <rte_cycles.h>
+
+#include "eal_private.h"
+
+/* The frequency of the RDTSC timer resolution */
+static uint64_t eal_tsc_resolution_hz;
+
+void
+rte_delay_us(unsigned us)
+{
+	const uint64_t start = rte_get_timer_cycles();
+	const uint64_t ticks = (uint64_t)us * rte_get_timer_hz() / 1E6;
+	while ((rte_get_timer_cycles() - start) < ticks)
+		rte_pause();
+}
+
+uint64_t
+rte_get_tsc_hz(void)
+{
+	return eal_tsc_resolution_hz;
+}
+
+static uint64_t
+estimate_tsc_freq(void)
+{
+	RTE_LOG(WARNING, EAL, "WARNING: TSC frequency estimated roughly"
+		" - clock timings may be less accurate.\n");
+	/* assume that the sleep(1) will sleep for 1 second */
+	uint64_t start = rte_rdtsc();
+	sleep(1);
+	return rte_rdtsc() - start;
+}
+
+void
+set_tsc_freq(void)
+{
+	uint64_t freq = get_tsc_freq();
+
+	if (!freq)
+		freq = estimate_tsc_freq();
+
+	RTE_LOG(DEBUG, EAL, "TSC frequency is ~%" PRIu64 " KHz\n", freq / 1000);
+	eal_tsc_resolution_hz = freq;
+}
diff --git a/lib/librte_eal/common/eal_filesystem.h b/lib/librte_eal/common/eal_filesystem.h
new file mode 100644
index 00000000..fdb4a70b
--- /dev/null
+++ b/lib/librte_eal/common/eal_filesystem.h
@@ -0,0 +1,118 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * @file
+ * Stores functions and path defines for files and directories
+ * on the filesystem for Linux, that are used by the Linux EAL.
+ */
+
+#ifndef EAL_FILESYSTEM_H
+#define EAL_FILESYSTEM_H
+
+/** Path of rte config file. */
+#define RUNTIME_CONFIG_FMT "%s/.%s_config"
+
+#include <stdint.h>
+#include <limits.h>
+#include <unistd.h>
+#include <stdlib.h>
+
+#include <rte_string_fns.h>
+#include "eal_internal_cfg.h"
+
+static const char *default_config_dir = "/var/run";
+
+static inline const char *
+eal_runtime_config_path(void)
+{
+	static char buffer[PATH_MAX]; /* static so auto-zeroed */
+	const char *directory = default_config_dir;
+	const char *home_dir = getenv("HOME");
+
+	if (getuid() != 0 && home_dir != NULL)
+		directory = home_dir;
+	snprintf(buffer, sizeof(buffer) - 1, RUNTIME_CONFIG_FMT, directory,
+			internal_config.hugefile_prefix);
+	return buffer;
+}
+
+/** Path of hugepage info file. */
+#define HUGEPAGE_INFO_FMT "%s/.%s_hugepage_info"
+
+static inline const char *
+eal_hugepage_info_path(void)
+{
+	static char buffer[PATH_MAX]; /* static so auto-zeroed */
+	const char *directory = default_config_dir;
+	const char *home_dir = getenv("HOME");
+
+	if (getuid() != 0 && home_dir != NULL)
+		directory = home_dir;
+	snprintf(buffer, sizeof(buffer) - 1, HUGEPAGE_INFO_FMT, directory,
+			internal_config.hugefile_prefix);
+	return buffer;
+}
+
+/** String format for hugepage map files. */
+#define HUGEFILE_FMT "%s/%smap_%d"
+#define TEMP_HUGEFILE_FMT "%s/%smap_temp_%d"
+
+static inline const char *
+eal_get_hugefile_path(char *buffer, size_t buflen, const char *hugedir, int f_id)
+{
+	snprintf(buffer, buflen, HUGEFILE_FMT, hugedir,
+			internal_config.hugefile_prefix, f_id);
+	buffer[buflen - 1] = '\0';
+	return buffer;
+}
+
+#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
+static inline const char *
+eal_get_hugefile_temp_path(char *buffer, size_t buflen, const char *hugedir, int f_id)
+{
+	snprintf(buffer, buflen, TEMP_HUGEFILE_FMT, hugedir,
+			internal_config.hugefile_prefix, f_id);
+	buffer[buflen - 1] = '\0';
+	return buffer;
+}
+#endif
+
+/** define the default filename prefix for the %s values above */
+#define HUGEFILE_PREFIX_DEFAULT "rte"
+
+/** Function to read a single numeric value from a file on the filesystem.
+ * Used to read information from files on /sys */
+int eal_parse_sysfs_value(const char *filename, unsigned long *val);
+
+#endif /* EAL_FILESYSTEM_H */
diff --git a/lib/librte_eal/common/eal_hugepages.h b/lib/librte_eal/common/eal_hugepages.h
new file mode 100644
index 00000000..38edac03
--- /dev/null
+++ b/lib/librte_eal/common/eal_hugepages.h
@@ -0,0 +1,67 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef EAL_HUGEPAGES_H
+#define EAL_HUGEPAGES_H
+
+#include <stddef.h>
+#include <stdint.h>
+#include <limits.h>
+
+#define MAX_HUGEPAGE_PATH PATH_MAX
+
+/**
+ * Structure used to store informations about hugepages that we mapped
+ * through the files in hugetlbfs.
+ */
+struct hugepage_file {
+	void *orig_va;      /**< virtual addr of first mmap() */
+	void *final_va;     /**< virtual addr of 2nd mmap() */
+	uint64_t physaddr;  /**< physical addr */
+	size_t size;        /**< the page size */
+	int socket_id;      /**< NUMA socket ID */
+	int file_id;        /**< the '%d' in HUGEFILE_FMT */
+	int memseg_id;      /**< the memory segment to which page belongs */
+#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
+	int repeated;		/**< number of times the page size is repeated */
+#endif
+	char filepath[MAX_HUGEPAGE_PATH]; /**< path to backing file on filesystem */
+};
+
+/**
+ * Read the information from linux on what hugepages are available
+ * for the EAL to use
+ */
+int eal_hugepage_info_init(void);
+
+#endif /* EAL_HUGEPAGES_H */
diff --git a/lib/librte_eal/common/eal_internal_cfg.h b/lib/librte_eal/common/eal_internal_cfg.h
new file mode 100644
index 00000000..5f1367eb
--- /dev/null
+++ b/lib/librte_eal/common/eal_internal_cfg.h
@@ -0,0 +1,94 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * @file
+ * Holds the structures for the eal internal configuration
+ */
+
+#ifndef EAL_INTERNAL_CFG_H
+#define EAL_INTERNAL_CFG_H
+
+#include <rte_eal.h>
+#include <rte_pci_dev_feature_defs.h>
+
+#define MAX_HUGEPAGE_SIZES 3  /**< support up to 3 page sizes */
+
+/*
+ * internal configuration structure for the number, size and
+ * mount points of hugepages
+ */
+struct hugepage_info {
+	uint64_t hugepage_sz;   /**< size of a huge page */
+	const char *hugedir;    /**< dir where hugetlbfs is mounted */
+	uint32_t num_pages[RTE_MAX_NUMA_NODES];
+				/**< number of hugepages of that size on each socket */
+	int lock_descriptor;    /**< file descriptor for hugepage dir */
+};
+
+/**
+ * internal configuration
+ */
+struct internal_config {
+	volatile size_t memory;           /**< amount of asked memory */
+	volatile unsigned force_nchannel; /**< force number of channels */
+	volatile unsigned force_nrank;    /**< force number of ranks */
+	volatile unsigned no_hugetlbfs;   /**< true to disable hugetlbfs */
+	unsigned hugepage_unlink;         /**< true to unlink backing files */
+	volatile unsigned xen_dom0_support; /**< support app running on Xen Dom0*/
+	volatile unsigned no_pci;         /**< true to disable PCI */
+	volatile unsigned no_hpet;        /**< true to disable HPET */
+	volatile unsigned vmware_tsc_map; /**< true to use VMware TSC mapping
+										* instead of native TSC */
+	volatile unsigned no_shconf;      /**< true if there is no shared config */
+	volatile unsigned create_uio_dev; /**< true to create /dev/uioX devices */
+	volatile enum rte_proc_type_t process_type; /**< multi-process proc type */
+	/** true to try allocating memory on specific sockets */
+	volatile unsigned force_sockets;
+	volatile uint64_t socket_mem[RTE_MAX_NUMA_NODES]; /**< amount of memory per socket */
+	uintptr_t base_virtaddr;          /**< base address to try and reserve memory from */
+	volatile int syslog_facility;	  /**< facility passed to openlog() */
+	volatile uint32_t log_level;	  /**< default log level */
+	/** default interrupt mode for VFIO */
+	volatile enum rte_intr_mode vfio_intr_mode;
+	const char *hugefile_prefix;      /**< the base filename of hugetlbfs files */
+	const char *hugepage_dir;         /**< specific hugetlbfs directory to use */
+
+	unsigned num_hugepage_sizes;      /**< how many sizes on this system */
+	struct hugepage_info hugepage_info[MAX_HUGEPAGE_SIZES];
+};
+extern struct internal_config internal_config; /**< Global EAL configuration. */
+
+void eal_reset_internal_config(struct internal_config *internal_cfg);
+
+#endif /* EAL_INTERNAL_CFG_H */
diff --git a/lib/librte_eal/common/eal_options.h b/lib/librte_eal/common/eal_options.h
new file mode 100644
index 00000000..a881c62e
--- /dev/null
+++ b/lib/librte_eal/common/eal_options.h
@@ -0,0 +1,100 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2014 6WIND S.A.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef EAL_OPTIONS_H
+#define EAL_OPTIONS_H
+
+enum {
+	/* long options mapped to a short option */
+#define OPT_HELP              "help"
+	OPT_HELP_NUM            = 'h',
+#define OPT_PCI_BLACKLIST     "pci-blacklist"
+	OPT_PCI_BLACKLIST_NUM   = 'b',
+#define OPT_PCI_WHITELIST     "pci-whitelist"
+	OPT_PCI_WHITELIST_NUM   = 'w',
+
+	/* first long only option value must be >= 256, so that we won't
+	 * conflict with short options */
+	OPT_LONG_MIN_NUM = 256,
+#define OPT_BASE_VIRTADDR     "base-virtaddr"
+	OPT_BASE_VIRTADDR_NUM,
+#define OPT_CREATE_UIO_DEV    "create-uio-dev"
+	OPT_CREATE_UIO_DEV_NUM,
+#define OPT_FILE_PREFIX       "file-prefix"
+	OPT_FILE_PREFIX_NUM,
+#define OPT_HUGE_DIR          "huge-dir"
+	OPT_HUGE_DIR_NUM,
+#define OPT_HUGE_UNLINK       "huge-unlink"
+	OPT_HUGE_UNLINK_NUM,
+#define OPT_LCORES            "lcores"
+	OPT_LCORES_NUM,
+#define OPT_LOG_LEVEL         "log-level"
+	OPT_LOG_LEVEL_NUM,
+#define OPT_MASTER_LCORE      "master-lcore"
+	OPT_MASTER_LCORE_NUM,
+#define OPT_PROC_TYPE         "proc-type"
+	OPT_PROC_TYPE_NUM,
+#define OPT_NO_HPET           "no-hpet"
+	OPT_NO_HPET_NUM,
+#define OPT_NO_HUGE           "no-huge"
+	OPT_NO_HUGE_NUM,
+#define OPT_NO_PCI            "no-pci"
+	OPT_NO_PCI_NUM,
+#define OPT_NO_SHCONF         "no-shconf"
+	OPT_NO_SHCONF_NUM,
+#define OPT_SOCKET_MEM        "socket-mem"
+	OPT_SOCKET_MEM_NUM,
+#define OPT_SYSLOG            "syslog"
+	OPT_SYSLOG_NUM,
+#define OPT_VDEV              "vdev"
+	OPT_VDEV_NUM,
+#define OPT_VFIO_INTR         "vfio-intr"
+	OPT_VFIO_INTR_NUM,
+#define OPT_VMWARE_TSC_MAP    "vmware-tsc-map"
+	OPT_VMWARE_TSC_MAP_NUM,
+#define OPT_XEN_DOM0          "xen-dom0"
+	OPT_XEN_DOM0_NUM,
+	OPT_LONG_MAX_NUM
+};
+
+extern const char eal_short_options[];
+extern const struct option eal_long_options[];
+
+int eal_parse_common_option(int opt, const char *argv,
+			    struct internal_config *conf);
+int eal_adjust_config(struct internal_config *internal_cfg);
+int eal_check_common_options(struct internal_config *internal_cfg);
+void eal_common_usage(void);
+enum rte_proc_type_t eal_proc_type_detect(void);
+int eal_plugins_init(void);
+
+#endif /* EAL_OPTIONS_H */
diff --git a/lib/librte_eal/common/eal_private.h b/lib/librte_eal/common/eal_private.h
new file mode 100644
index 00000000..2342fa16
--- /dev/null
+++ b/lib/librte_eal/common/eal_private.h
@@ -0,0 +1,331 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _EAL_PRIVATE_H_
+#define _EAL_PRIVATE_H_
+
+#include <stdio.h>
+#include <rte_pci.h>
+
+/**
+ * Initialize the memzone subsystem (private to eal).
+ *
+ * @return
+ *   - 0 on success
+ *   - Negative on error
+ */
+int rte_eal_memzone_init(void);
+
+/**
+ * Common log initialization function (private to eal).
+ *
+ * Called by environment-specific log initialization function to initialize
+ * log history.
+ *
+ * @param default_log
+ *   The default log stream to be used.
+ * @return
+ *   - 0 on success
+ *   - Negative on error
+ */
+int rte_eal_common_log_init(FILE *default_log);
+
+/**
+ * Fill configuration with number of physical and logical processors
+ *
+ * This function is private to EAL.
+ *
+ * Parse /proc/cpuinfo to get the number of physical and logical
+ * processors on the machine.
+ *
+ * @return
+ *   0 on success, negative on error
+ */
+int rte_eal_cpu_init(void);
+
+/**
+ * Map memory
+ *
+ * This function is private to EAL.
+ *
+ * Fill configuration structure with these infos, and return 0 on success.
+ *
+ * @return
+ *   0 on success, negative on error
+ */
+int rte_eal_memory_init(void);
+
+/**
+ * Configure timers
+ *
+ * This function is private to EAL.
+ *
+ * Mmap memory areas used by HPET (high precision event timer) that will
+ * provide our time reference, and configure the TSC frequency also for it
+ * to be used as a reference.
+ *
+ * @return
+ *   0 on success, negative on error
+ */
+int rte_eal_timer_init(void);
+
+/**
+ * Init early logs
+ *
+ * This function is private to EAL.
+ *
+ * @return
+ *   0 on success, negative on error
+ */
+int rte_eal_log_early_init(void);
+
+/**
+ * Init the default log stream
+ *
+ * This function is private to EAL.
+ *
+ * @return
+ *   0 on success, negative on error
+ */
+int rte_eal_log_init(const char *id, int facility);
+
+/**
+ * Init the default log stream
+ *
+ * This function is private to EAL.
+ *
+ * @return
+ *   0 on success, negative on error
+ */
+int rte_eal_pci_init(void);
+
+#ifdef RTE_LIBRTE_IVSHMEM
+/**
+ * Init the memory from IVSHMEM devices
+ *
+ * This function is private to EAL.
+ *
+ * @return
+ *  0 on success, negative on error
+ */
+int rte_eal_ivshmem_init(void);
+
+/**
+ * Init objects in IVSHMEM devices
+ *
+ * This function is private to EAL.
+ *
+ * @return
+ *  0 on success, negative on error
+ */
+int rte_eal_ivshmem_obj_init(void);
+#endif
+
+struct rte_pci_driver;
+struct rte_pci_device;
+
+/**
+ * Unbind kernel driver for this device
+ *
+ * This function is private to EAL.
+ *
+ * @return
+ *   0 on success, negative on error
+ */
+int pci_unbind_kernel_driver(struct rte_pci_device *dev);
+
+/**
+ * Map the PCI resource of a PCI device in virtual memory
+ *
+ * This function is private to EAL.
+ *
+ * @return
+ *   0 on success, negative on error
+ */
+int pci_uio_map_resource(struct rte_pci_device *dev);
+
+/**
+ * Unmap the PCI resource of a PCI device
+ *
+ * This function is private to EAL.
+ */
+void pci_uio_unmap_resource(struct rte_pci_device *dev);
+
+/**
+ * Allocate uio resource for PCI device
+ *
+ * This function is private to EAL.
+ *
+ * @param dev
+ *   PCI device to allocate uio resource
+ * @param uio_res
+ *   Pointer to uio resource.
+ *   If the function returns 0, the pointer will be filled.
+ * @return
+ *   0 on success, negative on error
+ */
+int pci_uio_alloc_resource(struct rte_pci_device *dev,
+		struct mapped_pci_resource **uio_res);
+
+/**
+ * Free uio resource for PCI device
+ *
+ * This function is private to EAL.
+ *
+ * @param dev
+ *   PCI device to free uio resource
+ * @param uio_res
+ *   Pointer to uio resource.
+ */
+void pci_uio_free_resource(struct rte_pci_device *dev,
+		struct mapped_pci_resource *uio_res);
+
+/**
+ * Map device memory to uio resource
+ *
+ * This function is private to EAL.
+ *
+ * @param dev
+ *   PCI device that has memory information.
+ * @param res_idx
+ *   Memory resource index of the PCI device.
+ * @param uio_res
+ *  uio resource that will keep mapping information.
+ * @param map_idx
+ *   Mapping information index of the uio resource.
+ * @return
+ *   0 on success, negative on error
+ */
+int pci_uio_map_resource_by_index(struct rte_pci_device *dev, int res_idx,
+		struct mapped_pci_resource *uio_res, int map_idx);
+
+/**
+ * Init tail queues for non-EAL library structures. This is to allow
+ * the rings, mempools, etc. lists to be shared among multiple processes
+ *
+ * This function is private to EAL
+ *
+ * @return
+ *    0 on success, negative on error
+ */
+int rte_eal_tailqs_init(void);
+
+/**
+ * Init interrupt handling.
+ *
+ * This function is private to EAL.
+ *
+ * @return
+ *  0 on success, negative on error
+ */
+int rte_eal_intr_init(void);
+
+/**
+ * Init alarm mechanism. This is to allow a callback be called after
+ * specific time.
+ *
+ * This function is private to EAL.
+ *
+ * @return
+ *  0 on success, negative on error
+ */
+int rte_eal_alarm_init(void);
+
+/**
+ * This function initialises any virtual devices
+ *
+ * This function is private to the EAL.
+ */
+int rte_eal_dev_init(void);
+
+/**
+ * Function is to check if the kernel module(like, vfio, vfio_iommu_type1,
+ * etc.) loaded.
+ *
+ * @param module_name
+ *	The module's name which need to be checked
+ *
+ * @return
+ *	-1 means some error happens(NULL pointer or open failure)
+ *	0  means the module not loaded
+ *	1  means the module loaded
+ */
+int rte_eal_check_module(const char *module_name);
+
+/**
+ * Get cpu core_id.
+ *
+ * This function is private to the EAL.
+ */
+unsigned eal_cpu_core_id(unsigned lcore_id);
+
+/**
+ * Check if cpu is present.
+ *
+ * This function is private to the EAL.
+ */
+int eal_cpu_detected(unsigned lcore_id);
+
+/**
+ * Set TSC frequency from precise value or estimation
+ *
+ * This function is private to the EAL.
+ */
+void set_tsc_freq(void);
+
+/**
+ * Get precise TSC frequency from system
+ *
+ * This function is private to the EAL.
+ */
+uint64_t get_tsc_freq(void);
+
+/**
+ * Prepare physical memory mapping
+ * i.e. hugepages on Linux and
+ *      contigmem on BSD.
+ *
+ * This function is private to the EAL.
+ */
+int rte_eal_hugepage_init(void);
+
+/**
+ * Creates memory mapping in secondary process
+ * i.e. hugepages on Linux and
+ *      contigmem on BSD.
+ *
+ * This function is private to the EAL.
+ */
+int rte_eal_hugepage_attach(void);
+
+#endif /* _EAL_PRIVATE_H_ */
diff --git a/lib/librte_eal/common/eal_thread.h b/lib/librte_eal/common/eal_thread.h
new file mode 100644
index 00000000..e4e76b9d
--- /dev/null
+++ b/lib/librte_eal/common/eal_thread.h
@@ -0,0 +1,100 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef EAL_THREAD_H
+#define EAL_THREAD_H
+
+#include <rte_lcore.h>
+
+/**
+ * basic loop of thread, called for each thread by eal_init().
+ *
+ * @param arg
+ *   opaque pointer
+ */
+__attribute__((noreturn)) void *eal_thread_loop(void *arg);
+
+/**
+ * Init per-lcore info for master thread
+ *
+ * @param lcore_id
+ *   identifier of master lcore
+ */
+void eal_thread_init_master(unsigned lcore_id);
+
+/**
+ * Get the NUMA socket id from cpu id.
+ * This function is private to EAL.
+ *
+ * @param cpu_id
+ *   The logical process id.
+ * @return
+ *   socket_id or SOCKET_ID_ANY
+ */
+unsigned eal_cpu_socket_id(unsigned cpu_id);
+
+/**
+ * Get the NUMA socket id from cpuset.
+ * This function is private to EAL.
+ *
+ * @param cpusetp
+ *   The point to a valid cpu set.
+ * @return
+ *   socket_id or SOCKET_ID_ANY
+ */
+int eal_cpuset_socket_id(rte_cpuset_t *cpusetp);
+
+/**
+ * Default buffer size to use with eal_thread_dump_affinity()
+ */
+#define RTE_CPU_AFFINITY_STR_LEN            256
+
+/**
+ * Dump the current pthread cpuset.
+ * This function is private to EAL.
+ *
+ * Note:
+ *   If the dump size is greater than the size of given buffer,
+ *   the string will be truncated and with '\0' at the end.
+ *
+ * @param str
+ *   The string buffer the cpuset will dump to.
+ * @param size
+ *   The string buffer size.
+ * @return
+ *   0 for success, -1 if truncation happens.
+ */
+int
+eal_thread_dump_affinity(char *str, unsigned size);
+
+#endif /* EAL_THREAD_H */
diff --git a/lib/librte_eal/common/include/arch/arm/rte_atomic.h b/lib/librte_eal/common/include/arch/arm/rte_atomic.h
new file mode 100644
index 00000000..454a12b0
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/arm/rte_atomic.h
@@ -0,0 +1,48 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2015 RehiveTech. All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of RehiveTech nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_ATOMIC_ARM_H_
+#define _RTE_ATOMIC_ARM_H_
+
+#ifdef RTE_ARCH_64
+#include <rte_atomic_64.h>
+#else
+#include <rte_atomic_32.h>
+#endif
+
+#define rte_smp_mb() rte_mb()
+
+#define rte_smp_wmb() rte_wmb()
+
+#define rte_smp_rmb() rte_rmb()
+
+#endif /* _RTE_ATOMIC_ARM_H_ */
diff --git a/lib/librte_eal/common/include/arch/arm/rte_atomic_32.h b/lib/librte_eal/common/include/arch/arm/rte_atomic_32.h
new file mode 100644
index 00000000..9ae1e78b
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/arm/rte_atomic_32.h
@@ -0,0 +1,74 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2015 RehiveTech. All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of RehiveTech nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_ATOMIC_ARM32_H_
+#define _RTE_ATOMIC_ARM32_H_
+
+#ifndef RTE_FORCE_INTRINSICS
+#  error Platform must be built with CONFIG_RTE_FORCE_INTRINSICS
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_atomic.h"
+
+/**
+ * General memory barrier.
+ *
+ * Guarantees that the LOAD and STORE operations generated before the
+ * barrier occur before the LOAD and STORE operations generated after.
+ */
+#define	rte_mb()  __sync_synchronize()
+
+/**
+ * Write memory barrier.
+ *
+ * Guarantees that the STORE operations generated before the barrier
+ * occur before the STORE operations generated after.
+ */
+#define	rte_wmb() do { asm volatile ("dmb st" : : : "memory"); } while (0)
+
+/**
+ * Read memory barrier.
+ *
+ * Guarantees that the LOAD operations generated before the barrier
+ * occur before the LOAD operations generated after.
+ */
+#define	rte_rmb() __sync_synchronize()
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_ATOMIC_ARM32_H_ */
diff --git a/lib/librte_eal/common/include/arch/arm/rte_atomic_64.h b/lib/librte_eal/common/include/arch/arm/rte_atomic_64.h
new file mode 100644
index 00000000..671caa76
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/arm/rte_atomic_64.h
@@ -0,0 +1,88 @@
+/*
+ *   BSD LICENSE
+ *
+ *   Copyright (C) Cavium networks Ltd. 2015.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Cavium networks nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _RTE_ATOMIC_ARM64_H_
+#define _RTE_ATOMIC_ARM64_H_
+
+#ifndef RTE_FORCE_INTRINSICS
+#  error Platform must be built with CONFIG_RTE_FORCE_INTRINSICS
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_atomic.h"
+
+#define dmb(opt)  do { asm volatile("dmb " #opt : : : "memory"); } while (0)
+
+/**
+ * General memory barrier.
+ *
+ * Guarantees that the LOAD and STORE operations generated before the
+ * barrier occur before the LOAD and STORE operations generated after.
+ * This function is architecture dependent.
+ */
+static inline void rte_mb(void)
+{
+	dmb(ish);
+}
+
+/**
+ * Write memory barrier.
+ *
+ * Guarantees that the STORE operations generated before the barrier
+ * occur before the STORE operations generated after.
+ * This function is architecture dependent.
+ */
+static inline void rte_wmb(void)
+{
+	dmb(ishst);
+}
+
+/**
+ * Read memory barrier.
+ *
+ * Guarantees that the LOAD operations generated before the barrier
+ * occur before the LOAD operations generated after.
+ * This function is architecture dependent.
+ */
+static inline void rte_rmb(void)
+{
+	dmb(ishld);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_ATOMIC_ARM64_H_ */
diff --git a/lib/librte_eal/common/include/arch/arm/rte_byteorder.h b/lib/librte_eal/common/include/arch/arm/rte_byteorder.h
new file mode 100644
index 00000000..3f2dd1f2
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/arm/rte_byteorder.h
@@ -0,0 +1,107 @@
+/*
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2015 RehiveTech. All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of RehiveTech nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_BYTEORDER_ARM_H_
+#define _RTE_BYTEORDER_ARM_H_
+
+#ifndef RTE_FORCE_INTRINSICS
+#  error Platform must be built with CONFIG_RTE_FORCE_INTRINSICS
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_byteorder.h"
+
+/* fix missing __builtin_bswap16 for gcc older then 4.8 */
+#if !(__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8))
+
+static inline uint16_t rte_arch_bswap16(uint16_t _x)
+{
+	register uint16_t x = _x;
+
+	asm volatile ("rev16 %0,%1"
+		      : "=r" (x)
+		      : "r" (x)
+		      );
+	return x;
+}
+
+#define rte_bswap16(x) ((uint16_t)(__builtin_constant_p(x) ? \
+				   rte_constant_bswap16(x) : \
+				   rte_arch_bswap16(x)))
+#endif
+
+/* ARM architecture is bi-endian (both big and little). */
+#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
+
+#define rte_cpu_to_le_16(x) (x)
+#define rte_cpu_to_le_32(x) (x)
+#define rte_cpu_to_le_64(x) (x)
+
+#define rte_cpu_to_be_16(x) rte_bswap16(x)
+#define rte_cpu_to_be_32(x) rte_bswap32(x)
+#define rte_cpu_to_be_64(x) rte_bswap64(x)
+
+#define rte_le_to_cpu_16(x) (x)
+#define rte_le_to_cpu_32(x) (x)
+#define rte_le_to_cpu_64(x) (x)
+
+#define rte_be_to_cpu_16(x) rte_bswap16(x)
+#define rte_be_to_cpu_32(x) rte_bswap32(x)
+#define rte_be_to_cpu_64(x) rte_bswap64(x)
+
+#else /* RTE_BIG_ENDIAN */
+
+#define rte_cpu_to_le_16(x) rte_bswap16(x)
+#define rte_cpu_to_le_32(x) rte_bswap32(x)
+#define rte_cpu_to_le_64(x) rte_bswap64(x)
+
+#define rte_cpu_to_be_16(x) (x)
+#define rte_cpu_to_be_32(x) (x)
+#define rte_cpu_to_be_64(x) (x)
+
+#define rte_le_to_cpu_16(x) rte_bswap16(x)
+#define rte_le_to_cpu_32(x) rte_bswap32(x)
+#define rte_le_to_cpu_64(x) rte_bswap64(x)
+
+#define rte_be_to_cpu_16(x) (x)
+#define rte_be_to_cpu_32(x) (x)
+#define rte_be_to_cpu_64(x) (x)
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_BYTEORDER_ARM_H_ */
diff --git a/lib/librte_eal/common/include/arch/arm/rte_cpuflags.h b/lib/librte_eal/common/include/arch/arm/rte_cpuflags.h
new file mode 100644
index 00000000..b8f62889
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/arm/rte_cpuflags.h
@@ -0,0 +1,42 @@
+/*
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2015 RehiveTech. All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of RehiveTech nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_CPUFLAGS_ARM_H_
+#define _RTE_CPUFLAGS_ARM_H_
+
+#ifdef RTE_ARCH_64
+#include <rte_cpuflags_64.h>
+#else
+#include <rte_cpuflags_32.h>
+#endif
+
+#endif /* _RTE_CPUFLAGS_ARM_H_ */
diff --git a/lib/librte_eal/common/include/arch/arm/rte_cpuflags_32.h b/lib/librte_eal/common/include/arch/arm/rte_cpuflags_32.h
new file mode 100644
index 00000000..eb02d9b9
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/arm/rte_cpuflags_32.h
@@ -0,0 +1,82 @@
+/*
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2015 RehiveTech. All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of RehiveTech nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_CPUFLAGS_ARM32_H_
+#define _RTE_CPUFLAGS_ARM32_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Enumeration of all CPU features supported
+ */
+enum rte_cpu_flag_t {
+	RTE_CPUFLAG_SWP = 0,
+	RTE_CPUFLAG_HALF,
+	RTE_CPUFLAG_THUMB,
+	RTE_CPUFLAG_A26BIT,
+	RTE_CPUFLAG_FAST_MULT,
+	RTE_CPUFLAG_FPA,
+	RTE_CPUFLAG_VFP,
+	RTE_CPUFLAG_EDSP,
+	RTE_CPUFLAG_JAVA,
+	RTE_CPUFLAG_IWMMXT,
+	RTE_CPUFLAG_CRUNCH,
+	RTE_CPUFLAG_THUMBEE,
+	RTE_CPUFLAG_NEON,
+	RTE_CPUFLAG_VFPv3,
+	RTE_CPUFLAG_VFPv3D16,
+	RTE_CPUFLAG_TLS,
+	RTE_CPUFLAG_VFPv4,
+	RTE_CPUFLAG_IDIVA,
+	RTE_CPUFLAG_IDIVT,
+	RTE_CPUFLAG_VFPD32,
+	RTE_CPUFLAG_LPAE,
+	RTE_CPUFLAG_EVTSTRM,
+	RTE_CPUFLAG_AES,
+	RTE_CPUFLAG_PMULL,
+	RTE_CPUFLAG_SHA1,
+	RTE_CPUFLAG_SHA2,
+	RTE_CPUFLAG_CRC32,
+	RTE_CPUFLAG_V7L,
+	/* The last item */
+	RTE_CPUFLAG_NUMFLAGS,/**< This should always be the last! */
+};
+
+#include "generic/rte_cpuflags.h"
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_CPUFLAGS_ARM32_H_ */
diff --git a/lib/librte_eal/common/include/arch/arm/rte_cpuflags_64.h b/lib/librte_eal/common/include/arch/arm/rte_cpuflags_64.h
new file mode 100644
index 00000000..49aead92
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/arm/rte_cpuflags_64.h
@@ -0,0 +1,64 @@
+/*
+ *   BSD LICENSE
+ *
+ *   Copyright (C) Cavium networks Ltd. 2015.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Cavium networks nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_CPUFLAGS_ARM64_H_
+#define _RTE_CPUFLAGS_ARM64_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Enumeration of all CPU features supported
+ */
+enum rte_cpu_flag_t {
+	RTE_CPUFLAG_FP = 0,
+	RTE_CPUFLAG_NEON,
+	RTE_CPUFLAG_EVTSTRM,
+	RTE_CPUFLAG_AES,
+	RTE_CPUFLAG_PMULL,
+	RTE_CPUFLAG_SHA1,
+	RTE_CPUFLAG_SHA2,
+	RTE_CPUFLAG_CRC32,
+	RTE_CPUFLAG_ATOMICS,
+	RTE_CPUFLAG_AARCH64,
+	/* The last item */
+	RTE_CPUFLAG_NUMFLAGS,/**< This should always be the last! */
+};
+
+#include "generic/rte_cpuflags.h"
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_CPUFLAGS_ARM64_H_ */
diff --git a/lib/librte_eal/common/include/arch/arm/rte_cycles.h b/lib/librte_eal/common/include/arch/arm/rte_cycles.h
new file mode 100644
index 00000000..a8009a06
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/arm/rte_cycles.h
@@ -0,0 +1,42 @@
+/*
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2015 RehiveTech. All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of RehiveTech nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_CYCLES_ARM_H_
+#define _RTE_CYCLES_ARM_H_
+
+#ifdef RTE_ARCH_64
+#include <rte_cycles_64.h>
+#else
+#include <rte_cycles_32.h>
+#endif
+
+#endif /* _RTE_CYCLES_ARM_H_ */
diff --git a/lib/librte_eal/common/include/arch/arm/rte_cycles_32.h b/lib/librte_eal/common/include/arch/arm/rte_cycles_32.h
new file mode 100644
index 00000000..9c1be71e
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/arm/rte_cycles_32.h
@@ -0,0 +1,121 @@
+/*
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2015 RehiveTech. All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of RehiveTech nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_CYCLES_ARM32_H_
+#define _RTE_CYCLES_ARM32_H_
+
+/* ARM v7 does not have suitable source of clock signals. The only clock counter
+   available in the core is 32 bit wide. Therefore it is unsuitable as the
+   counter overlaps every few seconds and probably is not accessible by
+   userspace programs. Therefore we use clock_gettime(CLOCK_MONOTONIC_RAW) to
+   simulate counter running at 1GHz.
+*/
+
+#include <time.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_cycles.h"
+
+/**
+ * Read the time base register.
+ *
+ * @return
+ *   The time base for this lcore.
+ */
+#ifndef RTE_ARM_EAL_RDTSC_USE_PMU
+
+/**
+ * This call is easily portable to any ARM architecture, however,
+ * it may be damn slow and inprecise for some tasks.
+ */
+static inline uint64_t
+__rte_rdtsc_syscall(void)
+{
+	struct timespec val;
+	uint64_t v;
+
+	while (clock_gettime(CLOCK_MONOTONIC_RAW, &val) != 0)
+		/* no body */;
+
+	v  = (uint64_t) val.tv_sec * 1000000000LL;
+	v += (uint64_t) val.tv_nsec;
+	return v;
+}
+#define rte_rdtsc __rte_rdtsc_syscall
+
+#else
+
+/**
+ * This function requires to configure the PMCCNTR and enable
+ * userspace access to it:
+ *
+ *      asm volatile("mcr p15, 0, %0, c9, c14, 0" : : "r"(1));
+ *      asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r"(29));
+ *      asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r"(0x8000000f));
+ *
+ * which is possible only from the priviledged mode (kernel space).
+ */
+static inline uint64_t
+__rte_rdtsc_pmccntr(void)
+{
+	unsigned tsc;
+	uint64_t final_tsc;
+
+	/* Read PMCCNTR */
+	asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r"(tsc));
+	/* 1 tick = 64 clocks */
+	final_tsc = ((uint64_t)tsc) << 6;
+
+	return (uint64_t)final_tsc;
+}
+#define rte_rdtsc __rte_rdtsc_pmccntr
+
+#endif /* RTE_ARM_EAL_RDTSC_USE_PMU */
+
+static inline uint64_t
+rte_rdtsc_precise(void)
+{
+	rte_mb();
+	return rte_rdtsc();
+}
+
+static inline uint64_t
+rte_get_tsc_cycles(void) { return rte_rdtsc(); }
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_CYCLES_ARM32_H_ */
diff --git a/lib/librte_eal/common/include/arch/arm/rte_cycles_64.h b/lib/librte_eal/common/include/arch/arm/rte_cycles_64.h
new file mode 100644
index 00000000..14f26120
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/arm/rte_cycles_64.h
@@ -0,0 +1,71 @@
+/*
+ *   BSD LICENSE
+ *
+ *   Copyright (C) Cavium networks Ltd. 2015.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Cavium networks nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _RTE_CYCLES_ARM64_H_
+#define _RTE_CYCLES_ARM64_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_cycles.h"
+
+/**
+ * Read the time base register.
+ *
+ * @return
+ *   The time base for this lcore.
+ */
+static inline uint64_t
+rte_rdtsc(void)
+{
+	uint64_t tsc;
+
+	asm volatile("mrs %0, cntvct_el0" : "=r" (tsc));
+	return tsc;
+}
+
+static inline uint64_t
+rte_rdtsc_precise(void)
+{
+	rte_mb();
+	return rte_rdtsc();
+}
+
+static inline uint64_t
+rte_get_tsc_cycles(void) { return rte_rdtsc(); }
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_CYCLES_ARM64_H_ */
diff --git a/lib/librte_eal/common/include/arch/arm/rte_memcpy.h b/lib/librte_eal/common/include/arch/arm/rte_memcpy.h
new file mode 100644
index 00000000..1d562c3f
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/arm/rte_memcpy.h
@@ -0,0 +1,42 @@
+/*
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2015 RehiveTech. All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of RehiveTech nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_MEMCPY_ARM_H_
+#define _RTE_MEMCPY_ARM_H_
+
+#ifdef RTE_ARCH_64
+#include <rte_memcpy_64.h>
+#else
+#include <rte_memcpy_32.h>
+#endif
+
+#endif /* _RTE_MEMCPY_ARM_H_ */
diff --git a/lib/librte_eal/common/include/arch/arm/rte_memcpy_32.h b/lib/librte_eal/common/include/arch/arm/rte_memcpy_32.h
new file mode 100644
index 00000000..988125b3
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/arm/rte_memcpy_32.h
@@ -0,0 +1,338 @@
+/*
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2015 RehiveTech. All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of RehiveTech nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_MEMCPY_ARM32_H_
+#define _RTE_MEMCPY_ARM32_H_
+
+#include <stdint.h>
+#include <string.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_memcpy.h"
+
+#ifdef RTE_ARCH_ARM_NEON_MEMCPY
+
+#ifndef RTE_MACHINE_CPUFLAG_NEON
+#error "Cannot optimize memcpy by NEON as the CPU seems to not support this"
+#endif
+
+/* ARM NEON Intrinsics are used to copy data */
+#include <arm_neon.h>
+
+static inline void
+rte_mov16(uint8_t *dst, const uint8_t *src)
+{
+	vst1q_u8(dst, vld1q_u8(src));
+}
+
+static inline void
+rte_mov32(uint8_t *dst, const uint8_t *src)
+{
+	asm volatile (
+		"vld1.8 {d0-d3}, [%0]\n\t"
+		"vst1.8 {d0-d3}, [%1]\n\t"
+		: "+r" (src), "+r" (dst)
+		: : "memory", "d0", "d1", "d2", "d3");
+}
+
+static inline void
+rte_mov48(uint8_t *dst, const uint8_t *src)
+{
+	asm volatile (
+		"vld1.8 {d0-d3}, [%0]!\n\t"
+		"vld1.8 {d4-d5}, [%0]\n\t"
+		"vst1.8 {d0-d3}, [%1]!\n\t"
+		"vst1.8 {d4-d5}, [%1]\n\t"
+		: "+r" (src), "+r" (dst)
+		:
+		: "memory", "d0", "d1", "d2", "d3", "d4", "d5");
+}
+
+static inline void
+rte_mov64(uint8_t *dst, const uint8_t *src)
+{
+	asm volatile (
+		"vld1.8 {d0-d3}, [%0]!\n\t"
+		"vld1.8 {d4-d7}, [%0]\n\t"
+		"vst1.8 {d0-d3}, [%1]!\n\t"
+		"vst1.8 {d4-d7}, [%1]\n\t"
+		: "+r" (src), "+r" (dst)
+		:
+		: "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7");
+}
+
+static inline void
+rte_mov128(uint8_t *dst, const uint8_t *src)
+{
+	asm volatile ("pld [%0, #64]" : : "r" (src));
+	asm volatile (
+		"vld1.8 {d0-d3},   [%0]!\n\t"
+		"vld1.8 {d4-d7},   [%0]!\n\t"
+		"vld1.8 {d8-d11},  [%0]!\n\t"
+		"vld1.8 {d12-d15}, [%0]\n\t"
+		"vst1.8 {d0-d3},   [%1]!\n\t"
+		"vst1.8 {d4-d7},   [%1]!\n\t"
+		"vst1.8 {d8-d11},  [%1]!\n\t"
+		"vst1.8 {d12-d15}, [%1]\n\t"
+		: "+r" (src), "+r" (dst)
+		:
+		: "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7",
+		"d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15");
+}
+
+static inline void
+rte_mov256(uint8_t *dst, const uint8_t *src)
+{
+	asm volatile ("pld [%0,  #64]" : : "r" (src));
+	asm volatile ("pld [%0, #128]" : : "r" (src));
+	asm volatile ("pld [%0, #192]" : : "r" (src));
+	asm volatile ("pld [%0, #256]" : : "r" (src));
+	asm volatile ("pld [%0, #320]" : : "r" (src));
+	asm volatile ("pld [%0, #384]" : : "r" (src));
+	asm volatile ("pld [%0, #448]" : : "r" (src));
+	asm volatile (
+		"vld1.8 {d0-d3},   [%0]!\n\t"
+		"vld1.8 {d4-d7},   [%0]!\n\t"
+		"vld1.8 {d8-d11},  [%0]!\n\t"
+		"vld1.8 {d12-d15}, [%0]!\n\t"
+		"vld1.8 {d16-d19}, [%0]!\n\t"
+		"vld1.8 {d20-d23}, [%0]!\n\t"
+		"vld1.8 {d24-d27}, [%0]!\n\t"
+		"vld1.8 {d28-d31}, [%0]\n\t"
+		"vst1.8 {d0-d3},   [%1]!\n\t"
+		"vst1.8 {d4-d7},   [%1]!\n\t"
+		"vst1.8 {d8-d11},  [%1]!\n\t"
+		"vst1.8 {d12-d15}, [%1]!\n\t"
+		"vst1.8 {d16-d19}, [%1]!\n\t"
+		"vst1.8 {d20-d23}, [%1]!\n\t"
+		"vst1.8 {d24-d27}, [%1]!\n\t"
+		"vst1.8 {d28-d31}, [%1]!\n\t"
+		: "+r" (src), "+r" (dst)
+		:
+		: "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7",
+		"d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15",
+		"d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23",
+		"d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31");
+}
+
+#define rte_memcpy(dst, src, n)              \
+	({ (__builtin_constant_p(n)) ?       \
+	memcpy((dst), (src), (n)) :          \
+	rte_memcpy_func((dst), (src), (n)); })
+
+static inline void *
+rte_memcpy_func(void *dst, const void *src, size_t n)
+{
+	void *ret = dst;
+
+	/* We can't copy < 16 bytes using XMM registers so do it manually. */
+	if (n < 16) {
+		if (n & 0x01) {
+			*(uint8_t *)dst = *(const uint8_t *)src;
+			dst = (uint8_t *)dst + 1;
+			src = (const uint8_t *)src + 1;
+		}
+		if (n & 0x02) {
+			*(uint16_t *)dst = *(const uint16_t *)src;
+			dst = (uint16_t *)dst + 1;
+			src = (const uint16_t *)src + 1;
+		}
+		if (n & 0x04) {
+			*(uint32_t *)dst = *(const uint32_t *)src;
+			dst = (uint32_t *)dst + 1;
+			src = (const uint32_t *)src + 1;
+		}
+		if (n & 0x08) {
+			/* ARMv7 can not handle unaligned access to long long
+			 * (uint64_t). Therefore two uint32_t operations are
+			 * used.
+			 */
+			*(uint32_t *)dst = *(const uint32_t *)src;
+			dst = (uint32_t *)dst + 1;
+			src = (const uint32_t *)src + 1;
+			*(uint32_t *)dst = *(const uint32_t *)src;
+		}
+		return ret;
+	}
+
+	/* Special fast cases for <= 128 bytes */
+	if (n <= 32) {
+		rte_mov16((uint8_t *)dst, (const uint8_t *)src);
+		rte_mov16((uint8_t *)dst - 16 + n,
+			(const uint8_t *)src - 16 + n);
+		return ret;
+	}
+
+	if (n <= 64) {
+		rte_mov32((uint8_t *)dst, (const uint8_t *)src);
+		rte_mov32((uint8_t *)dst - 32 + n,
+			(const uint8_t *)src - 32 + n);
+		return ret;
+	}
+
+	if (n <= 128) {
+		rte_mov64((uint8_t *)dst, (const uint8_t *)src);
+		rte_mov64((uint8_t *)dst - 64 + n,
+			(const uint8_t *)src - 64 + n);
+		return ret;
+	}
+
+	/*
+	 * For large copies > 128 bytes. This combination of 256, 64 and 16 byte
+	 * copies was found to be faster than doing 128 and 32 byte copies as
+	 * well.
+	 */
+	for ( ; n >= 256; n -= 256) {
+		rte_mov256((uint8_t *)dst, (const uint8_t *)src);
+		dst = (uint8_t *)dst + 256;
+		src = (const uint8_t *)src + 256;
+	}
+
+	/*
+	 * We split the remaining bytes (which will be less than 256) into
+	 * 64byte (2^6) chunks.
+	 * Using incrementing integers in the case labels of a switch statement
+	 * enourages the compiler to use a jump table. To get incrementing
+	 * integers, we shift the 2 relevant bits to the LSB position to first
+	 * get decrementing integers, and then subtract.
+	 */
+	switch (3 - (n >> 6)) {
+	case 0x00:
+		rte_mov64((uint8_t *)dst, (const uint8_t *)src);
+		n -= 64;
+		dst = (uint8_t *)dst + 64;
+		src = (const uint8_t *)src + 64;      /* fallthrough */
+	case 0x01:
+		rte_mov64((uint8_t *)dst, (const uint8_t *)src);
+		n -= 64;
+		dst = (uint8_t *)dst + 64;
+		src = (const uint8_t *)src + 64;      /* fallthrough */
+	case 0x02:
+		rte_mov64((uint8_t *)dst, (const uint8_t *)src);
+		n -= 64;
+		dst = (uint8_t *)dst + 64;
+		src = (const uint8_t *)src + 64;      /* fallthrough */
+	default:
+		break;
+	}
+
+	/*
+	 * We split the remaining bytes (which will be less than 64) into
+	 * 16byte (2^4) chunks, using the same switch structure as above.
+	 */
+	switch (3 - (n >> 4)) {
+	case 0x00:
+		rte_mov16((uint8_t *)dst, (const uint8_t *)src);
+		n -= 16;
+		dst = (uint8_t *)dst + 16;
+		src = (const uint8_t *)src + 16;      /* fallthrough */
+	case 0x01:
+		rte_mov16((uint8_t *)dst, (const uint8_t *)src);
+		n -= 16;
+		dst = (uint8_t *)dst + 16;
+		src = (const uint8_t *)src + 16;      /* fallthrough */
+	case 0x02:
+		rte_mov16((uint8_t *)dst, (const uint8_t *)src);
+		n -= 16;
+		dst = (uint8_t *)dst + 16;
+		src = (const uint8_t *)src + 16;      /* fallthrough */
+	default:
+		break;
+	}
+
+	/* Copy any remaining bytes, without going beyond end of buffers */
+	if (n != 0)
+		rte_mov16((uint8_t *)dst - 16 + n,
+			(const uint8_t *)src - 16 + n);
+	return ret;
+}
+
+#else
+
+static inline void
+rte_mov16(uint8_t *dst, const uint8_t *src)
+{
+	memcpy(dst, src, 16);
+}
+
+static inline void
+rte_mov32(uint8_t *dst, const uint8_t *src)
+{
+	memcpy(dst, src, 32);
+}
+
+static inline void
+rte_mov48(uint8_t *dst, const uint8_t *src)
+{
+	memcpy(dst, src, 48);
+}
+
+static inline void
+rte_mov64(uint8_t *dst, const uint8_t *src)
+{
+	memcpy(dst, src, 64);
+}
+
+static inline void
+rte_mov128(uint8_t *dst, const uint8_t *src)
+{
+	memcpy(dst, src, 128);
+}
+
+static inline void
+rte_mov256(uint8_t *dst, const uint8_t *src)
+{
+	memcpy(dst, src, 256);
+}
+
+static inline void *
+rte_memcpy(void *dst, const void *src, size_t n)
+{
+	return memcpy(dst, src, n);
+}
+
+static inline void *
+rte_memcpy_func(void *dst, const void *src, size_t n)
+{
+	return memcpy(dst, src, n);
+}
+
+#endif /* RTE_ARCH_ARM_NEON_MEMCPY */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_MEMCPY_ARM32_H_ */
diff --git a/lib/librte_eal/common/include/arch/arm/rte_memcpy_64.h b/lib/librte_eal/common/include/arch/arm/rte_memcpy_64.h
new file mode 100644
index 00000000..917cdc1b
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/arm/rte_memcpy_64.h
@@ -0,0 +1,93 @@
+/*
+ *   BSD LICENSE
+ *
+ *   Copyright (C) Cavium networks Ltd. 2015.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Cavium networks nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _RTE_MEMCPY_ARM64_H_
+#define _RTE_MEMCPY_ARM64_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+#include <string.h>
+
+#include "generic/rte_memcpy.h"
+
+static inline void
+rte_mov16(uint8_t *dst, const uint8_t *src)
+{
+	memcpy(dst, src, 16);
+}
+
+static inline void
+rte_mov32(uint8_t *dst, const uint8_t *src)
+{
+	memcpy(dst, src, 32);
+}
+
+static inline void
+rte_mov48(uint8_t *dst, const uint8_t *src)
+{
+	memcpy(dst, src, 48);
+}
+
+static inline void
+rte_mov64(uint8_t *dst, const uint8_t *src)
+{
+	memcpy(dst, src, 64);
+}
+
+static inline void
+rte_mov128(uint8_t *dst, const uint8_t *src)
+{
+	memcpy(dst, src, 128);
+}
+
+static inline void
+rte_mov256(uint8_t *dst, const uint8_t *src)
+{
+	memcpy(dst, src, 256);
+}
+
+#define rte_memcpy(d, s, n)	memcpy((d), (s), (n))
+
+static inline void *
+rte_memcpy_func(void *dst, const void *src, size_t n)
+{
+	return memcpy(dst, src, n);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_MEMCPY_ARM_64_H_ */
diff --git a/lib/librte_eal/common/include/arch/arm/rte_prefetch.h b/lib/librte_eal/common/include/arch/arm/rte_prefetch.h
new file mode 100644
index 00000000..aa37de57
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/arm/rte_prefetch.h
@@ -0,0 +1,42 @@
+/*
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2015 RehiveTech. All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of RehiveTech nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_PREFETCH_ARM_H_
+#define _RTE_PREFETCH_ARM_H_
+
+#ifdef RTE_ARCH_64
+#include <rte_prefetch_64.h>
+#else
+#include <rte_prefetch_32.h>
+#endif
+
+#endif /* _RTE_PREFETCH_ARM_H_ */
diff --git a/lib/librte_eal/common/include/arch/arm/rte_prefetch_32.h b/lib/librte_eal/common/include/arch/arm/rte_prefetch_32.h
new file mode 100644
index 00000000..5aeed22d
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/arm/rte_prefetch_32.h
@@ -0,0 +1,67 @@
+/*
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2015 RehiveTech. All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of RehiveTech nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_PREFETCH_ARM32_H_
+#define _RTE_PREFETCH_ARM32_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_prefetch.h"
+
+static inline void rte_prefetch0(const volatile void *p)
+{
+	asm volatile ("pld [%0]" : : "r" (p));
+}
+
+static inline void rte_prefetch1(const volatile void *p)
+{
+	asm volatile ("pld [%0]" : : "r" (p));
+}
+
+static inline void rte_prefetch2(const volatile void *p)
+{
+	asm volatile ("pld [%0]" : : "r" (p));
+}
+
+static inline void rte_prefetch_non_temporal(const volatile void *p)
+{
+	/* non-temporal version not available, fallback to rte_prefetch0 */
+	rte_prefetch0(p);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_PREFETCH_ARM32_H_ */
diff --git a/lib/librte_eal/common/include/arch/arm/rte_prefetch_64.h b/lib/librte_eal/common/include/arch/arm/rte_prefetch_64.h
new file mode 100644
index 00000000..3ed46a46
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/arm/rte_prefetch_64.h
@@ -0,0 +1,66 @@
+/*
+ *   BSD LICENSE
+ *
+ *   Copyright (C) Cavium networks Ltd. 2015.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Cavium networks nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_PREFETCH_ARM_64_H_
+#define _RTE_PREFETCH_ARM_64_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_prefetch.h"
+
+static inline void rte_prefetch0(const volatile void *p)
+{
+	asm volatile ("PRFM PLDL1KEEP, [%0]" : : "r" (p));
+}
+
+static inline void rte_prefetch1(const volatile void *p)
+{
+	asm volatile ("PRFM PLDL2KEEP, [%0]" : : "r" (p));
+}
+
+static inline void rte_prefetch2(const volatile void *p)
+{
+	asm volatile ("PRFM PLDL3KEEP, [%0]" : : "r" (p));
+}
+
+static inline void rte_prefetch_non_temporal(const volatile void *p)
+{
+	asm volatile ("PRFM PLDL1STRM, [%0]" : : "r" (p));
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_PREFETCH_ARM_64_H_ */
diff --git a/lib/librte_eal/common/include/arch/arm/rte_rwlock.h b/lib/librte_eal/common/include/arch/arm/rte_rwlock.h
new file mode 100644
index 00000000..664bec88
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/arm/rte_rwlock.h
@@ -0,0 +1,40 @@
+/* copied from ppc_64 */
+
+#ifndef _RTE_RWLOCK_ARM_H_
+#define _RTE_RWLOCK_ARM_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_rwlock.h"
+
+static inline void
+rte_rwlock_read_lock_tm(rte_rwlock_t *rwl)
+{
+	rte_rwlock_read_lock(rwl);
+}
+
+static inline void
+rte_rwlock_read_unlock_tm(rte_rwlock_t *rwl)
+{
+	rte_rwlock_read_unlock(rwl);
+}
+
+static inline void
+rte_rwlock_write_lock_tm(rte_rwlock_t *rwl)
+{
+	rte_rwlock_write_lock(rwl);
+}
+
+static inline void
+rte_rwlock_write_unlock_tm(rte_rwlock_t *rwl)
+{
+	rte_rwlock_write_unlock(rwl);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_RWLOCK_ARM_H_ */
diff --git a/lib/librte_eal/common/include/arch/arm/rte_spinlock.h b/lib/librte_eal/common/include/arch/arm/rte_spinlock.h
new file mode 100644
index 00000000..396a42e8
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/arm/rte_spinlock.h
@@ -0,0 +1,92 @@
+/*
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2015 RehiveTech. All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of RehiveTech nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_SPINLOCK_ARM_H_
+#define _RTE_SPINLOCK_ARM_H_
+
+#ifndef RTE_FORCE_INTRINSICS
+#  error Platform must be built with CONFIG_RTE_FORCE_INTRINSICS
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <rte_common.h>
+#include "generic/rte_spinlock.h"
+
+static inline int rte_tm_supported(void)
+{
+	return 0;
+}
+
+static inline void
+rte_spinlock_lock_tm(rte_spinlock_t *sl)
+{
+	rte_spinlock_lock(sl); /* fall-back */
+}
+
+static inline int
+rte_spinlock_trylock_tm(rte_spinlock_t *sl)
+{
+	return rte_spinlock_trylock(sl);
+}
+
+static inline void
+rte_spinlock_unlock_tm(rte_spinlock_t *sl)
+{
+	rte_spinlock_unlock(sl);
+}
+
+static inline void
+rte_spinlock_recursive_lock_tm(rte_spinlock_recursive_t *slr)
+{
+	rte_spinlock_recursive_lock(slr); /* fall-back */
+}
+
+static inline void
+rte_spinlock_recursive_unlock_tm(rte_spinlock_recursive_t *slr)
+{
+	rte_spinlock_recursive_unlock(slr);
+}
+
+static inline int
+rte_spinlock_recursive_trylock_tm(rte_spinlock_recursive_t *slr)
+{
+	return rte_spinlock_recursive_trylock(slr);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_SPINLOCK_ARM_H_ */
diff --git a/lib/librte_eal/common/include/arch/arm/rte_vect.h b/lib/librte_eal/common/include/arch/arm/rte_vect.h
new file mode 100644
index 00000000..a33c0544
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/arm/rte_vect.h
@@ -0,0 +1,83 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2015 Cavium Networks. All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Cavium Networks nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_VECT_ARM_H_
+#define _RTE_VECT_ARM_H_
+
+#include "arm_neon.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef int32x4_t xmm_t;
+
+#define	XMM_SIZE	(sizeof(xmm_t))
+#define	XMM_MASK	(XMM_SIZE - 1)
+
+typedef union rte_xmm {
+	xmm_t    x;
+	uint8_t  u8[XMM_SIZE / sizeof(uint8_t)];
+	uint16_t u16[XMM_SIZE / sizeof(uint16_t)];
+	uint32_t u32[XMM_SIZE / sizeof(uint32_t)];
+	uint64_t u64[XMM_SIZE / sizeof(uint64_t)];
+	double   pd[XMM_SIZE / sizeof(double)];
+} __attribute__((aligned(16))) rte_xmm_t;
+
+#ifdef RTE_ARCH_ARM
+/* NEON intrinsic vqtbl1q_u8() is not supported in ARMv7-A(AArch32) */
+static __inline uint8x16_t
+vqtbl1q_u8(uint8x16_t a, uint8x16_t b)
+{
+	uint8_t i, pos;
+	rte_xmm_t rte_a, rte_b, rte_ret;
+
+	vst1q_u8(rte_a.u8, a);
+	vst1q_u8(rte_b.u8, b);
+
+	for (i = 0; i < 16; i++) {
+		pos = rte_b.u8[i];
+		if (pos < 16)
+			rte_ret.u8[i] = rte_a.u8[pos];
+		else
+			rte_ret.u8[i] = 0;
+	}
+
+	return vld1q_u8(rte_ret.u8);
+}
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_atomic.h b/lib/librte_eal/common/include/arch/ppc_64/rte_atomic.h
new file mode 100644
index 00000000..feae4868
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/ppc_64/rte_atomic.h
@@ -0,0 +1,432 @@
+/*
+ *   BSD LICENSE
+ *
+ *   Copyright (C) IBM Corporation 2014.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of IBM Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/*
+ * Inspired from FreeBSD src/sys/powerpc/include/atomic.h
+ * Copyright (c) 2008 Marcel Moolenaar
+ * Copyright (c) 2001 Benno Rice
+ * Copyright (c) 2001 David E. O'Brien
+ * Copyright (c) 1998 Doug Rabson
+ * All rights reserved.
+ */
+
+#ifndef _RTE_ATOMIC_PPC_64_H_
+#define _RTE_ATOMIC_PPC_64_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_atomic.h"
+
+/**
+ * General memory barrier.
+ *
+ * Guarantees that the LOAD and STORE operations generated before the
+ * barrier occur before the LOAD and STORE operations generated after.
+ */
+#define	rte_mb()  {asm volatile("sync" : : : "memory"); }
+
+/**
+ * Write memory barrier.
+ *
+ * Guarantees that the STORE operations generated before the barrier
+ * occur before the STORE operations generated after.
+ */
+#define	rte_wmb() {asm volatile("sync" : : : "memory"); }
+
+/**
+ * Read memory barrier.
+ *
+ * Guarantees that the LOAD operations generated before the barrier
+ * occur before the LOAD operations generated after.
+ */
+#define	rte_rmb() {asm volatile("sync" : : : "memory"); }
+
+#define rte_smp_mb() rte_mb()
+
+#define rte_smp_wmb() rte_compiler_barrier()
+
+#define rte_smp_rmb() rte_compiler_barrier()
+
+/*------------------------- 16 bit atomic operations -------------------------*/
+/* To be compatible with Power7, use GCC built-in functions for 16 bit
+ * operations */
+
+#ifndef RTE_FORCE_INTRINSICS
+static inline int
+rte_atomic16_cmpset(volatile uint16_t *dst, uint16_t exp, uint16_t src)
+{
+	return __atomic_compare_exchange(dst, &exp, &src, 0, __ATOMIC_ACQUIRE,
+		__ATOMIC_ACQUIRE) ? 1 : 0;
+}
+
+static inline int rte_atomic16_test_and_set(rte_atomic16_t *v)
+{
+	return rte_atomic16_cmpset((volatile uint16_t *)&v->cnt, 0, 1);
+}
+
+static inline void
+rte_atomic16_inc(rte_atomic16_t *v)
+{
+	__atomic_add_fetch(&v->cnt, 1, __ATOMIC_ACQUIRE);
+}
+
+static inline void
+rte_atomic16_dec(rte_atomic16_t *v)
+{
+	__atomic_sub_fetch(&v->cnt, 1, __ATOMIC_ACQUIRE);
+}
+
+static inline int rte_atomic16_inc_and_test(rte_atomic16_t *v)
+{
+	return __atomic_add_fetch(&v->cnt, 1, __ATOMIC_ACQUIRE) == 0;
+}
+
+static inline int rte_atomic16_dec_and_test(rte_atomic16_t *v)
+{
+	return __atomic_sub_fetch(&v->cnt, 1, __ATOMIC_ACQUIRE) == 0;
+}
+
+/*------------------------- 32 bit atomic operations -------------------------*/
+
+static inline int
+rte_atomic32_cmpset(volatile uint32_t *dst, uint32_t exp, uint32_t src)
+{
+	unsigned int ret = 0;
+
+	asm volatile(
+			"\tlwsync\n"
+			"1:\tlwarx %[ret], 0, %[dst]\n"
+			"cmplw %[exp], %[ret]\n"
+			"bne 2f\n"
+			"stwcx. %[src], 0, %[dst]\n"
+			"bne- 1b\n"
+			"li %[ret], 1\n"
+			"b 3f\n"
+			"2:\n"
+			"stwcx. %[ret], 0, %[dst]\n"
+			"li %[ret], 0\n"
+			"3:\n"
+			"isync\n"
+			: [ret] "=&r" (ret), "=m" (*dst)
+			: [dst] "r" (dst),
+			  [exp] "r" (exp),
+			  [src] "r" (src),
+			  "m" (*dst)
+			: "cc", "memory");
+
+	return ret;
+}
+
+static inline int rte_atomic32_test_and_set(rte_atomic32_t *v)
+{
+	return rte_atomic32_cmpset((volatile uint32_t *)&v->cnt, 0, 1);
+}
+
+static inline void
+rte_atomic32_inc(rte_atomic32_t *v)
+{
+	int t;
+
+	asm volatile(
+			"1: lwarx %[t],0,%[cnt]\n"
+			"addic %[t],%[t],1\n"
+			"stwcx. %[t],0,%[cnt]\n"
+			"bne- 1b\n"
+			: [t] "=&r" (t), "=m" (v->cnt)
+			: [cnt] "r" (&v->cnt), "m" (v->cnt)
+			: "cc", "xer", "memory");
+}
+
+static inline void
+rte_atomic32_dec(rte_atomic32_t *v)
+{
+	int t;
+
+	asm volatile(
+			"1: lwarx %[t],0,%[cnt]\n"
+			"addic %[t],%[t],-1\n"
+			"stwcx. %[t],0,%[cnt]\n"
+			"bne- 1b\n"
+			: [t] "=&r" (t), "=m" (v->cnt)
+			: [cnt] "r" (&v->cnt), "m" (v->cnt)
+			: "cc", "xer", "memory");
+}
+
+static inline int rte_atomic32_inc_and_test(rte_atomic32_t *v)
+{
+	int ret;
+
+	asm volatile(
+			"\n\tlwsync\n"
+			"1: lwarx %[ret],0,%[cnt]\n"
+			"addic	%[ret],%[ret],1\n"
+			"stwcx. %[ret],0,%[cnt]\n"
+			"bne- 1b\n"
+			"isync\n"
+			: [ret] "=&r" (ret)
+			: [cnt] "r" (&v->cnt)
+			: "cc", "xer", "memory");
+
+	return ret == 0;
+}
+
+static inline int rte_atomic32_dec_and_test(rte_atomic32_t *v)
+{
+	int ret;
+
+	asm volatile(
+			"\n\tlwsync\n"
+			"1: lwarx %[ret],0,%[cnt]\n"
+			"addic %[ret],%[ret],-1\n"
+			"stwcx. %[ret],0,%[cnt]\n"
+			"bne- 1b\n"
+			"isync\n"
+			: [ret] "=&r" (ret)
+			: [cnt] "r" (&v->cnt)
+			: "cc", "xer", "memory");
+
+	return ret == 0;
+}
+/*------------------------- 64 bit atomic operations -------------------------*/
+
+static inline int
+rte_atomic64_cmpset(volatile uint64_t *dst, uint64_t exp, uint64_t src)
+{
+	unsigned int ret = 0;
+
+	asm volatile (
+			"\tlwsync\n"
+			"1: ldarx %[ret], 0, %[dst]\n"
+			"cmpld %[exp], %[ret]\n"
+			"bne 2f\n"
+			"stdcx. %[src], 0, %[dst]\n"
+			"bne- 1b\n"
+			"li %[ret], 1\n"
+			"b 3f\n"
+			"2:\n"
+			"stdcx. %[ret], 0, %[dst]\n"
+			"li %[ret], 0\n"
+			"3:\n"
+			"isync\n"
+			: [ret] "=&r" (ret), "=m" (*dst)
+			: [dst] "r" (dst),
+			  [exp] "r" (exp),
+			  [src] "r" (src),
+			  "m" (*dst)
+			: "cc", "memory");
+	return ret;
+}
+
+static inline void
+rte_atomic64_init(rte_atomic64_t *v)
+{
+	v->cnt = 0;
+}
+
+static inline int64_t
+rte_atomic64_read(rte_atomic64_t *v)
+{
+	long ret;
+
+	asm volatile("ld%U1%X1 %[ret],%[cnt]"
+		: [ret] "=r"(ret)
+		: [cnt] "m"(v->cnt));
+
+	return ret;
+}
+
+static inline void
+rte_atomic64_set(rte_atomic64_t *v, int64_t new_value)
+{
+	asm volatile("std%U0%X0 %[new_value],%[cnt]"
+		: [cnt] "=m"(v->cnt)
+		: [new_value] "r"(new_value));
+}
+
+static inline void
+rte_atomic64_add(rte_atomic64_t *v, int64_t inc)
+{
+	long t;
+
+	asm volatile(
+			"1: ldarx %[t],0,%[cnt]\n"
+			"add %[t],%[inc],%[t]\n"
+			"stdcx. %[t],0,%[cnt]\n"
+			"bne- 1b\n"
+			: [t] "=&r" (t), "=m" (v->cnt)
+			: [cnt] "r" (&v->cnt), [inc] "r" (inc), "m" (v->cnt)
+			: "cc", "memory");
+}
+
+static inline void
+rte_atomic64_sub(rte_atomic64_t *v, int64_t dec)
+{
+	long t;
+
+	asm volatile(
+			"1: ldarx %[t],0,%[cnt]\n"
+			"subf %[t],%[dec],%[t]\n"
+			"stdcx. %[t],0,%[cnt]\n"
+			"bne- 1b\n"
+			: [t] "=&r" (t), "+m" (v->cnt)
+			: [cnt] "r" (&v->cnt), [dec] "r" (dec), "m" (v->cnt)
+			: "cc", "memory");
+}
+
+static inline void
+rte_atomic64_inc(rte_atomic64_t *v)
+{
+	long t;
+
+	asm volatile(
+			"1: ldarx %[t],0,%[cnt]\n"
+			"addic %[t],%[t],1\n"
+			"stdcx. %[t],0,%[cnt]\n"
+			"bne- 1b\n"
+			: [t] "=&r" (t), "+m" (v->cnt)
+			: [cnt] "r" (&v->cnt), "m" (v->cnt)
+			: "cc", "xer", "memory");
+}
+
+static inline void
+rte_atomic64_dec(rte_atomic64_t *v)
+{
+	long t;
+
+	asm volatile(
+			"1: ldarx %[t],0,%[cnt]\n"
+			"addic %[t],%[t],-1\n"
+			"stdcx. %[t],0,%[cnt]\n"
+			"bne- 1b\n"
+			: [t] "=&r" (t), "+m" (v->cnt)
+			: [cnt] "r" (&v->cnt), "m" (v->cnt)
+			: "cc", "xer", "memory");
+}
+
+static inline int64_t
+rte_atomic64_add_return(rte_atomic64_t *v, int64_t inc)
+{
+	long ret;
+
+	asm volatile(
+			"\n\tlwsync\n"
+			"1: ldarx %[ret],0,%[cnt]\n"
+			"add %[ret],%[inc],%[ret]\n"
+			"stdcx. %[ret],0,%[cnt]\n"
+			"bne- 1b\n"
+			"isync\n"
+			: [ret] "=&r" (ret)
+			: [inc] "r" (inc), [cnt] "r" (&v->cnt)
+			: "cc", "memory");
+
+	return ret;
+}
+
+static inline int64_t
+rte_atomic64_sub_return(rte_atomic64_t *v, int64_t dec)
+{
+	long ret;
+
+	asm volatile(
+			"\n\tlwsync\n"
+			"1: ldarx %[ret],0,%[cnt]\n"
+			"subf %[ret],%[dec],%[ret]\n"
+			"stdcx. %[ret],0,%[cnt]\n"
+			"bne- 1b\n"
+			"isync\n"
+			: [ret] "=&r" (ret)
+			: [dec] "r" (dec), [cnt] "r" (&v->cnt)
+			: "cc", "memory");
+
+	return ret;
+}
+
+static inline int rte_atomic64_inc_and_test(rte_atomic64_t *v)
+{
+	long ret;
+
+	asm volatile(
+			"\n\tlwsync\n"
+			"1: ldarx %[ret],0,%[cnt]\n"
+			"addic %[ret],%[ret],1\n"
+			"stdcx. %[ret],0,%[cnt]\n"
+			"bne- 1b\n"
+			"isync\n"
+			: [ret] "=&r" (ret)
+			: [cnt] "r" (&v->cnt)
+			: "cc", "xer", "memory");
+
+	return ret == 0;
+}
+
+static inline int rte_atomic64_dec_and_test(rte_atomic64_t *v)
+{
+	long ret;
+
+	asm volatile(
+			"\n\tlwsync\n"
+			"1: ldarx %[ret],0,%[cnt]\n"
+			"addic %[ret],%[ret],-1\n"
+			"stdcx. %[ret],0,%[cnt]\n"
+			"bne- 1b\n"
+			"isync\n"
+			: [ret] "=&r" (ret)
+			: [cnt] "r" (&v->cnt)
+			: "cc", "xer", "memory");
+
+	return ret == 0;
+}
+
+static inline int rte_atomic64_test_and_set(rte_atomic64_t *v)
+{
+	return rte_atomic64_cmpset((volatile uint64_t *)&v->cnt, 0, 1);
+}
+
+/**
+ * Atomically set a 64-bit counter to 0.
+ *
+ * @param v
+ *   A pointer to the atomic counter.
+ */
+static inline void rte_atomic64_clear(rte_atomic64_t *v)
+{
+	v->cnt = 0;
+}
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_ATOMIC_PPC_64_H_ */
diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_byteorder.h b/lib/librte_eal/common/include/arch/ppc_64/rte_byteorder.h
new file mode 100644
index 00000000..3c1734ed
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/ppc_64/rte_byteorder.h
@@ -0,0 +1,149 @@
+/*
+ *   BSD LICENSE
+ *
+ *   Copyright (C) IBM Corporation 2014.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of IBM Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/* Inspired from FreeBSD src/sys/powerpc/include/endian.h
+ * Copyright (c) 1987, 1991, 1993
+ * The Regents of the University of California.  All rights reserved.
+*/
+
+#ifndef _RTE_BYTEORDER_PPC_64_H_
+#define _RTE_BYTEORDER_PPC_64_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_byteorder.h"
+
+/*
+ * An architecture-optimized byte swap for a 16-bit value.
+ *
+ * Do not use this function directly. The preferred function is rte_bswap16().
+ */
+static inline uint16_t rte_arch_bswap16(uint16_t _x)
+{
+	return (_x >> 8) | ((_x << 8) & 0xff00);
+}
+
+/*
+ * An architecture-optimized byte swap for a 32-bit value.
+ *
+ * Do not use this function directly. The preferred function is rte_bswap32().
+ */
+static inline uint32_t rte_arch_bswap32(uint32_t _x)
+{
+	return (_x >> 24) | ((_x >> 8) & 0xff00) | ((_x << 8) & 0xff0000) |
+		((_x << 24) & 0xff000000);
+}
+
+/*
+ * An architecture-optimized byte swap for a 64-bit value.
+ *
+  * Do not use this function directly. The preferred function is rte_bswap64().
+ */
+/* 64-bit mode */
+static inline uint64_t rte_arch_bswap64(uint64_t _x)
+{
+	return (_x >> 56) | ((_x >> 40) & 0xff00) | ((_x >> 24) & 0xff0000) |
+		((_x >> 8) & 0xff000000) | ((_x << 8) & (0xffULL << 32)) |
+		((_x << 24) & (0xffULL << 40)) |
+		((_x << 40) & (0xffULL << 48)) | ((_x << 56));
+}
+
+#ifndef RTE_FORCE_INTRINSICS
+#define rte_bswap16(x) ((uint16_t)(__builtin_constant_p(x) ?		\
+				   rte_constant_bswap16(x) :		\
+				   rte_arch_bswap16(x)))
+
+#define rte_bswap32(x) ((uint32_t)(__builtin_constant_p(x) ?		\
+				   rte_constant_bswap32(x) :		\
+				   rte_arch_bswap32(x)))
+
+#define rte_bswap64(x) ((uint64_t)(__builtin_constant_p(x) ?		\
+				   rte_constant_bswap64(x) :		\
+				   rte_arch_bswap64(x)))
+#else
+/*
+ * __builtin_bswap16 is only available gcc 4.8 and upwards
+ */
+#if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 8)
+#define rte_bswap16(x) ((uint16_t)(__builtin_constant_p(x) ?		\
+				   rte_constant_bswap16(x) :		\
+				   rte_arch_bswap16(x)))
+#endif
+#endif
+
+/* Power 8 have both little endian and big endian mode
+ * Power 7 only support big endian
+ */
+#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
+
+#define rte_cpu_to_le_16(x) (x)
+#define rte_cpu_to_le_32(x) (x)
+#define rte_cpu_to_le_64(x) (x)
+
+#define rte_cpu_to_be_16(x) rte_bswap16(x)
+#define rte_cpu_to_be_32(x) rte_bswap32(x)
+#define rte_cpu_to_be_64(x) rte_bswap64(x)
+
+#define rte_le_to_cpu_16(x) (x)
+#define rte_le_to_cpu_32(x) (x)
+#define rte_le_to_cpu_64(x) (x)
+
+#define rte_be_to_cpu_16(x) rte_bswap16(x)
+#define rte_be_to_cpu_32(x) rte_bswap32(x)
+#define rte_be_to_cpu_64(x) rte_bswap64(x)
+
+#else /* RTE_BIG_ENDIAN */
+
+#define rte_cpu_to_le_16(x) rte_bswap16(x)
+#define rte_cpu_to_le_32(x) rte_bswap32(x)
+#define rte_cpu_to_le_64(x) rte_bswap64(x)
+
+#define rte_cpu_to_be_16(x) (x)
+#define rte_cpu_to_be_32(x) (x)
+#define rte_cpu_to_be_64(x) (x)
+
+#define rte_le_to_cpu_16(x) rte_bswap16(x)
+#define rte_le_to_cpu_32(x) rte_bswap32(x)
+#define rte_le_to_cpu_64(x) rte_bswap64(x)
+
+#define rte_be_to_cpu_16(x) (x)
+#define rte_be_to_cpu_32(x) (x)
+#define rte_be_to_cpu_64(x) (x)
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_BYTEORDER_PPC_64_H_ */
diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_cpuflags.h b/lib/librte_eal/common/include/arch/ppc_64/rte_cpuflags.h
new file mode 100644
index 00000000..7cc2b3c5
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/ppc_64/rte_cpuflags.h
@@ -0,0 +1,88 @@
+/*
+ *   BSD LICENSE
+ *
+ *   Copyright (C) IBM Corporation 2014.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of IBM Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _RTE_CPUFLAGS_PPC_64_H_
+#define _RTE_CPUFLAGS_PPC_64_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Enumeration of all CPU features supported
+ */
+enum rte_cpu_flag_t {
+	RTE_CPUFLAG_PPC_LE = 0,
+	RTE_CPUFLAG_TRUE_LE,
+	RTE_CPUFLAG_PSERIES_PERFMON_COMPAT,
+	RTE_CPUFLAG_VSX,
+	RTE_CPUFLAG_ARCH_2_06,
+	RTE_CPUFLAG_POWER6_EXT,
+	RTE_CPUFLAG_DFP,
+	RTE_CPUFLAG_PA6T,
+	RTE_CPUFLAG_ARCH_2_05,
+	RTE_CPUFLAG_ICACHE_SNOOP,
+	RTE_CPUFLAG_SMT,
+	RTE_CPUFLAG_BOOKE,
+	RTE_CPUFLAG_CELLBE,
+	RTE_CPUFLAG_POWER5_PLUS,
+	RTE_CPUFLAG_POWER5,
+	RTE_CPUFLAG_POWER4,
+	RTE_CPUFLAG_NOTB,
+	RTE_CPUFLAG_EFP_DOUBLE,
+	RTE_CPUFLAG_EFP_SINGLE,
+	RTE_CPUFLAG_SPE,
+	RTE_CPUFLAG_UNIFIED_CACHE,
+	RTE_CPUFLAG_4xxMAC,
+	RTE_CPUFLAG_MMU,
+	RTE_CPUFLAG_FPU,
+	RTE_CPUFLAG_ALTIVEC,
+	RTE_CPUFLAG_PPC601,
+	RTE_CPUFLAG_PPC64,
+	RTE_CPUFLAG_PPC32,
+	RTE_CPUFLAG_TAR,
+	RTE_CPUFLAG_LSEL,
+	RTE_CPUFLAG_EBB,
+	RTE_CPUFLAG_DSCR,
+	RTE_CPUFLAG_HTM,
+	RTE_CPUFLAG_ARCH_2_07,
+	/* The last item */
+	RTE_CPUFLAG_NUMFLAGS,/**< This should always be the last! */
+};
+
+#include "generic/rte_cpuflags.h"
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_CPUFLAGS_PPC_64_H_ */
diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_cycles.h b/lib/librte_eal/common/include/arch/ppc_64/rte_cycles.h
new file mode 100644
index 00000000..64beddf9
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/ppc_64/rte_cycles.h
@@ -0,0 +1,94 @@
+/*
+ *   BSD LICENSE
+ *
+ *   Copyright (C) IBM Corporation 2014.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of IBM Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _RTE_CYCLES_PPC_64_H_
+#define _RTE_CYCLES_PPC_64_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_cycles.h"
+
+#include <rte_byteorder.h>
+
+/**
+ * Read the time base register.
+ *
+ * @return
+ *   The time base for this lcore.
+ */
+static inline uint64_t
+rte_rdtsc(void)
+{
+	union {
+		uint64_t tsc_64;
+		struct {
+#if RTE_BYTE_ORDER == RTE_BIG_ENDIAN
+			uint32_t hi_32;
+			uint32_t lo_32;
+#else
+			uint32_t lo_32;
+			uint32_t hi_32;
+#endif
+		};
+	} tsc;
+	uint32_t tmp;
+
+	asm volatile(
+			"0:\n"
+			"mftbu   %[hi32]\n"
+			"mftb    %[lo32]\n"
+			"mftbu   %[tmp]\n"
+			"cmpw    %[tmp],%[hi32]\n"
+			"bne     0b\n"
+			: [hi32] "=r"(tsc.hi_32), [lo32] "=r"(tsc.lo_32),
+			[tmp] "=r"(tmp)
+		    );
+	return tsc.tsc_64;
+}
+
+static inline uint64_t
+rte_rdtsc_precise(void)
+{
+	rte_mb();
+	return rte_rdtsc();
+}
+
+static inline uint64_t
+rte_get_tsc_cycles(void) { return rte_rdtsc(); }
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_CYCLES_PPC_64_H_ */
diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_memcpy.h b/lib/librte_eal/common/include/arch/ppc_64/rte_memcpy.h
new file mode 100644
index 00000000..acf7aac2
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/ppc_64/rte_memcpy.h
@@ -0,0 +1,225 @@
+/*
+ *   BSD LICENSE
+ *
+ *   Copyright (C) IBM Corporation 2014.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of IBM Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _RTE_MEMCPY_PPC_64_H_
+#define _RTE_MEMCPY_PPC_64_H_
+
+#include <stdint.h>
+#include <string.h>
+/*To include altivec.h, GCC version must  >= 4.8 */
+#include <altivec.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_memcpy.h"
+
+static inline void
+rte_mov16(uint8_t *dst, const uint8_t *src)
+{
+	vec_vsx_st(vec_vsx_ld(0, src), 0, dst);
+}
+
+static inline void
+rte_mov32(uint8_t *dst, const uint8_t *src)
+{
+	vec_vsx_st(vec_vsx_ld(0, src), 0, dst);
+	vec_vsx_st(vec_vsx_ld(16, src), 16, dst);
+}
+
+static inline void
+rte_mov48(uint8_t *dst, const uint8_t *src)
+{
+	vec_vsx_st(vec_vsx_ld(0, src), 0, dst);
+	vec_vsx_st(vec_vsx_ld(16, src), 16, dst);
+	vec_vsx_st(vec_vsx_ld(32, src), 32, dst);
+}
+
+static inline void
+rte_mov64(uint8_t *dst, const uint8_t *src)
+{
+	vec_vsx_st(vec_vsx_ld(0, src), 0, dst);
+	vec_vsx_st(vec_vsx_ld(16, src), 16, dst);
+	vec_vsx_st(vec_vsx_ld(32, src), 32, dst);
+	vec_vsx_st(vec_vsx_ld(48, src), 48, dst);
+}
+
+static inline void
+rte_mov128(uint8_t *dst, const uint8_t *src)
+{
+	vec_vsx_st(vec_vsx_ld(0, src), 0, dst);
+	vec_vsx_st(vec_vsx_ld(16, src), 16, dst);
+	vec_vsx_st(vec_vsx_ld(32, src), 32, dst);
+	vec_vsx_st(vec_vsx_ld(48, src), 48, dst);
+	vec_vsx_st(vec_vsx_ld(64, src), 64, dst);
+	vec_vsx_st(vec_vsx_ld(80, src), 80, dst);
+	vec_vsx_st(vec_vsx_ld(96, src), 96, dst);
+	vec_vsx_st(vec_vsx_ld(112, src), 112, dst);
+}
+
+static inline void
+rte_mov256(uint8_t *dst, const uint8_t *src)
+{
+	rte_mov128(dst, src);
+	rte_mov128(dst + 128, src + 128);
+}
+
+#define rte_memcpy(dst, src, n)              \
+	({ (__builtin_constant_p(n)) ?       \
+	memcpy((dst), (src), (n)) :          \
+	rte_memcpy_func((dst), (src), (n)); })
+
+static inline void *
+rte_memcpy_func(void *dst, const void *src, size_t n)
+{
+	void *ret = dst;
+
+	/* We can't copy < 16 bytes using XMM registers so do it manually. */
+	if (n < 16) {
+		if (n & 0x01) {
+			*(uint8_t *)dst = *(const uint8_t *)src;
+			dst = (uint8_t *)dst + 1;
+			src = (const uint8_t *)src + 1;
+		}
+		if (n & 0x02) {
+			*(uint16_t *)dst = *(const uint16_t *)src;
+			dst = (uint16_t *)dst + 1;
+			src = (const uint16_t *)src + 1;
+		}
+		if (n & 0x04) {
+			*(uint32_t *)dst = *(const uint32_t *)src;
+			dst = (uint32_t *)dst + 1;
+			src = (const uint32_t *)src + 1;
+		}
+		if (n & 0x08)
+			*(uint64_t *)dst = *(const uint64_t *)src;
+		return ret;
+	}
+
+	/* Special fast cases for <= 128 bytes */
+	if (n <= 32) {
+		rte_mov16((uint8_t *)dst, (const uint8_t *)src);
+		rte_mov16((uint8_t *)dst - 16 + n,
+			(const uint8_t *)src - 16 + n);
+		return ret;
+	}
+
+	if (n <= 64) {
+		rte_mov32((uint8_t *)dst, (const uint8_t *)src);
+		rte_mov32((uint8_t *)dst - 32 + n,
+			(const uint8_t *)src - 32 + n);
+		return ret;
+	}
+
+	if (n <= 128) {
+		rte_mov64((uint8_t *)dst, (const uint8_t *)src);
+		rte_mov64((uint8_t *)dst - 64 + n,
+			(const uint8_t *)src - 64 + n);
+		return ret;
+	}
+
+	/*
+	 * For large copies > 128 bytes. This combination of 256, 64 and 16 byte
+	 * copies was found to be faster than doing 128 and 32 byte copies as
+	 * well.
+	 */
+	for ( ; n >= 256; n -= 256) {
+		rte_mov256((uint8_t *)dst, (const uint8_t *)src);
+		dst = (uint8_t *)dst + 256;
+		src = (const uint8_t *)src + 256;
+	}
+
+	/*
+	 * We split the remaining bytes (which will be less than 256) into
+	 * 64byte (2^6) chunks.
+	 * Using incrementing integers in the case labels of a switch statement
+	 * enourages the compiler to use a jump table. To get incrementing
+	 * integers, we shift the 2 relevant bits to the LSB position to first
+	 * get decrementing integers, and then subtract.
+	 */
+	switch (3 - (n >> 6)) {
+	case 0x00:
+		rte_mov64((uint8_t *)dst, (const uint8_t *)src);
+		n -= 64;
+		dst = (uint8_t *)dst + 64;
+		src = (const uint8_t *)src + 64;      /* fallthrough */
+	case 0x01:
+		rte_mov64((uint8_t *)dst, (const uint8_t *)src);
+		n -= 64;
+		dst = (uint8_t *)dst + 64;
+		src = (const uint8_t *)src + 64;      /* fallthrough */
+	case 0x02:
+		rte_mov64((uint8_t *)dst, (const uint8_t *)src);
+		n -= 64;
+		dst = (uint8_t *)dst + 64;
+		src = (const uint8_t *)src + 64;      /* fallthrough */
+	default:
+		;
+	}
+
+	/*
+	 * We split the remaining bytes (which will be less than 64) into
+	 * 16byte (2^4) chunks, using the same switch structure as above.
+	 */
+	switch (3 - (n >> 4)) {
+	case 0x00:
+		rte_mov16((uint8_t *)dst, (const uint8_t *)src);
+		n -= 16;
+		dst = (uint8_t *)dst + 16;
+		src = (const uint8_t *)src + 16;      /* fallthrough */
+	case 0x01:
+		rte_mov16((uint8_t *)dst, (const uint8_t *)src);
+		n -= 16;
+		dst = (uint8_t *)dst + 16;
+		src = (const uint8_t *)src + 16;      /* fallthrough */
+	case 0x02:
+		rte_mov16((uint8_t *)dst, (const uint8_t *)src);
+		n -= 16;
+		dst = (uint8_t *)dst + 16;
+		src = (const uint8_t *)src + 16;      /* fallthrough */
+	default:
+		;
+	}
+
+	/* Copy any remaining bytes, without going beyond end of buffers */
+	if (n != 0)
+		rte_mov16((uint8_t *)dst - 16 + n,
+			(const uint8_t *)src - 16 + n);
+	return ret;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_MEMCPY_PPC_64_H_ */
diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_prefetch.h b/lib/librte_eal/common/include/arch/ppc_64/rte_prefetch.h
new file mode 100644
index 00000000..9a1995ea
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/ppc_64/rte_prefetch.h
@@ -0,0 +1,67 @@
+/*
+ *   BSD LICENSE
+ *
+ *   Copyright (C) IBM Corporation 2014.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of IBM Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _RTE_PREFETCH_PPC_64_H_
+#define _RTE_PREFETCH_PPC_64_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_prefetch.h"
+
+static inline void rte_prefetch0(const volatile void *p)
+{
+	asm volatile ("dcbt 0,%[p],0" : : [p] "r" (p));
+}
+
+static inline void rte_prefetch1(const volatile void *p)
+{
+	asm volatile ("dcbt 0,%[p],0" : : [p] "r" (p));
+}
+
+static inline void rte_prefetch2(const volatile void *p)
+{
+	asm volatile ("dcbt 0,%[p],0" : : [p] "r" (p));
+}
+
+static inline void rte_prefetch_non_temporal(const volatile void *p)
+{
+	/* non-temporal version not available, fallback to rte_prefetch0 */
+	rte_prefetch0(p);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_PREFETCH_PPC_64_H_ */
diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_rwlock.h b/lib/librte_eal/common/include/arch/ppc_64/rte_rwlock.h
new file mode 100644
index 00000000..de8af19e
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/ppc_64/rte_rwlock.h
@@ -0,0 +1,38 @@
+#ifndef _RTE_RWLOCK_PPC_64_H_
+#define _RTE_RWLOCK_PPC_64_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_rwlock.h"
+
+static inline void
+rte_rwlock_read_lock_tm(rte_rwlock_t *rwl)
+{
+	rte_rwlock_read_lock(rwl);
+}
+
+static inline void
+rte_rwlock_read_unlock_tm(rte_rwlock_t *rwl)
+{
+	rte_rwlock_read_unlock(rwl);
+}
+
+static inline void
+rte_rwlock_write_lock_tm(rte_rwlock_t *rwl)
+{
+	rte_rwlock_write_lock(rwl);
+}
+
+static inline void
+rte_rwlock_write_unlock_tm(rte_rwlock_t *rwl)
+{
+	rte_rwlock_write_unlock(rwl);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_RWLOCK_PPC_64_H_ */
diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_spinlock.h b/lib/librte_eal/common/include/arch/ppc_64/rte_spinlock.h
new file mode 100644
index 00000000..af139c9d
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/ppc_64/rte_spinlock.h
@@ -0,0 +1,114 @@
+/*
+ *   BSD LICENSE
+ *
+ *   Copyright (C) IBM Corporation 2014.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of IBM Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _RTE_SPINLOCK_PPC_64_H_
+#define _RTE_SPINLOCK_PPC_64_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <rte_common.h>
+#include "generic/rte_spinlock.h"
+
+/* Fixme: Use intrinsics to implement the spinlock on Power architecture */
+
+#ifndef RTE_FORCE_INTRINSICS
+
+static inline void
+rte_spinlock_lock(rte_spinlock_t *sl)
+{
+	while (__sync_lock_test_and_set(&sl->locked, 1))
+		while (sl->locked)
+			rte_pause();
+}
+
+static inline void
+rte_spinlock_unlock(rte_spinlock_t *sl)
+{
+	__sync_lock_release(&sl->locked);
+}
+
+static inline int
+rte_spinlock_trylock(rte_spinlock_t *sl)
+{
+	return __sync_lock_test_and_set(&sl->locked, 1) == 0;
+}
+
+#endif
+
+static inline int rte_tm_supported(void)
+{
+	return 0;
+}
+
+static inline void
+rte_spinlock_lock_tm(rte_spinlock_t *sl)
+{
+	rte_spinlock_lock(sl); /* fall-back */
+}
+
+static inline int
+rte_spinlock_trylock_tm(rte_spinlock_t *sl)
+{
+	return rte_spinlock_trylock(sl);
+}
+
+static inline void
+rte_spinlock_unlock_tm(rte_spinlock_t *sl)
+{
+	rte_spinlock_unlock(sl);
+}
+
+static inline void
+rte_spinlock_recursive_lock_tm(rte_spinlock_recursive_t *slr)
+{
+	rte_spinlock_recursive_lock(slr); /* fall-back */
+}
+
+static inline void
+rte_spinlock_recursive_unlock_tm(rte_spinlock_recursive_t *slr)
+{
+	rte_spinlock_recursive_unlock(slr);
+}
+
+static inline int
+rte_spinlock_recursive_trylock_tm(rte_spinlock_recursive_t *slr)
+{
+	return rte_spinlock_recursive_trylock(slr);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_SPINLOCK_PPC_64_H_ */
diff --git a/lib/librte_eal/common/include/arch/tile/rte_atomic.h b/lib/librte_eal/common/include/arch/tile/rte_atomic.h
new file mode 100644
index 00000000..28825ff6
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/tile/rte_atomic.h
@@ -0,0 +1,92 @@
+/*
+ *   BSD LICENSE
+ *
+ *   Copyright (C) EZchip Semiconductor Ltd. 2015.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of EZchip Semiconductor nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _RTE_ATOMIC_TILE_H_
+#define _RTE_ATOMIC_TILE_H_
+
+#ifndef RTE_FORCE_INTRINSICS
+#  error Platform must be built with CONFIG_RTE_FORCE_INTRINSICS
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_atomic.h"
+
+/**
+ * General memory barrier.
+ *
+ * Guarantees that the LOAD and STORE operations generated before the
+ * barrier occur before the LOAD and STORE operations generated after.
+ * This function is architecture dependent.
+ */
+static inline void rte_mb(void)
+{
+	__sync_synchronize();
+}
+
+/**
+ * Write memory barrier.
+ *
+ * Guarantees that the STORE operations generated before the barrier
+ * occur before the STORE operations generated after.
+ * This function is architecture dependent.
+ */
+static inline void rte_wmb(void)
+{
+	__sync_synchronize();
+}
+
+/**
+ * Read memory barrier.
+ *
+ * Guarantees that the LOAD operations generated before the barrier
+ * occur before the LOAD operations generated after.
+ * This function is architecture dependent.
+ */
+static inline void rte_rmb(void)
+{
+	__sync_synchronize();
+}
+
+#define rte_smp_mb() rte_mb()
+
+#define rte_smp_wmb() rte_compiler_barrier()
+
+#define rte_smp_rmb() rte_compiler_barrier()
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_ATOMIC_TILE_H_ */
diff --git a/lib/librte_eal/common/include/arch/tile/rte_byteorder.h b/lib/librte_eal/common/include/arch/tile/rte_byteorder.h
new file mode 100644
index 00000000..7239e437
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/tile/rte_byteorder.h
@@ -0,0 +1,91 @@
+/*
+ *   BSD LICENSE
+ *
+ *   Copyright (C) EZchip Semiconductor Ltd. 2015.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of EZchip Semiconductor nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _RTE_BYTEORDER_TILE_H_
+#define _RTE_BYTEORDER_TILE_H_
+
+#ifndef RTE_FORCE_INTRINSICS
+#  error Platform must be built with CONFIG_RTE_FORCE_INTRINSICS
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_byteorder.h"
+
+#if !(__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8))
+#define rte_bswap16(x) rte_constant_bswap16(x)
+#endif
+
+#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
+
+#define rte_cpu_to_le_16(x) (x)
+#define rte_cpu_to_le_32(x) (x)
+#define rte_cpu_to_le_64(x) (x)
+
+#define rte_cpu_to_be_16(x) rte_bswap16(x)
+#define rte_cpu_to_be_32(x) rte_bswap32(x)
+#define rte_cpu_to_be_64(x) rte_bswap64(x)
+
+#define rte_le_to_cpu_16(x) (x)
+#define rte_le_to_cpu_32(x) (x)
+#define rte_le_to_cpu_64(x) (x)
+
+#define rte_be_to_cpu_16(x) rte_bswap16(x)
+#define rte_be_to_cpu_32(x) rte_bswap32(x)
+#define rte_be_to_cpu_64(x) rte_bswap64(x)
+
+#else /* RTE_BIG_ENDIAN */
+
+#define rte_cpu_to_le_16(x) rte_bswap16(x)
+#define rte_cpu_to_le_32(x) rte_bswap32(x)
+#define rte_cpu_to_le_64(x) rte_bswap64(x)
+
+#define rte_cpu_to_be_16(x) (x)
+#define rte_cpu_to_be_32(x) (x)
+#define rte_cpu_to_be_64(x) (x)
+
+#define rte_le_to_cpu_16(x) rte_bswap16(x)
+#define rte_le_to_cpu_32(x) rte_bswap32(x)
+#define rte_le_to_cpu_64(x) rte_bswap64(x)
+
+#define rte_be_to_cpu_16(x) (x)
+#define rte_be_to_cpu_32(x) (x)
+#define rte_be_to_cpu_64(x) (x)
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_BYTEORDER_TILE_H_ */
diff --git a/lib/librte_eal/common/include/arch/tile/rte_cpuflags.h b/lib/librte_eal/common/include/arch/tile/rte_cpuflags.h
new file mode 100644
index 00000000..1849b520
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/tile/rte_cpuflags.h
@@ -0,0 +1,53 @@
+/*
+ *   BSD LICENSE
+ *
+ *   Copyright (C) EZchip Semiconductor Ltd. 2015.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of EZchip Semiconductor nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _RTE_CPUFLAGS_TILE_H_
+#define _RTE_CPUFLAGS_TILE_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Enumeration of all CPU features supported
+ */
+enum rte_cpu_flag_t {
+	RTE_CPUFLAG_NUMFLAGS /**< This should always be the last! */
+};
+
+#include "generic/rte_cpuflags.h"
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_CPUFLAGS_TILE_H_ */
diff --git a/lib/librte_eal/common/include/arch/tile/rte_cycles.h b/lib/librte_eal/common/include/arch/tile/rte_cycles.h
new file mode 100644
index 00000000..0b2200a3
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/tile/rte_cycles.h
@@ -0,0 +1,70 @@
+/*
+ *   BSD LICENSE
+ *
+ *   Copyright (C) EZchip Semiconductor Ltd. 2015.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of EZchip Semiconductor nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _RTE_CYCLES_TILE_H_
+#define _RTE_CYCLES_TILE_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <arch/cycle.h>
+
+#include "generic/rte_cycles.h"
+
+/**
+ * Read the time base register.
+ *
+ * @return
+ *   The time base for this lcore.
+ */
+static inline uint64_t
+rte_rdtsc(void)
+{
+	return get_cycle_count();
+}
+
+static inline uint64_t
+rte_rdtsc_precise(void)
+{
+	rte_mb();
+	return rte_rdtsc();
+}
+
+static inline uint64_t
+rte_get_tsc_cycles(void) { return rte_rdtsc(); }
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_CYCLES_TILE_H_ */
diff --git a/lib/librte_eal/common/include/arch/tile/rte_memcpy.h b/lib/librte_eal/common/include/arch/tile/rte_memcpy.h
new file mode 100644
index 00000000..9b5b37ef
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/tile/rte_memcpy.h
@@ -0,0 +1,93 @@
+/*
+ *   BSD LICENSE
+ *
+ *   Copyright (C) EZchip Semiconductor Ltd. 2015.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of EZchip Semiconductor nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _RTE_MEMCPY_TILE_H_
+#define _RTE_MEMCPY_TILE_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+#include <string.h>
+
+#include "generic/rte_memcpy.h"
+
+static inline void
+rte_mov16(uint8_t *dst, const uint8_t *src)
+{
+	memcpy(dst, src, 16);
+}
+
+static inline void
+rte_mov32(uint8_t *dst, const uint8_t *src)
+{
+	memcpy(dst, src, 32);
+}
+
+static inline void
+rte_mov48(uint8_t *dst, const uint8_t *src)
+{
+	memcpy(dst, src, 48);
+}
+
+static inline void
+rte_mov64(uint8_t *dst, const uint8_t *src)
+{
+	memcpy(dst, src, 64);
+}
+
+static inline void
+rte_mov128(uint8_t *dst, const uint8_t *src)
+{
+	memcpy(dst, src, 128);
+}
+
+static inline void
+rte_mov256(uint8_t *dst, const uint8_t *src)
+{
+	memcpy(dst, src, 256);
+}
+
+#define rte_memcpy(d, s, n)	memcpy((d), (s), (n))
+
+static inline void *
+rte_memcpy_func(void *dst, const void *src, size_t n)
+{
+	return memcpy(dst, src, n);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_MEMCPY_TILE_H_ */
diff --git a/lib/librte_eal/common/include/arch/tile/rte_prefetch.h b/lib/librte_eal/common/include/arch/tile/rte_prefetch.h
new file mode 100644
index 00000000..7a1bb93e
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/tile/rte_prefetch.h
@@ -0,0 +1,67 @@
+/*
+ *   BSD LICENSE
+ *
+ *   Copyright (C) EZchip Semiconductor Ltd. 2015.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of EZchip Semiconductor nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _RTE_PREFETCH_TILE_H_
+#define _RTE_PREFETCH_TILE_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_prefetch.h"
+
+static inline void rte_prefetch0(const volatile void *p)
+{
+	__builtin_prefetch((const void *)(uintptr_t)p, 0, 3);
+}
+
+static inline void rte_prefetch1(const volatile void *p)
+{
+	__builtin_prefetch((const void *)(uintptr_t)p, 0, 2);
+}
+
+static inline void rte_prefetch2(const volatile void *p)
+{
+	__builtin_prefetch((const void *)(uintptr_t)p, 0, 1);
+}
+
+static inline void rte_prefetch_non_temporal(const volatile void *p)
+{
+	/* non-temporal version not available, fallback to rte_prefetch0 */
+	rte_prefetch0(p);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_PREFETCH_TILE_H_ */
diff --git a/lib/librte_eal/common/include/arch/tile/rte_rwlock.h b/lib/librte_eal/common/include/arch/tile/rte_rwlock.h
new file mode 100644
index 00000000..8f67a190
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/tile/rte_rwlock.h
@@ -0,0 +1,70 @@
+/*
+ *   BSD LICENSE
+ *
+ *   Copyright (C) EZchip Semiconductor Ltd. 2015.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of EZchip Semiconductor nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _RTE_RWLOCK_TILE_H_
+#define _RTE_RWLOCK_TILE_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_rwlock.h"
+
+static inline void
+rte_rwlock_read_lock_tm(rte_rwlock_t *rwl)
+{
+	rte_rwlock_read_lock(rwl);
+}
+
+static inline void
+rte_rwlock_read_unlock_tm(rte_rwlock_t *rwl)
+{
+	rte_rwlock_read_unlock(rwl);
+}
+
+static inline void
+rte_rwlock_write_lock_tm(rte_rwlock_t *rwl)
+{
+	rte_rwlock_write_lock(rwl);
+}
+
+static inline void
+rte_rwlock_write_unlock_tm(rte_rwlock_t *rwl)
+{
+	rte_rwlock_write_unlock(rwl);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_RWLOCK_TILE_H_ */
diff --git a/lib/librte_eal/common/include/arch/tile/rte_spinlock.h b/lib/librte_eal/common/include/arch/tile/rte_spinlock.h
new file mode 100644
index 00000000..e91f99ee
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/tile/rte_spinlock.h
@@ -0,0 +1,92 @@
+/*
+ *   BSD LICENSE
+ *
+ *   Copyright (C) EZchip Semiconductor Ltd. 2015.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of EZchip Semiconductor nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _RTE_SPINLOCK_TILE_H_
+#define _RTE_SPINLOCK_TILE_H_
+
+#ifndef RTE_FORCE_INTRINSICS
+#  error Platform must be built with CONFIG_RTE_FORCE_INTRINSICS
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <rte_common.h>
+#include "generic/rte_spinlock.h"
+
+static inline int rte_tm_supported(void)
+{
+	return 0;
+}
+
+static inline void
+rte_spinlock_lock_tm(rte_spinlock_t *sl)
+{
+	rte_spinlock_lock(sl); /* fall-back */
+}
+
+static inline int
+rte_spinlock_trylock_tm(rte_spinlock_t *sl)
+{
+	return rte_spinlock_trylock(sl);
+}
+
+static inline void
+rte_spinlock_unlock_tm(rte_spinlock_t *sl)
+{
+	rte_spinlock_unlock(sl);
+}
+
+static inline void
+rte_spinlock_recursive_lock_tm(rte_spinlock_recursive_t *slr)
+{
+	rte_spinlock_recursive_lock(slr); /* fall-back */
+}
+
+static inline void
+rte_spinlock_recursive_unlock_tm(rte_spinlock_recursive_t *slr)
+{
+	rte_spinlock_recursive_unlock(slr);
+}
+
+static inline int
+rte_spinlock_recursive_trylock_tm(rte_spinlock_recursive_t *slr)
+{
+	return rte_spinlock_recursive_trylock(slr);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_SPINLOCK_TILE_H_ */
diff --git a/lib/librte_eal/common/include/arch/x86/rte_atomic.h b/lib/librte_eal/common/include/arch/x86/rte_atomic.h
new file mode 100644
index 00000000..b20056b8
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/x86/rte_atomic.h
@@ -0,0 +1,222 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_ATOMIC_X86_H_
+#define _RTE_ATOMIC_X86_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <emmintrin.h>
+#include "generic/rte_atomic.h"
+
+#if RTE_MAX_LCORE == 1
+#define MPLOCKED                        /**< No need to insert MP lock prefix. */
+#else
+#define MPLOCKED        "lock ; "       /**< Insert MP lock prefix. */
+#endif
+
+#define	rte_mb() _mm_mfence()
+
+#define	rte_wmb() _mm_sfence()
+
+#define	rte_rmb() _mm_lfence()
+
+#define rte_smp_mb() rte_mb()
+
+#define rte_smp_wmb() rte_compiler_barrier()
+
+#define rte_smp_rmb() rte_compiler_barrier()
+
+/*------------------------- 16 bit atomic operations -------------------------*/
+
+#ifndef RTE_FORCE_INTRINSICS
+static inline int
+rte_atomic16_cmpset(volatile uint16_t *dst, uint16_t exp, uint16_t src)
+{
+	uint8_t res;
+
+	asm volatile(
+			MPLOCKED
+			"cmpxchgw %[src], %[dst];"
+			"sete %[res];"
+			: [res] "=a" (res),     /* output */
+			  [dst] "=m" (*dst)
+			: [src] "r" (src),      /* input */
+			  "a" (exp),
+			  "m" (*dst)
+			: "memory");            /* no-clobber list */
+	return res;
+}
+
+static inline int rte_atomic16_test_and_set(rte_atomic16_t *v)
+{
+	return rte_atomic16_cmpset((volatile uint16_t *)&v->cnt, 0, 1);
+}
+
+static inline void
+rte_atomic16_inc(rte_atomic16_t *v)
+{
+	asm volatile(
+			MPLOCKED
+			"incw %[cnt]"
+			: [cnt] "=m" (v->cnt)   /* output */
+			: "m" (v->cnt)          /* input */
+			);
+}
+
+static inline void
+rte_atomic16_dec(rte_atomic16_t *v)
+{
+	asm volatile(
+			MPLOCKED
+			"decw %[cnt]"
+			: [cnt] "=m" (v->cnt)   /* output */
+			: "m" (v->cnt)          /* input */
+			);
+}
+
+static inline int rte_atomic16_inc_and_test(rte_atomic16_t *v)
+{
+	uint8_t ret;
+
+	asm volatile(
+			MPLOCKED
+			"incw %[cnt] ; "
+			"sete %[ret]"
+			: [cnt] "+m" (v->cnt),  /* output */
+			  [ret] "=qm" (ret)
+			);
+	return ret != 0;
+}
+
+static inline int rte_atomic16_dec_and_test(rte_atomic16_t *v)
+{
+	uint8_t ret;
+
+	asm volatile(MPLOCKED
+			"decw %[cnt] ; "
+			"sete %[ret]"
+			: [cnt] "+m" (v->cnt),  /* output */
+			  [ret] "=qm" (ret)
+			);
+	return ret != 0;
+}
+
+/*------------------------- 32 bit atomic operations -------------------------*/
+
+static inline int
+rte_atomic32_cmpset(volatile uint32_t *dst, uint32_t exp, uint32_t src)
+{
+	uint8_t res;
+
+	asm volatile(
+			MPLOCKED
+			"cmpxchgl %[src], %[dst];"
+			"sete %[res];"
+			: [res] "=a" (res),     /* output */
+			  [dst] "=m" (*dst)
+			: [src] "r" (src),      /* input */
+			  "a" (exp),
+			  "m" (*dst)
+			: "memory");            /* no-clobber list */
+	return res;
+}
+
+static inline int rte_atomic32_test_and_set(rte_atomic32_t *v)
+{
+	return rte_atomic32_cmpset((volatile uint32_t *)&v->cnt, 0, 1);
+}
+
+static inline void
+rte_atomic32_inc(rte_atomic32_t *v)
+{
+	asm volatile(
+			MPLOCKED
+			"incl %[cnt]"
+			: [cnt] "=m" (v->cnt)   /* output */
+			: "m" (v->cnt)          /* input */
+			);
+}
+
+static inline void
+rte_atomic32_dec(rte_atomic32_t *v)
+{
+	asm volatile(
+			MPLOCKED
+			"decl %[cnt]"
+			: [cnt] "=m" (v->cnt)   /* output */
+			: "m" (v->cnt)          /* input */
+			);
+}
+
+static inline int rte_atomic32_inc_and_test(rte_atomic32_t *v)
+{
+	uint8_t ret;
+
+	asm volatile(
+			MPLOCKED
+			"incl %[cnt] ; "
+			"sete %[ret]"
+			: [cnt] "+m" (v->cnt),  /* output */
+			  [ret] "=qm" (ret)
+			);
+	return ret != 0;
+}
+
+static inline int rte_atomic32_dec_and_test(rte_atomic32_t *v)
+{
+	uint8_t ret;
+
+	asm volatile(MPLOCKED
+			"decl %[cnt] ; "
+			"sete %[ret]"
+			: [cnt] "+m" (v->cnt),  /* output */
+			  [ret] "=qm" (ret)
+			);
+	return ret != 0;
+}
+#endif
+
+#ifdef RTE_ARCH_I686
+#include "rte_atomic_32.h"
+#else
+#include "rte_atomic_64.h"
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_ATOMIC_X86_H_ */
diff --git a/lib/librte_eal/common/include/arch/x86/rte_atomic_32.h b/lib/librte_eal/common/include/arch/x86/rte_atomic_32.h
new file mode 100644
index 00000000..400d8a96
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/x86/rte_atomic_32.h
@@ -0,0 +1,222 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Inspired from FreeBSD src/sys/i386/include/atomic.h
+ * Copyright (c) 1998 Doug Rabson
+ * All rights reserved.
+ */
+
+#ifndef _RTE_ATOMIC_I686_H_
+#define _RTE_ATOMIC_I686_H_
+
+/*------------------------- 64 bit atomic operations -------------------------*/
+
+#ifndef RTE_FORCE_INTRINSICS
+static inline int
+rte_atomic64_cmpset(volatile uint64_t *dst, uint64_t exp, uint64_t src)
+{
+	uint8_t res;
+	union {
+		struct {
+			uint32_t l32;
+			uint32_t h32;
+		};
+		uint64_t u64;
+	} _exp, _src;
+
+	_exp.u64 = exp;
+	_src.u64 = src;
+
+#ifndef __PIC__
+    asm volatile (
+            MPLOCKED
+            "cmpxchg8b (%[dst]);"
+            "setz %[res];"
+            : [res] "=a" (res)      /* result in eax */
+            : [dst] "S" (dst),      /* esi */
+             "b" (_src.l32),       /* ebx */
+             "c" (_src.h32),       /* ecx */
+             "a" (_exp.l32),       /* eax */
+             "d" (_exp.h32)        /* edx */
+			: "memory" );           /* no-clobber list */
+#else
+	asm volatile (
+            "mov %%ebx, %%edi\n"
+			MPLOCKED
+			"cmpxchg8b (%[dst]);"
+			"setz %[res];"
+            "xchgl %%ebx, %%edi;\n"
+			: [res] "=a" (res)      /* result in eax */
+			: [dst] "S" (dst),      /* esi */
+			  "D" (_src.l32),       /* ebx */
+			  "c" (_src.h32),       /* ecx */
+			  "a" (_exp.l32),       /* eax */
+			  "d" (_exp.h32)        /* edx */
+			: "memory" );           /* no-clobber list */
+#endif
+
+	return res;
+}
+
+static inline void
+rte_atomic64_init(rte_atomic64_t *v)
+{
+	int success = 0;
+	uint64_t tmp;
+
+	while (success == 0) {
+		tmp = v->cnt;
+		success = rte_atomic64_cmpset((volatile uint64_t *)&v->cnt,
+		                              tmp, 0);
+	}
+}
+
+static inline int64_t
+rte_atomic64_read(rte_atomic64_t *v)
+{
+	int success = 0;
+	uint64_t tmp;
+
+	while (success == 0) {
+		tmp = v->cnt;
+		/* replace the value by itself */
+		success = rte_atomic64_cmpset((volatile uint64_t *)&v->cnt,
+		                              tmp, tmp);
+	}
+	return tmp;
+}
+
+static inline void
+rte_atomic64_set(rte_atomic64_t *v, int64_t new_value)
+{
+	int success = 0;
+	uint64_t tmp;
+
+	while (success == 0) {
+		tmp = v->cnt;
+		success = rte_atomic64_cmpset((volatile uint64_t *)&v->cnt,
+		                              tmp, new_value);
+	}
+}
+
+static inline void
+rte_atomic64_add(rte_atomic64_t *v, int64_t inc)
+{
+	int success = 0;
+	uint64_t tmp;
+
+	while (success == 0) {
+		tmp = v->cnt;
+		success = rte_atomic64_cmpset((volatile uint64_t *)&v->cnt,
+		                              tmp, tmp + inc);
+	}
+}
+
+static inline void
+rte_atomic64_sub(rte_atomic64_t *v, int64_t dec)
+{
+	int success = 0;
+	uint64_t tmp;
+
+	while (success == 0) {
+		tmp = v->cnt;
+		success = rte_atomic64_cmpset((volatile uint64_t *)&v->cnt,
+		                              tmp, tmp - dec);
+	}
+}
+
+static inline void
+rte_atomic64_inc(rte_atomic64_t *v)
+{
+	rte_atomic64_add(v, 1);
+}
+
+static inline void
+rte_atomic64_dec(rte_atomic64_t *v)
+{
+	rte_atomic64_sub(v, 1);
+}
+
+static inline int64_t
+rte_atomic64_add_return(rte_atomic64_t *v, int64_t inc)
+{
+	int success = 0;
+	uint64_t tmp;
+
+	while (success == 0) {
+		tmp = v->cnt;
+		success = rte_atomic64_cmpset((volatile uint64_t *)&v->cnt,
+		                              tmp, tmp + inc);
+	}
+
+	return tmp + inc;
+}
+
+static inline int64_t
+rte_atomic64_sub_return(rte_atomic64_t *v, int64_t dec)
+{
+	int success = 0;
+	uint64_t tmp;
+
+	while (success == 0) {
+		tmp = v->cnt;
+		success = rte_atomic64_cmpset((volatile uint64_t *)&v->cnt,
+		                              tmp, tmp - dec);
+	}
+
+	return tmp - dec;
+}
+
+static inline int rte_atomic64_inc_and_test(rte_atomic64_t *v)
+{
+	return rte_atomic64_add_return(v, 1) == 0;
+}
+
+static inline int rte_atomic64_dec_and_test(rte_atomic64_t *v)
+{
+	return rte_atomic64_sub_return(v, 1) == 0;
+}
+
+static inline int rte_atomic64_test_and_set(rte_atomic64_t *v)
+{
+	return rte_atomic64_cmpset((volatile uint64_t *)&v->cnt, 0, 1);
+}
+
+static inline void rte_atomic64_clear(rte_atomic64_t *v)
+{
+	rte_atomic64_set(v, 0);
+}
+#endif
+
+#endif /* _RTE_ATOMIC_I686_H_ */
diff --git a/lib/librte_eal/common/include/arch/x86/rte_atomic_64.h b/lib/librte_eal/common/include/arch/x86/rte_atomic_64.h
new file mode 100644
index 00000000..4de66000
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/x86/rte_atomic_64.h
@@ -0,0 +1,191 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Inspired from FreeBSD src/sys/amd64/include/atomic.h
+ * Copyright (c) 1998 Doug Rabson
+ * All rights reserved.
+ */
+
+#ifndef _RTE_ATOMIC_X86_64_H_
+#define _RTE_ATOMIC_X86_64_H_
+
+/*------------------------- 64 bit atomic operations -------------------------*/
+
+#ifndef RTE_FORCE_INTRINSICS
+static inline int
+rte_atomic64_cmpset(volatile uint64_t *dst, uint64_t exp, uint64_t src)
+{
+	uint8_t res;
+
+
+	asm volatile(
+			MPLOCKED
+			"cmpxchgq %[src], %[dst];"
+			"sete %[res];"
+			: [res] "=a" (res),     /* output */
+			  [dst] "=m" (*dst)
+			: [src] "r" (src),      /* input */
+			  "a" (exp),
+			  "m" (*dst)
+			: "memory");            /* no-clobber list */
+
+	return res;
+}
+
+static inline void
+rte_atomic64_init(rte_atomic64_t *v)
+{
+	v->cnt = 0;
+}
+
+static inline int64_t
+rte_atomic64_read(rte_atomic64_t *v)
+{
+	return v->cnt;
+}
+
+static inline void
+rte_atomic64_set(rte_atomic64_t *v, int64_t new_value)
+{
+	v->cnt = new_value;
+}
+
+static inline void
+rte_atomic64_add(rte_atomic64_t *v, int64_t inc)
+{
+	asm volatile(
+			MPLOCKED
+			"addq %[inc], %[cnt]"
+			: [cnt] "=m" (v->cnt)   /* output */
+			: [inc] "ir" (inc),     /* input */
+			  "m" (v->cnt)
+			);
+}
+
+static inline void
+rte_atomic64_sub(rte_atomic64_t *v, int64_t dec)
+{
+	asm volatile(
+			MPLOCKED
+			"subq %[dec], %[cnt]"
+			: [cnt] "=m" (v->cnt)   /* output */
+			: [dec] "ir" (dec),     /* input */
+			  "m" (v->cnt)
+			);
+}
+
+static inline void
+rte_atomic64_inc(rte_atomic64_t *v)
+{
+	asm volatile(
+			MPLOCKED
+			"incq %[cnt]"
+			: [cnt] "=m" (v->cnt)   /* output */
+			: "m" (v->cnt)          /* input */
+			);
+}
+
+static inline void
+rte_atomic64_dec(rte_atomic64_t *v)
+{
+	asm volatile(
+			MPLOCKED
+			"decq %[cnt]"
+			: [cnt] "=m" (v->cnt)   /* output */
+			: "m" (v->cnt)          /* input */
+			);
+}
+
+static inline int64_t
+rte_atomic64_add_return(rte_atomic64_t *v, int64_t inc)
+{
+	int64_t prev = inc;
+
+	asm volatile(
+			MPLOCKED
+			"xaddq %[prev], %[cnt]"
+			: [prev] "+r" (prev),   /* output */
+			  [cnt] "=m" (v->cnt)
+			: "m" (v->cnt)          /* input */
+			);
+	return prev + inc;
+}
+
+static inline int64_t
+rte_atomic64_sub_return(rte_atomic64_t *v, int64_t dec)
+{
+	return rte_atomic64_add_return(v, -dec);
+}
+
+static inline int rte_atomic64_inc_and_test(rte_atomic64_t *v)
+{
+	uint8_t ret;
+
+	asm volatile(
+			MPLOCKED
+			"incq %[cnt] ; "
+			"sete %[ret]"
+			: [cnt] "+m" (v->cnt), /* output */
+			  [ret] "=qm" (ret)
+			);
+
+	return ret != 0;
+}
+
+static inline int rte_atomic64_dec_and_test(rte_atomic64_t *v)
+{
+	uint8_t ret;
+
+	asm volatile(
+			MPLOCKED
+			"decq %[cnt] ; "
+			"sete %[ret]"
+			: [cnt] "+m" (v->cnt),  /* output */
+			  [ret] "=qm" (ret)
+			);
+	return ret != 0;
+}
+
+static inline int rte_atomic64_test_and_set(rte_atomic64_t *v)
+{
+	return rte_atomic64_cmpset((volatile uint64_t *)&v->cnt, 0, 1);
+}
+
+static inline void rte_atomic64_clear(rte_atomic64_t *v)
+{
+	v->cnt = 0;
+}
+#endif
+
+#endif /* _RTE_ATOMIC_X86_64_H_ */
diff --git a/lib/librte_eal/common/include/arch/x86/rte_byteorder.h b/lib/librte_eal/common/include/arch/x86/rte_byteorder.h
new file mode 100644
index 00000000..ffdb6ef5
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/x86/rte_byteorder.h
@@ -0,0 +1,125 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_BYTEORDER_X86_H_
+#define _RTE_BYTEORDER_X86_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_byteorder.h"
+
+#ifndef RTE_BYTE_ORDER
+#define RTE_BYTE_ORDER RTE_LITTLE_ENDIAN
+#endif
+
+/*
+ * An architecture-optimized byte swap for a 16-bit value.
+ *
+ * Do not use this function directly. The preferred function is rte_bswap16().
+ */
+static inline uint16_t rte_arch_bswap16(uint16_t _x)
+{
+	register uint16_t x = _x;
+	asm volatile ("xchgb %b[x1],%h[x2]"
+		      : [x1] "=Q" (x)
+		      : [x2] "0" (x)
+		      );
+	return x;
+}
+
+/*
+ * An architecture-optimized byte swap for a 32-bit value.
+ *
+ * Do not use this function directly. The preferred function is rte_bswap32().
+ */
+static inline uint32_t rte_arch_bswap32(uint32_t _x)
+{
+	register uint32_t x = _x;
+	asm volatile ("bswap %[x]"
+		      : [x] "+r" (x)
+		      );
+	return x;
+}
+
+#ifndef RTE_FORCE_INTRINSICS
+#define rte_bswap16(x) ((uint16_t)(__builtin_constant_p(x) ?		\
+				   rte_constant_bswap16(x) :		\
+				   rte_arch_bswap16(x)))
+
+#define rte_bswap32(x) ((uint32_t)(__builtin_constant_p(x) ?		\
+				   rte_constant_bswap32(x) :		\
+				   rte_arch_bswap32(x)))
+
+#define rte_bswap64(x) ((uint64_t)(__builtin_constant_p(x) ?		\
+				   rte_constant_bswap64(x) :		\
+				   rte_arch_bswap64(x)))
+#else
+/*
+ * __builtin_bswap16 is only available gcc 4.8 and upwards
+ */
+#if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 8)
+#define rte_bswap16(x) ((uint16_t)(__builtin_constant_p(x) ?		\
+				   rte_constant_bswap16(x) :		\
+				   rte_arch_bswap16(x)))
+#endif
+#endif
+
+#define rte_cpu_to_le_16(x) (x)
+#define rte_cpu_to_le_32(x) (x)
+#define rte_cpu_to_le_64(x) (x)
+
+#define rte_cpu_to_be_16(x) rte_bswap16(x)
+#define rte_cpu_to_be_32(x) rte_bswap32(x)
+#define rte_cpu_to_be_64(x) rte_bswap64(x)
+
+#define rte_le_to_cpu_16(x) (x)
+#define rte_le_to_cpu_32(x) (x)
+#define rte_le_to_cpu_64(x) (x)
+
+#define rte_be_to_cpu_16(x) rte_bswap16(x)
+#define rte_be_to_cpu_32(x) rte_bswap32(x)
+#define rte_be_to_cpu_64(x) rte_bswap64(x)
+
+#ifdef RTE_ARCH_I686
+#include "rte_byteorder_32.h"
+#else
+#include "rte_byteorder_64.h"
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_BYTEORDER_X86_H_ */
diff --git a/lib/librte_eal/common/include/arch/x86/rte_byteorder_32.h b/lib/librte_eal/common/include/arch/x86/rte_byteorder_32.h
new file mode 100644
index 00000000..51c306f8
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/x86/rte_byteorder_32.h
@@ -0,0 +1,51 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_BYTEORDER_I686_H_
+#define _RTE_BYTEORDER_I686_H_
+
+/*
+ * An architecture-optimized byte swap for a 64-bit value.
+ *
+  * Do not use this function directly. The preferred function is rte_bswap64().
+ */
+/* Compat./Leg. mode */
+static inline uint64_t rte_arch_bswap64(uint64_t x)
+{
+	uint64_t ret = 0;
+	ret |= ((uint64_t)rte_arch_bswap32(x & 0xffffffffUL) << 32);
+	ret |= ((uint64_t)rte_arch_bswap32((x >> 32) & 0xffffffffUL));
+	return ret;
+}
+
+#endif /* _RTE_BYTEORDER_I686_H_ */
diff --git a/lib/librte_eal/common/include/arch/x86/rte_byteorder_64.h b/lib/librte_eal/common/include/arch/x86/rte_byteorder_64.h
new file mode 100644
index 00000000..dda572bd
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/x86/rte_byteorder_64.h
@@ -0,0 +1,52 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_BYTEORDER_X86_64_H_
+#define _RTE_BYTEORDER_X86_64_H_
+
+/*
+ * An architecture-optimized byte swap for a 64-bit value.
+ *
+  * Do not use this function directly. The preferred function is rte_bswap64().
+ */
+/* 64-bit mode */
+static inline uint64_t rte_arch_bswap64(uint64_t _x)
+{
+	register uint64_t x = _x;
+	asm volatile ("bswap %[x]"
+		      : [x] "+r" (x)
+		      );
+	return x;
+}
+
+#endif /* _RTE_BYTEORDER_X86_64_H_ */
diff --git a/lib/librte_eal/common/include/arch/x86/rte_cpuflags.h b/lib/librte_eal/common/include/arch/x86/rte_cpuflags.h
new file mode 100644
index 00000000..26204fab
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/x86/rte_cpuflags.h
@@ -0,0 +1,153 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_CPUFLAGS_X86_64_H_
+#define _RTE_CPUFLAGS_X86_64_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+enum rte_cpu_flag_t {
+	/* (EAX 01h) ECX features*/
+	RTE_CPUFLAG_SSE3 = 0,               /**< SSE3 */
+	RTE_CPUFLAG_PCLMULQDQ,              /**< PCLMULQDQ */
+	RTE_CPUFLAG_DTES64,                 /**< DTES64 */
+	RTE_CPUFLAG_MONITOR,                /**< MONITOR */
+	RTE_CPUFLAG_DS_CPL,                 /**< DS_CPL */
+	RTE_CPUFLAG_VMX,                    /**< VMX */
+	RTE_CPUFLAG_SMX,                    /**< SMX */
+	RTE_CPUFLAG_EIST,                   /**< EIST */
+	RTE_CPUFLAG_TM2,                    /**< TM2 */
+	RTE_CPUFLAG_SSSE3,                  /**< SSSE3 */
+	RTE_CPUFLAG_CNXT_ID,                /**< CNXT_ID */
+	RTE_CPUFLAG_FMA,                    /**< FMA */
+	RTE_CPUFLAG_CMPXCHG16B,             /**< CMPXCHG16B */
+	RTE_CPUFLAG_XTPR,                   /**< XTPR */
+	RTE_CPUFLAG_PDCM,                   /**< PDCM */
+	RTE_CPUFLAG_PCID,                   /**< PCID */
+	RTE_CPUFLAG_DCA,                    /**< DCA */
+	RTE_CPUFLAG_SSE4_1,                 /**< SSE4_1 */
+	RTE_CPUFLAG_SSE4_2,                 /**< SSE4_2 */
+	RTE_CPUFLAG_X2APIC,                 /**< X2APIC */
+	RTE_CPUFLAG_MOVBE,                  /**< MOVBE */
+	RTE_CPUFLAG_POPCNT,                 /**< POPCNT */
+	RTE_CPUFLAG_TSC_DEADLINE,           /**< TSC_DEADLINE */
+	RTE_CPUFLAG_AES,                    /**< AES */
+	RTE_CPUFLAG_XSAVE,                  /**< XSAVE */
+	RTE_CPUFLAG_OSXSAVE,                /**< OSXSAVE */
+	RTE_CPUFLAG_AVX,                    /**< AVX */
+	RTE_CPUFLAG_F16C,                   /**< F16C */
+	RTE_CPUFLAG_RDRAND,                 /**< RDRAND */
+
+	/* (EAX 01h) EDX features */
+	RTE_CPUFLAG_FPU,                    /**< FPU */
+	RTE_CPUFLAG_VME,                    /**< VME */
+	RTE_CPUFLAG_DE,                     /**< DE */
+	RTE_CPUFLAG_PSE,                    /**< PSE */
+	RTE_CPUFLAG_TSC,                    /**< TSC */
+	RTE_CPUFLAG_MSR,                    /**< MSR */
+	RTE_CPUFLAG_PAE,                    /**< PAE */
+	RTE_CPUFLAG_MCE,                    /**< MCE */
+	RTE_CPUFLAG_CX8,                    /**< CX8 */
+	RTE_CPUFLAG_APIC,                   /**< APIC */
+	RTE_CPUFLAG_SEP,                    /**< SEP */
+	RTE_CPUFLAG_MTRR,                   /**< MTRR */
+	RTE_CPUFLAG_PGE,                    /**< PGE */
+	RTE_CPUFLAG_MCA,                    /**< MCA */
+	RTE_CPUFLAG_CMOV,                   /**< CMOV */
+	RTE_CPUFLAG_PAT,                    /**< PAT */
+	RTE_CPUFLAG_PSE36,                  /**< PSE36 */
+	RTE_CPUFLAG_PSN,                    /**< PSN */
+	RTE_CPUFLAG_CLFSH,                  /**< CLFSH */
+	RTE_CPUFLAG_DS,                     /**< DS */
+	RTE_CPUFLAG_ACPI,                   /**< ACPI */
+	RTE_CPUFLAG_MMX,                    /**< MMX */
+	RTE_CPUFLAG_FXSR,                   /**< FXSR */
+	RTE_CPUFLAG_SSE,                    /**< SSE */
+	RTE_CPUFLAG_SSE2,                   /**< SSE2 */
+	RTE_CPUFLAG_SS,                     /**< SS */
+	RTE_CPUFLAG_HTT,                    /**< HTT */
+	RTE_CPUFLAG_TM,                     /**< TM */
+	RTE_CPUFLAG_PBE,                    /**< PBE */
+
+	/* (EAX 06h) EAX features */
+	RTE_CPUFLAG_DIGTEMP,                /**< DIGTEMP */
+	RTE_CPUFLAG_TRBOBST,                /**< TRBOBST */
+	RTE_CPUFLAG_ARAT,                   /**< ARAT */
+	RTE_CPUFLAG_PLN,                    /**< PLN */
+	RTE_CPUFLAG_ECMD,                   /**< ECMD */
+	RTE_CPUFLAG_PTM,                    /**< PTM */
+
+	/* (EAX 06h) ECX features */
+	RTE_CPUFLAG_MPERF_APERF_MSR,        /**< MPERF_APERF_MSR */
+	RTE_CPUFLAG_ACNT2,                  /**< ACNT2 */
+	RTE_CPUFLAG_ENERGY_EFF,             /**< ENERGY_EFF */
+
+	/* (EAX 07h, ECX 0h) EBX features */
+	RTE_CPUFLAG_FSGSBASE,               /**< FSGSBASE */
+	RTE_CPUFLAG_BMI1,                   /**< BMI1 */
+	RTE_CPUFLAG_HLE,                    /**< Hardware Lock elision */
+	RTE_CPUFLAG_AVX2,                   /**< AVX2 */
+	RTE_CPUFLAG_SMEP,                   /**< SMEP */
+	RTE_CPUFLAG_BMI2,                   /**< BMI2 */
+	RTE_CPUFLAG_ERMS,                   /**< ERMS */
+	RTE_CPUFLAG_INVPCID,                /**< INVPCID */
+	RTE_CPUFLAG_RTM,                    /**< Transactional memory */
+	RTE_CPUFLAG_AVX512F,                /**< AVX512F */
+
+	/* (EAX 80000001h) ECX features */
+	RTE_CPUFLAG_LAHF_SAHF,              /**< LAHF_SAHF */
+	RTE_CPUFLAG_LZCNT,                  /**< LZCNT */
+
+	/* (EAX 80000001h) EDX features */
+	RTE_CPUFLAG_SYSCALL,                /**< SYSCALL */
+	RTE_CPUFLAG_XD,                     /**< XD */
+	RTE_CPUFLAG_1GB_PG,                 /**< 1GB_PG */
+	RTE_CPUFLAG_RDTSCP,                 /**< RDTSCP */
+	RTE_CPUFLAG_EM64T,                  /**< EM64T */
+
+	/* (EAX 80000007h) EDX features */
+	RTE_CPUFLAG_INVTSC,                 /**< INVTSC */
+
+	/* The last item */
+	RTE_CPUFLAG_NUMFLAGS,               /**< This should always be the last! */
+};
+
+#include "generic/rte_cpuflags.h"
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_CPUFLAGS_X86_64_H_ */
diff --git a/lib/librte_eal/common/include/arch/x86/rte_cycles.h b/lib/librte_eal/common/include/arch/x86/rte_cycles.h
new file mode 100644
index 00000000..6e3c7d89
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/x86/rte_cycles.h
@@ -0,0 +1,121 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+/*   BSD LICENSE
+ *
+ *   Copyright(c) 2013 6WIND.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of 6WIND S.A. nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_CYCLES_X86_64_H_
+#define _RTE_CYCLES_X86_64_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_cycles.h"
+
+#ifdef RTE_LIBRTE_EAL_VMWARE_TSC_MAP_SUPPORT
+/* Global switch to use VMWARE mapping of TSC instead of RDTSC */
+extern int rte_cycles_vmware_tsc_map;
+#include <rte_branch_prediction.h>
+#endif
+
+static inline uint64_t
+rte_rdtsc(void)
+{
+	union {
+		uint64_t tsc_64;
+		struct {
+			uint32_t lo_32;
+			uint32_t hi_32;
+		};
+	} tsc;
+
+#ifdef RTE_LIBRTE_EAL_VMWARE_TSC_MAP_SUPPORT
+	if (unlikely(rte_cycles_vmware_tsc_map)) {
+		/* ecx = 0x10000 corresponds to the physical TSC for VMware */
+		asm volatile("rdpmc" :
+		             "=a" (tsc.lo_32),
+		             "=d" (tsc.hi_32) :
+		             "c"(0x10000));
+		return tsc.tsc_64;
+	}
+#endif
+
+	asm volatile("rdtsc" :
+		     "=a" (tsc.lo_32),
+		     "=d" (tsc.hi_32));
+	return tsc.tsc_64;
+}
+
+static inline uint64_t
+rte_rdtsc_precise(void)
+{
+	rte_mb();
+	return rte_rdtsc();
+}
+
+static inline uint64_t
+rte_get_tsc_cycles(void) { return rte_rdtsc(); }
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_CYCLES_X86_64_H_ */
diff --git a/lib/librte_eal/common/include/arch/x86/rte_memcpy.h b/lib/librte_eal/common/include/arch/x86/rte_memcpy.h
new file mode 100644
index 00000000..f463ab30
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/x86/rte_memcpy.h
@@ -0,0 +1,884 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_MEMCPY_X86_64_H_
+#define _RTE_MEMCPY_X86_64_H_
+
+/**
+ * @file
+ *
+ * Functions for SSE/AVX/AVX2/AVX512 implementation of memcpy().
+ */
+
+#include <stdio.h>
+#include <stdint.h>
+#include <string.h>
+#include <rte_vect.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Copy bytes from one location to another. The locations must not overlap.
+ *
+ * @note This is implemented as a macro, so it's address should not be taken
+ * and care is needed as parameter expressions may be evaluated multiple times.
+ *
+ * @param dst
+ *   Pointer to the destination of the data.
+ * @param src
+ *   Pointer to the source data.
+ * @param n
+ *   Number of bytes to copy.
+ * @return
+ *   Pointer to the destination data.
+ */
+static inline void *
+rte_memcpy(void *dst, const void *src, size_t n) __attribute__((always_inline));
+
+#ifdef RTE_MACHINE_CPUFLAG_AVX512F
+
+/**
+ * AVX512 implementation below
+ */
+
+/**
+ * Copy 16 bytes from one location to another,
+ * locations should not overlap.
+ */
+static inline void
+rte_mov16(uint8_t *dst, const uint8_t *src)
+{
+	__m128i xmm0;
+
+	xmm0 = _mm_loadu_si128((const __m128i *)src);
+	_mm_storeu_si128((__m128i *)dst, xmm0);
+}
+
+/**
+ * Copy 32 bytes from one location to another,
+ * locations should not overlap.
+ */
+static inline void
+rte_mov32(uint8_t *dst, const uint8_t *src)
+{
+	__m256i ymm0;
+
+	ymm0 = _mm256_loadu_si256((const __m256i *)src);
+	_mm256_storeu_si256((__m256i *)dst, ymm0);
+}
+
+/**
+ * Copy 64 bytes from one location to another,
+ * locations should not overlap.
+ */
+static inline void
+rte_mov64(uint8_t *dst, const uint8_t *src)
+{
+	__m512i zmm0;
+
+	zmm0 = _mm512_loadu_si512((const void *)src);
+	_mm512_storeu_si512((void *)dst, zmm0);
+}
+
+/**
+ * Copy 128 bytes from one location to another,
+ * locations should not overlap.
+ */
+static inline void
+rte_mov128(uint8_t *dst, const uint8_t *src)
+{
+	rte_mov64(dst + 0 * 64, src + 0 * 64);
+	rte_mov64(dst + 1 * 64, src + 1 * 64);
+}
+
+/**
+ * Copy 256 bytes from one location to another,
+ * locations should not overlap.
+ */
+static inline void
+rte_mov256(uint8_t *dst, const uint8_t *src)
+{
+	rte_mov64(dst + 0 * 64, src + 0 * 64);
+	rte_mov64(dst + 1 * 64, src + 1 * 64);
+	rte_mov64(dst + 2 * 64, src + 2 * 64);
+	rte_mov64(dst + 3 * 64, src + 3 * 64);
+}
+
+/**
+ * Copy 128-byte blocks from one location to another,
+ * locations should not overlap.
+ */
+static inline void
+rte_mov128blocks(uint8_t *dst, const uint8_t *src, size_t n)
+{
+	__m512i zmm0, zmm1;
+
+	while (n >= 128) {
+		zmm0 = _mm512_loadu_si512((const void *)(src + 0 * 64));
+		n -= 128;
+		zmm1 = _mm512_loadu_si512((const void *)(src + 1 * 64));
+		src = src + 128;
+		_mm512_storeu_si512((void *)(dst + 0 * 64), zmm0);
+		_mm512_storeu_si512((void *)(dst + 1 * 64), zmm1);
+		dst = dst + 128;
+	}
+}
+
+/**
+ * Copy 512-byte blocks from one location to another,
+ * locations should not overlap.
+ */
+static inline void
+rte_mov512blocks(uint8_t *dst, const uint8_t *src, size_t n)
+{
+	__m512i zmm0, zmm1, zmm2, zmm3, zmm4, zmm5, zmm6, zmm7;
+
+	while (n >= 512) {
+		zmm0 = _mm512_loadu_si512((const void *)(src + 0 * 64));
+		n -= 512;
+		zmm1 = _mm512_loadu_si512((const void *)(src + 1 * 64));
+		zmm2 = _mm512_loadu_si512((const void *)(src + 2 * 64));
+		zmm3 = _mm512_loadu_si512((const void *)(src + 3 * 64));
+		zmm4 = _mm512_loadu_si512((const void *)(src + 4 * 64));
+		zmm5 = _mm512_loadu_si512((const void *)(src + 5 * 64));
+		zmm6 = _mm512_loadu_si512((const void *)(src + 6 * 64));
+		zmm7 = _mm512_loadu_si512((const void *)(src + 7 * 64));
+		src = src + 512;
+		_mm512_storeu_si512((void *)(dst + 0 * 64), zmm0);
+		_mm512_storeu_si512((void *)(dst + 1 * 64), zmm1);
+		_mm512_storeu_si512((void *)(dst + 2 * 64), zmm2);
+		_mm512_storeu_si512((void *)(dst + 3 * 64), zmm3);
+		_mm512_storeu_si512((void *)(dst + 4 * 64), zmm4);
+		_mm512_storeu_si512((void *)(dst + 5 * 64), zmm5);
+		_mm512_storeu_si512((void *)(dst + 6 * 64), zmm6);
+		_mm512_storeu_si512((void *)(dst + 7 * 64), zmm7);
+		dst = dst + 512;
+	}
+}
+
+static inline void *
+rte_memcpy(void *dst, const void *src, size_t n)
+{
+	uintptr_t dstu = (uintptr_t)dst;
+	uintptr_t srcu = (uintptr_t)src;
+	void *ret = dst;
+	size_t dstofss;
+	size_t bits;
+
+	/**
+	 * Copy less than 16 bytes
+	 */
+	if (n < 16) {
+		if (n & 0x01) {
+			*(uint8_t *)dstu = *(const uint8_t *)srcu;
+			srcu = (uintptr_t)((const uint8_t *)srcu + 1);
+			dstu = (uintptr_t)((uint8_t *)dstu + 1);
+		}
+		if (n & 0x02) {
+			*(uint16_t *)dstu = *(const uint16_t *)srcu;
+			srcu = (uintptr_t)((const uint16_t *)srcu + 1);
+			dstu = (uintptr_t)((uint16_t *)dstu + 1);
+		}
+		if (n & 0x04) {
+			*(uint32_t *)dstu = *(const uint32_t *)srcu;
+			srcu = (uintptr_t)((const uint32_t *)srcu + 1);
+			dstu = (uintptr_t)((uint32_t *)dstu + 1);
+		}
+		if (n & 0x08)
+			*(uint64_t *)dstu = *(const uint64_t *)srcu;
+		return ret;
+	}
+
+	/**
+	 * Fast way when copy size doesn't exceed 512 bytes
+	 */
+	if (n <= 32) {
+		rte_mov16((uint8_t *)dst, (const uint8_t *)src);
+		rte_mov16((uint8_t *)dst - 16 + n,
+				  (const uint8_t *)src - 16 + n);
+		return ret;
+	}
+	if (n <= 64) {
+		rte_mov32((uint8_t *)dst, (const uint8_t *)src);
+		rte_mov32((uint8_t *)dst - 32 + n,
+				  (const uint8_t *)src - 32 + n);
+		return ret;
+	}
+	if (n <= 512) {
+		if (n >= 256) {
+			n -= 256;
+			rte_mov256((uint8_t *)dst, (const uint8_t *)src);
+			src = (const uint8_t *)src + 256;
+			dst = (uint8_t *)dst + 256;
+		}
+		if (n >= 128) {
+			n -= 128;
+			rte_mov128((uint8_t *)dst, (const uint8_t *)src);
+			src = (const uint8_t *)src + 128;
+			dst = (uint8_t *)dst + 128;
+		}
+COPY_BLOCK_128_BACK63:
+		if (n > 64) {
+			rte_mov64((uint8_t *)dst, (const uint8_t *)src);
+			rte_mov64((uint8_t *)dst - 64 + n,
+					  (const uint8_t *)src - 64 + n);
+			return ret;
+		}
+		if (n > 0)
+			rte_mov64((uint8_t *)dst - 64 + n,
+					  (const uint8_t *)src - 64 + n);
+		return ret;
+	}
+
+	/**
+	 * Make store aligned when copy size exceeds 512 bytes
+	 */
+	dstofss = ((uintptr_t)dst & 0x3F);
+	if (dstofss > 0) {
+		dstofss = 64 - dstofss;
+		n -= dstofss;
+		rte_mov64((uint8_t *)dst, (const uint8_t *)src);
+		src = (const uint8_t *)src + dstofss;
+		dst = (uint8_t *)dst + dstofss;
+	}
+
+	/**
+	 * Copy 512-byte blocks.
+	 * Use copy block function for better instruction order control,
+	 * which is important when load is unaligned.
+	 */
+	rte_mov512blocks((uint8_t *)dst, (const uint8_t *)src, n);
+	bits = n;
+	n = n & 511;
+	bits -= n;
+	src = (const uint8_t *)src + bits;
+	dst = (uint8_t *)dst + bits;
+
+	/**
+	 * Copy 128-byte blocks.
+	 * Use copy block function for better instruction order control,
+	 * which is important when load is unaligned.
+	 */
+	if (n >= 128) {
+		rte_mov128blocks((uint8_t *)dst, (const uint8_t *)src, n);
+		bits = n;
+		n = n & 127;
+		bits -= n;
+		src = (const uint8_t *)src + bits;
+		dst = (uint8_t *)dst + bits;
+	}
+
+	/**
+	 * Copy whatever left
+	 */
+	goto COPY_BLOCK_128_BACK63;
+}
+
+#elif defined RTE_MACHINE_CPUFLAG_AVX2
+
+/**
+ * AVX2 implementation below
+ */
+
+/**
+ * Copy 16 bytes from one location to another,
+ * locations should not overlap.
+ */
+static inline void
+rte_mov16(uint8_t *dst, const uint8_t *src)
+{
+	__m128i xmm0;
+
+	xmm0 = _mm_loadu_si128((const __m128i *)src);
+	_mm_storeu_si128((__m128i *)dst, xmm0);
+}
+
+/**
+ * Copy 32 bytes from one location to another,
+ * locations should not overlap.
+ */
+static inline void
+rte_mov32(uint8_t *dst, const uint8_t *src)
+{
+	__m256i ymm0;
+
+	ymm0 = _mm256_loadu_si256((const __m256i *)src);
+	_mm256_storeu_si256((__m256i *)dst, ymm0);
+}
+
+/**
+ * Copy 64 bytes from one location to another,
+ * locations should not overlap.
+ */
+static inline void
+rte_mov64(uint8_t *dst, const uint8_t *src)
+{
+	rte_mov32((uint8_t *)dst + 0 * 32, (const uint8_t *)src + 0 * 32);
+	rte_mov32((uint8_t *)dst + 1 * 32, (const uint8_t *)src + 1 * 32);
+}
+
+/**
+ * Copy 128 bytes from one location to another,
+ * locations should not overlap.
+ */
+static inline void
+rte_mov128(uint8_t *dst, const uint8_t *src)
+{
+	rte_mov32((uint8_t *)dst + 0 * 32, (const uint8_t *)src + 0 * 32);
+	rte_mov32((uint8_t *)dst + 1 * 32, (const uint8_t *)src + 1 * 32);
+	rte_mov32((uint8_t *)dst + 2 * 32, (const uint8_t *)src + 2 * 32);
+	rte_mov32((uint8_t *)dst + 3 * 32, (const uint8_t *)src + 3 * 32);
+}
+
+/**
+ * Copy 256 bytes from one location to another,
+ * locations should not overlap.
+ */
+static inline void
+rte_mov256(uint8_t *dst, const uint8_t *src)
+{
+	rte_mov32((uint8_t *)dst + 0 * 32, (const uint8_t *)src + 0 * 32);
+	rte_mov32((uint8_t *)dst + 1 * 32, (const uint8_t *)src + 1 * 32);
+	rte_mov32((uint8_t *)dst + 2 * 32, (const uint8_t *)src + 2 * 32);
+	rte_mov32((uint8_t *)dst + 3 * 32, (const uint8_t *)src + 3 * 32);
+	rte_mov32((uint8_t *)dst + 4 * 32, (const uint8_t *)src + 4 * 32);
+	rte_mov32((uint8_t *)dst + 5 * 32, (const uint8_t *)src + 5 * 32);
+	rte_mov32((uint8_t *)dst + 6 * 32, (const uint8_t *)src + 6 * 32);
+	rte_mov32((uint8_t *)dst + 7 * 32, (const uint8_t *)src + 7 * 32);
+}
+
+/**
+ * Copy 64-byte blocks from one location to another,
+ * locations should not overlap.
+ */
+static inline void
+rte_mov64blocks(uint8_t *dst, const uint8_t *src, size_t n)
+{
+	__m256i ymm0, ymm1;
+
+	while (n >= 64) {
+		ymm0 = _mm256_loadu_si256((const __m256i *)((const uint8_t *)src + 0 * 32));
+		n -= 64;
+		ymm1 = _mm256_loadu_si256((const __m256i *)((const uint8_t *)src + 1 * 32));
+		src = (const uint8_t *)src + 64;
+		_mm256_storeu_si256((__m256i *)((uint8_t *)dst + 0 * 32), ymm0);
+		_mm256_storeu_si256((__m256i *)((uint8_t *)dst + 1 * 32), ymm1);
+		dst = (uint8_t *)dst + 64;
+	}
+}
+
+/**
+ * Copy 256-byte blocks from one location to another,
+ * locations should not overlap.
+ */
+static inline void
+rte_mov256blocks(uint8_t *dst, const uint8_t *src, size_t n)
+{
+	__m256i ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7;
+
+	while (n >= 256) {
+		ymm0 = _mm256_loadu_si256((const __m256i *)((const uint8_t *)src + 0 * 32));
+		n -= 256;
+		ymm1 = _mm256_loadu_si256((const __m256i *)((const uint8_t *)src + 1 * 32));
+		ymm2 = _mm256_loadu_si256((const __m256i *)((const uint8_t *)src + 2 * 32));
+		ymm3 = _mm256_loadu_si256((const __m256i *)((const uint8_t *)src + 3 * 32));
+		ymm4 = _mm256_loadu_si256((const __m256i *)((const uint8_t *)src + 4 * 32));
+		ymm5 = _mm256_loadu_si256((const __m256i *)((const uint8_t *)src + 5 * 32));
+		ymm6 = _mm256_loadu_si256((const __m256i *)((const uint8_t *)src + 6 * 32));
+		ymm7 = _mm256_loadu_si256((const __m256i *)((const uint8_t *)src + 7 * 32));
+		src = (const uint8_t *)src + 256;
+		_mm256_storeu_si256((__m256i *)((uint8_t *)dst + 0 * 32), ymm0);
+		_mm256_storeu_si256((__m256i *)((uint8_t *)dst + 1 * 32), ymm1);
+		_mm256_storeu_si256((__m256i *)((uint8_t *)dst + 2 * 32), ymm2);
+		_mm256_storeu_si256((__m256i *)((uint8_t *)dst + 3 * 32), ymm3);
+		_mm256_storeu_si256((__m256i *)((uint8_t *)dst + 4 * 32), ymm4);
+		_mm256_storeu_si256((__m256i *)((uint8_t *)dst + 5 * 32), ymm5);
+		_mm256_storeu_si256((__m256i *)((uint8_t *)dst + 6 * 32), ymm6);
+		_mm256_storeu_si256((__m256i *)((uint8_t *)dst + 7 * 32), ymm7);
+		dst = (uint8_t *)dst + 256;
+	}
+}
+
+static inline void *
+rte_memcpy(void *dst, const void *src, size_t n)
+{
+	uintptr_t dstu = (uintptr_t)dst;
+	uintptr_t srcu = (uintptr_t)src;
+	void *ret = dst;
+	size_t dstofss;
+	size_t bits;
+
+	/**
+	 * Copy less than 16 bytes
+	 */
+	if (n < 16) {
+		if (n & 0x01) {
+			*(uint8_t *)dstu = *(const uint8_t *)srcu;
+			srcu = (uintptr_t)((const uint8_t *)srcu + 1);
+			dstu = (uintptr_t)((uint8_t *)dstu + 1);
+		}
+		if (n & 0x02) {
+			*(uint16_t *)dstu = *(const uint16_t *)srcu;
+			srcu = (uintptr_t)((const uint16_t *)srcu + 1);
+			dstu = (uintptr_t)((uint16_t *)dstu + 1);
+		}
+		if (n & 0x04) {
+			*(uint32_t *)dstu = *(const uint32_t *)srcu;
+			srcu = (uintptr_t)((const uint32_t *)srcu + 1);
+			dstu = (uintptr_t)((uint32_t *)dstu + 1);
+		}
+		if (n & 0x08) {
+			*(uint64_t *)dstu = *(const uint64_t *)srcu;
+		}
+		return ret;
+	}
+
+	/**
+	 * Fast way when copy size doesn't exceed 512 bytes
+	 */
+	if (n <= 32) {
+		rte_mov16((uint8_t *)dst, (const uint8_t *)src);
+		rte_mov16((uint8_t *)dst - 16 + n, (const uint8_t *)src - 16 + n);
+		return ret;
+	}
+	if (n <= 64) {
+		rte_mov32((uint8_t *)dst, (const uint8_t *)src);
+		rte_mov32((uint8_t *)dst - 32 + n, (const uint8_t *)src - 32 + n);
+		return ret;
+	}
+	if (n <= 512) {
+		if (n >= 256) {
+			n -= 256;
+			rte_mov256((uint8_t *)dst, (const uint8_t *)src);
+			src = (const uint8_t *)src + 256;
+			dst = (uint8_t *)dst + 256;
+		}
+		if (n >= 128) {
+			n -= 128;
+			rte_mov128((uint8_t *)dst, (const uint8_t *)src);
+			src = (const uint8_t *)src + 128;
+			dst = (uint8_t *)dst + 128;
+		}
+		if (n >= 64) {
+			n -= 64;
+			rte_mov64((uint8_t *)dst, (const uint8_t *)src);
+			src = (const uint8_t *)src + 64;
+			dst = (uint8_t *)dst + 64;
+		}
+COPY_BLOCK_64_BACK31:
+		if (n > 32) {
+			rte_mov32((uint8_t *)dst, (const uint8_t *)src);
+			rte_mov32((uint8_t *)dst - 32 + n, (const uint8_t *)src - 32 + n);
+			return ret;
+		}
+		if (n > 0) {
+			rte_mov32((uint8_t *)dst - 32 + n, (const uint8_t *)src - 32 + n);
+		}
+		return ret;
+	}
+
+	/**
+	 * Make store aligned when copy size exceeds 512 bytes
+	 */
+	dstofss = (uintptr_t)dst & 0x1F;
+	if (dstofss > 0) {
+		dstofss = 32 - dstofss;
+		n -= dstofss;
+		rte_mov32((uint8_t *)dst, (const uint8_t *)src);
+		src = (const uint8_t *)src + dstofss;
+		dst = (uint8_t *)dst + dstofss;
+	}
+
+	/**
+	 * Copy 256-byte blocks.
+	 * Use copy block function for better instruction order control,
+	 * which is important when load is unaligned.
+	 */
+	rte_mov256blocks((uint8_t *)dst, (const uint8_t *)src, n);
+	bits = n;
+	n = n & 255;
+	bits -= n;
+	src = (const uint8_t *)src + bits;
+	dst = (uint8_t *)dst + bits;
+
+	/**
+	 * Copy 64-byte blocks.
+	 * Use copy block function for better instruction order control,
+	 * which is important when load is unaligned.
+	 */
+	if (n >= 64) {
+		rte_mov64blocks((uint8_t *)dst, (const uint8_t *)src, n);
+		bits = n;
+		n = n & 63;
+		bits -= n;
+		src = (const uint8_t *)src + bits;
+		dst = (uint8_t *)dst + bits;
+	}
+
+	/**
+	 * Copy whatever left
+	 */
+	goto COPY_BLOCK_64_BACK31;
+}
+
+#else /* RTE_MACHINE_CPUFLAG */
+
+/**
+ * SSE & AVX implementation below
+ */
+
+/**
+ * Copy 16 bytes from one location to another,
+ * locations should not overlap.
+ */
+static inline void
+rte_mov16(uint8_t *dst, const uint8_t *src)
+{
+	__m128i xmm0;
+
+	xmm0 = _mm_loadu_si128((const __m128i *)(const __m128i *)src);
+	_mm_storeu_si128((__m128i *)dst, xmm0);
+}
+
+/**
+ * Copy 32 bytes from one location to another,
+ * locations should not overlap.
+ */
+static inline void
+rte_mov32(uint8_t *dst, const uint8_t *src)
+{
+	rte_mov16((uint8_t *)dst + 0 * 16, (const uint8_t *)src + 0 * 16);
+	rte_mov16((uint8_t *)dst + 1 * 16, (const uint8_t *)src + 1 * 16);
+}
+
+/**
+ * Copy 64 bytes from one location to another,
+ * locations should not overlap.
+ */
+static inline void
+rte_mov64(uint8_t *dst, const uint8_t *src)
+{
+	rte_mov16((uint8_t *)dst + 0 * 16, (const uint8_t *)src + 0 * 16);
+	rte_mov16((uint8_t *)dst + 1 * 16, (const uint8_t *)src + 1 * 16);
+	rte_mov16((uint8_t *)dst + 2 * 16, (const uint8_t *)src + 2 * 16);
+	rte_mov16((uint8_t *)dst + 3 * 16, (const uint8_t *)src + 3 * 16);
+}
+
+/**
+ * Copy 128 bytes from one location to another,
+ * locations should not overlap.
+ */
+static inline void
+rte_mov128(uint8_t *dst, const uint8_t *src)
+{
+	rte_mov16((uint8_t *)dst + 0 * 16, (const uint8_t *)src + 0 * 16);
+	rte_mov16((uint8_t *)dst + 1 * 16, (const uint8_t *)src + 1 * 16);
+	rte_mov16((uint8_t *)dst + 2 * 16, (const uint8_t *)src + 2 * 16);
+	rte_mov16((uint8_t *)dst + 3 * 16, (const uint8_t *)src + 3 * 16);
+	rte_mov16((uint8_t *)dst + 4 * 16, (const uint8_t *)src + 4 * 16);
+	rte_mov16((uint8_t *)dst + 5 * 16, (const uint8_t *)src + 5 * 16);
+	rte_mov16((uint8_t *)dst + 6 * 16, (const uint8_t *)src + 6 * 16);
+	rte_mov16((uint8_t *)dst + 7 * 16, (const uint8_t *)src + 7 * 16);
+}
+
+/**
+ * Copy 256 bytes from one location to another,
+ * locations should not overlap.
+ */
+static inline void
+rte_mov256(uint8_t *dst, const uint8_t *src)
+{
+	rte_mov16((uint8_t *)dst + 0 * 16, (const uint8_t *)src + 0 * 16);
+	rte_mov16((uint8_t *)dst + 1 * 16, (const uint8_t *)src + 1 * 16);
+	rte_mov16((uint8_t *)dst + 2 * 16, (const uint8_t *)src + 2 * 16);
+	rte_mov16((uint8_t *)dst + 3 * 16, (const uint8_t *)src + 3 * 16);
+	rte_mov16((uint8_t *)dst + 4 * 16, (const uint8_t *)src + 4 * 16);
+	rte_mov16((uint8_t *)dst + 5 * 16, (const uint8_t *)src + 5 * 16);
+	rte_mov16((uint8_t *)dst + 6 * 16, (const uint8_t *)src + 6 * 16);
+	rte_mov16((uint8_t *)dst + 7 * 16, (const uint8_t *)src + 7 * 16);
+	rte_mov16((uint8_t *)dst + 8 * 16, (const uint8_t *)src + 8 * 16);
+	rte_mov16((uint8_t *)dst + 9 * 16, (const uint8_t *)src + 9 * 16);
+	rte_mov16((uint8_t *)dst + 10 * 16, (const uint8_t *)src + 10 * 16);
+	rte_mov16((uint8_t *)dst + 11 * 16, (const uint8_t *)src + 11 * 16);
+	rte_mov16((uint8_t *)dst + 12 * 16, (const uint8_t *)src + 12 * 16);
+	rte_mov16((uint8_t *)dst + 13 * 16, (const uint8_t *)src + 13 * 16);
+	rte_mov16((uint8_t *)dst + 14 * 16, (const uint8_t *)src + 14 * 16);
+	rte_mov16((uint8_t *)dst + 15 * 16, (const uint8_t *)src + 15 * 16);
+}
+
+/**
+ * Macro for copying unaligned block from one location to another with constant load offset,
+ * 47 bytes leftover maximum,
+ * locations should not overlap.
+ * Requirements:
+ * - Store is aligned
+ * - Load offset is <offset>, which must be immediate value within [1, 15]
+ * - For <src>, make sure <offset> bit backwards & <16 - offset> bit forwards are available for loading
+ * - <dst>, <src>, <len> must be variables
+ * - __m128i <xmm0> ~ <xmm8> must be pre-defined
+ */
+#define MOVEUNALIGNED_LEFT47_IMM(dst, src, len, offset)                                                     \
+({                                                                                                          \
+    int tmp;                                                                                                \
+    while (len >= 128 + 16 - offset) {                                                                      \
+        xmm0 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 0 * 16));                  \
+        len -= 128;                                                                                         \
+        xmm1 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 1 * 16));                  \
+        xmm2 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 2 * 16));                  \
+        xmm3 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 3 * 16));                  \
+        xmm4 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 4 * 16));                  \
+        xmm5 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 5 * 16));                  \
+        xmm6 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 6 * 16));                  \
+        xmm7 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 7 * 16));                  \
+        xmm8 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 8 * 16));                  \
+        src = (const uint8_t *)src + 128;                                                                   \
+        _mm_storeu_si128((__m128i *)((uint8_t *)dst + 0 * 16), _mm_alignr_epi8(xmm1, xmm0, offset));        \
+        _mm_storeu_si128((__m128i *)((uint8_t *)dst + 1 * 16), _mm_alignr_epi8(xmm2, xmm1, offset));        \
+        _mm_storeu_si128((__m128i *)((uint8_t *)dst + 2 * 16), _mm_alignr_epi8(xmm3, xmm2, offset));        \
+        _mm_storeu_si128((__m128i *)((uint8_t *)dst + 3 * 16), _mm_alignr_epi8(xmm4, xmm3, offset));        \
+        _mm_storeu_si128((__m128i *)((uint8_t *)dst + 4 * 16), _mm_alignr_epi8(xmm5, xmm4, offset));        \
+        _mm_storeu_si128((__m128i *)((uint8_t *)dst + 5 * 16), _mm_alignr_epi8(xmm6, xmm5, offset));        \
+        _mm_storeu_si128((__m128i *)((uint8_t *)dst + 6 * 16), _mm_alignr_epi8(xmm7, xmm6, offset));        \
+        _mm_storeu_si128((__m128i *)((uint8_t *)dst + 7 * 16), _mm_alignr_epi8(xmm8, xmm7, offset));        \
+        dst = (uint8_t *)dst + 128;                                                                         \
+    }                                                                                                       \
+    tmp = len;                                                                                              \
+    len = ((len - 16 + offset) & 127) + 16 - offset;                                                        \
+    tmp -= len;                                                                                             \
+    src = (const uint8_t *)src + tmp;                                                                       \
+    dst = (uint8_t *)dst + tmp;                                                                             \
+    if (len >= 32 + 16 - offset) {                                                                          \
+        while (len >= 32 + 16 - offset) {                                                                   \
+            xmm0 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 0 * 16));              \
+            len -= 32;                                                                                      \
+            xmm1 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 1 * 16));              \
+            xmm2 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 2 * 16));              \
+            src = (const uint8_t *)src + 32;                                                                \
+            _mm_storeu_si128((__m128i *)((uint8_t *)dst + 0 * 16), _mm_alignr_epi8(xmm1, xmm0, offset));    \
+            _mm_storeu_si128((__m128i *)((uint8_t *)dst + 1 * 16), _mm_alignr_epi8(xmm2, xmm1, offset));    \
+            dst = (uint8_t *)dst + 32;                                                                      \
+        }                                                                                                   \
+        tmp = len;                                                                                          \
+        len = ((len - 16 + offset) & 31) + 16 - offset;                                                     \
+        tmp -= len;                                                                                         \
+        src = (const uint8_t *)src + tmp;                                                                   \
+        dst = (uint8_t *)dst + tmp;                                                                         \
+    }                                                                                                       \
+})
+
+/**
+ * Macro for copying unaligned block from one location to another,
+ * 47 bytes leftover maximum,
+ * locations should not overlap.
+ * Use switch here because the aligning instruction requires immediate value for shift count.
+ * Requirements:
+ * - Store is aligned
+ * - Load offset is <offset>, which must be within [1, 15]
+ * - For <src>, make sure <offset> bit backwards & <16 - offset> bit forwards are available for loading
+ * - <dst>, <src>, <len> must be variables
+ * - __m128i <xmm0> ~ <xmm8> used in MOVEUNALIGNED_LEFT47_IMM must be pre-defined
+ */
+#define MOVEUNALIGNED_LEFT47(dst, src, len, offset)                   \
+({                                                                    \
+    switch (offset) {                                                 \
+    case 0x01: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x01); break;    \
+    case 0x02: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x02); break;    \
+    case 0x03: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x03); break;    \
+    case 0x04: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x04); break;    \
+    case 0x05: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x05); break;    \
+    case 0x06: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x06); break;    \
+    case 0x07: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x07); break;    \
+    case 0x08: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x08); break;    \
+    case 0x09: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x09); break;    \
+    case 0x0A: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x0A); break;    \
+    case 0x0B: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x0B); break;    \
+    case 0x0C: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x0C); break;    \
+    case 0x0D: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x0D); break;    \
+    case 0x0E: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x0E); break;    \
+    case 0x0F: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x0F); break;    \
+    default:;                                                         \
+    }                                                                 \
+})
+
+static inline void *
+rte_memcpy(void *dst, const void *src, size_t n)
+{
+	__m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
+	uintptr_t dstu = (uintptr_t)dst;
+	uintptr_t srcu = (uintptr_t)src;
+	void *ret = dst;
+	size_t dstofss;
+	size_t srcofs;
+
+	/**
+	 * Copy less than 16 bytes
+	 */
+	if (n < 16) {
+		if (n & 0x01) {
+			*(uint8_t *)dstu = *(const uint8_t *)srcu;
+			srcu = (uintptr_t)((const uint8_t *)srcu + 1);
+			dstu = (uintptr_t)((uint8_t *)dstu + 1);
+		}
+		if (n & 0x02) {
+			*(uint16_t *)dstu = *(const uint16_t *)srcu;
+			srcu = (uintptr_t)((const uint16_t *)srcu + 1);
+			dstu = (uintptr_t)((uint16_t *)dstu + 1);
+		}
+		if (n & 0x04) {
+			*(uint32_t *)dstu = *(const uint32_t *)srcu;
+			srcu = (uintptr_t)((const uint32_t *)srcu + 1);
+			dstu = (uintptr_t)((uint32_t *)dstu + 1);
+		}
+		if (n & 0x08) {
+			*(uint64_t *)dstu = *(const uint64_t *)srcu;
+		}
+		return ret;
+	}
+
+	/**
+	 * Fast way when copy size doesn't exceed 512 bytes
+	 */
+	if (n <= 32) {
+		rte_mov16((uint8_t *)dst, (const uint8_t *)src);
+		rte_mov16((uint8_t *)dst - 16 + n, (const uint8_t *)src - 16 + n);
+		return ret;
+	}
+	if (n <= 48) {
+		rte_mov32((uint8_t *)dst, (const uint8_t *)src);
+		rte_mov16((uint8_t *)dst - 16 + n, (const uint8_t *)src - 16 + n);
+		return ret;
+	}
+	if (n <= 64) {
+		rte_mov32((uint8_t *)dst, (const uint8_t *)src);
+		rte_mov16((uint8_t *)dst + 32, (const uint8_t *)src + 32);
+		rte_mov16((uint8_t *)dst - 16 + n, (const uint8_t *)src - 16 + n);
+		return ret;
+	}
+	if (n <= 128) {
+		goto COPY_BLOCK_128_BACK15;
+	}
+	if (n <= 512) {
+		if (n >= 256) {
+			n -= 256;
+			rte_mov128((uint8_t *)dst, (const uint8_t *)src);
+			rte_mov128((uint8_t *)dst + 128, (const uint8_t *)src + 128);
+			src = (const uint8_t *)src + 256;
+			dst = (uint8_t *)dst + 256;
+		}
+COPY_BLOCK_255_BACK15:
+		if (n >= 128) {
+			n -= 128;
+			rte_mov128((uint8_t *)dst, (const uint8_t *)src);
+			src = (const uint8_t *)src + 128;
+			dst = (uint8_t *)dst + 128;
+		}
+COPY_BLOCK_128_BACK15:
+		if (n >= 64) {
+			n -= 64;
+			rte_mov64((uint8_t *)dst, (const uint8_t *)src);
+			src = (const uint8_t *)src + 64;
+			dst = (uint8_t *)dst + 64;
+		}
+COPY_BLOCK_64_BACK15:
+		if (n >= 32) {
+			n -= 32;
+			rte_mov32((uint8_t *)dst, (const uint8_t *)src);
+			src = (const uint8_t *)src + 32;
+			dst = (uint8_t *)dst + 32;
+		}
+		if (n > 16) {
+			rte_mov16((uint8_t *)dst, (const uint8_t *)src);
+			rte_mov16((uint8_t *)dst - 16 + n, (const uint8_t *)src - 16 + n);
+			return ret;
+		}
+		if (n > 0) {
+			rte_mov16((uint8_t *)dst - 16 + n, (const uint8_t *)src - 16 + n);
+		}
+		return ret;
+	}
+
+	/**
+	 * Make store aligned when copy size exceeds 512 bytes,
+	 * and make sure the first 15 bytes are copied, because
+	 * unaligned copy functions require up to 15 bytes
+	 * backwards access.
+	 */
+	dstofss = (uintptr_t)dst & 0x0F;
+	if (dstofss > 0) {
+		dstofss = 16 - dstofss + 16;
+		n -= dstofss;
+		rte_mov32((uint8_t *)dst, (const uint8_t *)src);
+		src = (const uint8_t *)src + dstofss;
+		dst = (uint8_t *)dst + dstofss;
+	}
+	srcofs = ((uintptr_t)src & 0x0F);
+
+	/**
+	 * For aligned copy
+	 */
+	if (srcofs == 0) {
+		/**
+		 * Copy 256-byte blocks
+		 */
+		for (; n >= 256; n -= 256) {
+			rte_mov256((uint8_t *)dst, (const uint8_t *)src);
+			dst = (uint8_t *)dst + 256;
+			src = (const uint8_t *)src + 256;
+		}
+
+		/**
+		 * Copy whatever left
+		 */
+		goto COPY_BLOCK_255_BACK15;
+	}
+
+	/**
+	 * For copy with unaligned load
+	 */
+	MOVEUNALIGNED_LEFT47(dst, src, n, srcofs);
+
+	/**
+	 * Copy whatever left
+	 */
+	goto COPY_BLOCK_64_BACK15;
+}
+
+#endif /* RTE_MACHINE_CPUFLAG */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_MEMCPY_X86_64_H_ */
diff --git a/lib/librte_eal/common/include/arch/x86/rte_prefetch.h b/lib/librte_eal/common/include/arch/x86/rte_prefetch.h
new file mode 100644
index 00000000..5dac47eb
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/x86/rte_prefetch.h
@@ -0,0 +1,67 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_PREFETCH_X86_64_H_
+#define _RTE_PREFETCH_X86_64_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_prefetch.h"
+
+static inline void rte_prefetch0(const volatile void *p)
+{
+	asm volatile ("prefetcht0 %[p]" : : [p] "m" (*(const volatile char *)p));
+}
+
+static inline void rte_prefetch1(const volatile void *p)
+{
+	asm volatile ("prefetcht1 %[p]" : : [p] "m" (*(const volatile char *)p));
+}
+
+static inline void rte_prefetch2(const volatile void *p)
+{
+	asm volatile ("prefetcht2 %[p]" : : [p] "m" (*(const volatile char *)p));
+}
+
+static inline void rte_prefetch_non_temporal(const volatile void *p)
+{
+	asm volatile ("prefetchnta %[p]" : : [p] "m" (*(const volatile char *)p));
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_PREFETCH_X86_64_H_ */
diff --git a/lib/librte_eal/common/include/arch/x86/rte_rtm.h b/lib/librte_eal/common/include/arch/x86/rte_rtm.h
new file mode 100644
index 00000000..d9356419
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/x86/rte_rtm.h
@@ -0,0 +1,73 @@
+#ifndef _RTE_RTM_H_
+#define _RTE_RTM_H_ 1
+
+/*
+ * Copyright (c) 2012,2013 Intel Corporation
+ * Author: Andi Kleen
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that: (1) source code distributions
+ * retain the above copyright notice and this paragraph in its entirety, (2)
+ * distributions including binary code include the above copyright notice and
+ * this paragraph in its entirety in the documentation or other materials
+ * provided with the distribution
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+ */
+
+/* Official RTM intrinsics interface matching gcc/icc, but works
+   on older gcc compatible compilers and binutils. */
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+#define RTE_XBEGIN_STARTED		(~0u)
+#define RTE_XABORT_EXPLICIT		(1 << 0)
+#define RTE_XABORT_RETRY		(1 << 1)
+#define RTE_XABORT_CONFLICT		(1 << 2)
+#define RTE_XABORT_CAPACITY		(1 << 3)
+#define RTE_XABORT_DEBUG		(1 << 4)
+#define RTE_XABORT_NESTED		(1 << 5)
+#define RTE_XABORT_CODE(x)		(((x) >> 24) & 0xff)
+
+static __attribute__((__always_inline__)) inline
+unsigned int rte_xbegin(void)
+{
+	unsigned int ret = RTE_XBEGIN_STARTED;
+
+	asm volatile(".byte 0xc7,0xf8 ; .long 0" : "+a" (ret) :: "memory");
+	return ret;
+}
+
+static __attribute__((__always_inline__)) inline
+void rte_xend(void)
+{
+	 asm volatile(".byte 0x0f,0x01,0xd5" ::: "memory");
+}
+
+static __attribute__((__always_inline__)) inline
+void rte_xabort(const unsigned int status)
+{
+	asm volatile(".byte 0xc6,0xf8,%P0" :: "i" (status) : "memory");
+}
+
+static __attribute__((__always_inline__)) inline
+int rte_xtest(void)
+{
+	unsigned char out;
+
+	asm volatile(".byte 0x0f,0x01,0xd6 ; setnz %0" :
+		"=r" (out) :: "memory");
+	return out;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_RTM_H_ */
diff --git a/lib/librte_eal/common/include/arch/x86/rte_rwlock.h b/lib/librte_eal/common/include/arch/x86/rte_rwlock.h
new file mode 100644
index 00000000..afd1c3c2
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/x86/rte_rwlock.h
@@ -0,0 +1,82 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2015 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_RWLOCK_X86_64_H_
+#define _RTE_RWLOCK_X86_64_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_rwlock.h"
+#include "rte_spinlock.h"
+
+static inline void
+rte_rwlock_read_lock_tm(rte_rwlock_t *rwl)
+{
+	if (likely(rte_try_tm(&rwl->cnt)))
+		return;
+	rte_rwlock_read_lock(rwl);
+}
+
+static inline void
+rte_rwlock_read_unlock_tm(rte_rwlock_t *rwl)
+{
+	if (unlikely(rwl->cnt))
+		rte_rwlock_read_unlock(rwl);
+	else
+		rte_xend();
+}
+
+static inline void
+rte_rwlock_write_lock_tm(rte_rwlock_t *rwl)
+{
+	if (likely(rte_try_tm(&rwl->cnt)))
+		return;
+	rte_rwlock_write_lock(rwl);
+}
+
+static inline void
+rte_rwlock_write_unlock_tm(rte_rwlock_t *rwl)
+{
+	if (unlikely(rwl->cnt))
+		rte_rwlock_write_unlock(rwl);
+	else
+		rte_xend();
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_RWLOCK_X86_64_H_ */
diff --git a/lib/librte_eal/common/include/arch/x86/rte_spinlock.h b/lib/librte_eal/common/include/arch/x86/rte_spinlock.h
new file mode 100644
index 00000000..02f95cbb
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/x86/rte_spinlock.h
@@ -0,0 +1,201 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_SPINLOCK_X86_64_H_
+#define _RTE_SPINLOCK_X86_64_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_spinlock.h"
+#include "rte_rtm.h"
+#include "rte_cpuflags.h"
+#include "rte_branch_prediction.h"
+#include "rte_common.h"
+
+#define RTE_RTM_MAX_RETRIES (10)
+#define RTE_XABORT_LOCK_BUSY (0xff)
+
+#ifndef RTE_FORCE_INTRINSICS
+static inline void
+rte_spinlock_lock(rte_spinlock_t *sl)
+{
+	int lock_val = 1;
+	asm volatile (
+			"1:\n"
+			"xchg %[locked], %[lv]\n"
+			"test %[lv], %[lv]\n"
+			"jz 3f\n"
+			"2:\n"
+			"pause\n"
+			"cmpl $0, %[locked]\n"
+			"jnz 2b\n"
+			"jmp 1b\n"
+			"3:\n"
+			: [locked] "=m" (sl->locked), [lv] "=q" (lock_val)
+			: "[lv]" (lock_val)
+			: "memory");
+}
+
+static inline void
+rte_spinlock_unlock (rte_spinlock_t *sl)
+{
+	int unlock_val = 0;
+	asm volatile (
+			"xchg %[locked], %[ulv]\n"
+			: [locked] "=m" (sl->locked), [ulv] "=q" (unlock_val)
+			: "[ulv]" (unlock_val)
+			: "memory");
+}
+
+static inline int
+rte_spinlock_trylock (rte_spinlock_t *sl)
+{
+	int lockval = 1;
+
+	asm volatile (
+			"xchg %[locked], %[lockval]"
+			: [locked] "=m" (sl->locked), [lockval] "=q" (lockval)
+			: "[lockval]" (lockval)
+			: "memory");
+
+	return lockval == 0;
+}
+#endif
+
+static uint8_t rtm_supported; /* cache the flag to avoid the overhead
+				 of the rte_cpu_get_flag_enabled function */
+
+static inline void __attribute__((constructor))
+rte_rtm_init(void)
+{
+	rtm_supported = rte_cpu_get_flag_enabled(RTE_CPUFLAG_RTM);
+}
+
+static inline int rte_tm_supported(void)
+{
+	return rtm_supported;
+}
+
+static inline int
+rte_try_tm(volatile int *lock)
+{
+	if (!rtm_supported)
+		return 0;
+
+	int retries = RTE_RTM_MAX_RETRIES;
+
+	while (likely(retries--)) {
+
+		unsigned int status = rte_xbegin();
+
+		if (likely(RTE_XBEGIN_STARTED == status)) {
+			if (unlikely(*lock))
+				rte_xabort(RTE_XABORT_LOCK_BUSY);
+			else
+				return 1;
+		}
+		while (*lock)
+			rte_pause();
+
+		if ((status & RTE_XABORT_EXPLICIT) &&
+			(RTE_XABORT_CODE(status) == RTE_XABORT_LOCK_BUSY))
+			continue;
+
+		if ((status & RTE_XABORT_RETRY) == 0) /* do not retry */
+			break;
+	}
+	return 0;
+}
+
+static inline void
+rte_spinlock_lock_tm(rte_spinlock_t *sl)
+{
+	if (likely(rte_try_tm(&sl->locked)))
+		return;
+
+	rte_spinlock_lock(sl); /* fall-back */
+}
+
+static inline int
+rte_spinlock_trylock_tm(rte_spinlock_t *sl)
+{
+	if (likely(rte_try_tm(&sl->locked)))
+		return 1;
+
+	return rte_spinlock_trylock(sl);
+}
+
+static inline void
+rte_spinlock_unlock_tm(rte_spinlock_t *sl)
+{
+	if (unlikely(sl->locked))
+		rte_spinlock_unlock(sl);
+	else
+		rte_xend();
+}
+
+static inline void
+rte_spinlock_recursive_lock_tm(rte_spinlock_recursive_t *slr)
+{
+	if (likely(rte_try_tm(&slr->sl.locked)))
+		return;
+
+	rte_spinlock_recursive_lock(slr); /* fall-back */
+}
+
+static inline void
+rte_spinlock_recursive_unlock_tm(rte_spinlock_recursive_t *slr)
+{
+	if (unlikely(slr->sl.locked))
+		rte_spinlock_recursive_unlock(slr);
+	else
+		rte_xend();
+}
+
+static inline int
+rte_spinlock_recursive_trylock_tm(rte_spinlock_recursive_t *slr)
+{
+	if (likely(rte_try_tm(&slr->sl.locked)))
+		return 1;
+
+	return rte_spinlock_recursive_trylock(slr);
+}
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_SPINLOCK_X86_64_H_ */
diff --git a/lib/librte_eal/common/include/arch/x86/rte_vect.h b/lib/librte_eal/common/include/arch/x86/rte_vect.h
new file mode 100644
index 00000000..b698797c
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/x86/rte_vect.h
@@ -0,0 +1,132 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_VECT_H_
+#define _RTE_VECT_H_
+
+/**
+ * @file
+ *
+ * RTE SSE/AVX related header.
+ */
+
+#if (defined(__ICC) || (__GNUC__ == 4 &&  __GNUC_MINOR__ < 4))
+
+#ifdef __SSE__
+#include <xmmintrin.h>
+#endif
+
+#ifdef __SSE2__
+#include <emmintrin.h>
+#endif
+
+#ifdef __SSE3__
+#include <tmmintrin.h>
+#endif
+
+#if defined(__SSE4_2__) || defined(__SSE4_1__)
+#include <smmintrin.h>
+#endif
+
+#if defined(__AVX__)
+#include <immintrin.h>
+#endif
+
+#else
+
+#include <x86intrin.h>
+
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef __m128i xmm_t;
+
+#define	XMM_SIZE	(sizeof(xmm_t))
+#define	XMM_MASK	(XMM_SIZE - 1)
+
+typedef union rte_xmm {
+	xmm_t    x;
+	uint8_t  u8[XMM_SIZE / sizeof(uint8_t)];
+	uint16_t u16[XMM_SIZE / sizeof(uint16_t)];
+	uint32_t u32[XMM_SIZE / sizeof(uint32_t)];
+	uint64_t u64[XMM_SIZE / sizeof(uint64_t)];
+	double   pd[XMM_SIZE / sizeof(double)];
+} rte_xmm_t;
+
+#ifdef __AVX__
+
+typedef __m256i ymm_t;
+
+#define	YMM_SIZE	(sizeof(ymm_t))
+#define	YMM_MASK	(YMM_SIZE - 1)
+
+typedef union rte_ymm {
+	ymm_t    y;
+	xmm_t    x[YMM_SIZE / sizeof(xmm_t)];
+	uint8_t  u8[YMM_SIZE / sizeof(uint8_t)];
+	uint16_t u16[YMM_SIZE / sizeof(uint16_t)];
+	uint32_t u32[YMM_SIZE / sizeof(uint32_t)];
+	uint64_t u64[YMM_SIZE / sizeof(uint64_t)];
+	double   pd[YMM_SIZE / sizeof(double)];
+} rte_ymm_t;
+
+#endif /* __AVX__ */
+
+#ifdef RTE_ARCH_I686
+#define _mm_cvtsi128_si64(a) ({ \
+	rte_xmm_t m;            \
+	m.x = (a);              \
+	(m.u64[0]);             \
+})
+#endif
+
+/*
+ * Prior to version 12.1 icc doesn't support _mm_set_epi64x.
+ */
+#if (defined(__ICC) && __ICC < 1210)
+#define _mm_set_epi64x(a, b)  ({ \
+	rte_xmm_t m;             \
+	m.u64[0] = b;            \
+	m.u64[1] = a;            \
+	(m.x);                   \
+})
+#endif /* (defined(__ICC) && __ICC < 1210) */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_VECT_H_ */
diff --git a/lib/librte_eal/common/include/generic/rte_atomic.h b/lib/librte_eal/common/include/generic/rte_atomic.h
new file mode 100644
index 00000000..bfb4fe44
--- /dev/null
+++ b/lib/librte_eal/common/include/generic/rte_atomic.h
@@ -0,0 +1,945 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_ATOMIC_H_
+#define _RTE_ATOMIC_H_
+
+/**
+ * @file
+ * Atomic Operations
+ *
+ * This file defines a generic API for atomic operations.
+ */
+
+#include <stdint.h>
+
+#ifdef __DOXYGEN__
+
+/**
+ * General memory barrier.
+ *
+ * Guarantees that the LOAD and STORE operations generated before the
+ * barrier occur before the LOAD and STORE operations generated after.
+ * This function is architecture dependent.
+ */
+static inline void rte_mb(void);
+
+/**
+ * Write memory barrier.
+ *
+ * Guarantees that the STORE operations generated before the barrier
+ * occur before the STORE operations generated after.
+ * This function is architecture dependent.
+ */
+static inline void rte_wmb(void);
+
+/**
+ * Read memory barrier.
+ *
+ * Guarantees that the LOAD operations generated before the barrier
+ * occur before the LOAD operations generated after.
+ * This function is architecture dependent.
+ */
+static inline void rte_rmb(void);
+
+/**
+ * General memory barrier between lcores
+ *
+ * Guarantees that the LOAD and STORE operations that precede the
+ * rte_smp_mb() call are globally visible across the lcores
+ * before the the LOAD and STORE operations that follows it.
+ */
+static inline void rte_smp_mb(void);
+
+/**
+ * Write memory barrier between lcores
+ *
+ * Guarantees that the STORE operations that precede the
+ * rte_smp_wmb() call are globally visible across the lcores
+ * before the the STORE operations that follows it.
+ */
+static inline void rte_smp_wmb(void);
+
+/**
+ * Read memory barrier between lcores
+ *
+ * Guarantees that the LOAD operations that precede the
+ * rte_smp_rmb() call are globally visible across the lcores
+ * before the the LOAD operations that follows it.
+ */
+static inline void rte_smp_rmb(void);
+
+#endif /* __DOXYGEN__ */
+
+/**
+ * Compiler barrier.
+ *
+ * Guarantees that operation reordering does not occur at compile time
+ * for operations directly before and after the barrier.
+ */
+#define	rte_compiler_barrier() do {		\
+	asm volatile ("" : : : "memory");	\
+} while(0)
+
+/*------------------------- 16 bit atomic operations -------------------------*/
+
+/**
+ * Atomic compare and set.
+ *
+ * (atomic) equivalent to:
+ *   if (*dst == exp)
+ *     *dst = src (all 16-bit words)
+ *
+ * @param dst
+ *   The destination location into which the value will be written.
+ * @param exp
+ *   The expected value.
+ * @param src
+ *   The new value.
+ * @return
+ *   Non-zero on success; 0 on failure.
+ */
+static inline int
+rte_atomic16_cmpset(volatile uint16_t *dst, uint16_t exp, uint16_t src);
+
+#ifdef RTE_FORCE_INTRINSICS
+static inline int
+rte_atomic16_cmpset(volatile uint16_t *dst, uint16_t exp, uint16_t src)
+{
+	return __sync_bool_compare_and_swap(dst, exp, src);
+}
+#endif
+
+/**
+ * The atomic counter structure.
+ */
+typedef struct {
+	volatile int16_t cnt; /**< An internal counter value. */
+} rte_atomic16_t;
+
+/**
+ * Static initializer for an atomic counter.
+ */
+#define RTE_ATOMIC16_INIT(val) { (val) }
+
+/**
+ * Initialize an atomic counter.
+ *
+ * @param v
+ *   A pointer to the atomic counter.
+ */
+static inline void
+rte_atomic16_init(rte_atomic16_t *v)
+{
+	v->cnt = 0;
+}
+
+/**
+ * Atomically read a 16-bit value from a counter.
+ *
+ * @param v
+ *   A pointer to the atomic counter.
+ * @return
+ *   The value of the counter.
+ */
+static inline int16_t
+rte_atomic16_read(const rte_atomic16_t *v)
+{
+	return v->cnt;
+}
+
+/**
+ * Atomically set a counter to a 16-bit value.
+ *
+ * @param v
+ *   A pointer to the atomic counter.
+ * @param new_value
+ *   The new value for the counter.
+ */
+static inline void
+rte_atomic16_set(rte_atomic16_t *v, int16_t new_value)
+{
+	v->cnt = new_value;
+}
+
+/**
+ * Atomically add a 16-bit value to an atomic counter.
+ *
+ * @param v
+ *   A pointer to the atomic counter.
+ * @param inc
+ *   The value to be added to the counter.
+ */
+static inline void
+rte_atomic16_add(rte_atomic16_t *v, int16_t inc)
+{
+	__sync_fetch_and_add(&v->cnt, inc);
+}
+
+/**
+ * Atomically subtract a 16-bit value from an atomic counter.
+ *
+ * @param v
+ *   A pointer to the atomic counter.
+ * @param dec
+ *   The value to be subtracted from the counter.
+ */
+static inline void
+rte_atomic16_sub(rte_atomic16_t *v, int16_t dec)
+{
+	__sync_fetch_and_sub(&v->cnt, dec);
+}
+
+/**
+ * Atomically increment a counter by one.
+ *
+ * @param v
+ *   A pointer to the atomic counter.
+ */
+static inline void
+rte_atomic16_inc(rte_atomic16_t *v);
+
+#ifdef RTE_FORCE_INTRINSICS
+static inline void
+rte_atomic16_inc(rte_atomic16_t *v)
+{
+	rte_atomic16_add(v, 1);
+}
+#endif
+
+/**
+ * Atomically decrement a counter by one.
+ *
+ * @param v
+ *   A pointer to the atomic counter.
+ */
+static inline void
+rte_atomic16_dec(rte_atomic16_t *v);
+
+#ifdef RTE_FORCE_INTRINSICS
+static inline void
+rte_atomic16_dec(rte_atomic16_t *v)
+{
+	rte_atomic16_sub(v, 1);
+}
+#endif
+
+/**
+ * Atomically add a 16-bit value to a counter and return the result.
+ *
+ * Atomically adds the 16-bits value (inc) to the atomic counter (v) and
+ * returns the value of v after addition.
+ *
+ * @param v
+ *   A pointer to the atomic counter.
+ * @param inc
+ *   The value to be added to the counter.
+ * @return
+ *   The value of v after the addition.
+ */
+static inline int16_t
+rte_atomic16_add_return(rte_atomic16_t *v, int16_t inc)
+{
+	return __sync_add_and_fetch(&v->cnt, inc);
+}
+
+/**
+ * Atomically subtract a 16-bit value from a counter and return
+ * the result.
+ *
+ * Atomically subtracts the 16-bit value (inc) from the atomic counter
+ * (v) and returns the value of v after the subtraction.
+ *
+ * @param v
+ *   A pointer to the atomic counter.
+ * @param dec
+ *   The value to be subtracted from the counter.
+ * @return
+ *   The value of v after the subtraction.
+ */
+static inline int16_t
+rte_atomic16_sub_return(rte_atomic16_t *v, int16_t dec)
+{
+	return __sync_sub_and_fetch(&v->cnt, dec);
+}
+
+/**
+ * Atomically increment a 16-bit counter by one and test.
+ *
+ * Atomically increments the atomic counter (v) by one and returns true if
+ * the result is 0, or false in all other cases.
+ *
+ * @param v
+ *   A pointer to the atomic counter.
+ * @return
+ *   True if the result after the increment operation is 0; false otherwise.
+ */
+static inline int rte_atomic16_inc_and_test(rte_atomic16_t *v);
+
+#ifdef RTE_FORCE_INTRINSICS
+static inline int rte_atomic16_inc_and_test(rte_atomic16_t *v)
+{
+	return __sync_add_and_fetch(&v->cnt, 1) == 0;
+}
+#endif
+
+/**
+ * Atomically decrement a 16-bit counter by one and test.
+ *
+ * Atomically decrements the atomic counter (v) by one and returns true if
+ * the result is 0, or false in all other cases.
+ *
+ * @param v
+ *   A pointer to the atomic counter.
+ * @return
+ *   True if the result after the decrement operation is 0; false otherwise.
+ */
+static inline int rte_atomic16_dec_and_test(rte_atomic16_t *v);
+
+#ifdef RTE_FORCE_INTRINSICS
+static inline int rte_atomic16_dec_and_test(rte_atomic16_t *v)
+{
+	return __sync_sub_and_fetch(&v->cnt, 1) == 0;
+}
+#endif
+
+/**
+ * Atomically test and set a 16-bit atomic counter.
+ *
+ * If the counter value is already set, return 0 (failed). Otherwise, set
+ * the counter value to 1 and return 1 (success).
+ *
+ * @param v
+ *   A pointer to the atomic counter.
+ * @return
+ *   0 if failed; else 1, success.
+ */
+static inline int rte_atomic16_test_and_set(rte_atomic16_t *v);
+
+#ifdef RTE_FORCE_INTRINSICS
+static inline int rte_atomic16_test_and_set(rte_atomic16_t *v)
+{
+	return rte_atomic16_cmpset((volatile uint16_t *)&v->cnt, 0, 1);
+}
+#endif
+
+/**
+ * Atomically set a 16-bit counter to 0.
+ *
+ * @param v
+ *   A pointer to the atomic counter.
+ */
+static inline void rte_atomic16_clear(rte_atomic16_t *v)
+{
+	v->cnt = 0;
+}
+
+/*------------------------- 32 bit atomic operations -------------------------*/
+
+/**
+ * Atomic compare and set.
+ *
+ * (atomic) equivalent to:
+ *   if (*dst == exp)
+ *     *dst = src (all 32-bit words)
+ *
+ * @param dst
+ *   The destination location into which the value will be written.
+ * @param exp
+ *   The expected value.
+ * @param src
+ *   The new value.
+ * @return
+ *   Non-zero on success; 0 on failure.
+ */
+static inline int
+rte_atomic32_cmpset(volatile uint32_t *dst, uint32_t exp, uint32_t src);
+
+#ifdef RTE_FORCE_INTRINSICS
+static inline int
+rte_atomic32_cmpset(volatile uint32_t *dst, uint32_t exp, uint32_t src)
+{
+	return __sync_bool_compare_and_swap(dst, exp, src);
+}
+#endif
+
+/**
+ * The atomic counter structure.
+ */
+typedef struct {
+	volatile int32_t cnt; /**< An internal counter value. */
+} rte_atomic32_t;
+
+/**
+ * Static initializer for an atomic counter.
+ */
+#define RTE_ATOMIC32_INIT(val) { (val) }
+
+/**
+ * Initialize an atomic counter.
+ *
+ * @param v
+ *   A pointer to the atomic counter.
+ */
+static inline void
+rte_atomic32_init(rte_atomic32_t *v)
+{
+	v->cnt = 0;
+}
+
+/**
+ * Atomically read a 32-bit value from a counter.
+ *
+ * @param v
+ *   A pointer to the atomic counter.
+ * @return
+ *   The value of the counter.
+ */
+static inline int32_t
+rte_atomic32_read(const rte_atomic32_t *v)
+{
+	return v->cnt;
+}
+
+/**
+ * Atomically set a counter to a 32-bit value.
+ *
+ * @param v
+ *   A pointer to the atomic counter.
+ * @param new_value
+ *   The new value for the counter.
+ */
+static inline void
+rte_atomic32_set(rte_atomic32_t *v, int32_t new_value)
+{
+	v->cnt = new_value;
+}
+
+/**
+ * Atomically add a 32-bit value to an atomic counter.
+ *
+ * @param v
+ *   A pointer to the atomic counter.
+ * @param inc
+ *   The value to be added to the counter.
+ */
+static inline void
+rte_atomic32_add(rte_atomic32_t *v, int32_t inc)
+{
+	__sync_fetch_and_add(&v->cnt, inc);
+}
+
+/**
+ * Atomically subtract a 32-bit value from an atomic counter.
+ *
+ * @param v
+ *   A pointer to the atomic counter.
+ * @param dec
+ *   The value to be subtracted from the counter.
+ */
+static inline void
+rte_atomic32_sub(rte_atomic32_t *v, int32_t dec)
+{
+	__sync_fetch_and_sub(&v->cnt, dec);
+}
+
+/**
+ * Atomically increment a counter by one.
+ *
+ * @param v
+ *   A pointer to the atomic counter.
+ */
+static inline void
+rte_atomic32_inc(rte_atomic32_t *v);
+
+#ifdef RTE_FORCE_INTRINSICS
+static inline void
+rte_atomic32_inc(rte_atomic32_t *v)
+{
+	rte_atomic32_add(v, 1);
+}
+#endif
+
+/**
+ * Atomically decrement a counter by one.
+ *
+ * @param v
+ *   A pointer to the atomic counter.
+ */
+static inline void
+rte_atomic32_dec(rte_atomic32_t *v);
+
+#ifdef RTE_FORCE_INTRINSICS
+static inline void
+rte_atomic32_dec(rte_atomic32_t *v)
+{
+	rte_atomic32_sub(v,1);
+}
+#endif
+
+/**
+ * Atomically add a 32-bit value to a counter and return the result.
+ *
+ * Atomically adds the 32-bits value (inc) to the atomic counter (v) and
+ * returns the value of v after addition.
+ *
+ * @param v
+ *   A pointer to the atomic counter.
+ * @param inc
+ *   The value to be added to the counter.
+ * @return
+ *   The value of v after the addition.
+ */
+static inline int32_t
+rte_atomic32_add_return(rte_atomic32_t *v, int32_t inc)
+{
+	return __sync_add_and_fetch(&v->cnt, inc);
+}
+
+/**
+ * Atomically subtract a 32-bit value from a counter and return
+ * the result.
+ *
+ * Atomically subtracts the 32-bit value (inc) from the atomic counter
+ * (v) and returns the value of v after the subtraction.
+ *
+ * @param v
+ *   A pointer to the atomic counter.
+ * @param dec
+ *   The value to be subtracted from the counter.
+ * @return
+ *   The value of v after the subtraction.
+ */
+static inline int32_t
+rte_atomic32_sub_return(rte_atomic32_t *v, int32_t dec)
+{
+	return __sync_sub_and_fetch(&v->cnt, dec);
+}
+
+/**
+ * Atomically increment a 32-bit counter by one and test.
+ *
+ * Atomically increments the atomic counter (v) by one and returns true if
+ * the result is 0, or false in all other cases.
+ *
+ * @param v
+ *   A pointer to the atomic counter.
+ * @return
+ *   True if the result after the increment operation is 0; false otherwise.
+ */
+static inline int rte_atomic32_inc_and_test(rte_atomic32_t *v);
+
+#ifdef RTE_FORCE_INTRINSICS
+static inline int rte_atomic32_inc_and_test(rte_atomic32_t *v)
+{
+	return __sync_add_and_fetch(&v->cnt, 1) == 0;
+}
+#endif
+
+/**
+ * Atomically decrement a 32-bit counter by one and test.
+ *
+ * Atomically decrements the atomic counter (v) by one and returns true if
+ * the result is 0, or false in all other cases.
+ *
+ * @param v
+ *   A pointer to the atomic counter.
+ * @return
+ *   True if the result after the decrement operation is 0; false otherwise.
+ */
+static inline int rte_atomic32_dec_and_test(rte_atomic32_t *v);
+
+#ifdef RTE_FORCE_INTRINSICS
+static inline int rte_atomic32_dec_and_test(rte_atomic32_t *v)
+{
+	return __sync_sub_and_fetch(&v->cnt, 1) == 0;
+}
+#endif
+
+/**
+ * Atomically test and set a 32-bit atomic counter.
+ *
+ * If the counter value is already set, return 0 (failed). Otherwise, set
+ * the counter value to 1 and return 1 (success).
+ *
+ * @param v
+ *   A pointer to the atomic counter.
+ * @return
+ *   0 if failed; else 1, success.
+ */
+static inline int rte_atomic32_test_and_set(rte_atomic32_t *v);
+
+#ifdef RTE_FORCE_INTRINSICS
+static inline int rte_atomic32_test_and_set(rte_atomic32_t *v)
+{
+	return rte_atomic32_cmpset((volatile uint32_t *)&v->cnt, 0, 1);
+}
+#endif
+
+/**
+ * Atomically set a 32-bit counter to 0.
+ *
+ * @param v
+ *   A pointer to the atomic counter.
+ */
+static inline void rte_atomic32_clear(rte_atomic32_t *v)
+{
+	v->cnt = 0;
+}
+
+/*------------------------- 64 bit atomic operations -------------------------*/
+
+/**
+ * An atomic compare and set function used by the mutex functions.
+ * (atomic) equivalent to:
+ *   if (*dst == exp)
+ *     *dst = src (all 64-bit words)
+ *
+ * @param dst
+ *   The destination into which the value will be written.
+ * @param exp
+ *   The expected value.
+ * @param src
+ *   The new value.
+ * @return
+ *   Non-zero on success; 0 on failure.
+ */
+static inline int
+rte_atomic64_cmpset(volatile uint64_t *dst, uint64_t exp, uint64_t src);
+
+#ifdef RTE_FORCE_INTRINSICS
+static inline int
+rte_atomic64_cmpset(volatile uint64_t *dst, uint64_t exp, uint64_t src)
+{
+	return __sync_bool_compare_and_swap(dst, exp, src);
+}
+#endif
+
+/**
+ * The atomic counter structure.
+ */
+typedef struct {
+	volatile int64_t cnt;  /**< Internal counter value. */
+} rte_atomic64_t;
+
+/**
+ * Static initializer for an atomic counter.
+ */
+#define RTE_ATOMIC64_INIT(val) { (val) }
+
+/**
+ * Initialize the atomic counter.
+ *
+ * @param v
+ *   A pointer to the atomic counter.
+ */
+static inline void
+rte_atomic64_init(rte_atomic64_t *v);
+
+#ifdef RTE_FORCE_INTRINSICS
+static inline void
+rte_atomic64_init(rte_atomic64_t *v)
+{
+#ifdef __LP64__
+	v->cnt = 0;
+#else
+	int success = 0;
+	uint64_t tmp;
+
+	while (success == 0) {
+		tmp = v->cnt;
+		success = rte_atomic64_cmpset((volatile uint64_t *)&v->cnt,
+		                              tmp, 0);
+	}
+#endif
+}
+#endif
+
+/**
+ * Atomically read a 64-bit counter.
+ *
+ * @param v
+ *   A pointer to the atomic counter.
+ * @return
+ *   The value of the counter.
+ */
+static inline int64_t
+rte_atomic64_read(rte_atomic64_t *v);
+
+#ifdef RTE_FORCE_INTRINSICS
+static inline int64_t
+rte_atomic64_read(rte_atomic64_t *v)
+{
+#ifdef __LP64__
+	return v->cnt;
+#else
+	int success = 0;
+	uint64_t tmp;
+
+	while (success == 0) {
+		tmp = v->cnt;
+		/* replace the value by itself */
+		success = rte_atomic64_cmpset((volatile uint64_t *)&v->cnt,
+		                              tmp, tmp);
+	}
+	return tmp;
+#endif
+}
+#endif
+
+/**
+ * Atomically set a 64-bit counter.
+ *
+ * @param v
+ *   A pointer to the atomic counter.
+ * @param new_value
+ *   The new value of the counter.
+ */
+static inline void
+rte_atomic64_set(rte_atomic64_t *v, int64_t new_value);
+
+#ifdef RTE_FORCE_INTRINSICS
+static inline void
+rte_atomic64_set(rte_atomic64_t *v, int64_t new_value)
+{
+#ifdef __LP64__
+	v->cnt = new_value;
+#else
+	int success = 0;
+	uint64_t tmp;
+
+	while (success == 0) {
+		tmp = v->cnt;
+		success = rte_atomic64_cmpset((volatile uint64_t *)&v->cnt,
+		                              tmp, new_value);
+	}
+#endif
+}
+#endif
+
+/**
+ * Atomically add a 64-bit value to a counter.
+ *
+ * @param v
+ *   A pointer to the atomic counter.
+ * @param inc
+ *   The value to be added to the counter.
+ */
+static inline void
+rte_atomic64_add(rte_atomic64_t *v, int64_t inc);
+
+#ifdef RTE_FORCE_INTRINSICS
+static inline void
+rte_atomic64_add(rte_atomic64_t *v, int64_t inc)
+{
+	__sync_fetch_and_add(&v->cnt, inc);
+}
+#endif
+
+/**
+ * Atomically subtract a 64-bit value from a counter.
+ *
+ * @param v
+ *   A pointer to the atomic counter.
+ * @param dec
+ *   The value to be subtracted from the counter.
+ */
+static inline void
+rte_atomic64_sub(rte_atomic64_t *v, int64_t dec);
+
+#ifdef RTE_FORCE_INTRINSICS
+static inline void
+rte_atomic64_sub(rte_atomic64_t *v, int64_t dec)
+{
+	__sync_fetch_and_sub(&v->cnt, dec);
+}
+#endif
+
+/**
+ * Atomically increment a 64-bit counter by one and test.
+ *
+ * @param v
+ *   A pointer to the atomic counter.
+ */
+static inline void
+rte_atomic64_inc(rte_atomic64_t *v);
+
+#ifdef RTE_FORCE_INTRINSICS
+static inline void
+rte_atomic64_inc(rte_atomic64_t *v)
+{
+	rte_atomic64_add(v, 1);
+}
+#endif
+
+/**
+ * Atomically decrement a 64-bit counter by one and test.
+ *
+ * @param v
+ *   A pointer to the atomic counter.
+ */
+static inline void
+rte_atomic64_dec(rte_atomic64_t *v);
+
+#ifdef RTE_FORCE_INTRINSICS
+static inline void
+rte_atomic64_dec(rte_atomic64_t *v)
+{
+	rte_atomic64_sub(v, 1);
+}
+#endif
+
+/**
+ * Add a 64-bit value to an atomic counter and return the result.
+ *
+ * Atomically adds the 64-bit value (inc) to the atomic counter (v) and
+ * returns the value of v after the addition.
+ *
+ * @param v
+ *   A pointer to the atomic counter.
+ * @param inc
+ *   The value to be added to the counter.
+ * @return
+ *   The value of v after the addition.
+ */
+static inline int64_t
+rte_atomic64_add_return(rte_atomic64_t *v, int64_t inc);
+
+#ifdef RTE_FORCE_INTRINSICS
+static inline int64_t
+rte_atomic64_add_return(rte_atomic64_t *v, int64_t inc)
+{
+	return __sync_add_and_fetch(&v->cnt, inc);
+}
+#endif
+
+/**
+ * Subtract a 64-bit value from an atomic counter and return the result.
+ *
+ * Atomically subtracts the 64-bit value (dec) from the atomic counter (v)
+ * and returns the value of v after the subtraction.
+ *
+ * @param v
+ *   A pointer to the atomic counter.
+ * @param dec
+ *   The value to be subtracted from the counter.
+ * @return
+ *   The value of v after the subtraction.
+ */
+static inline int64_t
+rte_atomic64_sub_return(rte_atomic64_t *v, int64_t dec);
+
+#ifdef RTE_FORCE_INTRINSICS
+static inline int64_t
+rte_atomic64_sub_return(rte_atomic64_t *v, int64_t dec)
+{
+	return __sync_sub_and_fetch(&v->cnt, dec);
+}
+#endif
+
+/**
+ * Atomically increment a 64-bit counter by one and test.
+ *
+ * Atomically increments the atomic counter (v) by one and returns
+ * true if the result is 0, or false in all other cases.
+ *
+ * @param v
+ *   A pointer to the atomic counter.
+ * @return
+ *   True if the result after the addition is 0; false otherwise.
+ */
+static inline int rte_atomic64_inc_and_test(rte_atomic64_t *v);
+
+#ifdef RTE_FORCE_INTRINSICS
+static inline int rte_atomic64_inc_and_test(rte_atomic64_t *v)
+{
+	return rte_atomic64_add_return(v, 1) == 0;
+}
+#endif
+
+/**
+ * Atomically decrement a 64-bit counter by one and test.
+ *
+ * Atomically decrements the atomic counter (v) by one and returns true if
+ * the result is 0, or false in all other cases.
+ *
+ * @param v
+ *   A pointer to the atomic counter.
+ * @return
+ *   True if the result after subtraction is 0; false otherwise.
+ */
+static inline int rte_atomic64_dec_and_test(rte_atomic64_t *v);
+
+#ifdef RTE_FORCE_INTRINSICS
+static inline int rte_atomic64_dec_and_test(rte_atomic64_t *v)
+{
+	return rte_atomic64_sub_return(v, 1) == 0;
+}
+#endif
+
+/**
+ * Atomically test and set a 64-bit atomic counter.
+ *
+ * If the counter value is already set, return 0 (failed). Otherwise, set
+ * the counter value to 1 and return 1 (success).
+ *
+ * @param v
+ *   A pointer to the atomic counter.
+ * @return
+ *   0 if failed; else 1, success.
+ */
+static inline int rte_atomic64_test_and_set(rte_atomic64_t *v);
+
+#ifdef RTE_FORCE_INTRINSICS
+static inline int rte_atomic64_test_and_set(rte_atomic64_t *v)
+{
+	return rte_atomic64_cmpset((volatile uint64_t *)&v->cnt, 0, 1);
+}
+#endif
+
+/**
+ * Atomically set a 64-bit counter to 0.
+ *
+ * @param v
+ *   A pointer to the atomic counter.
+ */
+static inline void rte_atomic64_clear(rte_atomic64_t *v);
+
+#ifdef RTE_FORCE_INTRINSICS
+static inline void rte_atomic64_clear(rte_atomic64_t *v)
+{
+	rte_atomic64_set(v, 0);
+}
+#endif
+
+#endif /* _RTE_ATOMIC_H_ */
diff --git a/lib/librte_eal/common/include/generic/rte_byteorder.h b/lib/librte_eal/common/include/generic/rte_byteorder.h
new file mode 100644
index 00000000..c46fdcf2
--- /dev/null
+++ b/lib/librte_eal/common/include/generic/rte_byteorder.h
@@ -0,0 +1,217 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_BYTEORDER_H_
+#define _RTE_BYTEORDER_H_
+
+/**
+ * @file
+ *
+ * Byte Swap Operations
+ *
+ * This file defines a generic API for byte swap operations. Part of
+ * the implementation is architecture-specific.
+ */
+
+#include <stdint.h>
+#ifdef RTE_EXEC_ENV_BSDAPP
+#include <sys/endian.h>
+#else
+#include <endian.h>
+#endif
+
+/*
+ * Compile-time endianness detection
+ */
+#define RTE_BIG_ENDIAN    1
+#define RTE_LITTLE_ENDIAN 2
+#if defined __BYTE_ORDER__
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+#define RTE_BYTE_ORDER RTE_BIG_ENDIAN
+#elif __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#define RTE_BYTE_ORDER RTE_LITTLE_ENDIAN
+#endif /* __BYTE_ORDER__ */
+#elif defined __BYTE_ORDER
+#if __BYTE_ORDER == __BIG_ENDIAN
+#define RTE_BYTE_ORDER RTE_BIG_ENDIAN
+#elif __BYTE_ORDER == __LITTLE_ENDIAN
+#define RTE_BYTE_ORDER RTE_LITTLE_ENDIAN
+#endif /* __BYTE_ORDER */
+#elif defined __BIG_ENDIAN__
+#define RTE_BYTE_ORDER RTE_BIG_ENDIAN
+#elif defined __LITTLE_ENDIAN__
+#define RTE_BYTE_ORDER RTE_LITTLE_ENDIAN
+#endif
+
+/*
+ * An internal function to swap bytes in a 16-bit value.
+ *
+ * It is used by rte_bswap16() when the value is constant. Do not use
+ * this function directly; rte_bswap16() is preferred.
+ */
+static inline uint16_t
+rte_constant_bswap16(uint16_t x)
+{
+	return (uint16_t)(((x & 0x00ffU) << 8) |
+		((x & 0xff00U) >> 8));
+}
+
+/*
+ * An internal function to swap bytes in a 32-bit value.
+ *
+ * It is used by rte_bswap32() when the value is constant. Do not use
+ * this function directly; rte_bswap32() is preferred.
+ */
+static inline uint32_t
+rte_constant_bswap32(uint32_t x)
+{
+	return  ((x & 0x000000ffUL) << 24) |
+		((x & 0x0000ff00UL) << 8) |
+		((x & 0x00ff0000UL) >> 8) |
+		((x & 0xff000000UL) >> 24);
+}
+
+/*
+ * An internal function to swap bytes of a 64-bit value.
+ *
+ * It is used by rte_bswap64() when the value is constant. Do not use
+ * this function directly; rte_bswap64() is preferred.
+ */
+static inline uint64_t
+rte_constant_bswap64(uint64_t x)
+{
+	return  ((x & 0x00000000000000ffULL) << 56) |
+		((x & 0x000000000000ff00ULL) << 40) |
+		((x & 0x0000000000ff0000ULL) << 24) |
+		((x & 0x00000000ff000000ULL) <<  8) |
+		((x & 0x000000ff00000000ULL) >>  8) |
+		((x & 0x0000ff0000000000ULL) >> 24) |
+		((x & 0x00ff000000000000ULL) >> 40) |
+		((x & 0xff00000000000000ULL) >> 56);
+}
+
+
+#ifdef __DOXYGEN__
+
+/**
+ * Swap bytes in a 16-bit value.
+ */
+static uint16_t rte_bswap16(uint16_t _x);
+
+/**
+ * Swap bytes in a 32-bit value.
+ */
+static uint32_t rte_bswap32(uint32_t x);
+
+/**
+ * Swap bytes in a 64-bit value.
+ */
+static uint64_t rte_bswap64(uint64_t x);
+
+/**
+ * Convert a 16-bit value from CPU order to little endian.
+ */
+static uint16_t rte_cpu_to_le_16(uint16_t x);
+
+/**
+ * Convert a 32-bit value from CPU order to little endian.
+ */
+static uint32_t rte_cpu_to_le_32(uint32_t x);
+
+/**
+ * Convert a 64-bit value from CPU order to little endian.
+ */
+static uint64_t rte_cpu_to_le_64(uint64_t x);
+
+
+/**
+ * Convert a 16-bit value from CPU order to big endian.
+ */
+static uint16_t rte_cpu_to_be_16(uint16_t x);
+
+/**
+ * Convert a 32-bit value from CPU order to big endian.
+ */
+static uint32_t rte_cpu_to_be_32(uint32_t x);
+
+/**
+ * Convert a 64-bit value from CPU order to big endian.
+ */
+static uint64_t rte_cpu_to_be_64(uint64_t x);
+
+
+/**
+ * Convert a 16-bit value from little endian to CPU order.
+ */
+static uint16_t rte_le_to_cpu_16(uint16_t x);
+
+/**
+ * Convert a 32-bit value from little endian to CPU order.
+ */
+static uint32_t rte_le_to_cpu_32(uint32_t x);
+
+/**
+ * Convert a 64-bit value from little endian to CPU order.
+ */
+static uint64_t rte_le_to_cpu_64(uint64_t x);
+
+
+/**
+ * Convert a 16-bit value from big endian to CPU order.
+ */
+static uint16_t rte_be_to_cpu_16(uint16_t x);
+
+/**
+ * Convert a 32-bit value from big endian to CPU order.
+ */
+static uint32_t rte_be_to_cpu_32(uint32_t x);
+
+/**
+ * Convert a 64-bit value from big endian to CPU order.
+ */
+static uint64_t rte_be_to_cpu_64(uint64_t x);
+
+#endif /* __DOXYGEN__ */
+
+#ifdef RTE_FORCE_INTRINSICS
+#if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)
+#define rte_bswap16(x) __builtin_bswap16(x)
+#endif
+
+#define rte_bswap32(x) __builtin_bswap32(x)
+
+#define rte_bswap64(x) __builtin_bswap64(x)
+
+#endif
+
+#endif /* _RTE_BYTEORDER_H_ */
diff --git a/lib/librte_eal/common/include/generic/rte_cpuflags.h b/lib/librte_eal/common/include/generic/rte_cpuflags.h
new file mode 100644
index 00000000..c1da357c
--- /dev/null
+++ b/lib/librte_eal/common/include/generic/rte_cpuflags.h
@@ -0,0 +1,82 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_CPUFLAGS_H_
+#define _RTE_CPUFLAGS_H_
+
+/**
+ * @file
+ * Architecture specific API to determine available CPU features at runtime.
+ */
+
+#include <errno.h>
+
+/**
+ * Enumeration of all CPU features supported
+ */
+enum rte_cpu_flag_t;
+
+/**
+ * Get name of CPU flag
+ *
+ * @param feature
+ *     CPU flag ID
+ * @return
+ *     flag name
+ *     NULL if flag ID is invalid
+ */
+const char *
+rte_cpu_get_flag_name(enum rte_cpu_flag_t feature);
+
+/**
+ * Function for checking a CPU flag availability
+ *
+ * @param feature
+ *     CPU flag to query CPU for
+ * @return
+ *     1 if flag is available
+ *     0 if flag is not available
+ *     -ENOENT if flag is invalid
+ */
+int
+rte_cpu_get_flag_enabled(enum rte_cpu_flag_t feature);
+
+/**
+ * This function checks that the currently used CPU supports the CPU features
+ * that were specified at compile time. It is called automatically within the
+ * EAL, so does not need to be used by applications.
+ */
+void
+rte_cpu_check_supported(void);
+
+#endif /* _RTE_CPUFLAGS_H_ */
diff --git a/lib/librte_eal/common/include/generic/rte_cycles.h b/lib/librte_eal/common/include/generic/rte_cycles.h
new file mode 100644
index 00000000..8cc21f20
--- /dev/null
+++ b/lib/librte_eal/common/include/generic/rte_cycles.h
@@ -0,0 +1,205 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+/*   BSD LICENSE
+ *
+ *   Copyright(c) 2013 6WIND.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of 6WIND S.A. nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_CYCLES_H_
+#define _RTE_CYCLES_H_
+
+/**
+ * @file
+ *
+ * Simple Time Reference Functions (Cycles and HPET).
+ */
+
+#include <stdint.h>
+#include <rte_debug.h>
+#include <rte_atomic.h>
+
+#define MS_PER_S 1000
+#define US_PER_S 1000000
+#define NS_PER_S 1000000000
+
+enum timer_source {
+	EAL_TIMER_TSC = 0,
+	EAL_TIMER_HPET
+};
+extern enum timer_source eal_timer_source;
+
+/**
+ * Get the measured frequency of the RDTSC counter
+ *
+ * @return
+ *   The TSC frequency for this lcore
+ */
+uint64_t
+rte_get_tsc_hz(void);
+
+/**
+ * Return the number of TSC cycles since boot
+ *
+  * @return
+ *   the number of cycles
+ */
+static inline uint64_t
+rte_get_tsc_cycles(void);
+
+#ifdef RTE_LIBEAL_USE_HPET
+/**
+ * Return the number of HPET cycles since boot
+ *
+ * This counter is global for all execution units. The number of
+ * cycles in one second can be retrieved using rte_get_hpet_hz().
+ *
+ * @return
+ *   the number of cycles
+ */
+uint64_t
+rte_get_hpet_cycles(void);
+
+/**
+ * Get the number of HPET cycles in one second.
+ *
+ * @return
+ *   The number of cycles in one second.
+ */
+uint64_t
+rte_get_hpet_hz(void);
+
+/**
+ * Initialise the HPET for use. This must be called before the rte_get_hpet_hz
+ * and rte_get_hpet_cycles APIs are called. If this function does not succeed,
+ * then the HPET functions are unavailable and should not be called.
+ *
+ * @param make_default
+ *	If set, the hpet timer becomes the default timer whose values are
+ *	returned by the rte_get_timer_hz/cycles API calls
+ *
+ * @return
+ *	0 on success,
+ *	-1 on error, and the make_default parameter is ignored.
+ */
+int rte_eal_hpet_init(int make_default);
+
+#endif
+
+/**
+ * Get the number of cycles since boot from the default timer.
+ *
+ * @return
+ *   The number of cycles
+ */
+static inline uint64_t
+rte_get_timer_cycles(void)
+{
+	switch(eal_timer_source) {
+	case EAL_TIMER_TSC:
+		return rte_get_tsc_cycles();
+	case EAL_TIMER_HPET:
+#ifdef RTE_LIBEAL_USE_HPET
+		return rte_get_hpet_cycles();
+#endif
+	default: rte_panic("Invalid timer source specified\n");
+	}
+}
+
+/**
+ * Get the number of cycles in one second for the default timer.
+ *
+ * @return
+ *   The number of cycles in one second.
+ */
+static inline uint64_t
+rte_get_timer_hz(void)
+{
+	switch(eal_timer_source) {
+	case EAL_TIMER_TSC:
+		return rte_get_tsc_hz();
+	case EAL_TIMER_HPET:
+#ifdef RTE_LIBEAL_USE_HPET
+		return rte_get_hpet_hz();
+#endif
+	default: rte_panic("Invalid timer source specified\n");
+	}
+}
+
+/**
+ * Wait at least us microseconds.
+ *
+ * @param us
+ *   The number of microseconds to wait.
+ */
+void
+rte_delay_us(unsigned us);
+
+/**
+ * Wait at least ms milliseconds.
+ *
+ * @param ms
+ *   The number of milliseconds to wait.
+ */
+static inline void
+rte_delay_ms(unsigned ms)
+{
+	rte_delay_us(ms * 1000);
+}
+
+#endif /* _RTE_CYCLES_H_ */
diff --git a/lib/librte_eal/common/include/generic/rte_memcpy.h b/lib/librte_eal/common/include/generic/rte_memcpy.h
new file mode 100644
index 00000000..03e84773
--- /dev/null
+++ b/lib/librte_eal/common/include/generic/rte_memcpy.h
@@ -0,0 +1,144 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_MEMCPY_H_
+#define _RTE_MEMCPY_H_
+
+/**
+ * @file
+ *
+ * Functions for vectorised implementation of memcpy().
+ */
+
+/**
+ * Copy 16 bytes from one location to another using optimised
+ * instructions. The locations should not overlap.
+ *
+ * @param dst
+ *   Pointer to the destination of the data.
+ * @param src
+ *   Pointer to the source data.
+ */
+static inline void
+rte_mov16(uint8_t *dst, const uint8_t *src);
+
+/**
+ * Copy 32 bytes from one location to another using optimised
+ * instructions. The locations should not overlap.
+ *
+ * @param dst
+ *   Pointer to the destination of the data.
+ * @param src
+ *   Pointer to the source data.
+ */
+static inline void
+rte_mov32(uint8_t *dst, const uint8_t *src);
+
+/**
+ * Copy 48 bytes from one location to another using optimised
+ * instructions. The locations should not overlap.
+ *
+ * @param dst
+ *   Pointer to the destination of the data.
+ * @param src
+ *   Pointer to the source data.
+ */
+static inline void
+rte_mov48(uint8_t *dst, const uint8_t *src);
+
+/**
+ * Copy 64 bytes from one location to another using optimised
+ * instructions. The locations should not overlap.
+ *
+ * @param dst
+ *   Pointer to the destination of the data.
+ * @param src
+ *   Pointer to the source data.
+ */
+static inline void
+rte_mov64(uint8_t *dst, const uint8_t *src);
+
+/**
+ * Copy 128 bytes from one location to another using optimised
+ * instructions. The locations should not overlap.
+ *
+ * @param dst
+ *   Pointer to the destination of the data.
+ * @param src
+ *   Pointer to the source data.
+ */
+static inline void
+rte_mov128(uint8_t *dst, const uint8_t *src);
+
+/**
+ * Copy 256 bytes from one location to another using optimised
+ * instructions. The locations should not overlap.
+ *
+ * @param dst
+ *   Pointer to the destination of the data.
+ * @param src
+ *   Pointer to the source data.
+ */
+static inline void
+rte_mov256(uint8_t *dst, const uint8_t *src);
+
+#ifdef __DOXYGEN__
+
+/**
+ * Copy bytes from one location to another. The locations must not overlap.
+ *
+ * @note This is implemented as a macro, so it's address should not be taken
+ * and care is needed as parameter expressions may be evaluated multiple times.
+ *
+ * @param dst
+ *   Pointer to the destination of the data.
+ * @param src
+ *   Pointer to the source data.
+ * @param n
+ *   Number of bytes to copy.
+ * @return
+ *   Pointer to the destination data.
+ */
+static void *
+rte_memcpy(void *dst, const void *src, size_t n);
+
+#endif /* __DOXYGEN__ */
+
+/*
+ * memcpy() function used by rte_memcpy macro
+ */
+static inline void *
+rte_memcpy_func(void *dst, const void *src, size_t n) __attribute__((always_inline));
+
+
+#endif /* _RTE_MEMCPY_H_ */
diff --git a/lib/librte_eal/common/include/generic/rte_prefetch.h b/lib/librte_eal/common/include/generic/rte_prefetch.h
new file mode 100644
index 00000000..07e409ec
--- /dev/null
+++ b/lib/librte_eal/common/include/generic/rte_prefetch.h
@@ -0,0 +1,83 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_PREFETCH_H_
+#define _RTE_PREFETCH_H_
+
+/**
+ * @file
+ *
+ * Prefetch operations.
+ *
+ * This file defines an API for prefetch macros / inline-functions,
+ * which are architecture-dependent. Prefetching occurs when a
+ * processor requests an instruction or data from memory to cache
+ * before it is actually needed, potentially speeding up the execution of the
+ * program.
+ */
+
+/**
+ * Prefetch a cache line into all cache levels.
+ * @param p
+ *   Address to prefetch
+ */
+static inline void rte_prefetch0(const volatile void *p);
+
+/**
+ * Prefetch a cache line into all cache levels except the 0th cache level.
+ * @param p
+ *   Address to prefetch
+ */
+static inline void rte_prefetch1(const volatile void *p);
+
+/**
+ * Prefetch a cache line into all cache levels except the 0th and 1th cache
+ * levels.
+ * @param p
+ *   Address to prefetch
+ */
+static inline void rte_prefetch2(const volatile void *p);
+
+/**
+ * Prefetch a cache line into all cache levels (non-temporal/transient version)
+ *
+ * The non-temporal prefetch is intended as a prefetch hint that processor will
+ * use the prefetched data only once or short period, unlike the
+ * rte_prefetch0() function which imply that prefetched data to use repeatedly.
+ *
+ * @param p
+ *   Address to prefetch
+ */
+static inline void rte_prefetch_non_temporal(const volatile void *p);
+
+#endif /* _RTE_PREFETCH_H_ */
diff --git a/lib/librte_eal/common/include/generic/rte_rwlock.h b/lib/librte_eal/common/include/generic/rte_rwlock.h
new file mode 100644
index 00000000..7a0fdc55
--- /dev/null
+++ b/lib/librte_eal/common/include/generic/rte_rwlock.h
@@ -0,0 +1,208 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_RWLOCK_H_
+#define _RTE_RWLOCK_H_
+
+/**
+ * @file
+ *
+ * RTE Read-Write Locks
+ *
+ * This file defines an API for read-write locks. The lock is used to
+ * protect data that allows multiple readers in parallel, but only
+ * one writer. All readers are blocked until the writer is finished
+ * writing.
+ *
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <rte_common.h>
+#include <rte_atomic.h>
+
+/**
+ * The rte_rwlock_t type.
+ *
+ * cnt is -1 when write lock is held, and > 0 when read locks are held.
+ */
+typedef struct {
+	volatile int32_t cnt; /**< -1 when W lock held, > 0 when R locks held. */
+} rte_rwlock_t;
+
+/**
+ * A static rwlock initializer.
+ */
+#define RTE_RWLOCK_INITIALIZER { 0 }
+
+/**
+ * Initialize the rwlock to an unlocked state.
+ *
+ * @param rwl
+ *   A pointer to the rwlock structure.
+ */
+static inline void
+rte_rwlock_init(rte_rwlock_t *rwl)
+{
+	rwl->cnt = 0;
+}
+
+/**
+ * Take a read lock. Loop until the lock is held.
+ *
+ * @param rwl
+ *   A pointer to a rwlock structure.
+ */
+static inline void
+rte_rwlock_read_lock(rte_rwlock_t *rwl)
+{
+	int32_t x;
+	int success = 0;
+
+	while (success == 0) {
+		x = rwl->cnt;
+		/* write lock is held */
+		if (x < 0) {
+			rte_pause();
+			continue;
+		}
+		success = rte_atomic32_cmpset((volatile uint32_t *)&rwl->cnt,
+					      x, x + 1);
+	}
+}
+
+/**
+ * Release a read lock.
+ *
+ * @param rwl
+ *   A pointer to the rwlock structure.
+ */
+static inline void
+rte_rwlock_read_unlock(rte_rwlock_t *rwl)
+{
+	rte_atomic32_dec((rte_atomic32_t *)(intptr_t)&rwl->cnt);
+}
+
+/**
+ * Take a write lock. Loop until the lock is held.
+ *
+ * @param rwl
+ *   A pointer to a rwlock structure.
+ */
+static inline void
+rte_rwlock_write_lock(rte_rwlock_t *rwl)
+{
+	int32_t x;
+	int success = 0;
+
+	while (success == 0) {
+		x = rwl->cnt;
+		/* a lock is held */
+		if (x != 0) {
+			rte_pause();
+			continue;
+		}
+		success = rte_atomic32_cmpset((volatile uint32_t *)&rwl->cnt,
+					      0, -1);
+	}
+}
+
+/**
+ * Release a write lock.
+ *
+ * @param rwl
+ *   A pointer to a rwlock structure.
+ */
+static inline void
+rte_rwlock_write_unlock(rte_rwlock_t *rwl)
+{
+	rte_atomic32_inc((rte_atomic32_t *)(intptr_t)&rwl->cnt);
+}
+
+/**
+ * Try to execute critical section in a hardware memory transaction, if it
+ * fails or not available take a read lock
+ *
+ * NOTE: An attempt to perform a HW I/O operation inside a hardware memory
+ * transaction always aborts the transaction since the CPU is not able to
+ * roll-back should the transaction fail. Therefore, hardware transactional
+ * locks are not advised to be used around rte_eth_rx_burst() and
+ * rte_eth_tx_burst() calls.
+ *
+ * @param rwl
+ *   A pointer to a rwlock structure.
+ */
+static inline void
+rte_rwlock_read_lock_tm(rte_rwlock_t *rwl);
+
+/**
+ * Commit hardware memory transaction or release the read lock if the lock is used as a fall-back
+ *
+ * @param rwl
+ *   A pointer to the rwlock structure.
+ */
+static inline void
+rte_rwlock_read_unlock_tm(rte_rwlock_t *rwl);
+
+/**
+ * Try to execute critical section in a hardware memory transaction, if it
+ * fails or not available take a write lock
+ *
+ * NOTE: An attempt to perform a HW I/O operation inside a hardware memory
+ * transaction always aborts the transaction since the CPU is not able to
+ * roll-back should the transaction fail. Therefore, hardware transactional
+ * locks are not advised to be used around rte_eth_rx_burst() and
+ * rte_eth_tx_burst() calls.
+ *
+ * @param rwl
+ *   A pointer to a rwlock structure.
+ */
+static inline void
+rte_rwlock_write_lock_tm(rte_rwlock_t *rwl);
+
+/**
+ * Commit hardware memory transaction or release the write lock if the lock is used as a fall-back
+ *
+ * @param rwl
+ *   A pointer to a rwlock structure.
+ */
+static inline void
+rte_rwlock_write_unlock_tm(rte_rwlock_t *rwl);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_RWLOCK_H_ */
diff --git a/lib/librte_eal/common/include/generic/rte_spinlock.h b/lib/librte_eal/common/include/generic/rte_spinlock.h
new file mode 100644
index 00000000..e51fc56b
--- /dev/null
+++ b/lib/librte_eal/common/include/generic/rte_spinlock.h
@@ -0,0 +1,325 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_SPINLOCK_H_
+#define _RTE_SPINLOCK_H_
+
+/**
+ * @file
+ *
+ * RTE Spinlocks
+ *
+ * This file defines an API for read-write locks, which are implemented
+ * in an architecture-specific way. This kind of lock simply waits in
+ * a loop repeatedly checking until the lock becomes available.
+ *
+ * All locks must be initialised before use, and only initialised once.
+ *
+ */
+
+#include <rte_lcore.h>
+#ifdef RTE_FORCE_INTRINSICS
+#include <rte_common.h>
+#endif
+
+/**
+ * The rte_spinlock_t type.
+ */
+typedef struct {
+	volatile int locked; /**< lock status 0 = unlocked, 1 = locked */
+} rte_spinlock_t;
+
+/**
+ * A static spinlock initializer.
+ */
+#define RTE_SPINLOCK_INITIALIZER { 0 }
+
+/**
+ * Initialize the spinlock to an unlocked state.
+ *
+ * @param sl
+ *   A pointer to the spinlock.
+ */
+static inline void
+rte_spinlock_init(rte_spinlock_t *sl)
+{
+	sl->locked = 0;
+}
+
+/**
+ * Take the spinlock.
+ *
+ * @param sl
+ *   A pointer to the spinlock.
+ */
+static inline void
+rte_spinlock_lock(rte_spinlock_t *sl);
+
+#ifdef RTE_FORCE_INTRINSICS
+static inline void
+rte_spinlock_lock(rte_spinlock_t *sl)
+{
+	while (__sync_lock_test_and_set(&sl->locked, 1))
+		while(sl->locked)
+			rte_pause();
+}
+#endif
+
+/**
+ * Release the spinlock.
+ *
+ * @param sl
+ *   A pointer to the spinlock.
+ */
+static inline void
+rte_spinlock_unlock (rte_spinlock_t *sl);
+
+#ifdef RTE_FORCE_INTRINSICS
+static inline void
+rte_spinlock_unlock (rte_spinlock_t *sl)
+{
+	__sync_lock_release(&sl->locked);
+}
+#endif
+
+/**
+ * Try to take the lock.
+ *
+ * @param sl
+ *   A pointer to the spinlock.
+ * @return
+ *   1 if the lock is successfully taken; 0 otherwise.
+ */
+static inline int
+rte_spinlock_trylock (rte_spinlock_t *sl);
+
+#ifdef RTE_FORCE_INTRINSICS
+static inline int
+rte_spinlock_trylock (rte_spinlock_t *sl)
+{
+	return __sync_lock_test_and_set(&sl->locked,1) == 0;
+}
+#endif
+
+/**
+ * Test if the lock is taken.
+ *
+ * @param sl
+ *   A pointer to the spinlock.
+ * @return
+ *   1 if the lock is currently taken; 0 otherwise.
+ */
+static inline int rte_spinlock_is_locked (rte_spinlock_t *sl)
+{
+	return sl->locked;
+}
+
+/**
+ * Test if hardware transactional memory (lock elision) is supported
+ *
+ * @return
+ *   1 if the hardware transactional memory is supported; 0 otherwise.
+ */
+static inline int rte_tm_supported(void);
+
+/**
+ * Try to execute critical section in a hardware memory transaction,
+ * if it fails or not available take the spinlock.
+ *
+ * NOTE: An attempt to perform a HW I/O operation inside a hardware memory
+ * transaction always aborts the transaction since the CPU is not able to
+ * roll-back should the transaction fail. Therefore, hardware transactional
+ * locks are not advised to be used around rte_eth_rx_burst() and
+ * rte_eth_tx_burst() calls.
+ *
+ * @param sl
+ *   A pointer to the spinlock.
+ */
+static inline void
+rte_spinlock_lock_tm(rte_spinlock_t *sl);
+
+/**
+ * Commit hardware memory transaction or release the spinlock if
+ * the spinlock is used as a fall-back
+ *
+ * @param sl
+ *   A pointer to the spinlock.
+ */
+static inline void
+rte_spinlock_unlock_tm(rte_spinlock_t *sl);
+
+/**
+ * Try to execute critical section in a hardware memory transaction,
+ * if it fails or not available try to take the lock.
+ *
+ * NOTE: An attempt to perform a HW I/O operation inside a hardware memory
+ * transaction always aborts the transaction since the CPU is not able to
+ * roll-back should the transaction fail. Therefore, hardware transactional
+ * locks are not advised to be used around rte_eth_rx_burst() and
+ * rte_eth_tx_burst() calls.
+ *
+ * @param sl
+ *   A pointer to the spinlock.
+ * @return
+ *   1 if the hardware memory transaction is successfully started
+ *   or lock is successfully taken; 0 otherwise.
+ */
+static inline int
+rte_spinlock_trylock_tm(rte_spinlock_t *sl);
+
+/**
+ * The rte_spinlock_recursive_t type.
+ */
+typedef struct {
+	rte_spinlock_t sl; /**< the actual spinlock */
+	volatile int user; /**< core id using lock, -1 for unused */
+	volatile int count; /**< count of time this lock has been called */
+} rte_spinlock_recursive_t;
+
+/**
+ * A static recursive spinlock initializer.
+ */
+#define RTE_SPINLOCK_RECURSIVE_INITIALIZER {RTE_SPINLOCK_INITIALIZER, -1, 0}
+
+/**
+ * Initialize the recursive spinlock to an unlocked state.
+ *
+ * @param slr
+ *   A pointer to the recursive spinlock.
+ */
+static inline void rte_spinlock_recursive_init(rte_spinlock_recursive_t *slr)
+{
+	rte_spinlock_init(&slr->sl);
+	slr->user = -1;
+	slr->count = 0;
+}
+
+/**
+ * Take the recursive spinlock.
+ *
+ * @param slr
+ *   A pointer to the recursive spinlock.
+ */
+static inline void rte_spinlock_recursive_lock(rte_spinlock_recursive_t *slr)
+{
+	int id = rte_gettid();
+
+	if (slr->user != id) {
+		rte_spinlock_lock(&slr->sl);
+		slr->user = id;
+	}
+	slr->count++;
+}
+/**
+ * Release the recursive spinlock.
+ *
+ * @param slr
+ *   A pointer to the recursive spinlock.
+ */
+static inline void rte_spinlock_recursive_unlock(rte_spinlock_recursive_t *slr)
+{
+	if (--(slr->count) == 0) {
+		slr->user = -1;
+		rte_spinlock_unlock(&slr->sl);
+	}
+
+}
+
+/**
+ * Try to take the recursive lock.
+ *
+ * @param slr
+ *   A pointer to the recursive spinlock.
+ * @return
+ *   1 if the lock is successfully taken; 0 otherwise.
+ */
+static inline int rte_spinlock_recursive_trylock(rte_spinlock_recursive_t *slr)
+{
+	int id = rte_gettid();
+
+	if (slr->user != id) {
+		if (rte_spinlock_trylock(&slr->sl) == 0)
+			return 0;
+		slr->user = id;
+	}
+	slr->count++;
+	return 1;
+}
+
+
+/**
+ * Try to execute critical section in a hardware memory transaction,
+ * if it fails or not available take the recursive spinlocks
+ *
+ * NOTE: An attempt to perform a HW I/O operation inside a hardware memory
+ * transaction always aborts the transaction since the CPU is not able to
+ * roll-back should the transaction fail. Therefore, hardware transactional
+ * locks are not advised to be used around rte_eth_rx_burst() and
+ * rte_eth_tx_burst() calls.
+ *
+ * @param slr
+ *   A pointer to the recursive spinlock.
+ */
+static inline void rte_spinlock_recursive_lock_tm(
+	rte_spinlock_recursive_t *slr);
+
+/**
+ * Commit hardware memory transaction or release the recursive spinlock
+ * if the recursive spinlock is used as a fall-back
+ *
+ * @param slr
+ *   A pointer to the recursive spinlock.
+ */
+static inline void rte_spinlock_recursive_unlock_tm(
+	rte_spinlock_recursive_t *slr);
+
+/**
+ * Try to execute critical section in a hardware memory transaction,
+ * if it fails or not available try to take the recursive lock
+ *
+ * NOTE: An attempt to perform a HW I/O operation inside a hardware memory
+ * transaction always aborts the transaction since the CPU is not able to
+ * roll-back should the transaction fail. Therefore, hardware transactional
+ * locks are not advised to be used around rte_eth_rx_burst() and
+ * rte_eth_tx_burst() calls.
+ *
+ * @param slr
+ *   A pointer to the recursive spinlock.
+ * @return
+ *   1 if the hardware memory transaction is successfully started
+ *   or lock is successfully taken; 0 otherwise.
+ */
+static inline int rte_spinlock_recursive_trylock_tm(
+	rte_spinlock_recursive_t *slr);
+
+#endif /* _RTE_SPINLOCK_H_ */
diff --git a/lib/librte_eal/common/include/rte_alarm.h b/lib/librte_eal/common/include/rte_alarm.h
new file mode 100644
index 00000000..4012cd67
--- /dev/null
+++ b/lib/librte_eal/common/include/rte_alarm.h
@@ -0,0 +1,106 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_ALARM_H_
+#define _RTE_ALARM_H_
+
+/**
+ * @file
+ *
+ * Alarm functions
+ *
+ * Simple alarm-clock functionality supplied by eal.
+ * Does not require hpet support.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+
+/**
+ * Signature of callback back function called when an alarm goes off.
+ */
+typedef void (*rte_eal_alarm_callback)(void *arg);
+
+/**
+ * Function to set a callback to be triggered when us microseconds
+ * have expired. Accuracy of timing to the microsecond is not guaranteed. The
+ * alarm function will not be called *before* the requested time, but may
+ * be called a short period of time afterwards.
+ * The alarm handler will be called only once. There is no need to call
+ * "rte_eal_alarm_cancel" from within the callback function.
+ *
+ * @param us
+ *   The time in microseconds before the callback is called
+ * @param cb
+ *   The function to be called when the alarm expires
+ * @param cb_arg
+ *   Pointer parameter to be passed to the callback function
+ *
+ * @return
+ *   On success, zero.
+ *   On failure, a negative error number
+ */
+int rte_eal_alarm_set(uint64_t us, rte_eal_alarm_callback cb, void *cb_arg);
+
+/**
+ * Function to cancel an alarm callback which has been registered before. If
+ * used outside alarm callback it wait for all callbacks to finish execution.
+ *
+ * @param cb_fn
+ *  alarm callback
+ * @param cb_arg
+ *  Pointer parameter to be passed to the callback function. To remove all
+ *  copies of a given callback function, irrespective of parameter, (void *)-1
+ *  can be used here.
+ *
+ * @return
+ *    - value greater than 0 and rte_errno not changed - returned value is
+ *      the number of canceled alarm callback functions
+ *    - value greater or equal 0 and rte_errno set to EINPROGRESS, at least one
+ *      alarm could not be canceled because cancellation was requested from alarm
+ *      callback context. Returned value is the number of succesfuly canceled
+ *      alarm callbacks
+ *    -  0 and rte_errno set to ENOENT - no alarm found
+ *    - -1 and rte_errno set to EINVAL - invalid parameter (NULL callback)
+ */
+int rte_eal_alarm_cancel(rte_eal_alarm_callback cb_fn, void *cb_arg);
+
+#ifdef __cplusplus
+}
+#endif
+
+
+#endif /* _RTE_ALARM_H_ */
diff --git a/lib/librte_eal/common/include/rte_branch_prediction.h b/lib/librte_eal/common/include/rte_branch_prediction.h
new file mode 100644
index 00000000..a6a56d17
--- /dev/null
+++ b/lib/librte_eal/common/include/rte_branch_prediction.h
@@ -0,0 +1,70 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * @file
+ * Branch Prediction Helpers in RTE
+ */
+
+#ifndef _RTE_BRANCH_PREDICTION_H_
+#define _RTE_BRANCH_PREDICTION_H_
+
+/**
+ * Check if a branch is likely to be taken.
+ *
+ * This compiler builtin allows the developer to indicate if a branch is
+ * likely to be taken. Example:
+ *
+ *   if (likely(x > 1))
+ *      do_stuff();
+ *
+ */
+#ifndef likely
+#define likely(x)  __builtin_expect((x),1)
+#endif /* likely */
+
+/**
+ * Check if a branch is unlikely to be taken.
+ *
+ * This compiler builtin allows the developer to indicate if a branch is
+ * unlikely to be taken. Example:
+ *
+ *   if (unlikely(x < 1))
+ *      do_stuff();
+ *
+ */
+#ifndef unlikely
+#define unlikely(x)  __builtin_expect((x),0)
+#endif /* unlikely */
+
+#endif /* _RTE_BRANCH_PREDICTION_H_ */
diff --git a/lib/librte_eal/common/include/rte_common.h b/lib/librte_eal/common/include/rte_common.h
new file mode 100644
index 00000000..332f2a43
--- /dev/null
+++ b/lib/librte_eal/common/include/rte_common.h
@@ -0,0 +1,401 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_COMMON_H_
+#define _RTE_COMMON_H_
+
+/**
+ * @file
+ *
+ * Generic, commonly-used macro and inline function definitions
+ * for DPDK.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <ctype.h>
+#include <errno.h>
+#include <limits.h>
+
+#ifndef typeof
+#define typeof __typeof__
+#endif
+
+#ifndef asm
+#define asm __asm__
+#endif
+
+#ifdef RTE_ARCH_STRICT_ALIGN
+typedef uint64_t unaligned_uint64_t __attribute__ ((aligned(1)));
+typedef uint32_t unaligned_uint32_t __attribute__ ((aligned(1)));
+typedef uint16_t unaligned_uint16_t __attribute__ ((aligned(1)));
+#else
+typedef uint64_t unaligned_uint64_t;
+typedef uint32_t unaligned_uint32_t;
+typedef uint16_t unaligned_uint16_t;
+#endif
+
+/**
+ * Force alignment
+ */
+#define __rte_aligned(a) __attribute__((__aligned__(a)))
+
+/**
+ * Force a structure to be packed
+ */
+#define __rte_packed __attribute__((__packed__))
+
+/******* Macro to mark functions and fields scheduled for removal *****/
+#define __rte_deprecated	__attribute__((__deprecated__))
+
+/*********** Macros to eliminate unused variable warnings ********/
+
+/**
+ * short definition to mark a function parameter unused
+ */
+#define __rte_unused __attribute__((__unused__))
+
+/**
+ * definition to mark a variable or function parameter as used so
+ * as to avoid a compiler warning
+ */
+#define RTE_SET_USED(x) (void)(x)
+
+/*********** Macros for pointer arithmetic ********/
+
+/**
+ * add a byte-value offset from a pointer
+ */
+#define RTE_PTR_ADD(ptr, x) ((void*)((uintptr_t)(ptr) + (x)))
+
+/**
+ * subtract a byte-value offset from a pointer
+ */
+#define RTE_PTR_SUB(ptr, x) ((void*)((uintptr_t)ptr - (x)))
+
+/**
+ * get the difference between two pointer values, i.e. how far apart
+ * in bytes are the locations they point two. It is assumed that
+ * ptr1 is greater than ptr2.
+ */
+#define RTE_PTR_DIFF(ptr1, ptr2) ((uintptr_t)(ptr1) - (uintptr_t)(ptr2))
+
+/*********** Macros/static functions for doing alignment ********/
+
+
+/**
+ * Macro to align a pointer to a given power-of-two. The resultant
+ * pointer will be a pointer of the same type as the first parameter, and
+ * point to an address no higher than the first parameter. Second parameter
+ * must be a power-of-two value.
+ */
+#define RTE_PTR_ALIGN_FLOOR(ptr, align) \
+	((typeof(ptr))RTE_ALIGN_FLOOR((uintptr_t)ptr, align))
+
+/**
+ * Macro to align a value to a given power-of-two. The resultant value
+ * will be of the same type as the first parameter, and will be no
+ * bigger than the first parameter. Second parameter must be a
+ * power-of-two value.
+ */
+#define RTE_ALIGN_FLOOR(val, align) \
+	(typeof(val))((val) & (~((typeof(val))((align) - 1))))
+
+/**
+ * Macro to align a pointer to a given power-of-two. The resultant
+ * pointer will be a pointer of the same type as the first parameter, and
+ * point to an address no lower than the first parameter. Second parameter
+ * must be a power-of-two value.
+ */
+#define RTE_PTR_ALIGN_CEIL(ptr, align) \
+	RTE_PTR_ALIGN_FLOOR((typeof(ptr))RTE_PTR_ADD(ptr, (align) - 1), align)
+
+/**
+ * Macro to align a value to a given power-of-two. The resultant value
+ * will be of the same type as the first parameter, and will be no lower
+ * than the first parameter. Second parameter must be a power-of-two
+ * value.
+ */
+#define RTE_ALIGN_CEIL(val, align) \
+	RTE_ALIGN_FLOOR(((val) + ((typeof(val)) (align) - 1)), align)
+
+/**
+ * Macro to align a pointer to a given power-of-two. The resultant
+ * pointer will be a pointer of the same type as the first parameter, and
+ * point to an address no lower than the first parameter. Second parameter
+ * must be a power-of-two value.
+ * This function is the same as RTE_PTR_ALIGN_CEIL
+ */
+#define RTE_PTR_ALIGN(ptr, align) RTE_PTR_ALIGN_CEIL(ptr, align)
+
+/**
+ * Macro to align a value to a given power-of-two. The resultant
+ * value will be of the same type as the first parameter, and
+ * will be no lower than the first parameter. Second parameter
+ * must be a power-of-two value.
+ * This function is the same as RTE_ALIGN_CEIL
+ */
+#define RTE_ALIGN(val, align) RTE_ALIGN_CEIL(val, align)
+
+/**
+ * Checks if a pointer is aligned to a given power-of-two value
+ *
+ * @param ptr
+ *   The pointer whose alignment is to be checked
+ * @param align
+ *   The power-of-two value to which the ptr should be aligned
+ *
+ * @return
+ *   True(1) where the pointer is correctly aligned, false(0) otherwise
+ */
+static inline int
+rte_is_aligned(void *ptr, unsigned align)
+{
+	return RTE_PTR_ALIGN(ptr, align) == ptr;
+}
+
+/*********** Macros for compile type checks ********/
+
+/**
+ * Triggers an error at compilation time if the condition is true.
+ */
+#ifndef __OPTIMIZE__
+#define RTE_BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)]))
+#else
+extern int RTE_BUILD_BUG_ON_detected_error;
+#define RTE_BUILD_BUG_ON(condition) do {             \
+	((void)sizeof(char[1 - 2*!!(condition)]));   \
+	if (condition)                               \
+		RTE_BUILD_BUG_ON_detected_error = 1; \
+} while(0)
+#endif
+
+/*********** Macros to work with powers of 2 ********/
+
+/**
+ * Returns true if n is a power of 2
+ * @param n
+ *     Number to check
+ * @return 1 if true, 0 otherwise
+ */
+static inline int
+rte_is_power_of_2(uint32_t n)
+{
+	return n && !(n & (n - 1));
+}
+
+/**
+ * Aligns input parameter to the next power of 2
+ *
+ * @param x
+ *   The integer value to algin
+ *
+ * @return
+ *   Input parameter aligned to the next power of 2
+ */
+static inline uint32_t
+rte_align32pow2(uint32_t x)
+{
+	x--;
+	x |= x >> 1;
+	x |= x >> 2;
+	x |= x >> 4;
+	x |= x >> 8;
+	x |= x >> 16;
+
+	return x + 1;
+}
+
+/**
+ * Aligns 64b input parameter to the next power of 2
+ *
+ * @param v
+ *   The 64b value to align
+ *
+ * @return
+ *   Input parameter aligned to the next power of 2
+ */
+static inline uint64_t
+rte_align64pow2(uint64_t v)
+{
+	v--;
+	v |= v >> 1;
+	v |= v >> 2;
+	v |= v >> 4;
+	v |= v >> 8;
+	v |= v >> 16;
+	v |= v >> 32;
+
+	return v + 1;
+}
+
+/*********** Macros for calculating min and max **********/
+
+/**
+ * Macro to return the minimum of two numbers
+ */
+#define RTE_MIN(a, b) ({ \
+		typeof (a) _a = (a); \
+		typeof (b) _b = (b); \
+		_a < _b ? _a : _b; \
+	})
+
+/**
+ * Macro to return the maximum of two numbers
+ */
+#define RTE_MAX(a, b) ({ \
+		typeof (a) _a = (a); \
+		typeof (b) _b = (b); \
+		_a > _b ? _a : _b; \
+	})
+
+/*********** Other general functions / macros ********/
+
+#ifdef __SSE2__
+#include <emmintrin.h>
+/**
+ * PAUSE instruction for tight loops (avoid busy waiting)
+ */
+static inline void
+rte_pause (void)
+{
+	_mm_pause();
+}
+#else
+static inline void
+rte_pause(void) {}
+#endif
+
+/**
+ * Searches the input parameter for the least significant set bit
+ * (starting from zero).
+ * If a least significant 1 bit is found, its bit index is returned.
+ * If the content of the input parameter is zero, then the content of the return
+ * value is undefined.
+ * @param v
+ *     input parameter, should not be zero.
+ * @return
+ *     least significant set bit in the input parameter.
+ */
+static inline uint32_t
+rte_bsf32(uint32_t v)
+{
+	return __builtin_ctz(v);
+}
+
+#ifndef offsetof
+/** Return the offset of a field in a structure. */
+#define offsetof(TYPE, MEMBER)  __builtin_offsetof (TYPE, MEMBER)
+#endif
+
+#define _RTE_STR(x) #x
+/** Take a macro value and get a string version of it */
+#define RTE_STR(x) _RTE_STR(x)
+
+/** Mask value of type "tp" for the first "ln" bit set. */
+#define	RTE_LEN2MASK(ln, tp)	\
+	((tp)((uint64_t)-1 >> (sizeof(uint64_t) * CHAR_BIT - (ln))))
+
+/** Number of elements in the array. */
+#define	RTE_DIM(a)	(sizeof (a) / sizeof ((a)[0]))
+
+/**
+ * Converts a numeric string to the equivalent uint64_t value.
+ * As well as straight number conversion, also recognises the suffixes
+ * k, m and g for kilobytes, megabytes and gigabytes respectively.
+ *
+ * If a negative number is passed in  i.e. a string with the first non-black
+ * character being "-", zero is returned. Zero is also returned in the case of
+ * an error with the strtoull call in the function.
+ *
+ * @param str
+ *     String containing number to convert.
+ * @return
+ *     Number.
+ */
+static inline uint64_t
+rte_str_to_size(const char *str)
+{
+	char *endptr;
+	unsigned long long size;
+
+	while (isspace((int)*str))
+		str++;
+	if (*str == '-')
+		return 0;
+
+	errno = 0;
+	size = strtoull(str, &endptr, 0);
+	if (errno)
+		return 0;
+
+	if (*endptr == ' ')
+		endptr++; /* allow 1 space gap */
+
+	switch (*endptr){
+	case 'G': case 'g': size *= 1024; /* fall-through */
+	case 'M': case 'm': size *= 1024; /* fall-through */
+	case 'K': case 'k': size *= 1024; /* fall-through */
+	default:
+		break;
+	}
+	return size;
+}
+
+/**
+ * Function to terminate the application immediately, printing an error
+ * message and returning the exit_code back to the shell.
+ *
+ * This function never returns
+ *
+ * @param exit_code
+ *     The exit code to be returned by the application
+ * @param format
+ *     The format string to be used for printing the message. This can include
+ *     printf format characters which will be expanded using any further parameters
+ *     to the function.
+ */
+void
+rte_exit(int exit_code, const char *format, ...)
+	__attribute__((noreturn))
+	__attribute__((format(printf, 2, 3)));
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/lib/librte_eal/common/include/rte_debug.h b/lib/librte_eal/common/include/rte_debug.h
new file mode 100644
index 00000000..94129fab
--- /dev/null
+++ b/lib/librte_eal/common/include/rte_debug.h
@@ -0,0 +1,103 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_DEBUG_H_
+#define _RTE_DEBUG_H_
+
+/**
+ * @file
+ *
+ * Debug Functions in RTE
+ *
+ * This file defines a generic API for debug operations. Part of
+ * the implementation is architecture-specific.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Dump the stack of the calling core to the console.
+ */
+void rte_dump_stack(void);
+
+/**
+ * Dump the registers of the calling core to the console.
+ *
+ * Note: Not implemented in a userapp environment; use gdb instead.
+ */
+void rte_dump_registers(void);
+
+/**
+ * Provide notification of a critical non-recoverable error and terminate
+ * execution abnormally.
+ *
+ * Display the format string and its expanded arguments (printf-like).
+ *
+ * In a linuxapp environment, this function dumps the stack and calls
+ * abort() resulting in a core dump if enabled.
+ *
+ * The function never returns.
+ *
+ * @param ...
+ *   The format string, followed by the variable list of arguments.
+ */
+#define rte_panic(...) rte_panic_(__func__, __VA_ARGS__, "dummy")
+#define rte_panic_(func, format, ...) __rte_panic(func, format "%.0s", __VA_ARGS__)
+
+#define	RTE_VERIFY(exp)	do {                                                  \
+	if (!(exp))                                                           \
+		rte_panic("line %d\tassert \"" #exp "\" failed\n", __LINE__); \
+} while (0)
+
+/*
+ * Provide notification of a critical non-recoverable error and stop.
+ *
+ * This function should not be called directly. Refer to rte_panic() macro
+ * documentation.
+ */
+void __rte_panic(const char *funcname , const char *format, ...)
+#ifdef __GNUC__
+#if (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ > 2))
+	__attribute__((cold))
+#endif
+#endif
+	__attribute__((noreturn))
+	__attribute__((format(printf, 2, 3)));
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_DEBUG_H_ */
diff --git a/lib/librte_eal/common/include/rte_dev.h b/lib/librte_eal/common/include/rte_dev.h
new file mode 100644
index 00000000..f1b55079
--- /dev/null
+++ b/lib/librte_eal/common/include/rte_dev.h
@@ -0,0 +1,192 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2014 6WIND S.A.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of 6WIND S.A. nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_DEV_H_
+#define _RTE_DEV_H_
+
+/**
+ * @file
+ *
+ * RTE PMD Driver Registration Interface
+ *
+ * This file manages the list of device drivers.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdio.h>
+#include <sys/queue.h>
+
+#include <rte_log.h>
+
+__attribute__((format(printf, 2, 0)))
+static inline void
+rte_pmd_debug_trace(const char *func_name, const char *fmt, ...)
+{
+	va_list ap;
+
+	va_start(ap, fmt);
+
+	char buffer[vsnprintf(NULL, 0, fmt, ap) + 1];
+
+	va_end(ap);
+
+	va_start(ap, fmt);
+	vsnprintf(buffer, sizeof(buffer), fmt, ap);
+	va_end(ap);
+
+	rte_log(RTE_LOG_ERR, RTE_LOGTYPE_PMD, "%s: %s", func_name, buffer);
+}
+
+/* Macros for checking for restricting functions to primary instance only */
+#define RTE_PROC_PRIMARY_OR_ERR_RET(retval) do { \
+	if (rte_eal_process_type() != RTE_PROC_PRIMARY) { \
+		RTE_PMD_DEBUG_TRACE("Cannot run in secondary processes\n"); \
+		return retval; \
+	} \
+} while (0)
+
+#define RTE_PROC_PRIMARY_OR_RET() do { \
+	if (rte_eal_process_type() != RTE_PROC_PRIMARY) { \
+		RTE_PMD_DEBUG_TRACE("Cannot run in secondary processes\n"); \
+		return; \
+	} \
+} while (0)
+
+/* Macros to check for invalid function pointers */
+#define RTE_FUNC_PTR_OR_ERR_RET(func, retval) do { \
+	if ((func) == NULL) { \
+		RTE_PMD_DEBUG_TRACE("Function not supported\n"); \
+		return retval; \
+	} \
+} while (0)
+
+#define RTE_FUNC_PTR_OR_RET(func) do { \
+	if ((func) == NULL) { \
+		RTE_PMD_DEBUG_TRACE("Function not supported\n"); \
+		return; \
+	} \
+} while (0)
+
+
+/** Double linked list of device drivers. */
+TAILQ_HEAD(rte_driver_list, rte_driver);
+
+/**
+ * Initialization function called for each device driver once.
+ */
+typedef int (rte_dev_init_t)(const char *name, const char *args);
+
+/**
+ * Uninitilization function called for each device driver once.
+ */
+typedef int (rte_dev_uninit_t)(const char *name);
+
+/**
+ * Driver type enumeration
+ */
+enum pmd_type {
+	PMD_VDEV = 0,
+	PMD_PDEV = 1,
+};
+
+/**
+ * A structure describing a device driver.
+ */
+struct rte_driver {
+	TAILQ_ENTRY(rte_driver) next;  /**< Next in list. */
+	enum pmd_type type;		   /**< PMD Driver type */
+	const char *name;                   /**< Driver name. */
+	rte_dev_init_t *init;              /**< Device init. function. */
+	rte_dev_uninit_t *uninit;          /**< Device uninit. function. */
+};
+
+/**
+ * Register a device driver.
+ *
+ * @param driver
+ *   A pointer to a rte_dev structure describing the driver
+ *   to be registered.
+ */
+void rte_eal_driver_register(struct rte_driver *driver);
+
+/**
+ * Unregister a device driver.
+ *
+ * @param driver
+ *   A pointer to a rte_dev structure describing the driver
+ *   to be unregistered.
+ */
+void rte_eal_driver_unregister(struct rte_driver *driver);
+
+/**
+ * Initalize all the registered drivers in this process
+ */
+int rte_eal_dev_init(void);
+
+/**
+ * Initialize a driver specified by name.
+ *
+ * @param name
+ *   The pointer to a driver name to be initialized.
+ * @param args
+ *   The pointer to arguments used by driver initialization.
+ * @return
+ *  0 on success, negative on error
+ */
+int rte_eal_vdev_init(const char *name, const char *args);
+
+/**
+ * Uninitalize a driver specified by name.
+ *
+ * @param name
+ *   The pointer to a driver name to be initialized.
+ * @return
+ *  0 on success, negative on error
+ */
+int rte_eal_vdev_uninit(const char *name);
+
+#define PMD_REGISTER_DRIVER(d)\
+void devinitfn_ ##d(void);\
+void __attribute__((constructor, used)) devinitfn_ ##d(void)\
+{\
+	rte_eal_driver_register(&d);\
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_VDEV_H_ */
diff --git a/lib/librte_eal/common/include/rte_devargs.h b/lib/librte_eal/common/include/rte_devargs.h
new file mode 100644
index 00000000..53c59f56
--- /dev/null
+++ b/lib/librte_eal/common/include/rte_devargs.h
@@ -0,0 +1,177 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright 2014 6WIND S.A.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of 6WIND S.A nor the names of its contributors
+ *       may be used to endorse or promote products derived from this
+ *       software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_DEVARGS_H_
+#define _RTE_DEVARGS_H_
+
+/**
+ * @file
+ *
+ * RTE devargs: list of devices and their user arguments
+ *
+ * This file stores a list of devices and their arguments given by
+ * the user when a DPDK application is started. These devices can be PCI
+ * devices or virtual devices. These devices are stored at startup in a
+ * list of rte_devargs structures.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdio.h>
+#include <sys/queue.h>
+#include <rte_pci.h>
+
+/**
+ * Type of generic device
+ */
+enum rte_devtype {
+	RTE_DEVTYPE_WHITELISTED_PCI,
+	RTE_DEVTYPE_BLACKLISTED_PCI,
+	RTE_DEVTYPE_VIRTUAL,
+};
+
+/**
+ * Structure that stores a device given by the user with its arguments
+ *
+ * A user device is a physical or a virtual device given by the user to
+ * the DPDK application at startup through command line arguments.
+ *
+ * The structure stores the configuration of the device, its PCI
+ * identifier if it's a PCI device or the driver name if it's a virtual
+ * device.
+ */
+struct rte_devargs {
+	/** Next in list. */
+	TAILQ_ENTRY(rte_devargs) next;
+	/** Type of device. */
+	enum rte_devtype type;
+	union {
+		/** Used if type is RTE_DEVTYPE_*_PCI. */
+		struct {
+			/** PCI location. */
+			struct rte_pci_addr addr;
+		} pci;
+		/** Used if type is RTE_DEVTYPE_VIRTUAL. */
+		struct {
+			/** Driver name. */
+			char drv_name[32];
+		} virt;
+	};
+	/** Arguments string as given by user or "" for no argument. */
+	char *args;
+};
+
+/** user device double-linked queue type definition */
+TAILQ_HEAD(rte_devargs_list, rte_devargs);
+
+/** Global list of user devices */
+extern struct rte_devargs_list devargs_list;
+
+/**
+ * Parse a devargs string.
+ *
+ * For PCI devices, the format of arguments string is "PCI_ADDR" or
+ * "PCI_ADDR,key=val,key2=val2,...". Examples: "08:00.1", "0000:5:00.0",
+ * "04:00.0,arg=val".
+ *
+ * For virtual devices, the format of arguments string is "DRIVER_NAME*"
+ * or "DRIVER_NAME*,key=val,key2=val2,...". Examples: "eth_ring",
+ * "eth_ring0", "eth_pmdAnything,arg=0:arg2=1".
+ *
+ * The function parses the arguments string to get driver name and driver
+ * arguments.
+ *
+ * @param devargs_str
+ *   The arguments as given by the user.
+ * @param drvname
+ *   The pointer to the string to store parsed driver name.
+ * @param drvargs
+ *   The pointer to the string to store parsed driver arguments.
+ *
+ * @return
+ *   - 0 on success
+ *   - A negative value on error
+ */
+int rte_eal_parse_devargs_str(const char *devargs_str,
+				char **drvname, char **drvargs);
+
+/**
+ * Add a device to the user device list
+ *
+ * For PCI devices, the format of arguments string is "PCI_ADDR" or
+ * "PCI_ADDR,key=val,key2=val2,...". Examples: "08:00.1", "0000:5:00.0",
+ * "04:00.0,arg=val".
+ *
+ * For virtual devices, the format of arguments string is "DRIVER_NAME*"
+ * or "DRIVER_NAME*,key=val,key2=val2,...". Examples: "eth_ring",
+ * "eth_ring0", "eth_pmdAnything,arg=0:arg2=1". The validity of the
+ * driver name is not checked by this function, it is done when probing
+ * the drivers.
+ *
+ * @param devtype
+ *   The type of the device.
+ * @param devargs_str
+ *   The arguments as given by the user.
+ *
+ * @return
+ *   - 0 on success
+ *   - A negative value on error
+ */
+int rte_eal_devargs_add(enum rte_devtype devtype, const char *devargs_str);
+
+/**
+ * Count the number of user devices of a specified type
+ *
+ * @param devtype
+ *   The type of the devices to counted.
+ *
+ * @return
+ *   The number of devices.
+ */
+unsigned int
+rte_eal_devargs_type_count(enum rte_devtype devtype);
+
+/**
+ * This function dumps the list of user device and their arguments.
+ *
+ * @param f
+ *   A pointer to a file for output
+ */
+void rte_eal_devargs_dump(FILE *f);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_DEVARGS_H_ */
diff --git a/lib/librte_eal/common/include/rte_eal.h b/lib/librte_eal/common/include/rte_eal.h
new file mode 100644
index 00000000..a71d6f57
--- /dev/null
+++ b/lib/librte_eal/common/include/rte_eal.h
@@ -0,0 +1,259 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_EAL_H_
+#define _RTE_EAL_H_
+
+/**
+ * @file
+ *
+ * EAL Configuration API
+ */
+
+#include <stdint.h>
+#include <sched.h>
+
+#include <rte_per_lcore.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define RTE_MAGIC 19820526 /**< Magic number written by the main partition when ready. */
+
+/* Maximum thread_name length. */
+#define RTE_MAX_THREAD_NAME_LEN 16
+
+/**
+ * The lcore role (used in RTE or not).
+ */
+enum rte_lcore_role_t {
+	ROLE_RTE,
+	ROLE_OFF,
+};
+
+/**
+ * The type of process in a linuxapp, multi-process setup
+ */
+enum rte_proc_type_t {
+	RTE_PROC_AUTO = -1,   /* allow auto-detection of primary/secondary */
+	RTE_PROC_PRIMARY = 0, /* set to zero, so primary is the default */
+	RTE_PROC_SECONDARY,
+
+	RTE_PROC_INVALID
+};
+
+/**
+ * The global RTE configuration structure.
+ */
+struct rte_config {
+	uint32_t master_lcore;       /**< Id of the master lcore */
+	uint32_t lcore_count;        /**< Number of available logical cores. */
+	enum rte_lcore_role_t lcore_role[RTE_MAX_LCORE]; /**< State of cores. */
+
+	/** Primary or secondary configuration */
+	enum rte_proc_type_t process_type;
+
+	/**
+	 * Pointer to memory configuration, which may be shared across multiple
+	 * DPDK instances
+	 */
+	struct rte_mem_config *mem_config;
+} __attribute__((__packed__));
+
+/**
+ * Get the global configuration structure.
+ *
+ * @return
+ *   A pointer to the global configuration structure.
+ */
+struct rte_config *rte_eal_get_configuration(void);
+
+/**
+ * Get a lcore's role.
+ *
+ * @param lcore_id
+ *   The identifier of the lcore.
+ * @return
+ *   The role of the lcore.
+ */
+enum rte_lcore_role_t rte_eal_lcore_role(unsigned lcore_id);
+
+
+/**
+ * Get the process type in a multi-process setup
+ *
+ * @return
+ *   The process type
+ */
+enum rte_proc_type_t rte_eal_process_type(void);
+
+/**
+ * Request iopl privilege for all RPL.
+ *
+ * This function should be called by pmds which need access to ioports.
+
+ * @return
+ *   - On success, returns 0.
+ *   - On failure, returns -1.
+ */
+int rte_eal_iopl_init(void);
+
+/**
+ * Initialize the Environment Abstraction Layer (EAL).
+ *
+ * This function is to be executed on the MASTER lcore only, as soon
+ * as possible in the application's main() function.
+ *
+ * The function finishes the initialization process before main() is called.
+ * It puts the SLAVE lcores in the WAIT state.
+ *
+ * When the multi-partition feature is supported, depending on the
+ * configuration (if CONFIG_RTE_EAL_MAIN_PARTITION is disabled), this
+ * function waits to ensure that the magic number is set before
+ * returning. See also the rte_eal_get_configuration() function. Note:
+ * This behavior may change in the future.
+ *
+ * @param argc
+ *   The argc argument that was given to the main() function.
+ * @param argv
+ *   The argv argument that was given to the main() function.
+ * @return
+ *   - On success, the number of parsed arguments, which is greater or
+ *     equal to zero. After the call to rte_eal_init(),
+ *     all arguments argv[x] with x < ret may be modified and should
+ *     not be accessed by the application.
+ *   - On failure, a negative error value.
+ */
+int rte_eal_init(int argc, char **argv);
+
+/**
+ * Check if a primary process is currently alive
+ *
+ * This function returns true when a primary process is currently
+ * active.
+ *
+ * @param config_file_path
+ *   The config_file_path argument provided should point at the location
+ *   that the primary process will create its config file. If NULL, the default
+ *   config file path is used.
+ *
+ * @return
+ *  - If alive, returns 1.
+ *  - If dead, returns 0.
+ */
+int rte_eal_primary_proc_alive(const char *config_file_path);
+
+/**
+ * Usage function typedef used by the application usage function.
+ *
+ * Use this function typedef to define and call rte_set_applcation_usage_hook()
+ * routine.
+ */
+typedef void	(*rte_usage_hook_t)(const char * prgname);
+
+/**
+ * Add application usage routine callout from the eal_usage() routine.
+ *
+ * This function allows the application to include its usage message
+ * in the EAL system usage message. The routine rte_set_application_usage_hook()
+ * needs to be called before the rte_eal_init() routine in the application.
+ *
+ * This routine is optional for the application and will behave as if the set
+ * routine was never called as the default behavior.
+ *
+ * @param usage_func
+ *   The func argument is a function pointer to the application usage routine.
+ *   Called function is defined using rte_usage_hook_t typedef, which is of
+ *   the form void rte_usage_func(const char * prgname).
+ *
+ *   Calling this routine with a NULL value will reset the usage hook routine and
+ *   return the current value, which could be NULL.
+ * @return
+ *   - Returns the current value of the rte_application_usage pointer to allow
+ *     the caller to daisy chain the usage routines if needing more then one.
+ */
+rte_usage_hook_t
+rte_set_application_usage_hook(rte_usage_hook_t usage_func);
+
+/**
+ * macro to get the lock of tailq in mem_config
+ */
+#define RTE_EAL_TAILQ_RWLOCK         (&rte_eal_get_configuration()->mem_config->qlock)
+
+/**
+ * macro to get the multiple lock of mempool shared by mutiple-instance
+ */
+#define RTE_EAL_MEMPOOL_RWLOCK            (&rte_eal_get_configuration()->mem_config->mplock)
+
+/**
+ * Whether EAL is using huge pages (disabled by --no-huge option).
+ * The no-huge mode cannot be used with UIO poll-mode drivers like igb/ixgbe.
+ * It is useful for NIC drivers (e.g. librte_pmd_mlx4, librte_pmd_vmxnet3) or
+ * crypto drivers (e.g. librte_crypto_nitrox) provided by third-parties such
+ * as 6WIND.
+ *
+ * @return
+ *   Nonzero if hugepages are enabled.
+ */
+int rte_eal_has_hugepages(void);
+
+/**
+ * A wrap API for syscall gettid.
+ *
+ * @return
+ *   On success, returns the thread ID of calling process.
+ *   It is always successful.
+ */
+int rte_sys_gettid(void);
+
+/**
+ * Get system unique thread id.
+ *
+ * @return
+ *   On success, returns the thread ID of calling process.
+ *   It is always successful.
+ */
+static inline int rte_gettid(void)
+{
+	static RTE_DEFINE_PER_LCORE(int, _thread_id) = -1;
+	if (RTE_PER_LCORE(_thread_id) == -1)
+		RTE_PER_LCORE(_thread_id) = rte_sys_gettid();
+	return RTE_PER_LCORE(_thread_id);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_EAL_H_ */
diff --git a/lib/librte_eal/common/include/rte_eal_memconfig.h b/lib/librte_eal/common/include/rte_eal_memconfig.h
new file mode 100644
index 00000000..2b5e0b17
--- /dev/null
+++ b/lib/librte_eal/common/include/rte_eal_memconfig.h
@@ -0,0 +1,100 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_EAL_MEMCONFIG_H_
+#define _RTE_EAL_MEMCONFIG_H_
+
+#include <rte_tailq.h>
+#include <rte_memory.h>
+#include <rte_memzone.h>
+#include <rte_malloc_heap.h>
+#include <rte_rwlock.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * the structure for the memory configuration for the RTE.
+ * Used by the rte_config structure. It is separated out, as for multi-process
+ * support, the memory details should be shared across instances
+ */
+struct rte_mem_config {
+	volatile uint32_t magic;   /**< Magic number - Sanity check. */
+
+	/* memory topology */
+	uint32_t nchannel;    /**< Number of channels (0 if unknown). */
+	uint32_t nrank;       /**< Number of ranks (0 if unknown). */
+
+	/**
+	 * current lock nest order
+	 *  - qlock->mlock (ring/hash/lpm)
+	 *  - mplock->qlock->mlock (mempool)
+	 * Notice:
+	 *  *ALWAYS* obtain qlock first if having to obtain both qlock and mlock
+	 */
+	rte_rwlock_t mlock;   /**< only used by memzone LIB for thread-safe. */
+	rte_rwlock_t qlock;   /**< used for tailq operation for thread safe. */
+	rte_rwlock_t mplock;  /**< only used by mempool LIB for thread-safe. */
+
+	uint32_t memzone_cnt; /**< Number of allocated memzones */
+
+	/* memory segments and zones */
+	struct rte_memseg memseg[RTE_MAX_MEMSEG];    /**< Physmem descriptors. */
+	struct rte_memzone memzone[RTE_MAX_MEMZONE]; /**< Memzone descriptors. */
+
+	struct rte_tailq_head tailq_head[RTE_MAX_TAILQ]; /**< Tailqs for objects */
+
+	/* Heaps of Malloc per socket */
+	struct malloc_heap malloc_heaps[RTE_MAX_NUMA_NODES];
+
+	/* address of mem_config in primary process. used to map shared config into
+	 * exact same address the primary process maps it.
+	 */
+	uint64_t mem_cfg_addr;
+} __attribute__((__packed__));
+
+
+inline static void
+rte_eal_mcfg_wait_complete(struct rte_mem_config* mcfg)
+{
+	/* wait until shared mem_config finish initialising */
+	while(mcfg->magic != RTE_MAGIC)
+		rte_pause();
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /*__RTE_EAL_MEMCONFIG_H_*/
diff --git a/lib/librte_eal/common/include/rte_errno.h b/lib/librte_eal/common/include/rte_errno.h
new file mode 100644
index 00000000..2e5cc454
--- /dev/null
+++ b/lib/librte_eal/common/include/rte_errno.h
@@ -0,0 +1,95 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * @file
+ *
+ * API for error cause tracking
+ */
+
+#ifndef _RTE_ERRNO_H_
+#define _RTE_ERRNO_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <rte_per_lcore.h>
+
+RTE_DECLARE_PER_LCORE(int, _rte_errno); /**< Per core error number. */
+
+/**
+ * Error number value, stored per-thread, which can be queried after
+ * calls to certain functions to determine why those functions failed.
+ *
+ * Uses standard values from errno.h wherever possible, with a small number
+ * of additional possible values for RTE-specific conditions.
+ */
+#define rte_errno RTE_PER_LCORE(_rte_errno)
+
+/**
+ * Function which returns a printable string describing a particular
+ * error code. For non-RTE-specific error codes, this function returns
+ * the value from the libc strerror function.
+ *
+ * @param errnum
+ *   The error number to be looked up - generally the value of rte_errno
+ * @return
+ *   A pointer to a thread-local string containing the text describing
+ *   the error.
+ */
+const char *rte_strerror(int errnum);
+
+#ifndef __ELASTERROR
+/**
+ * Check if we have a defined value for the max system-defined errno values.
+ * if no max defined, start from 1000 to prevent overlap with standard values
+ */
+#define __ELASTERROR 1000
+#endif
+
+/** Error types */
+enum {
+	RTE_MIN_ERRNO = __ELASTERROR, /**< Start numbering above std errno vals */
+
+	E_RTE_SECONDARY, /**< Operation not allowed in secondary processes */
+	E_RTE_NO_CONFIG, /**< Missing rte_config */
+
+	RTE_MAX_ERRNO    /**< Max RTE error number */
+};
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_ERRNO_H_ */
diff --git a/lib/librte_eal/common/include/rte_hexdump.h b/lib/librte_eal/common/include/rte_hexdump.h
new file mode 100644
index 00000000..5c18a50b
--- /dev/null
+++ b/lib/librte_eal/common/include/rte_hexdump.h
@@ -0,0 +1,89 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_HEXDUMP_H_
+#define _RTE_HEXDUMP_H_
+
+/**
+ * @file
+ * Simple API to dump out memory in a special hex format.
+ */
+
+#include <stdio.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+* Dump out memory in a special hex dump format.
+*
+* @param f
+*		A pointer to a file for output
+* @param title
+*		If not NULL this string is printed as a header to the output.
+* @param buf
+*		This is the buffer address to print out.
+* @param len
+*		The number of bytes to dump out
+* @return
+*		None.
+*/
+
+extern void
+rte_hexdump(FILE *f, const char * title, const void * buf, unsigned int len);
+
+/**
+* Dump out memory in a hex format with colons between bytes.
+*
+* @param f
+*		A pointer to a file for output
+* @param title
+*		If not NULL this string is printed as a header to the output.
+* @param buf
+*		This is the buffer address to print out.
+* @param len
+*		The number of bytes to dump out
+* @return
+*		None.
+*/
+
+void
+rte_memdump(FILE *f, const char * title, const void * buf, unsigned int len);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_HEXDUMP_H_ */
diff --git a/lib/librte_eal/common/include/rte_interrupts.h b/lib/librte_eal/common/include/rte_interrupts.h
new file mode 100644
index 00000000..ff11ef3a
--- /dev/null
+++ b/lib/librte_eal/common/include/rte_interrupts.h
@@ -0,0 +1,120 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_INTERRUPTS_H_
+#define _RTE_INTERRUPTS_H_
+
+/**
+ * @file
+ *
+ * The RTE interrupt interface provides functions to register/unregister
+ * callbacks for a specific interrupt.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** Interrupt handle */
+struct rte_intr_handle;
+
+/** Function to be registered for the specific interrupt */
+typedef void (*rte_intr_callback_fn)(struct rte_intr_handle *intr_handle,
+							void *cb_arg);
+
+#include <exec-env/rte_interrupts.h>
+
+/**
+ * It registers the callback for the specific interrupt. Multiple
+ * callbacks cal be registered at the same time.
+ * @param intr_handle
+ *  Pointer to the interrupt handle.
+ * @param cb
+ *  callback address.
+ * @param cb_arg
+ *  address of parameter for callback.
+ *
+ * @return
+ *  - On success, zero.
+ *  - On failure, a negative value.
+ */
+int rte_intr_callback_register(struct rte_intr_handle *intr_handle,
+				rte_intr_callback_fn cb, void *cb_arg);
+
+/**
+ * It unregisters the callback according to the specified interrupt handle.
+ *
+ * @param intr_handle
+ *  pointer to the interrupt handle.
+ * @param cb
+ *  callback address.
+ * @param cb_arg
+ *  address of parameter for callback, (void *)-1 means to remove all
+ *  registered which has the same callback address.
+ *
+ * @return
+ *  - On success, return the number of callback entities removed.
+ *  - On failure, a negative value.
+ */
+int rte_intr_callback_unregister(struct rte_intr_handle *intr_handle,
+				rte_intr_callback_fn cb, void *cb_arg);
+
+/**
+ * It enables the interrupt for the specified handle.
+ *
+ * @param intr_handle
+ *  pointer to the interrupt handle.
+ *
+ * @return
+ *  - On success, zero.
+ *  - On failure, a negative value.
+ */
+int rte_intr_enable(struct rte_intr_handle *intr_handle);
+
+/**
+ * It disables the interrupt for the specified handle.
+ *
+ * @param intr_handle
+ *  pointer to the interrupt handle.
+ *
+ * @return
+ *  - On success, zero.
+ *  - On failure, a negative value.
+ */
+int rte_intr_disable(struct rte_intr_handle *intr_handle);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/lib/librte_eal/common/include/rte_keepalive.h b/lib/librte_eal/common/include/rte_keepalive.h
new file mode 100644
index 00000000..10dac2e0
--- /dev/null
+++ b/lib/librte_eal/common/include/rte_keepalive.h
@@ -0,0 +1,108 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright 2015 Intel Shannon Ltd. All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * @file rte_keepalive.h
+ * DPDK RTE LCore Keepalive Monitor.
+ *
+ **/
+
+#ifndef _KEEPALIVE_H_
+#define _KEEPALIVE_H_
+
+#include <rte_memory.h>
+
+#ifndef RTE_KEEPALIVE_MAXCORES
+/**
+ * Number of cores to track.
+ * @note Must be larger than the highest core id. */
+#define RTE_KEEPALIVE_MAXCORES RTE_MAX_LCORE
+#endif
+
+/**
+ * Keepalive failure callback.
+ *
+ *  Receives a data pointer passed to rte_keepalive_create() and the id of the
+ *  failed core.
+ */
+typedef void (*rte_keepalive_failure_callback_t)(
+	void *data,
+	const int id_core);
+
+/**
+ * Keepalive state structure.
+ * @internal
+ */
+struct rte_keepalive;
+
+/**
+ * Initialise keepalive sub-system.
+ * @param callback
+ *   Function called upon detection of a dead core.
+ * @param data
+ *   Data pointer to be passed to function callback.
+ * @return
+ *   Keepalive structure success, NULL on failure.
+ */
+struct rte_keepalive *rte_keepalive_create(
+	rte_keepalive_failure_callback_t callback,
+	void *data);
+
+/**
+ * Checks & handles keepalive state of monitored cores.
+ * @param *ptr_timer Triggering timer (unused)
+ * @param *ptr_data  Data pointer (keepalive structure)
+ */
+void rte_keepalive_dispatch_pings(void *ptr_timer, void *ptr_data);
+
+/**
+ * Registers a core for keepalive checks.
+ * @param *keepcfg
+ *   Keepalive structure pointer
+ * @param id_core
+ *   ID number of core to register.
+ */
+void rte_keepalive_register_core(struct rte_keepalive *keepcfg,
+	const int id_core);
+
+/**
+ * Per-core keepalive check.
+ * @param *keepcfg
+ *   Keepalive structure pointer
+ *
+ * This function needs to be called from within the main process loop of
+ * the LCore to be checked.
+ */
+void
+rte_keepalive_mark_alive(struct rte_keepalive *keepcfg);
+
+#endif /* _KEEPALIVE_H_ */
diff --git a/lib/librte_eal/common/include/rte_launch.h b/lib/librte_eal/common/include/rte_launch.h
new file mode 100644
index 00000000..dd1946da
--- /dev/null
+++ b/lib/librte_eal/common/include/rte_launch.h
@@ -0,0 +1,177 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_LAUNCH_H_
+#define _RTE_LAUNCH_H_
+
+/**
+ * @file
+ *
+ * Launch tasks on other lcores
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * State of an lcore.
+ */
+enum rte_lcore_state_t {
+	WAIT,       /**< waiting a new command */
+	RUNNING,    /**< executing command */
+	FINISHED,   /**< command executed */
+};
+
+/**
+ * Definition of a remote launch function.
+ */
+typedef int (lcore_function_t)(void *);
+
+/**
+ * Launch a function on another lcore.
+ *
+ * To be executed on the MASTER lcore only.
+ *
+ * Sends a message to a slave lcore (identified by the slave_id) that
+ * is in the WAIT state (this is true after the first call to
+ * rte_eal_init()). This can be checked by first calling
+ * rte_eal_wait_lcore(slave_id).
+ *
+ * When the remote lcore receives the message, it switches to
+ * the RUNNING state, then calls the function f with argument arg. Once the
+ * execution is done, the remote lcore switches to a FINISHED state and
+ * the return value of f is stored in a local variable to be read using
+ * rte_eal_wait_lcore().
+ *
+ * The MASTER lcore returns as soon as the message is sent and knows
+ * nothing about the completion of f.
+ *
+ * Note: This function is not designed to offer optimum
+ * performance. It is just a practical way to launch a function on
+ * another lcore at initialization time.
+ *
+ * @param f
+ *   The function to be called.
+ * @param arg
+ *   The argument for the function.
+ * @param slave_id
+ *   The identifier of the lcore on which the function should be executed.
+ * @return
+ *   - 0: Success. Execution of function f started on the remote lcore.
+ *   - (-EBUSY): The remote lcore is not in a WAIT state.
+ */
+int rte_eal_remote_launch(lcore_function_t *f, void *arg, unsigned slave_id);
+
+/**
+ * This enum indicates whether the master core must execute the handler
+ * launched on all logical cores.
+ */
+enum rte_rmt_call_master_t {
+	SKIP_MASTER = 0, /**< lcore handler not executed by master core. */
+	CALL_MASTER,     /**< lcore handler executed by master core. */
+};
+
+/**
+ * Launch a function on all lcores.
+ *
+ * Check that each SLAVE lcore is in a WAIT state, then call
+ * rte_eal_remote_launch() for each lcore.
+ *
+ * @param f
+ *   The function to be called.
+ * @param arg
+ *   The argument for the function.
+ * @param call_master
+ *   If call_master set to SKIP_MASTER, the MASTER lcore does not call
+ *   the function. If call_master is set to CALL_MASTER, the function
+ *   is also called on master before returning. In any case, the master
+ *   lcore returns as soon as it finished its job and knows nothing
+ *   about the completion of f on the other lcores.
+ * @return
+ *   - 0: Success. Execution of function f started on all remote lcores.
+ *   - (-EBUSY): At least one remote lcore is not in a WAIT state. In this
+ *     case, no message is sent to any of the lcores.
+ */
+int rte_eal_mp_remote_launch(lcore_function_t *f, void *arg,
+			     enum rte_rmt_call_master_t call_master);
+
+/**
+ * Get the state of the lcore identified by slave_id.
+ *
+ * To be executed on the MASTER lcore only.
+ *
+ * @param slave_id
+ *   The identifier of the lcore.
+ * @return
+ *   The state of the lcore.
+ */
+enum rte_lcore_state_t rte_eal_get_lcore_state(unsigned slave_id);
+
+/**
+ * Wait until an lcore finishes its job.
+ *
+ * To be executed on the MASTER lcore only.
+ *
+ * If the slave lcore identified by the slave_id is in a FINISHED state,
+ * switch to the WAIT state. If the lcore is in RUNNING state, wait until
+ * the lcore finishes its job and moves to the FINISHED state.
+ *
+ * @param slave_id
+ *   The identifier of the lcore.
+ * @return
+ *   - 0: If the lcore identified by the slave_id is in a WAIT state.
+ *   - The value that was returned by the previous remote launch
+ *     function call if the lcore identified by the slave_id was in a
+ *     FINISHED or RUNNING state. In this case, it changes the state
+ *     of the lcore to WAIT.
+ */
+int rte_eal_wait_lcore(unsigned slave_id);
+
+/**
+ * Wait until all lcores finish their jobs.
+ *
+ * To be executed on the MASTER lcore only. Issue an
+ * rte_eal_wait_lcore() for every lcore. The return values are
+ * ignored.
+ *
+ * After a call to rte_eal_mp_wait_lcore(), the caller can assume
+ * that all slave lcores are in a WAIT state.
+ */
+void rte_eal_mp_wait_lcore(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_LAUNCH_H_ */
diff --git a/lib/librte_eal/common/include/rte_lcore.h b/lib/librte_eal/common/include/rte_lcore.h
new file mode 100644
index 00000000..ac151302
--- /dev/null
+++ b/lib/librte_eal/common/include/rte_lcore.h
@@ -0,0 +1,276 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_LCORE_H_
+#define _RTE_LCORE_H_
+
+/**
+ * @file
+ *
+ * API for lcore and socket manipulation
+ *
+ */
+#include <rte_per_lcore.h>
+#include <rte_eal.h>
+#include <rte_launch.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define LCORE_ID_ANY     UINT32_MAX       /**< Any lcore. */
+
+#if defined(__linux__)
+	typedef	cpu_set_t rte_cpuset_t;
+#elif defined(__FreeBSD__)
+#include <pthread_np.h>
+	typedef cpuset_t rte_cpuset_t;
+#endif
+
+/**
+ * Structure storing internal configuration (per-lcore)
+ */
+struct lcore_config {
+	unsigned detected;         /**< true if lcore was detected */
+	pthread_t thread_id;       /**< pthread identifier */
+	int pipe_master2slave[2];  /**< communication pipe with master */
+	int pipe_slave2master[2];  /**< communication pipe with master */
+	lcore_function_t * volatile f;         /**< function to call */
+	void * volatile arg;       /**< argument of function */
+	volatile int ret;          /**< return value of function */
+	volatile enum rte_lcore_state_t state; /**< lcore state */
+	unsigned socket_id;        /**< physical socket id for this lcore */
+	unsigned core_id;          /**< core number on socket for this lcore */
+	int core_index;            /**< relative index, starting from 0 */
+	rte_cpuset_t cpuset;       /**< cpu set which the lcore affinity to */
+};
+
+/**
+ * Internal configuration (per-lcore)
+ */
+extern struct lcore_config lcore_config[RTE_MAX_LCORE];
+
+RTE_DECLARE_PER_LCORE(unsigned, _lcore_id);  /**< Per thread "lcore id". */
+RTE_DECLARE_PER_LCORE(rte_cpuset_t, _cpuset); /**< Per thread "cpuset". */
+
+/**
+ * Return the ID of the execution unit we are running on.
+ * @return
+ *  Logical core ID (in EAL thread) or LCORE_ID_ANY (in non-EAL thread)
+ */
+static inline unsigned
+rte_lcore_id(void)
+{
+	return RTE_PER_LCORE(_lcore_id);
+}
+
+/**
+ * Get the id of the master lcore
+ *
+ * @return
+ *   the id of the master lcore
+ */
+static inline unsigned
+rte_get_master_lcore(void)
+{
+	return rte_eal_get_configuration()->master_lcore;
+}
+
+/**
+ * Return the number of execution units (lcores) on the system.
+ *
+ * @return
+ *   the number of execution units (lcores) on the system.
+ */
+static inline unsigned
+rte_lcore_count(void)
+{
+	const struct rte_config *cfg = rte_eal_get_configuration();
+	return cfg->lcore_count;
+}
+
+/**
+ * Return the index of the lcore starting from zero.
+ * The order is physical or given by command line (-l option).
+ *
+ * @param lcore_id
+ *   The targeted lcore, or -1 for the current one.
+ * @return
+ *   The relative index, or -1 if not enabled.
+ */
+static inline int
+rte_lcore_index(int lcore_id)
+{
+	if (lcore_id >= RTE_MAX_LCORE)
+		return -1;
+	if (lcore_id < 0)
+		lcore_id = rte_lcore_id();
+	return lcore_config[lcore_id].core_index;
+}
+
+/**
+ * Return the ID of the physical socket of the logical core we are
+ * running on.
+ * @return
+ *   the ID of current lcoreid's physical socket
+ */
+unsigned rte_socket_id(void);
+
+/**
+ * Get the ID of the physical socket of the specified lcore
+ *
+ * @param lcore_id
+ *   the targeted lcore, which MUST be between 0 and RTE_MAX_LCORE-1.
+ * @return
+ *   the ID of lcoreid's physical socket
+ */
+static inline unsigned
+rte_lcore_to_socket_id(unsigned lcore_id)
+{
+	return lcore_config[lcore_id].socket_id;
+}
+
+/**
+ * Test if an lcore is enabled.
+ *
+ * @param lcore_id
+ *   The identifier of the lcore, which MUST be between 0 and
+ *   RTE_MAX_LCORE-1.
+ * @return
+ *   True if the given lcore is enabled; false otherwise.
+ */
+static inline int
+rte_lcore_is_enabled(unsigned lcore_id)
+{
+	struct rte_config *cfg = rte_eal_get_configuration();
+	if (lcore_id >= RTE_MAX_LCORE)
+		return 0;
+	return cfg->lcore_role[lcore_id] != ROLE_OFF;
+}
+
+/**
+ * Get the next enabled lcore ID.
+ *
+ * @param i
+ *   The current lcore (reference).
+ * @param skip_master
+ *   If true, do not return the ID of the master lcore.
+ * @param wrap
+ *   If true, go back to 0 when RTE_MAX_LCORE is reached; otherwise,
+ *   return RTE_MAX_LCORE.
+ * @return
+ *   The next lcore_id or RTE_MAX_LCORE if not found.
+ */
+static inline unsigned
+rte_get_next_lcore(unsigned i, int skip_master, int wrap)
+{
+	i++;
+	if (wrap)
+		i %= RTE_MAX_LCORE;
+
+	while (i < RTE_MAX_LCORE) {
+		if (!rte_lcore_is_enabled(i) ||
+		    (skip_master && (i == rte_get_master_lcore()))) {
+			i++;
+			if (wrap)
+				i %= RTE_MAX_LCORE;
+			continue;
+		}
+		break;
+	}
+	return i;
+}
+/**
+ * Macro to browse all running lcores.
+ */
+#define RTE_LCORE_FOREACH(i)						\
+	for (i = rte_get_next_lcore(-1, 0, 0);				\
+	     i<RTE_MAX_LCORE;						\
+	     i = rte_get_next_lcore(i, 0, 0))
+
+/**
+ * Macro to browse all running lcores except the master lcore.
+ */
+#define RTE_LCORE_FOREACH_SLAVE(i)					\
+	for (i = rte_get_next_lcore(-1, 1, 0);				\
+	     i<RTE_MAX_LCORE;						\
+	     i = rte_get_next_lcore(i, 1, 0))
+
+/**
+ * Set core affinity of the current thread.
+ * Support both EAL and non-EAL thread and update TLS.
+ *
+ * @param cpusetp
+ *   Point to cpu_set_t for setting current thread affinity.
+ * @return
+ *   On success, return 0; otherwise return -1;
+ */
+int rte_thread_set_affinity(rte_cpuset_t *cpusetp);
+
+/**
+ * Get core affinity of the current thread.
+ *
+ * @param cpusetp
+ *   Point to cpu_set_t for getting current thread cpu affinity.
+ *   It presumes input is not NULL, otherwise it causes panic.
+ *
+ */
+void rte_thread_get_affinity(rte_cpuset_t *cpusetp);
+
+/**
+ * Set thread names.
+ *
+ * Macro to wrap `pthread_setname_np()` with a glibc version check.
+ * Only glibc >= 2.12 supports this feature.
+ *
+ * This macro only used for Linux, BSD does direct libc call.
+ * BSD libc version of function is `pthread_set_name_np()`.
+ */
+#if defined(__DOXYGEN__)
+#define rte_thread_setname(...) pthread_setname_np(__VA_ARGS__)
+#endif
+
+#if defined(__GLIBC__) && defined(__GLIBC_PREREQ)
+#if __GLIBC_PREREQ(2, 12)
+#define rte_thread_setname(...) pthread_setname_np(__VA_ARGS__)
+#else
+#define rte_thread_setname(...) 0
+#endif
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+
+#endif /* _RTE_LCORE_H_ */
diff --git a/lib/librte_eal/common/include/rte_log.h b/lib/librte_eal/common/include/rte_log.h
new file mode 100644
index 00000000..2e47e7f6
--- /dev/null
+++ b/lib/librte_eal/common/include/rte_log.h
@@ -0,0 +1,311 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_LOG_H_
+#define _RTE_LOG_H_
+
+/**
+ * @file
+ *
+ * RTE Logs API
+ *
+ * This file provides a log API to RTE applications.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdarg.h>
+
+/** The rte_log structure. */
+struct rte_logs {
+	uint32_t type;  /**< Bitfield with enabled logs. */
+	uint32_t level; /**< Log level. */
+	FILE *file;     /**< Pointer to current FILE* for logs. */
+};
+
+/** Global log informations */
+extern struct rte_logs rte_logs;
+
+/* SDK log type */
+#define RTE_LOGTYPE_EAL     0x00000001 /**< Log related to eal. */
+#define RTE_LOGTYPE_MALLOC  0x00000002 /**< Log related to malloc. */
+#define RTE_LOGTYPE_RING    0x00000004 /**< Log related to ring. */
+#define RTE_LOGTYPE_MEMPOOL 0x00000008 /**< Log related to mempool. */
+#define RTE_LOGTYPE_TIMER   0x00000010 /**< Log related to timers. */
+#define RTE_LOGTYPE_PMD     0x00000020 /**< Log related to poll mode driver. */
+#define RTE_LOGTYPE_HASH    0x00000040 /**< Log related to hash table. */
+#define RTE_LOGTYPE_LPM     0x00000080 /**< Log related to LPM. */
+#define RTE_LOGTYPE_KNI     0x00000100 /**< Log related to KNI. */
+#define RTE_LOGTYPE_ACL     0x00000200 /**< Log related to ACL. */
+#define RTE_LOGTYPE_POWER   0x00000400 /**< Log related to power. */
+#define RTE_LOGTYPE_METER   0x00000800 /**< Log related to QoS meter. */
+#define RTE_LOGTYPE_SCHED   0x00001000 /**< Log related to QoS port scheduler. */
+#define RTE_LOGTYPE_PORT    0x00002000 /**< Log related to port. */
+#define RTE_LOGTYPE_TABLE   0x00004000 /**< Log related to table. */
+#define RTE_LOGTYPE_PIPELINE 0x00008000 /**< Log related to pipeline. */
+#define RTE_LOGTYPE_MBUF    0x00010000 /**< Log related to mbuf. */
+#define RTE_LOGTYPE_CRYPTODEV 0x00020000 /**< Log related to cryptodev. */
+
+/* these log types can be used in an application */
+#define RTE_LOGTYPE_USER1   0x01000000 /**< User-defined log type 1. */
+#define RTE_LOGTYPE_USER2   0x02000000 /**< User-defined log type 2. */
+#define RTE_LOGTYPE_USER3   0x04000000 /**< User-defined log type 3. */
+#define RTE_LOGTYPE_USER4   0x08000000 /**< User-defined log type 4. */
+#define RTE_LOGTYPE_USER5   0x10000000 /**< User-defined log type 5. */
+#define RTE_LOGTYPE_USER6   0x20000000 /**< User-defined log type 6. */
+#define RTE_LOGTYPE_USER7   0x40000000 /**< User-defined log type 7. */
+#define RTE_LOGTYPE_USER8   0x80000000 /**< User-defined log type 8. */
+
+/* Can't use 0, as it gives compiler warnings */
+#define RTE_LOG_EMERG    1U  /**< System is unusable.               */
+#define RTE_LOG_ALERT    2U  /**< Action must be taken immediately. */
+#define RTE_LOG_CRIT     3U  /**< Critical conditions.              */
+#define RTE_LOG_ERR      4U  /**< Error conditions.                 */
+#define RTE_LOG_WARNING  5U  /**< Warning conditions.               */
+#define RTE_LOG_NOTICE   6U  /**< Normal but significant condition. */
+#define RTE_LOG_INFO     7U  /**< Informational.                    */
+#define RTE_LOG_DEBUG    8U  /**< Debug-level messages.             */
+
+/** The default log stream. */
+extern FILE *eal_default_log_stream;
+
+/**
+ * Change the stream that will be used by the logging system.
+ *
+ * This can be done at any time. The f argument represents the stream
+ * to be used to send the logs. If f is NULL, the default output is
+ * used (stderr).
+ *
+ * @param f
+ *   Pointer to the stream.
+ * @return
+ *   - 0 on success.
+ *   - Negative on error.
+ */
+int rte_openlog_stream(FILE *f);
+
+/**
+ * Set the global log level.
+ *
+ * After this call, all logs that are lower or equal than level and
+ * lower or equal than the RTE_LOG_LEVEL configuration option will be
+ * displayed.
+ *
+ * @param level
+ *   Log level. A value between RTE_LOG_EMERG (1) and RTE_LOG_DEBUG (8).
+ */
+void rte_set_log_level(uint32_t level);
+
+/**
+ * Get the global log level.
+ */
+uint32_t rte_get_log_level(void);
+
+/**
+ * Enable or disable the log type.
+ *
+ * @param type
+ *   Log type, for example, RTE_LOGTYPE_EAL.
+ * @param enable
+ *   True for enable; false for disable.
+ */
+void rte_set_log_type(uint32_t type, int enable);
+
+/**
+ * Get the global log type.
+ */
+uint32_t rte_get_log_type(void);
+
+/**
+ * Get the current loglevel for the message being processed.
+ *
+ * Before calling the user-defined stream for logging, the log
+ * subsystem sets a per-lcore variable containing the loglevel and the
+ * logtype of the message being processed. This information can be
+ * accessed by the user-defined log output function through this
+ * function.
+ *
+ * @return
+ *   The loglevel of the message being processed.
+ */
+int rte_log_cur_msg_loglevel(void);
+
+/**
+ * Get the current logtype for the message being processed.
+ *
+ * Before calling the user-defined stream for logging, the log
+ * subsystem sets a per-lcore variable containing the loglevel and the
+ * logtype of the message being processed. This information can be
+ * accessed by the user-defined log output function through this
+ * function.
+ *
+ * @return
+ *   The logtype of the message being processed.
+ */
+int rte_log_cur_msg_logtype(void);
+
+/**
+ * Enable or disable the history (enabled by default)
+ *
+ * @param enable
+ *   true to enable, or 0 to disable history.
+ */
+void rte_log_set_history(int enable);
+
+/**
+ * Dump the log history to a file
+ *
+ * @param f
+ *   A pointer to a file for output
+ */
+void rte_log_dump_history(FILE *f);
+
+/**
+ * Add a log message to the history.
+ *
+ * This function can be called from a user-defined log stream. It adds
+ * the given message in the history that can be dumped using
+ * rte_log_dump_history().
+ *
+ * @param buf
+ *   A data buffer containing the message to be saved in the history.
+ * @param size
+ *   The length of the data buffer.
+ * @return
+ *   - 0: Success.
+ *   - (-ENOBUFS) if there is no room to store the message.
+ */
+int rte_log_add_in_history(const char *buf, size_t size);
+
+/**
+ * Generates a log message.
+ *
+ * The message will be sent in the stream defined by the previous call
+ * to rte_openlog_stream().
+ *
+ * The level argument determines if the log should be displayed or
+ * not, depending on the global rte_logs variable.
+ *
+ * The preferred alternative is the RTE_LOG() function because debug logs may
+ * be removed at compilation time if optimization is enabled. Moreover,
+ * logs are automatically prefixed by type when using the macro.
+ *
+ * @param level
+ *   Log level. A value between RTE_LOG_EMERG (1) and RTE_LOG_DEBUG (8).
+ * @param logtype
+ *   The log type, for example, RTE_LOGTYPE_EAL.
+ * @param format
+ *   The format string, as in printf(3), followed by the variable arguments
+ *   required by the format.
+ * @return
+ *   - 0: Success.
+ *   - Negative on error.
+ */
+int rte_log(uint32_t level, uint32_t logtype, const char *format, ...)
+#ifdef __GNUC__
+#if (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ > 2))
+	__attribute__((cold))
+#endif
+#endif
+	__attribute__((format(printf, 3, 4)));
+
+/**
+ * Generates a log message.
+ *
+ * The message will be sent in the stream defined by the previous call
+ * to rte_openlog_stream().
+ *
+ * The level argument determines if the log should be displayed or
+ * not, depending on the global rte_logs variable. A trailing
+ * newline may be added if needed.
+ *
+ * The preferred alternative is the RTE_LOG() because debug logs may be
+ * removed at compilation time.
+ *
+ * @param level
+ *   Log level. A value between RTE_LOG_EMERG (1) and RTE_LOG_DEBUG (8).
+ * @param logtype
+ *   The log type, for example, RTE_LOGTYPE_EAL.
+ * @param format
+ *   The format string, as in printf(3), followed by the variable arguments
+ *   required by the format.
+ * @param ap
+ *   The va_list of the variable arguments required by the format.
+ * @return
+ *   - 0: Success.
+ *   - Negative on error.
+ */
+int rte_vlog(uint32_t level, uint32_t logtype, const char *format, va_list ap)
+	__attribute__((format(printf,3,0)));
+
+/**
+ * Generates a log message.
+ *
+ * The RTE_LOG() is equivalent to rte_log() with two differences:
+
+ * - RTE_LOG() can be used to remove debug logs at compilation time,
+ *   depending on RTE_LOG_LEVEL configuration option, and compilation
+ *   optimization level. If optimization is enabled, the tests
+ *   involving constants only are pre-computed. If compilation is done
+ *   with -O0, these tests will be done at run time.
+ * - The log level and log type names are smaller, for example:
+ *   RTE_LOG(INFO, EAL, "this is a %s", "log");
+ *
+ * @param l
+ *   Log level. A value between EMERG (1) and DEBUG (8). The short name is
+ *   expanded by the macro, so it cannot be an integer value.
+ * @param t
+ *   The log type, for example, EAL. The short name is expanded by the
+ *   macro, so it cannot be an integer value.
+ * @param ...
+ *   The fmt string, as in printf(3), followed by the variable arguments
+ *   required by the format.
+ * @return
+ *   - 0: Success.
+ *   - Negative on error.
+ */
+#define RTE_LOG(l, t, ...)					\
+	(void)((RTE_LOG_ ## l <= RTE_LOG_LEVEL) ?		\
+	 rte_log(RTE_LOG_ ## l,					\
+		 RTE_LOGTYPE_ ## t, # t ": " __VA_ARGS__) :	\
+	 0)
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_LOG_H_ */
diff --git a/lib/librte_eal/common/include/rte_malloc.h b/lib/librte_eal/common/include/rte_malloc.h
new file mode 100644
index 00000000..74bb78c7
--- /dev/null
+++ b/lib/librte_eal/common/include/rte_malloc.h
@@ -0,0 +1,342 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_MALLOC_H_
+#define _RTE_MALLOC_H_
+
+/**
+ * @file
+ * RTE Malloc. This library provides methods for dynamically allocating memory
+ * from hugepages.
+ */
+
+#include <stdio.h>
+#include <stddef.h>
+#include <rte_memory.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ *  Structure to hold heap statistics obtained from rte_malloc_get_socket_stats function.
+ */
+struct rte_malloc_socket_stats {
+	size_t heap_totalsz_bytes; /**< Total bytes on heap */
+	size_t heap_freesz_bytes;  /**< Total free bytes on heap */
+	size_t greatest_free_size; /**< Size in bytes of largest free block */
+	unsigned free_count;       /**< Number of free elements on heap */
+	unsigned alloc_count;      /**< Number of allocated elements on heap */
+	size_t heap_allocsz_bytes; /**< Total allocated bytes on heap */
+};
+
+/**
+ * This function allocates memory from the huge-page area of memory. The memory
+ * is not cleared. In NUMA systems, the memory allocated resides on the same
+ * NUMA socket as the core that calls this function.
+ *
+ * @param type
+ *   A string identifying the type of allocated objects (useful for debug
+ *   purposes, such as identifying the cause of a memory leak). Can be NULL.
+ * @param size
+ *   Size (in bytes) to be allocated.
+ * @param align
+ *   If 0, the return is a pointer that is suitably aligned for any kind of
+ *   variable (in the same manner as malloc()).
+ *   Otherwise, the return is a pointer that is a multiple of *align*. In
+ *   this case, it must be a power of two. (Minimum alignment is the
+ *   cacheline size, i.e. 64-bytes)
+ * @return
+ *   - NULL on error. Not enough memory, or invalid arguments (size is 0,
+ *     align is not a power of two).
+ *   - Otherwise, the pointer to the allocated object.
+ */
+void *
+rte_malloc(const char *type, size_t size, unsigned align);
+
+/**
+ * Allocate zero'ed memory from the heap.
+ *
+ * Equivalent to rte_malloc() except that the memory zone is
+ * initialised with zeros. In NUMA systems, the memory allocated resides on the
+ * same NUMA socket as the core that calls this function.
+ *
+ * @param type
+ *   A string identifying the type of allocated objects (useful for debug
+ *   purposes, such as identifying the cause of a memory leak). Can be NULL.
+ * @param size
+ *   Size (in bytes) to be allocated.
+ * @param align
+ *   If 0, the return is a pointer that is suitably aligned for any kind of
+ *   variable (in the same manner as malloc()).
+ *   Otherwise, the return is a pointer that is a multiple of *align*. In
+ *   this case, it must obviously be a power of two. (Minimum alignment is the
+ *   cacheline size, i.e. 64-bytes)
+ * @return
+ *   - NULL on error. Not enough memory, or invalid arguments (size is 0,
+ *     align is not a power of two).
+ *   - Otherwise, the pointer to the allocated object.
+ */
+void *
+rte_zmalloc(const char *type, size_t size, unsigned align);
+
+/**
+ * Replacement function for calloc(), using huge-page memory. Memory area is
+ * initialised with zeros. In NUMA systems, the memory allocated resides on the
+ * same NUMA socket as the core that calls this function.
+ *
+ * @param type
+ *   A string identifying the type of allocated objects (useful for debug
+ *   purposes, such as identifying the cause of a memory leak). Can be NULL.
+ * @param num
+ *   Number of elements to be allocated.
+ * @param size
+ *   Size (in bytes) of a single element.
+ * @param align
+ *   If 0, the return is a pointer that is suitably aligned for any kind of
+ *   variable (in the same manner as malloc()).
+ *   Otherwise, the return is a pointer that is a multiple of *align*. In
+ *   this case, it must obviously be a power of two. (Minimum alignment is the
+ *   cacheline size, i.e. 64-bytes)
+ * @return
+ *   - NULL on error. Not enough memory, or invalid arguments (size is 0,
+ *     align is not a power of two).
+ *   - Otherwise, the pointer to the allocated object.
+ */
+void *
+rte_calloc(const char *type, size_t num, size_t size, unsigned align);
+
+/**
+ * Replacement function for realloc(), using huge-page memory. Reserved area
+ * memory is resized, preserving contents. In NUMA systems, the new area
+ * resides on the same NUMA socket as the old area.
+ *
+ * @param ptr
+ *   Pointer to already allocated memory
+ * @param size
+ *   Size (in bytes) of new area. If this is 0, memory is freed.
+ * @param align
+ *   If 0, the return is a pointer that is suitably aligned for any kind of
+ *   variable (in the same manner as malloc()).
+ *   Otherwise, the return is a pointer that is a multiple of *align*. In
+ *   this case, it must obviously be a power of two. (Minimum alignment is the
+ *   cacheline size, i.e. 64-bytes)
+ * @return
+ *   - NULL on error. Not enough memory, or invalid arguments (size is 0,
+ *     align is not a power of two).
+ *   - Otherwise, the pointer to the reallocated memory.
+ */
+void *
+rte_realloc(void *ptr, size_t size, unsigned align);
+
+/**
+ * This function allocates memory from the huge-page area of memory. The memory
+ * is not cleared.
+ *
+ * @param type
+ *   A string identifying the type of allocated objects (useful for debug
+ *   purposes, such as identifying the cause of a memory leak). Can be NULL.
+ * @param size
+ *   Size (in bytes) to be allocated.
+ * @param align
+ *   If 0, the return is a pointer that is suitably aligned for any kind of
+ *   variable (in the same manner as malloc()).
+ *   Otherwise, the return is a pointer that is a multiple of *align*. In
+ *   this case, it must be a power of two. (Minimum alignment is the
+ *   cacheline size, i.e. 64-bytes)
+ * @param socket
+ *   NUMA socket to allocate memory on. If SOCKET_ID_ANY is used, this function
+ *   will behave the same as rte_malloc().
+ * @return
+ *   - NULL on error. Not enough memory, or invalid arguments (size is 0,
+ *     align is not a power of two).
+ *   - Otherwise, the pointer to the allocated object.
+ */
+void *
+rte_malloc_socket(const char *type, size_t size, unsigned align, int socket);
+
+/**
+ * Allocate zero'ed memory from the heap.
+ *
+ * Equivalent to rte_malloc() except that the memory zone is
+ * initialised with zeros.
+ *
+ * @param type
+ *   A string identifying the type of allocated objects (useful for debug
+ *   purposes, such as identifying the cause of a memory leak). Can be NULL.
+ * @param size
+ *   Size (in bytes) to be allocated.
+ * @param align
+ *   If 0, the return is a pointer that is suitably aligned for any kind of
+ *   variable (in the same manner as malloc()).
+ *   Otherwise, the return is a pointer that is a multiple of *align*. In
+ *   this case, it must obviously be a power of two. (Minimum alignment is the
+ *   cacheline size, i.e. 64-bytes)
+ * @param socket
+ *   NUMA socket to allocate memory on. If SOCKET_ID_ANY is used, this function
+ *   will behave the same as rte_zmalloc().
+ * @return
+ *   - NULL on error. Not enough memory, or invalid arguments (size is 0,
+ *     align is not a power of two).
+ *   - Otherwise, the pointer to the allocated object.
+ */
+void *
+rte_zmalloc_socket(const char *type, size_t size, unsigned align, int socket);
+
+/**
+ * Replacement function for calloc(), using huge-page memory. Memory area is
+ * initialised with zeros.
+ *
+ * @param type
+ *   A string identifying the type of allocated objects (useful for debug
+ *   purposes, such as identifying the cause of a memory leak). Can be NULL.
+ * @param num
+ *   Number of elements to be allocated.
+ * @param size
+ *   Size (in bytes) of a single element.
+ * @param align
+ *   If 0, the return is a pointer that is suitably aligned for any kind of
+ *   variable (in the same manner as malloc()).
+ *   Otherwise, the return is a pointer that is a multiple of *align*. In
+ *   this case, it must obviously be a power of two. (Minimum alignment is the
+ *   cacheline size, i.e. 64-bytes)
+ * @param socket
+ *   NUMA socket to allocate memory on. If SOCKET_ID_ANY is used, this function
+ *   will behave the same as rte_calloc().
+ * @return
+ *   - NULL on error. Not enough memory, or invalid arguments (size is 0,
+ *     align is not a power of two).
+ *   - Otherwise, the pointer to the allocated object.
+ */
+void *
+rte_calloc_socket(const char *type, size_t num, size_t size, unsigned align, int socket);
+
+/**
+ * Frees the memory space pointed to by the provided pointer.
+ *
+ * This pointer must have been returned by a previous call to
+ * rte_malloc(), rte_zmalloc(), rte_calloc() or rte_realloc(). The behaviour of
+ * rte_free() is undefined if the pointer does not match this requirement.
+ *
+ * If the pointer is NULL, the function does nothing.
+ *
+ * @param ptr
+ *   The pointer to memory to be freed.
+ */
+void
+rte_free(void *ptr);
+
+/**
+ * If malloc debug is enabled, check a memory block for header
+ * and trailer markers to indicate that all is well with the block.
+ * If size is non-null, also return the size of the block.
+ *
+ * @param ptr
+ *   pointer to the start of a data block, must have been returned
+ *   by a previous call to rte_malloc(), rte_zmalloc(), rte_calloc()
+ *   or rte_realloc()
+ * @param size
+ *   if non-null, and memory block pointer is valid, returns the size
+ *   of the memory block
+ * @return
+ *   -1 on error, invalid pointer passed or header and trailer markers
+ *   are missing or corrupted
+ *   0 on success
+ */
+int
+rte_malloc_validate(const void *ptr, size_t *size);
+
+/**
+ * Get heap statistics for the specified heap.
+ *
+ * @param socket
+ *   An unsigned integer specifying the socket to get heap statistics for
+ * @param socket_stats
+ *   A structure which provides memory to store statistics
+ * @return
+ *   Null on error
+ *   Pointer to structure storing statistics on success
+ */
+int
+rte_malloc_get_socket_stats(int socket,
+		struct rte_malloc_socket_stats *socket_stats);
+
+/**
+ * Dump statistics.
+ *
+ * Dump for the specified type to the console. If the type argument is
+ * NULL, all memory types will be dumped.
+ *
+ * @param f
+ *   A pointer to a file for output
+ * @param type
+ *   A string identifying the type of objects to dump, or NULL
+ *   to dump all objects.
+ */
+void
+rte_malloc_dump_stats(FILE *f, const char *type);
+
+/**
+ * Set the maximum amount of allocated memory for this type.
+ *
+ * This is not yet implemented
+ *
+ * @param type
+ *   A string identifying the type of allocated objects.
+ * @param max
+ *   The maximum amount of allocated bytes for this type.
+ * @return
+ *   - 0: Success.
+ *   - (-1): Error.
+ */
+int
+rte_malloc_set_limit(const char *type, size_t max);
+
+/**
+ * Return the physical address of a virtual address obtained through
+ * rte_malloc
+ *
+ * @param addr
+ *   Adress obtained from a previous rte_malloc call
+ * @return
+ *   NULL on error
+ *   otherwise return physical address of the buffer
+ */
+phys_addr_t
+rte_malloc_virt2phy(const void *addr);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_MALLOC_H_ */
diff --git a/lib/librte_eal/common/include/rte_malloc_heap.h b/lib/librte_eal/common/include/rte_malloc_heap.h
new file mode 100644
index 00000000..b2703562
--- /dev/null
+++ b/lib/librte_eal/common/include/rte_malloc_heap.h
@@ -0,0 +1,55 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_MALLOC_HEAP_H_
+#define _RTE_MALLOC_HEAP_H_
+
+#include <stddef.h>
+#include <sys/queue.h>
+#include <rte_spinlock.h>
+#include <rte_memory.h>
+
+/* Number of free lists per heap, grouped by size. */
+#define RTE_HEAP_NUM_FREELISTS  13
+
+/**
+ * Structure to hold malloc heap
+ */
+struct malloc_heap {
+	rte_spinlock_t lock;
+	LIST_HEAD(, malloc_elem) free_head[RTE_HEAP_NUM_FREELISTS];
+	unsigned alloc_count;
+	size_t total_size;
+} __rte_cache_aligned;
+
+#endif /* _RTE_MALLOC_HEAP_H_ */
diff --git a/lib/librte_eal/common/include/rte_memory.h b/lib/librte_eal/common/include/rte_memory.h
new file mode 100644
index 00000000..f8dbece0
--- /dev/null
+++ b/lib/librte_eal/common/include/rte_memory.h
@@ -0,0 +1,263 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_MEMORY_H_
+#define _RTE_MEMORY_H_
+
+/**
+ * @file
+ *
+ * Memory-related RTE API.
+ */
+
+#include <stdint.h>
+#include <stddef.h>
+#include <stdio.h>
+
+#ifdef RTE_EXEC_ENV_LINUXAPP
+#include <exec-env/rte_dom0_common.h>
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <rte_common.h>
+
+enum rte_page_sizes {
+	RTE_PGSIZE_4K    = 1ULL << 12,
+	RTE_PGSIZE_64K   = 1ULL << 16,
+	RTE_PGSIZE_256K  = 1ULL << 18,
+	RTE_PGSIZE_2M    = 1ULL << 21,
+	RTE_PGSIZE_16M   = 1ULL << 24,
+	RTE_PGSIZE_256M  = 1ULL << 28,
+	RTE_PGSIZE_512M  = 1ULL << 29,
+	RTE_PGSIZE_1G    = 1ULL << 30,
+	RTE_PGSIZE_4G    = 1ULL << 32,
+	RTE_PGSIZE_16G   = 1ULL << 34,
+};
+
+#define SOCKET_ID_ANY -1                    /**< Any NUMA socket. */
+#define RTE_CACHE_LINE_MASK (RTE_CACHE_LINE_SIZE-1) /**< Cache line mask. */
+
+#define RTE_CACHE_LINE_ROUNDUP(size) \
+	(RTE_CACHE_LINE_SIZE * ((size + RTE_CACHE_LINE_SIZE - 1) / RTE_CACHE_LINE_SIZE))
+/**< Return the first cache-aligned value greater or equal to size. */
+
+/**< Cache line size in terms of log2 */
+#if RTE_CACHE_LINE_SIZE == 64
+#define RTE_CACHE_LINE_SIZE_LOG2 6
+#elif RTE_CACHE_LINE_SIZE == 128
+#define RTE_CACHE_LINE_SIZE_LOG2 7
+#else
+#error "Unsupported cache line size"
+#endif
+
+#define RTE_CACHE_LINE_MIN_SIZE 64	/**< Minimum Cache line size. */
+
+/**
+ * Force alignment to cache line.
+ */
+#define __rte_cache_aligned __rte_aligned(RTE_CACHE_LINE_SIZE)
+
+/**
+ * Force minimum cache line alignment.
+ */
+#define __rte_cache_min_aligned __rte_aligned(RTE_CACHE_LINE_MIN_SIZE)
+
+typedef uint64_t phys_addr_t; /**< Physical address definition. */
+#define RTE_BAD_PHYS_ADDR ((phys_addr_t)-1)
+
+/**
+ * Physical memory segment descriptor.
+ */
+struct rte_memseg {
+	phys_addr_t phys_addr;      /**< Start physical address. */
+	union {
+		void *addr;         /**< Start virtual address. */
+		uint64_t addr_64;   /**< Makes sure addr is always 64 bits */
+	};
+#ifdef RTE_LIBRTE_IVSHMEM
+	phys_addr_t ioremap_addr; /**< Real physical address inside the VM */
+#endif
+	size_t len;               /**< Length of the segment. */
+	uint64_t hugepage_sz;       /**< The pagesize of underlying memory */
+	int32_t socket_id;          /**< NUMA socket ID. */
+	uint32_t nchannel;          /**< Number of channels. */
+	uint32_t nrank;             /**< Number of ranks. */
+#ifdef RTE_LIBRTE_XEN_DOM0
+	 /**< store segment MFNs */
+	uint64_t mfn[DOM0_NUM_MEMBLOCK];
+#endif
+} __rte_packed;
+
+/**
+ * Lock page in physical memory and prevent from swapping.
+ *
+ * @param virt
+ *   The virtual address.
+ * @return
+ *   0 on success, negative on error.
+ */
+int rte_mem_lock_page(const void *virt);
+
+/**
+ * Get physical address of any mapped virtual address in the current process.
+ * It is found by browsing the /proc/self/pagemap special file.
+ * The page must be locked.
+ *
+ * @param virt
+ *   The virtual address.
+ * @return
+ *   The physical address or RTE_BAD_PHYS_ADDR on error.
+ */
+phys_addr_t rte_mem_virt2phy(const void *virt);
+
+/**
+ * Get the layout of the available physical memory.
+ *
+ * It can be useful for an application to have the full physical
+ * memory layout to decide the size of a memory zone to reserve. This
+ * table is stored in rte_config (see rte_eal_get_configuration()).
+ *
+ * @return
+ *  - On success, return a pointer to a read-only table of struct
+ *    rte_physmem_desc elements, containing the layout of all
+ *    addressable physical memory. The last element of the table
+ *    contains a NULL address.
+ *  - On error, return NULL. This should not happen since it is a fatal
+ *    error that will probably cause the entire system to panic.
+ */
+const struct rte_memseg *rte_eal_get_physmem_layout(void);
+
+/**
+ * Dump the physical memory layout to the console.
+ *
+ * @param f
+ *   A pointer to a file for output
+ */
+void rte_dump_physmem_layout(FILE *f);
+
+/**
+ * Get the total amount of available physical memory.
+ *
+ * @return
+ *    The total amount of available physical memory in bytes.
+ */
+uint64_t rte_eal_get_physmem_size(void);
+
+/**
+ * Get the number of memory channels.
+ *
+ * @return
+ *   The number of memory channels on the system. The value is 0 if unknown
+ *   or not the same on all devices.
+ */
+unsigned rte_memory_get_nchannel(void);
+
+/**
+ * Get the number of memory ranks.
+ *
+ * @return
+ *   The number of memory ranks on the system. The value is 0 if unknown or
+ *   not the same on all devices.
+ */
+unsigned rte_memory_get_nrank(void);
+
+#ifdef RTE_LIBRTE_XEN_DOM0
+
+/**< Internal use only - should DOM0 memory mapping be used */
+int rte_xen_dom0_supported(void);
+
+/**< Internal use only - phys to virt mapping for xen */
+phys_addr_t rte_xen_mem_phy2mch(uint32_t, const phys_addr_t);
+
+/**
+ * Return the physical address of elt, which is an element of the pool mp.
+ *
+ * @param memseg_id
+ *   The mempool is from which memory segment.
+ * @param phy_addr
+ *   physical address of elt.
+ *
+ * @return
+ *   The physical address or error.
+ */
+static inline phys_addr_t
+rte_mem_phy2mch(uint32_t memseg_id, const phys_addr_t phy_addr)
+{
+	if (rte_xen_dom0_supported())
+		return rte_xen_mem_phy2mch(memseg_id, phy_addr);
+	else
+		return phy_addr;
+}
+
+/**
+ * Memory init for supporting application running on Xen domain0.
+ *
+ * @param void
+ *
+ * @return
+ *       0: successfully
+ *	 negative: error
+ */
+int rte_xen_dom0_memory_init(void);
+
+/**
+ * Attach to memory setments of primary process on Xen domain0.
+ *
+ * @param void
+ *
+ * @return
+ *       0: successfully
+ *       negative: error
+ */
+int rte_xen_dom0_memory_attach(void);
+#else
+static inline int rte_xen_dom0_supported(void)
+{
+	return 0;
+}
+
+static inline phys_addr_t
+rte_mem_phy2mch(uint32_t memseg_id __rte_unused, const phys_addr_t phy_addr)
+{
+	return phy_addr;
+}
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_MEMORY_H_ */
diff --git a/lib/librte_eal/common/include/rte_memzone.h b/lib/librte_eal/common/include/rte_memzone.h
new file mode 100644
index 00000000..f69b5a87
--- /dev/null
+++ b/lib/librte_eal/common/include/rte_memzone.h
@@ -0,0 +1,305 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_MEMZONE_H_
+#define _RTE_MEMZONE_H_
+
+/**
+ * @file
+ * RTE Memzone
+ *
+ * The goal of the memzone allocator is to reserve contiguous
+ * portions of physical memory. These zones are identified by a name.
+ *
+ * The memzone descriptors are shared by all partitions and are
+ * located in a known place of physical memory. This zone is accessed
+ * using rte_eal_get_configuration(). The lookup (by name) of a
+ * memory zone can be done in any partition and returns the same
+ * physical address.
+ *
+ * A reserved memory zone cannot be unreserved. The reservation shall
+ * be done at initialization time only.
+ */
+
+#include <stdio.h>
+#include <rte_memory.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define RTE_MEMZONE_2MB            0x00000001   /**< Use 2MB pages. */
+#define RTE_MEMZONE_1GB            0x00000002   /**< Use 1GB pages. */
+#define RTE_MEMZONE_16MB           0x00000100   /**< Use 16MB pages. */
+#define RTE_MEMZONE_16GB           0x00000200   /**< Use 16GB pages. */
+#define RTE_MEMZONE_256KB          0x00010000   /**< Use 256KB pages. */
+#define RTE_MEMZONE_256MB          0x00020000   /**< Use 256MB pages. */
+#define RTE_MEMZONE_512MB          0x00040000   /**< Use 512MB pages. */
+#define RTE_MEMZONE_4GB            0x00080000   /**< Use 4GB pages. */
+#define RTE_MEMZONE_SIZE_HINT_ONLY 0x00000004   /**< Use available page size */
+
+/**
+ * A structure describing a memzone, which is a contiguous portion of
+ * physical memory identified by a name.
+ */
+struct rte_memzone {
+
+#define RTE_MEMZONE_NAMESIZE 32       /**< Maximum length of memory zone name.*/
+	char name[RTE_MEMZONE_NAMESIZE];  /**< Name of the memory zone. */
+
+	phys_addr_t phys_addr;            /**< Start physical address. */
+	union {
+		void *addr;                   /**< Start virtual address. */
+		uint64_t addr_64;             /**< Makes sure addr is always 64-bits */
+	};
+#ifdef RTE_LIBRTE_IVSHMEM
+	phys_addr_t ioremap_addr;         /**< Real physical address inside the VM */
+#endif
+	size_t len;                       /**< Length of the memzone. */
+
+	uint64_t hugepage_sz;             /**< The page size of underlying memory */
+
+	int32_t socket_id;                /**< NUMA socket ID. */
+
+	uint32_t flags;                   /**< Characteristics of this memzone. */
+	uint32_t memseg_id;               /**< Memseg it belongs. */
+} __attribute__((__packed__));
+
+/**
+ * Reserve a portion of physical memory.
+ *
+ * This function reserves some memory and returns a pointer to a
+ * correctly filled memzone descriptor. If the allocation cannot be
+ * done, return NULL.
+ *
+ * @param name
+ *   The name of the memzone. If it already exists, the function will
+ *   fail and return NULL.
+ * @param len
+ *   The size of the memory to be reserved. If it
+ *   is 0, the biggest contiguous zone will be reserved.
+ * @param socket_id
+ *   The socket identifier in the case of
+ *   NUMA. The value can be SOCKET_ID_ANY if there is no NUMA
+ *   constraint for the reserved zone.
+ * @param flags
+ *   The flags parameter is used to request memzones to be
+ *   taken from specifically sized hugepages.
+ *   - RTE_MEMZONE_2MB - Reserved from 2MB pages
+ *   - RTE_MEMZONE_1GB - Reserved from 1GB pages
+ *   - RTE_MEMZONE_16MB - Reserved from 16MB pages
+ *   - RTE_MEMZONE_16GB - Reserved from 16GB pages
+ *   - RTE_MEMZONE_256KB - Reserved from 256KB pages
+ *   - RTE_MEMZONE_256MB - Reserved from 256MB pages
+ *   - RTE_MEMZONE_512MB - Reserved from 512MB pages
+ *   - RTE_MEMZONE_4GB - Reserved from 4GB pages
+ *   - RTE_MEMZONE_SIZE_HINT_ONLY - Allow alternative page size to be used if
+ *                                  the requested page size is unavailable.
+ *                                  If this flag is not set, the function
+ *                                  will return error on an unavailable size
+ *                                  request.
+ * @return
+ *   A pointer to a correctly-filled read-only memzone descriptor, or NULL
+ *   on error.
+ *   On error case, rte_errno will be set appropriately:
+ *    - E_RTE_NO_CONFIG - function could not get pointer to rte_config structure
+ *    - E_RTE_SECONDARY - function was called from a secondary process instance
+ *    - ENOSPC - the maximum number of memzones has already been allocated
+ *    - EEXIST - a memzone with the same name already exists
+ *    - ENOMEM - no appropriate memory area found in which to create memzone
+ *    - EINVAL - invalid parameters
+ */
+const struct rte_memzone *rte_memzone_reserve(const char *name,
+					      size_t len, int socket_id,
+					      unsigned flags);
+
+/**
+ * Reserve a portion of physical memory with alignment on a specified
+ * boundary.
+ *
+ * This function reserves some memory with alignment on a specified
+ * boundary, and returns a pointer to a correctly filled memzone
+ * descriptor. If the allocation cannot be done or if the alignment
+ * is not a power of 2, returns NULL.
+ *
+ * @param name
+ *   The name of the memzone. If it already exists, the function will
+ *   fail and return NULL.
+ * @param len
+ *   The size of the memory to be reserved. If it
+ *   is 0, the biggest contiguous zone will be reserved.
+ * @param socket_id
+ *   The socket identifier in the case of
+ *   NUMA. The value can be SOCKET_ID_ANY if there is no NUMA
+ *   constraint for the reserved zone.
+ * @param flags
+ *   The flags parameter is used to request memzones to be
+ *   taken from specifically sized hugepages.
+ *   - RTE_MEMZONE_2MB - Reserved from 2MB pages
+ *   - RTE_MEMZONE_1GB - Reserved from 1GB pages
+ *   - RTE_MEMZONE_16MB - Reserved from 16MB pages
+ *   - RTE_MEMZONE_16GB - Reserved from 16GB pages
+ *   - RTE_MEMZONE_256KB - Reserved from 256KB pages
+ *   - RTE_MEMZONE_256MB - Reserved from 256MB pages
+ *   - RTE_MEMZONE_512MB - Reserved from 512MB pages
+ *   - RTE_MEMZONE_4GB - Reserved from 4GB pages
+ *   - RTE_MEMZONE_SIZE_HINT_ONLY - Allow alternative page size to be used if
+ *                                  the requested page size is unavailable.
+ *                                  If this flag is not set, the function
+ *                                  will return error on an unavailable size
+ *                                  request.
+ * @param align
+ *   Alignment for resulting memzone. Must be a power of 2.
+ * @return
+ *   A pointer to a correctly-filled read-only memzone descriptor, or NULL
+ *   on error.
+ *   On error case, rte_errno will be set appropriately:
+ *    - E_RTE_NO_CONFIG - function could not get pointer to rte_config structure
+ *    - E_RTE_SECONDARY - function was called from a secondary process instance
+ *    - ENOSPC - the maximum number of memzones has already been allocated
+ *    - EEXIST - a memzone with the same name already exists
+ *    - ENOMEM - no appropriate memory area found in which to create memzone
+ *    - EINVAL - invalid parameters
+ */
+const struct rte_memzone *rte_memzone_reserve_aligned(const char *name,
+			size_t len, int socket_id,
+			unsigned flags, unsigned align);
+
+/**
+ * Reserve a portion of physical memory with specified alignment and
+ * boundary.
+ *
+ * This function reserves some memory with specified alignment and
+ * boundary, and returns a pointer to a correctly filled memzone
+ * descriptor. If the allocation cannot be done or if the alignment
+ * or boundary are not a power of 2, returns NULL.
+ * Memory buffer is reserved in a way, that it wouldn't cross specified
+ * boundary. That implies that requested length should be less or equal
+ * then boundary.
+ *
+ * @param name
+ *   The name of the memzone. If it already exists, the function will
+ *   fail and return NULL.
+ * @param len
+ *   The size of the memory to be reserved. If it
+ *   is 0, the biggest contiguous zone will be reserved.
+ * @param socket_id
+ *   The socket identifier in the case of
+ *   NUMA. The value can be SOCKET_ID_ANY if there is no NUMA
+ *   constraint for the reserved zone.
+ * @param flags
+ *   The flags parameter is used to request memzones to be
+ *   taken from specifically sized hugepages.
+ *   - RTE_MEMZONE_2MB - Reserved from 2MB pages
+ *   - RTE_MEMZONE_1GB - Reserved from 1GB pages
+ *   - RTE_MEMZONE_16MB - Reserved from 16MB pages
+ *   - RTE_MEMZONE_16GB - Reserved from 16GB pages
+ *   - RTE_MEMZONE_256KB - Reserved from 256KB pages
+ *   - RTE_MEMZONE_256MB - Reserved from 256MB pages
+ *   - RTE_MEMZONE_512MB - Reserved from 512MB pages
+ *   - RTE_MEMZONE_4GB - Reserved from 4GB pages
+ *   - RTE_MEMZONE_SIZE_HINT_ONLY - Allow alternative page size to be used if
+ *                                  the requested page size is unavailable.
+ *                                  If this flag is not set, the function
+ *                                  will return error on an unavailable size
+ *                                  request.
+ * @param align
+ *   Alignment for resulting memzone. Must be a power of 2.
+ * @param bound
+ *   Boundary for resulting memzone. Must be a power of 2 or zero.
+ *   Zero value implies no boundary condition.
+ * @return
+ *   A pointer to a correctly-filled read-only memzone descriptor, or NULL
+ *   on error.
+ *   On error case, rte_errno will be set appropriately:
+ *    - E_RTE_NO_CONFIG - function could not get pointer to rte_config structure
+ *    - E_RTE_SECONDARY - function was called from a secondary process instance
+ *    - ENOSPC - the maximum number of memzones has already been allocated
+ *    - EEXIST - a memzone with the same name already exists
+ *    - ENOMEM - no appropriate memory area found in which to create memzone
+ *    - EINVAL - invalid parameters
+ */
+const struct rte_memzone *rte_memzone_reserve_bounded(const char *name,
+			size_t len, int socket_id,
+			unsigned flags, unsigned align, unsigned bound);
+
+/**
+ * Free a memzone.
+ *
+ * Note: an IVSHMEM zone cannot be freed.
+ *
+ * @param mz
+ *   A pointer to the memzone
+ * @return
+ *  -EINVAL - invalid parameter, IVSHMEM memzone.
+ *  0 - success
+ */
+int rte_memzone_free(const struct rte_memzone *mz);
+
+/**
+ * Lookup for a memzone.
+ *
+ * Get a pointer to a descriptor of an already reserved memory
+ * zone identified by the name given as an argument.
+ *
+ * @param name
+ *   The name of the memzone.
+ * @return
+ *   A pointer to a read-only memzone descriptor.
+ */
+const struct rte_memzone *rte_memzone_lookup(const char *name);
+
+/**
+ * Dump all reserved memzones to the console.
+ *
+ * @param f
+ *   A pointer to a file for output
+ */
+void rte_memzone_dump(FILE *f);
+
+/**
+ * Walk list of all memzones
+ *
+ * @param func
+ *   Iterator function
+ * @param arg
+ *   Argument passed to iterator
+ */
+void rte_memzone_walk(void (*func)(const struct rte_memzone *, void *arg),
+		      void *arg);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_MEMZONE_H_ */
diff --git a/lib/librte_eal/common/include/rte_pci.h b/lib/librte_eal/common/include/rte_pci.h
new file mode 100644
index 00000000..e692094e
--- /dev/null
+++ b/lib/librte_eal/common/include/rte_pci.h
@@ -0,0 +1,598 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+/*   BSD LICENSE
+ *
+ *   Copyright 2013-2014 6WIND S.A.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of 6WIND S.A. nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_PCI_H_
+#define _RTE_PCI_H_
+
+/**
+ * @file
+ *
+ * RTE PCI Interface
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <limits.h>
+#include <errno.h>
+#include <sys/queue.h>
+#include <stdint.h>
+#include <inttypes.h>
+
+#include <rte_interrupts.h>
+
+TAILQ_HEAD(pci_device_list, rte_pci_device); /**< PCI devices in D-linked Q. */
+TAILQ_HEAD(pci_driver_list, rte_pci_driver); /**< PCI drivers in D-linked Q. */
+
+extern struct pci_driver_list pci_driver_list; /**< Global list of PCI drivers. */
+extern struct pci_device_list pci_device_list; /**< Global list of PCI devices. */
+
+/** Pathname of PCI devices directory. */
+#define SYSFS_PCI_DEVICES "/sys/bus/pci/devices"
+
+/** Formatting string for PCI device identifier: Ex: 0000:00:01.0 */
+#define PCI_PRI_FMT "%.4" PRIx16 ":%.2" PRIx8 ":%.2" PRIx8 ".%" PRIx8
+
+/** Short formatting string, without domain, for PCI device: Ex: 00:01.0 */
+#define PCI_SHORT_PRI_FMT "%.2" PRIx8 ":%.2" PRIx8 ".%" PRIx8
+
+/** Nb. of values in PCI device identifier format string. */
+#define PCI_FMT_NVAL 4
+
+/** Nb. of values in PCI resource format. */
+#define PCI_RESOURCE_FMT_NVAL 3
+
+/** IO resource type: memory address space */
+#define IORESOURCE_MEM        0x00000200
+
+/**
+ * A structure describing a PCI resource.
+ */
+struct rte_pci_resource {
+	uint64_t phys_addr;   /**< Physical address, 0 if no resource. */
+	uint64_t len;         /**< Length of the resource. */
+	void *addr;           /**< Virtual address, NULL when not mapped. */
+};
+
+/** Maximum number of PCI resources. */
+#define PCI_MAX_RESOURCE 6
+
+/**
+ * A structure describing an ID for a PCI driver. Each driver provides a
+ * table of these IDs for each device that it supports.
+ */
+struct rte_pci_id {
+	uint16_t vendor_id;           /**< Vendor ID or PCI_ANY_ID. */
+	uint16_t device_id;           /**< Device ID or PCI_ANY_ID. */
+	uint16_t subsystem_vendor_id; /**< Subsystem vendor ID or PCI_ANY_ID. */
+	uint16_t subsystem_device_id; /**< Subsystem device ID or PCI_ANY_ID. */
+};
+
+/**
+ * A structure describing the location of a PCI device.
+ */
+struct rte_pci_addr {
+	uint16_t domain;                /**< Device domain */
+	uint8_t bus;                    /**< Device bus */
+	uint8_t devid;                  /**< Device ID */
+	uint8_t function;               /**< Device function. */
+};
+
+struct rte_devargs;
+
+enum rte_kernel_driver {
+	RTE_KDRV_UNKNOWN = 0,
+	RTE_KDRV_IGB_UIO,
+	RTE_KDRV_VFIO,
+	RTE_KDRV_UIO_GENERIC,
+	RTE_KDRV_NIC_UIO,
+	RTE_KDRV_NONE,
+};
+
+/**
+ * A structure describing a PCI device.
+ */
+struct rte_pci_device {
+	TAILQ_ENTRY(rte_pci_device) next;       /**< Next probed PCI device. */
+	struct rte_pci_addr addr;               /**< PCI location. */
+	struct rte_pci_id id;                   /**< PCI ID. */
+	struct rte_pci_resource mem_resource[PCI_MAX_RESOURCE];   /**< PCI Memory Resource */
+	struct rte_intr_handle intr_handle;     /**< Interrupt handle */
+	struct rte_pci_driver *driver;          /**< Associated driver */
+	uint16_t max_vfs;                       /**< sriov enable if not zero */
+	int numa_node;                          /**< NUMA node connection */
+	struct rte_devargs *devargs;            /**< Device user arguments */
+	enum rte_kernel_driver kdrv;            /**< Kernel driver passthrough */
+};
+
+/** Any PCI device identifier (vendor, device, ...) */
+#define PCI_ANY_ID (0xffff)
+
+#ifdef __cplusplus
+/** C++ macro used to help building up tables of device IDs */
+#define RTE_PCI_DEVICE(vend, dev) \
+	(vend),                   \
+	(dev),                    \
+	PCI_ANY_ID,               \
+	PCI_ANY_ID
+#else
+/** Macro used to help building up tables of device IDs */
+#define RTE_PCI_DEVICE(vend, dev)          \
+	.vendor_id = (vend),               \
+	.device_id = (dev),                \
+	.subsystem_vendor_id = PCI_ANY_ID, \
+	.subsystem_device_id = PCI_ANY_ID
+#endif
+
+struct rte_pci_driver;
+
+/**
+ * Initialisation function for the driver called during PCI probing.
+ */
+typedef int (pci_devinit_t)(struct rte_pci_driver *, struct rte_pci_device *);
+
+/**
+ * Uninitialisation function for the driver called during hotplugging.
+ */
+typedef int (pci_devuninit_t)(struct rte_pci_device *);
+
+/**
+ * A structure describing a PCI driver.
+ */
+struct rte_pci_driver {
+	TAILQ_ENTRY(rte_pci_driver) next;       /**< Next in list. */
+	const char *name;                       /**< Driver name. */
+	pci_devinit_t *devinit;                 /**< Device init. function. */
+	pci_devuninit_t *devuninit;             /**< Device uninit function. */
+	const struct rte_pci_id *id_table;	/**< ID table, NULL terminated. */
+	uint32_t drv_flags;                     /**< Flags contolling handling of device. */
+};
+
+/** Device needs PCI BAR mapping (done with either IGB_UIO or VFIO) */
+#define RTE_PCI_DRV_NEED_MAPPING 0x0001
+/** Device driver must be registered several times until failure - deprecated */
+#pragma GCC poison RTE_PCI_DRV_MULTIPLE
+/** Device needs to be unbound even if no module is provided */
+#define RTE_PCI_DRV_FORCE_UNBIND 0x0004
+/** Device driver supports link state interrupt */
+#define RTE_PCI_DRV_INTR_LSC	0x0008
+/** Device driver supports detaching capability */
+#define RTE_PCI_DRV_DETACHABLE	0x0010
+
+/**
+ * A structure describing a PCI mapping.
+ */
+struct pci_map {
+	void *addr;
+	char *path;
+	uint64_t offset;
+	uint64_t size;
+	uint64_t phaddr;
+};
+
+/**
+ * A structure describing a mapped PCI resource.
+ * For multi-process we need to reproduce all PCI mappings in secondary
+ * processes, so save them in a tailq.
+ */
+struct mapped_pci_resource {
+	TAILQ_ENTRY(mapped_pci_resource) next;
+
+	struct rte_pci_addr pci_addr;
+	char path[PATH_MAX];
+	int nb_maps;
+	struct pci_map maps[PCI_MAX_RESOURCE];
+};
+
+/** mapped pci device list */
+TAILQ_HEAD(mapped_pci_res_list, mapped_pci_resource);
+
+/**< Internal use only - Macro used by pci addr parsing functions **/
+#define GET_PCIADDR_FIELD(in, fd, lim, dlm)                   \
+do {                                                               \
+	unsigned long val;                                      \
+	char *end;                                              \
+	errno = 0;                                              \
+	val = strtoul((in), &end, 16);                          \
+	if (errno != 0 || end[0] != (dlm) || val > (lim))       \
+		return -EINVAL;                                 \
+	(fd) = (typeof (fd))val;                                \
+	(in) = end + 1;                                         \
+} while(0)
+
+/**
+ * Utility function to produce a PCI Bus-Device-Function value
+ * given a string representation. Assumes that the BDF is provided without
+ * a domain prefix (i.e. domain returned is always 0)
+ *
+ * @param input
+ *	The input string to be parsed. Should have the format XX:XX.X
+ * @param dev_addr
+ *	The PCI Bus-Device-Function address to be returned. Domain will always be
+ *	returned as 0
+ * @return
+ *  0 on success, negative on error.
+ */
+static inline int
+eal_parse_pci_BDF(const char *input, struct rte_pci_addr *dev_addr)
+{
+	dev_addr->domain = 0;
+	GET_PCIADDR_FIELD(input, dev_addr->bus, UINT8_MAX, ':');
+	GET_PCIADDR_FIELD(input, dev_addr->devid, UINT8_MAX, '.');
+	GET_PCIADDR_FIELD(input, dev_addr->function, UINT8_MAX, 0);
+	return 0;
+}
+
+/**
+ * Utility function to produce a PCI Bus-Device-Function value
+ * given a string representation. Assumes that the BDF is provided including
+ * a domain prefix.
+ *
+ * @param input
+ *	The input string to be parsed. Should have the format XXXX:XX:XX.X
+ * @param dev_addr
+ *	The PCI Bus-Device-Function address to be returned
+ * @return
+ *  0 on success, negative on error.
+ */
+static inline int
+eal_parse_pci_DomBDF(const char *input, struct rte_pci_addr *dev_addr)
+{
+	GET_PCIADDR_FIELD(input, dev_addr->domain, UINT16_MAX, ':');
+	GET_PCIADDR_FIELD(input, dev_addr->bus, UINT8_MAX, ':');
+	GET_PCIADDR_FIELD(input, dev_addr->devid, UINT8_MAX, '.');
+	GET_PCIADDR_FIELD(input, dev_addr->function, UINT8_MAX, 0);
+	return 0;
+}
+#undef GET_PCIADDR_FIELD
+
+/* Compare two PCI device addresses. */
+/**
+ * Utility function to compare two PCI device addresses.
+ *
+ * @param addr
+ *	The PCI Bus-Device-Function address to compare
+ * @param addr2
+ *	The PCI Bus-Device-Function address to compare
+ * @return
+ *	0 on equal PCI address.
+ *	Positive on addr is greater than addr2.
+ *	Negative on addr is less than addr2, or error.
+ */
+static inline int
+rte_eal_compare_pci_addr(const struct rte_pci_addr *addr,
+			 const struct rte_pci_addr *addr2)
+{
+	uint64_t dev_addr, dev_addr2;
+
+	if ((addr == NULL) || (addr2 == NULL))
+		return -1;
+
+	dev_addr = (addr->domain << 24) | (addr->bus << 16) |
+				(addr->devid << 8) | addr->function;
+	dev_addr2 = (addr2->domain << 24) | (addr2->bus << 16) |
+				(addr2->devid << 8) | addr2->function;
+
+	if (dev_addr > dev_addr2)
+		return 1;
+	else if (dev_addr < dev_addr2)
+		return -1;
+	else
+		return 0;
+}
+
+/**
+ * Scan the content of the PCI bus, and the devices in the devices
+ * list
+ *
+ * @return
+ *  0 on success, negative on error
+ */
+int rte_eal_pci_scan(void);
+
+/**
+ * Probe the PCI bus for registered drivers.
+ *
+ * Scan the content of the PCI bus, and call the probe() function for
+ * all registered drivers that have a matching entry in its id_table
+ * for discovered devices.
+ *
+ * @return
+ *   - 0 on success.
+ *   - Negative on error.
+ */
+int rte_eal_pci_probe(void);
+
+/**
+ * Map the PCI device resources in user space virtual memory address
+ *
+ * Note that driver should not call this function when flag
+ * RTE_PCI_DRV_NEED_MAPPING is set, as EAL will do that for
+ * you when it's on.
+ *
+ * @param dev
+ *   A pointer to a rte_pci_device structure describing the device
+ *   to use
+ *
+ * @return
+ *   0 on success, negative on error and positive if no driver
+ *   is found for the device.
+ */
+int rte_eal_pci_map_device(struct rte_pci_device *dev);
+
+/**
+ * Unmap this device
+ *
+ * @param dev
+ *   A pointer to a rte_pci_device structure describing the device
+ *   to use
+ */
+void rte_eal_pci_unmap_device(struct rte_pci_device *dev);
+
+/**
+ * @internal
+ * Map a particular resource from a file.
+ *
+ * @param requested_addr
+ *      The starting address for the new mapping range.
+ * @param fd
+ *      The file descriptor.
+ * @param offset
+ *      The offset for the mapping range.
+ * @param size
+ *      The size for the mapping range.
+ * @param additional_flags
+ *      The additional flags for the mapping range.
+ * @return
+ *   - On success, the function returns a pointer to the mapped area.
+ *   - On error, the value MAP_FAILED is returned.
+ */
+void *pci_map_resource(void *requested_addr, int fd, off_t offset,
+		size_t size, int additional_flags);
+
+/**
+ * @internal
+ * Unmap a particular resource.
+ *
+ * @param requested_addr
+ *      The address for the unmapping range.
+ * @param size
+ *      The size for the unmapping range.
+ */
+void pci_unmap_resource(void *requested_addr, size_t size);
+
+/**
+ * Probe the single PCI device.
+ *
+ * Scan the content of the PCI bus, and find the pci device specified by pci
+ * address, then call the probe() function for registered driver that has a
+ * matching entry in its id_table for discovered device.
+ *
+ * @param addr
+ *	The PCI Bus-Device-Function address to probe.
+ * @return
+ *   - 0 on success.
+ *   - Negative on error.
+ */
+int rte_eal_pci_probe_one(const struct rte_pci_addr *addr);
+
+/**
+ * Close the single PCI device.
+ *
+ * Scan the content of the PCI bus, and find the pci device specified by pci
+ * address, then call the devuninit() function for registered driver that has a
+ * matching entry in its id_table for discovered device.
+ *
+ * @param addr
+ *	The PCI Bus-Device-Function address to close.
+ * @return
+ *   - 0 on success.
+ *   - Negative on error.
+ */
+int rte_eal_pci_detach(const struct rte_pci_addr *addr);
+
+/**
+ * Dump the content of the PCI bus.
+ *
+ * @param f
+ *   A pointer to a file for output
+ */
+void rte_eal_pci_dump(FILE *f);
+
+/**
+ * Register a PCI driver.
+ *
+ * @param driver
+ *   A pointer to a rte_pci_driver structure describing the driver
+ *   to be registered.
+ */
+void rte_eal_pci_register(struct rte_pci_driver *driver);
+
+/**
+ * Unregister a PCI driver.
+ *
+ * @param driver
+ *   A pointer to a rte_pci_driver structure describing the driver
+ *   to be unregistered.
+ */
+void rte_eal_pci_unregister(struct rte_pci_driver *driver);
+
+/**
+ * Read PCI config space.
+ *
+ * @param device
+ *   A pointer to a rte_pci_device structure describing the device
+ *   to use
+ * @param buf
+ *   A data buffer where the bytes should be read into
+ * @param len
+ *   The length of the data buffer.
+ * @param offset
+ *   The offset into PCI config space
+ */
+int rte_eal_pci_read_config(const struct rte_pci_device *device,
+			    void *buf, size_t len, off_t offset);
+
+/**
+ * Write PCI config space.
+ *
+ * @param device
+ *   A pointer to a rte_pci_device structure describing the device
+ *   to use
+ * @param buf
+ *   A data buffer containing the bytes should be written
+ * @param len
+ *   The length of the data buffer.
+ * @param offset
+ *   The offset into PCI config space
+ */
+int rte_eal_pci_write_config(const struct rte_pci_device *device,
+			     const void *buf, size_t len, off_t offset);
+
+/**
+ * A structure used to access io resources for a pci device.
+ * rte_pci_ioport is arch, os, driver specific, and should not be used outside
+ * of pci ioport api.
+ */
+struct rte_pci_ioport {
+	struct rte_pci_device *dev;
+	uint64_t base;
+};
+
+/**
+ * Initialises a rte_pci_ioport object for a pci device io resource.
+ * This object is then used to gain access to those io resources (see below).
+ *
+ * @param dev
+ *   A pointer to a rte_pci_device structure describing the device.
+ *   to use
+ * @param bar
+ *   Index of the io pci resource we want to access.
+ * @param p
+ *   The rte_pci_ioport object to be initialized.
+ * @return
+ *  0 on success, negative on error.
+ */
+int rte_eal_pci_ioport_map(struct rte_pci_device *dev, int bar,
+			   struct rte_pci_ioport *p);
+
+/**
+ * Release any resources used in a rte_pci_ioport object.
+ *
+ * @param p
+ *   The rte_pci_ioport object to be uninitialized.
+ */
+int rte_eal_pci_ioport_unmap(struct rte_pci_ioport *p);
+
+/**
+ * Read from a io pci resource.
+ *
+ * @param p
+ *   The rte_pci_ioport object from which we want to read.
+ * @param data
+ *   A data buffer where the bytes should be read into
+ * @param len
+ *   The length of the data buffer.
+ * @param offset
+ *   The offset into the pci io resource.
+ */
+void rte_eal_pci_ioport_read(struct rte_pci_ioport *p,
+			     void *data, size_t len, off_t offset);
+
+/**
+ * Write to a io pci resource.
+ *
+ * @param p
+ *   The rte_pci_ioport object to which we want to write.
+ * @param data
+ *   A data buffer where the bytes should be read into
+ * @param len
+ *   The length of the data buffer.
+ * @param offset
+ *   The offset into the pci io resource.
+ */
+void rte_eal_pci_ioport_write(struct rte_pci_ioport *p,
+			      const void *data, size_t len, off_t offset);
+
+#ifdef RTE_PCI_CONFIG
+#include <rte_common.h>
+/**
+ * Set special config space registers for performance purpose.
+ * It is deprecated, as all configurations have been moved into
+ * each PMDs respectively.
+ *
+ * @param dev
+ *   A pointer to a rte_pci_device structure describing the device
+ *   to use
+ */
+void pci_config_space_set(struct rte_pci_device *dev) __rte_deprecated;
+#endif /* RTE_PCI_CONFIG */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_PCI_H_ */
diff --git a/lib/librte_eal/common/include/rte_pci_dev_feature_defs.h b/lib/librte_eal/common/include/rte_pci_dev_feature_defs.h
new file mode 100644
index 00000000..08222510
--- /dev/null
+++ b/lib/librte_eal/common/include/rte_pci_dev_feature_defs.h
@@ -0,0 +1,70 @@
+/*-
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ *   redistributing this file, you may do so under either license.
+ *
+ *   GPL LICENSE SUMMARY
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of version 2 of the GNU General Public License as
+ *   published by the Free Software Foundation.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *   General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program; if not, write to the Free Software
+ *   Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *   The full GNU General Public License is included in this distribution
+ *   in the file called LICENSE.GPL.
+ *
+ *   Contact Information:
+ *   Intel Corporation
+ *
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_PCI_DEV_DEFS_H_
+#define _RTE_PCI_DEV_DEFS_H_
+
+/* interrupt mode */
+enum rte_intr_mode {
+	RTE_INTR_MODE_NONE = 0,
+	RTE_INTR_MODE_LEGACY,
+	RTE_INTR_MODE_MSI,
+	RTE_INTR_MODE_MSIX
+};
+
+#endif /* _RTE_PCI_DEV_DEFS_H_ */
diff --git a/lib/librte_eal/common/include/rte_pci_dev_features.h b/lib/librte_eal/common/include/rte_pci_dev_features.h
new file mode 100644
index 00000000..67b986a6
--- /dev/null
+++ b/lib/librte_eal/common/include/rte_pci_dev_features.h
@@ -0,0 +1,69 @@
+/*-
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ *   redistributing this file, you may do so under either license.
+ *
+ *   GPL LICENSE SUMMARY
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of version 2 of the GNU General Public License as
+ *   published by the Free Software Foundation.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *   General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program; if not, write to the Free Software
+ *   Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *   The full GNU General Public License is included in this distribution
+ *   in the file called LICENSE.GPL.
+ *
+ *   Contact Information:
+ *   Intel Corporation
+ *
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_PCI_DEV_FEATURES_H
+#define _RTE_PCI_DEV_FEATURES_H
+
+#include <rte_pci_dev_feature_defs.h>
+
+#define RTE_INTR_MODE_NONE_NAME "none"
+#define RTE_INTR_MODE_LEGACY_NAME "legacy"
+#define RTE_INTR_MODE_MSI_NAME "msi"
+#define RTE_INTR_MODE_MSIX_NAME "msix"
+
+#endif
diff --git a/lib/librte_eal/common/include/rte_pci_dev_ids.h b/lib/librte_eal/common/include/rte_pci_dev_ids.h
new file mode 100644
index 00000000..cf7b5487
--- /dev/null
+++ b/lib/librte_eal/common/include/rte_pci_dev_ids.h
@@ -0,0 +1,704 @@
+/*-
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ *   redistributing this file, you may do so under either license.
+ *
+ *   GPL LICENSE SUMMARY
+ *
+ *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of version 2 of the GNU General Public License as
+ *   published by the Free Software Foundation.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *   General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program; if not, write to the Free Software
+ *   Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *   The full GNU General Public License is included in this distribution
+ *   in the file called LICENSE.GPL.
+ *
+ *   Contact Information:
+ *   Intel Corporation
+ *
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/**
+ * @file
+ *
+ * This file contains a list of the PCI device IDs recognised by DPDK, which
+ * can be used to fill out an array of structures describing the devices.
+ *
+ * Currently four families of devices are recognised: those supported by the
+ * IGB driver, by EM driver, those supported by the IXGBE driver, and by virtio
+ * driver which is a para virtualization driver running in guest virtual machine.
+ * The inclusion of these in an array built using this file depends on the
+ * definition of
+ * RTE_PCI_DEV_ID_DECL_EM
+ * RTE_PCI_DEV_ID_DECL_IGB
+ * RTE_PCI_DEV_ID_DECL_IGBVF
+ * RTE_PCI_DEV_ID_DECL_IXGBE
+ * RTE_PCI_DEV_ID_DECL_IXGBEVF
+ * RTE_PCI_DEV_ID_DECL_I40E
+ * RTE_PCI_DEV_ID_DECL_I40EVF
+ * RTE_PCI_DEV_ID_DECL_VIRTIO
+ * at the time when this file is included.
+ *
+ * In order to populate an array, the user of this file must define this macro:
+ * RTE_PCI_DEV_ID_DECL_IXGBE(vendorID, deviceID). For example:
+ *
+ * @code
+ * struct device {
+ *     int vend;
+ *     int dev;
+ * };
+ *
+ * struct device devices[] = {
+ * #define RTE_PCI_DEV_ID_DECL_IXGBE(vendorID, deviceID) {vend, dev},
+ * #include <rte_pci_dev_ids.h>
+ * };
+ * @endcode
+ *
+ * Note that this file can be included multiple times within the same file.
+ */
+
+#ifndef RTE_PCI_DEV_ID_DECL_EM
+#define RTE_PCI_DEV_ID_DECL_EM(vend, dev)
+#endif
+
+#ifndef RTE_PCI_DEV_ID_DECL_IGB
+#define RTE_PCI_DEV_ID_DECL_IGB(vend, dev)
+#endif
+
+#ifndef RTE_PCI_DEV_ID_DECL_IGBVF
+#define RTE_PCI_DEV_ID_DECL_IGBVF(vend, dev)
+#endif
+
+#ifndef RTE_PCI_DEV_ID_DECL_IXGBE
+#define RTE_PCI_DEV_ID_DECL_IXGBE(vend, dev)
+#endif
+
+#ifndef RTE_PCI_DEV_ID_DECL_IXGBEVF
+#define RTE_PCI_DEV_ID_DECL_IXGBEVF(vend, dev)
+#endif
+
+#ifndef RTE_PCI_DEV_ID_DECL_I40E
+#define RTE_PCI_DEV_ID_DECL_I40E(vend, dev)
+#endif
+
+#ifndef RTE_PCI_DEV_ID_DECL_I40EVF
+#define RTE_PCI_DEV_ID_DECL_I40EVF(vend, dev)
+#endif
+
+#ifndef RTE_PCI_DEV_ID_DECL_VIRTIO
+#define RTE_PCI_DEV_ID_DECL_VIRTIO(vend, dev)
+#endif
+
+#ifndef RTE_PCI_DEV_ID_DECL_VMXNET3
+#define RTE_PCI_DEV_ID_DECL_VMXNET3(vend, dev)
+#endif
+
+#ifndef RTE_PCI_DEV_ID_DECL_FM10K
+#define RTE_PCI_DEV_ID_DECL_FM10K(vend, dev)
+#endif
+
+#ifndef RTE_PCI_DEV_ID_DECL_FM10KVF
+#define RTE_PCI_DEV_ID_DECL_FM10KVF(vend, dev)
+#endif
+
+#ifndef RTE_PCI_DEV_ID_DECL_ENIC
+#define RTE_PCI_DEV_ID_DECL_ENIC(vend, dev)
+#endif
+
+#ifndef RTE_PCI_DEV_ID_DECL_BNX2X
+#define RTE_PCI_DEV_ID_DECL_BNX2X(vend, dev)
+#endif
+
+#ifndef RTE_PCI_DEV_ID_DECL_BNX2XVF
+#define RTE_PCI_DEV_ID_DECL_BNX2XVF(vend, dev)
+#endif
+
+#ifndef PCI_VENDOR_ID_INTEL
+/** Vendor ID used by Intel devices */
+#define PCI_VENDOR_ID_INTEL 0x8086
+#endif
+
+#ifndef PCI_VENDOR_ID_QUMRANET
+/** Vendor ID used by virtio devices */
+#define PCI_VENDOR_ID_QUMRANET 0x1AF4
+#endif
+
+#ifndef PCI_VENDOR_ID_VMWARE
+/** Vendor ID used by VMware devices */
+#define PCI_VENDOR_ID_VMWARE 0x15AD
+#endif
+
+#ifndef PCI_VENDOR_ID_CISCO
+/** Vendor ID used by Cisco VIC devices */
+#define PCI_VENDOR_ID_CISCO 0x1137
+#endif
+
+#ifndef PCI_VENDOR_ID_BROADCOM
+/** Vendor ID used by Broadcom devices */
+#define PCI_VENDOR_ID_BROADCOM 0x14E4
+#endif
+
+/******************** Physical EM devices from e1000_hw.h ********************/
+
+#define E1000_DEV_ID_82542                    0x1000
+#define E1000_DEV_ID_82543GC_FIBER            0x1001
+#define E1000_DEV_ID_82543GC_COPPER           0x1004
+#define E1000_DEV_ID_82544EI_COPPER           0x1008
+#define E1000_DEV_ID_82544EI_FIBER            0x1009
+#define E1000_DEV_ID_82544GC_COPPER           0x100C
+#define E1000_DEV_ID_82544GC_LOM              0x100D
+#define E1000_DEV_ID_82540EM                  0x100E
+#define E1000_DEV_ID_82540EM_LOM              0x1015
+#define E1000_DEV_ID_82540EP_LOM              0x1016
+#define E1000_DEV_ID_82540EP                  0x1017
+#define E1000_DEV_ID_82540EP_LP               0x101E
+#define E1000_DEV_ID_82545EM_COPPER           0x100F
+#define E1000_DEV_ID_82545EM_FIBER            0x1011
+#define E1000_DEV_ID_82545GM_COPPER           0x1026
+#define E1000_DEV_ID_82545GM_FIBER            0x1027
+#define E1000_DEV_ID_82545GM_SERDES           0x1028
+#define E1000_DEV_ID_82546EB_COPPER           0x1010
+#define E1000_DEV_ID_82546EB_FIBER            0x1012
+#define E1000_DEV_ID_82546EB_QUAD_COPPER      0x101D
+#define E1000_DEV_ID_82546GB_COPPER           0x1079
+#define E1000_DEV_ID_82546GB_FIBER            0x107A
+#define E1000_DEV_ID_82546GB_SERDES           0x107B
+#define E1000_DEV_ID_82546GB_PCIE             0x108A
+#define E1000_DEV_ID_82546GB_QUAD_COPPER      0x1099
+#define E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3 0x10B5
+#define E1000_DEV_ID_82541EI                  0x1013
+#define E1000_DEV_ID_82541EI_MOBILE           0x1018
+#define E1000_DEV_ID_82541ER_LOM              0x1014
+#define E1000_DEV_ID_82541ER                  0x1078
+#define E1000_DEV_ID_82541GI                  0x1076
+#define E1000_DEV_ID_82541GI_LF               0x107C
+#define E1000_DEV_ID_82541GI_MOBILE           0x1077
+#define E1000_DEV_ID_82547EI                  0x1019
+#define E1000_DEV_ID_82547EI_MOBILE           0x101A
+#define E1000_DEV_ID_82547GI                  0x1075
+#define E1000_DEV_ID_82571EB_COPPER           0x105E
+#define E1000_DEV_ID_82571EB_FIBER            0x105F
+#define E1000_DEV_ID_82571EB_SERDES           0x1060
+#define E1000_DEV_ID_82571EB_SERDES_DUAL      0x10D9
+#define E1000_DEV_ID_82571EB_SERDES_QUAD      0x10DA
+#define E1000_DEV_ID_82571EB_QUAD_COPPER      0x10A4
+#define E1000_DEV_ID_82571PT_QUAD_COPPER      0x10D5
+#define E1000_DEV_ID_82571EB_QUAD_FIBER       0x10A5
+#define E1000_DEV_ID_82571EB_QUAD_COPPER_LP   0x10BC
+#define E1000_DEV_ID_82572EI_COPPER           0x107D
+#define E1000_DEV_ID_82572EI_FIBER            0x107E
+#define E1000_DEV_ID_82572EI_SERDES           0x107F
+#define E1000_DEV_ID_82572EI                  0x10B9
+#define E1000_DEV_ID_82573E                   0x108B
+#define E1000_DEV_ID_82573E_IAMT              0x108C
+#define E1000_DEV_ID_82573L                   0x109A
+#define E1000_DEV_ID_82574L                   0x10D3
+#define E1000_DEV_ID_82574LA                  0x10F6
+#define E1000_DEV_ID_82583V                   0x150C
+#define E1000_DEV_ID_80003ES2LAN_COPPER_DPT   0x1096
+#define E1000_DEV_ID_80003ES2LAN_SERDES_DPT   0x1098
+#define E1000_DEV_ID_80003ES2LAN_COPPER_SPT   0x10BA
+#define E1000_DEV_ID_80003ES2LAN_SERDES_SPT   0x10BB
+#define E1000_DEV_ID_ICH8_82567V_3            0x1501
+#define E1000_DEV_ID_ICH8_IGP_M_AMT           0x1049
+#define E1000_DEV_ID_ICH8_IGP_AMT             0x104A
+#define E1000_DEV_ID_ICH8_IGP_C               0x104B
+#define E1000_DEV_ID_ICH8_IFE                 0x104C
+#define E1000_DEV_ID_ICH8_IFE_GT              0x10C4
+#define E1000_DEV_ID_ICH8_IFE_G               0x10C5
+#define E1000_DEV_ID_ICH8_IGP_M               0x104D
+#define E1000_DEV_ID_ICH9_IGP_M               0x10BF
+#define E1000_DEV_ID_ICH9_IGP_M_AMT           0x10F5
+#define E1000_DEV_ID_ICH9_IGP_M_V             0x10CB
+#define E1000_DEV_ID_ICH9_IGP_AMT             0x10BD
+#define E1000_DEV_ID_ICH9_BM                  0x10E5
+#define E1000_DEV_ID_ICH9_IGP_C               0x294C
+#define E1000_DEV_ID_ICH9_IFE                 0x10C0
+#define E1000_DEV_ID_ICH9_IFE_GT              0x10C3
+#define E1000_DEV_ID_ICH9_IFE_G               0x10C2
+#define E1000_DEV_ID_ICH10_R_BM_LM            0x10CC
+#define E1000_DEV_ID_ICH10_R_BM_LF            0x10CD
+#define E1000_DEV_ID_ICH10_R_BM_V             0x10CE
+#define E1000_DEV_ID_ICH10_D_BM_LM            0x10DE
+#define E1000_DEV_ID_ICH10_D_BM_LF            0x10DF
+#define E1000_DEV_ID_ICH10_D_BM_V             0x1525
+
+#define E1000_DEV_ID_PCH_M_HV_LM              0x10EA
+#define E1000_DEV_ID_PCH_M_HV_LC              0x10EB
+#define E1000_DEV_ID_PCH_D_HV_DM              0x10EF
+#define E1000_DEV_ID_PCH_D_HV_DC              0x10F0
+#define E1000_DEV_ID_PCH2_LV_LM               0x1502
+#define E1000_DEV_ID_PCH2_LV_V                0x1503
+#define E1000_DEV_ID_PCH_LPT_I217_LM          0x153A
+#define E1000_DEV_ID_PCH_LPT_I217_V           0x153B
+#define E1000_DEV_ID_PCH_LPTLP_I218_LM	      0x155A
+#define E1000_DEV_ID_PCH_LPTLP_I218_V	      0x1559
+#define E1000_DEV_ID_PCH_I218_LM2             0x15A0
+#define E1000_DEV_ID_PCH_I218_V2              0x15A1
+#define E1000_DEV_ID_PCH_I218_LM3             0x15A2
+#define E1000_DEV_ID_PCH_I218_V3              0x15A3
+
+
+/*
+ * Tested (supported) on VM emulated HW.
+ */
+
+RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82540EM)
+RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82545EM_COPPER)
+RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82545EM_FIBER)
+
+/*
+ * Tested (supported) on real HW.
+ */
+
+RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82546EB_COPPER)
+RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82546EB_FIBER)
+RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82546EB_QUAD_COPPER)
+RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82571EB_COPPER)
+RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82571EB_FIBER)
+RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82571EB_SERDES)
+RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82571EB_SERDES_DUAL)
+RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82571EB_SERDES_QUAD)
+RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82571EB_QUAD_COPPER)
+RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82571PT_QUAD_COPPER)
+RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82571EB_QUAD_FIBER)
+RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82571EB_QUAD_COPPER_LP)
+RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82572EI_COPPER)
+RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82572EI_FIBER)
+RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82572EI_SERDES)
+RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82572EI)
+RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82573L)
+RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82574L)
+RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82574LA)
+RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82583V)
+RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_PCH_LPT_I217_LM)
+RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_PCH_LPT_I217_V)
+RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_PCH_LPTLP_I218_LM)
+RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_PCH_LPTLP_I218_V)
+RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_PCH_I218_LM2)
+RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_PCH_I218_V2)
+RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_PCH_I218_LM3)
+RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_PCH_I218_V3)
+
+
+/******************** Physical IGB devices from e1000_hw.h ********************/
+
+#define E1000_DEV_ID_82576                      0x10C9
+#define E1000_DEV_ID_82576_FIBER                0x10E6
+#define E1000_DEV_ID_82576_SERDES               0x10E7
+#define E1000_DEV_ID_82576_QUAD_COPPER          0x10E8
+#define E1000_DEV_ID_82576_QUAD_COPPER_ET2      0x1526
+#define E1000_DEV_ID_82576_NS                   0x150A
+#define E1000_DEV_ID_82576_NS_SERDES            0x1518
+#define E1000_DEV_ID_82576_SERDES_QUAD          0x150D
+#define E1000_DEV_ID_82575EB_COPPER             0x10A7
+#define E1000_DEV_ID_82575EB_FIBER_SERDES       0x10A9
+#define E1000_DEV_ID_82575GB_QUAD_COPPER        0x10D6
+#define E1000_DEV_ID_82580_COPPER               0x150E
+#define E1000_DEV_ID_82580_FIBER                0x150F
+#define E1000_DEV_ID_82580_SERDES               0x1510
+#define E1000_DEV_ID_82580_SGMII                0x1511
+#define E1000_DEV_ID_82580_COPPER_DUAL          0x1516
+#define E1000_DEV_ID_82580_QUAD_FIBER           0x1527
+#define E1000_DEV_ID_I350_COPPER                0x1521
+#define E1000_DEV_ID_I350_FIBER                 0x1522
+#define E1000_DEV_ID_I350_SERDES                0x1523
+#define E1000_DEV_ID_I350_SGMII                 0x1524
+#define E1000_DEV_ID_I350_DA4                   0x1546
+#define E1000_DEV_ID_I210_COPPER                0x1533
+#define E1000_DEV_ID_I210_COPPER_OEM1           0x1534
+#define E1000_DEV_ID_I210_COPPER_IT             0x1535
+#define E1000_DEV_ID_I210_FIBER                 0x1536
+#define E1000_DEV_ID_I210_SERDES                0x1537
+#define E1000_DEV_ID_I210_SGMII                 0x1538
+#define E1000_DEV_ID_I210_COPPER_FLASHLESS      0x157B
+#define E1000_DEV_ID_I210_SERDES_FLASHLESS      0x157C
+#define E1000_DEV_ID_I211_COPPER                0x1539
+#define E1000_DEV_ID_I354_BACKPLANE_1GBPS       0x1F40
+#define E1000_DEV_ID_I354_SGMII                 0x1F41
+#define E1000_DEV_ID_I354_BACKPLANE_2_5GBPS     0x1F45
+#define E1000_DEV_ID_DH89XXCC_SGMII             0x0438
+#define E1000_DEV_ID_DH89XXCC_SERDES            0x043A
+#define E1000_DEV_ID_DH89XXCC_BACKPLANE         0x043C
+#define E1000_DEV_ID_DH89XXCC_SFP               0x0440
+
+RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82576)
+RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82576_FIBER)
+RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82576_SERDES)
+RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82576_QUAD_COPPER)
+RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2)
+RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82576_NS)
+RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82576_NS_SERDES)
+RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82576_SERDES_QUAD)
+
+RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82575EB_COPPER)
+RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES)
+RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER)
+
+RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82580_COPPER)
+RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82580_FIBER)
+RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82580_SERDES)
+RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82580_SGMII)
+RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82580_COPPER_DUAL)
+RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82580_QUAD_FIBER)
+
+RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I350_COPPER)
+RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I350_FIBER)
+RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I350_SERDES)
+RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I350_SGMII)
+RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I350_DA4)
+RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I210_COPPER)
+RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I210_COPPER_OEM1)
+RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I210_COPPER_IT)
+RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I210_FIBER)
+RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I210_SERDES)
+RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I210_SGMII)
+RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I211_COPPER)
+RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I354_BACKPLANE_1GBPS)
+RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I354_SGMII)
+RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I354_BACKPLANE_2_5GBPS)
+RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_DH89XXCC_SGMII)
+RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_DH89XXCC_SERDES)
+RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_DH89XXCC_BACKPLANE)
+RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_DH89XXCC_SFP)
+
+/****************** Physical IXGBE devices from ixgbe_type.h ******************/
+
+#define IXGBE_DEV_ID_82598                      0x10B6
+#define IXGBE_DEV_ID_82598_BX                   0x1508
+#define IXGBE_DEV_ID_82598AF_DUAL_PORT          0x10C6
+#define IXGBE_DEV_ID_82598AF_SINGLE_PORT        0x10C7
+#define IXGBE_DEV_ID_82598AT                    0x10C8
+#define IXGBE_DEV_ID_82598AT2                   0x150B
+#define IXGBE_DEV_ID_82598EB_SFP_LOM            0x10DB
+#define IXGBE_DEV_ID_82598EB_CX4                0x10DD
+#define IXGBE_DEV_ID_82598_CX4_DUAL_PORT        0x10EC
+#define IXGBE_DEV_ID_82598_DA_DUAL_PORT         0x10F1
+#define IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM      0x10E1
+#define IXGBE_DEV_ID_82598EB_XF_LR              0x10F4
+#define IXGBE_DEV_ID_82599_KX4                  0x10F7
+#define IXGBE_DEV_ID_82599_KX4_MEZZ             0x1514
+#define IXGBE_DEV_ID_82599_KR                   0x1517
+#define IXGBE_DEV_ID_82599_COMBO_BACKPLANE      0x10F8
+#define IXGBE_SUBDEV_ID_82599_KX4_KR_MEZZ       0x000C
+#define IXGBE_DEV_ID_82599_CX4                  0x10F9
+#define IXGBE_DEV_ID_82599_SFP                  0x10FB
+#define IXGBE_SUBDEV_ID_82599_SFP               0x11A9
+#define IXGBE_SUBDEV_ID_82599_RNDC              0x1F72
+#define IXGBE_SUBDEV_ID_82599_560FLR            0x17D0
+#define IXGBE_SUBDEV_ID_82599_ECNA_DP           0x0470
+#define IXGBE_DEV_ID_82599_BACKPLANE_FCOE       0x152A
+#define IXGBE_DEV_ID_82599_SFP_FCOE             0x1529
+#define IXGBE_DEV_ID_82599_SFP_EM               0x1507
+#define IXGBE_DEV_ID_82599_SFP_SF2              0x154D
+#define IXGBE_DEV_ID_82599_SFP_SF_QP            0x154A
+#define IXGBE_DEV_ID_82599_QSFP_SF_QP           0x1558
+#define IXGBE_DEV_ID_82599EN_SFP                0x1557
+#define IXGBE_DEV_ID_82599_XAUI_LOM             0x10FC
+#define IXGBE_DEV_ID_82599_T3_LOM               0x151C
+#define IXGBE_DEV_ID_82599_LS                   0x154F
+#define IXGBE_DEV_ID_X540T                      0x1528
+#define IXGBE_DEV_ID_X540T1                     0x1560
+#define IXGBE_DEV_ID_X550EM_X_SFP               0x15AC
+#define IXGBE_DEV_ID_X550EM_X_10G_T             0x15AD
+#define IXGBE_DEV_ID_X550EM_X_1G_T              0x15AE
+#define IXGBE_DEV_ID_X550T                      0x1563
+#define IXGBE_DEV_ID_X550T1                     0x15D1
+#define IXGBE_DEV_ID_X550EM_A_KR                0x15C2
+#define IXGBE_DEV_ID_X550EM_A_KR_L              0x15C3
+#define IXGBE_DEV_ID_X550EM_A_SFP_N             0x15C4
+#define IXGBE_DEV_ID_X550EM_A_1G_T              0x15C6
+#define IXGBE_DEV_ID_X550EM_A_1G_T_L            0x15C7
+#define IXGBE_DEV_ID_X550EM_A_10G_T             0x15C8
+#define IXGBE_DEV_ID_X550EM_A_QSFP              0x15CA
+#define IXGBE_DEV_ID_X550EM_A_QSFP_N            0x15CC
+#define IXGBE_DEV_ID_X550EM_A_SFP               0x15CE
+#define IXGBE_DEV_ID_X550EM_X_KX4               0x15AA
+#define IXGBE_DEV_ID_X550EM_X_KR                0x15AB
+
+#ifdef RTE_NIC_BYPASS
+#define IXGBE_DEV_ID_82599_BYPASS               0x155D
+#endif
+
+RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82598)
+RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82598_BX)
+RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82598AF_DUAL_PORT)
+RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, \
+	IXGBE_DEV_ID_82598AF_SINGLE_PORT)
+RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82598AT)
+RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82598AT2)
+RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82598EB_SFP_LOM)
+RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82598EB_CX4)
+RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82598_CX4_DUAL_PORT)
+RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82598_DA_DUAL_PORT)
+RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, \
+	IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM)
+RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82598EB_XF_LR)
+RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_KX4)
+RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_KX4_MEZZ)
+RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_KR)
+RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, \
+	IXGBE_DEV_ID_82599_COMBO_BACKPLANE)
+RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, \
+	IXGBE_SUBDEV_ID_82599_KX4_KR_MEZZ)
+RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_CX4)
+RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_SFP)
+RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_SUBDEV_ID_82599_SFP)
+RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_SUBDEV_ID_82599_RNDC)
+RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_SUBDEV_ID_82599_560FLR)
+RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_SUBDEV_ID_82599_ECNA_DP)
+RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_BACKPLANE_FCOE)
+RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_SFP_FCOE)
+RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_SFP_EM)
+RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_SFP_SF2)
+RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_SFP_SF_QP)
+RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_QSFP_SF_QP)
+RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599EN_SFP)
+RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_XAUI_LOM)
+RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_T3_LOM)
+RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_LS)
+RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X540T)
+RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X540T1)
+RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_X_SFP)
+RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_X_10G_T)
+RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_X_1G_T)
+RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550T)
+RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550T1)
+RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_KR)
+RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_KR_L)
+RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_SFP_N)
+RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_1G_T)
+RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_1G_T_L)
+RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_10G_T)
+RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_QSFP)
+RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_QSFP_N)
+RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_SFP)
+RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_X_KX4)
+RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_X_KR)
+
+#ifdef RTE_NIC_BYPASS
+RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_BYPASS)
+#endif
+
+/*************** Physical I40E devices from i40e_type.h *****************/
+
+#define I40E_DEV_ID_SFP_XL710           0x1572
+#define I40E_DEV_ID_QEMU                0x1574
+#define I40E_DEV_ID_KX_B                0x1580
+#define I40E_DEV_ID_KX_C                0x1581
+#define I40E_DEV_ID_QSFP_A              0x1583
+#define I40E_DEV_ID_QSFP_B              0x1584
+#define I40E_DEV_ID_QSFP_C              0x1585
+#define I40E_DEV_ID_10G_BASE_T          0x1586
+#define I40E_DEV_ID_20G_KR2             0x1587
+#define I40E_DEV_ID_20G_KR2_A           0x1588
+#define I40E_DEV_ID_10G_BASE_T4         0x1589
+#define I40E_DEV_ID_X722_A0             0x374C
+#define I40E_DEV_ID_KX_X722             0x37CE
+#define I40E_DEV_ID_QSFP_X722           0x37CF
+#define I40E_DEV_ID_SFP_X722            0x37D0
+#define I40E_DEV_ID_1G_BASE_T_X722      0x37D1
+#define I40E_DEV_ID_10G_BASE_T_X722     0x37D2
+
+RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_SFP_XL710)
+RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_QEMU)
+RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_KX_B)
+RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_KX_C)
+RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_QSFP_A)
+RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_QSFP_B)
+RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_QSFP_C)
+RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_10G_BASE_T)
+RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_20G_KR2)
+RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_20G_KR2_A)
+RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_10G_BASE_T4)
+RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_X722_A0)
+RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_KX_X722)
+RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_QSFP_X722)
+RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_SFP_X722)
+RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_1G_BASE_T_X722)
+RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_10G_BASE_T_X722)
+
+/*************** Physical FM10K devices from fm10k_type.h ***************/
+
+#define FM10K_DEV_ID_PF                   0x15A4
+#define FM10K_DEV_ID_SDI_FM10420_QDA2     0x15D0
+
+RTE_PCI_DEV_ID_DECL_FM10K(PCI_VENDOR_ID_INTEL, FM10K_DEV_ID_PF)
+RTE_PCI_DEV_ID_DECL_FM10K(PCI_VENDOR_ID_INTEL, FM10K_DEV_ID_SDI_FM10420_QDA2)
+
+/****************** Virtual IGB devices from e1000_hw.h ******************/
+
+#define E1000_DEV_ID_82576_VF                   0x10CA
+#define E1000_DEV_ID_82576_VF_HV                0x152D
+#define E1000_DEV_ID_I350_VF                    0x1520
+#define E1000_DEV_ID_I350_VF_HV                 0x152F
+
+RTE_PCI_DEV_ID_DECL_IGBVF(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82576_VF)
+RTE_PCI_DEV_ID_DECL_IGBVF(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82576_VF_HV)
+RTE_PCI_DEV_ID_DECL_IGBVF(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I350_VF)
+RTE_PCI_DEV_ID_DECL_IGBVF(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I350_VF_HV)
+
+/****************** Virtual IXGBE devices from ixgbe_type.h ******************/
+
+#define IXGBE_DEV_ID_82599_VF                   0x10ED
+#define IXGBE_DEV_ID_82599_VF_HV                0x152E
+#define IXGBE_DEV_ID_X540_VF                    0x1515
+#define IXGBE_DEV_ID_X540_VF_HV                 0x1530
+#define IXGBE_DEV_ID_X550_VF_HV                 0x1564
+#define IXGBE_DEV_ID_X550_VF                    0x1565
+#define IXGBE_DEV_ID_X550EM_A_VF                0x15C5
+#define IXGBE_DEV_ID_X550EM_A_VF_HV             0x15B4
+#define IXGBE_DEV_ID_X550EM_X_VF                0x15A8
+#define IXGBE_DEV_ID_X550EM_X_VF_HV             0x15A9
+
+RTE_PCI_DEV_ID_DECL_IXGBEVF(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_VF)
+RTE_PCI_DEV_ID_DECL_IXGBEVF(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_VF_HV)
+RTE_PCI_DEV_ID_DECL_IXGBEVF(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X540_VF)
+RTE_PCI_DEV_ID_DECL_IXGBEVF(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X540_VF_HV)
+RTE_PCI_DEV_ID_DECL_IXGBEVF(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550_VF_HV)
+RTE_PCI_DEV_ID_DECL_IXGBEVF(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550_VF)
+RTE_PCI_DEV_ID_DECL_IXGBEVF(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_VF)
+RTE_PCI_DEV_ID_DECL_IXGBEVF(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_VF_HV)
+RTE_PCI_DEV_ID_DECL_IXGBEVF(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_X_VF)
+RTE_PCI_DEV_ID_DECL_IXGBEVF(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_X_VF_HV)
+
+/****************** Virtual I40E devices from i40e_type.h ********************/
+
+#define I40E_DEV_ID_VF                  0x154C
+#define I40E_DEV_ID_VF_HV               0x1571
+#define I40E_DEV_ID_X722_A0_VF          0x374D
+#define I40E_DEV_ID_X722_VF             0x37CD
+#define I40E_DEV_ID_X722_VF_HV          0x37D9
+
+RTE_PCI_DEV_ID_DECL_I40EVF(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_VF)
+RTE_PCI_DEV_ID_DECL_I40EVF(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_VF_HV)
+RTE_PCI_DEV_ID_DECL_I40EVF(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_X722_A0_VF)
+RTE_PCI_DEV_ID_DECL_I40EVF(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_X722_VF)
+RTE_PCI_DEV_ID_DECL_I40EVF(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_X722_VF_HV)
+
+/****************** Virtio devices from virtio.h ******************/
+
+#define QUMRANET_DEV_ID_VIRTIO                  0x1000
+
+RTE_PCI_DEV_ID_DECL_VIRTIO(PCI_VENDOR_ID_QUMRANET, QUMRANET_DEV_ID_VIRTIO)
+
+/****************** VMware VMXNET3 devices ******************/
+
+#define VMWARE_DEV_ID_VMXNET3                   0x07B0
+
+RTE_PCI_DEV_ID_DECL_VMXNET3(PCI_VENDOR_ID_VMWARE, VMWARE_DEV_ID_VMXNET3)
+
+/*************** Virtual FM10K devices from fm10k_type.h ***************/
+
+#define FM10K_DEV_ID_VF                   0x15A5
+
+RTE_PCI_DEV_ID_DECL_FM10KVF(PCI_VENDOR_ID_INTEL, FM10K_DEV_ID_VF)
+
+/****************** Cisco VIC devices ******************/
+
+#define PCI_DEVICE_ID_CISCO_VIC_ENET         0x0043  /* ethernet vnic */
+#define PCI_DEVICE_ID_CISCO_VIC_ENET_VF      0x0071  /* enet SRIOV VF */
+
+RTE_PCI_DEV_ID_DECL_ENIC(PCI_VENDOR_ID_CISCO, PCI_DEVICE_ID_CISCO_VIC_ENET)
+RTE_PCI_DEV_ID_DECL_ENIC(PCI_VENDOR_ID_CISCO, PCI_DEVICE_ID_CISCO_VIC_ENET_VF)
+
+/****************** QLogic devices ******************/
+
+/* Broadcom/QLogic BNX2X */
+#define BNX2X_DEV_ID_57710	0x164e
+#define BNX2X_DEV_ID_57711	0x164f
+#define BNX2X_DEV_ID_57711E	0x1650
+#define BNX2X_DEV_ID_57712	0x1662
+#define BNX2X_DEV_ID_57712_MF	0x1663
+#define BNX2X_DEV_ID_57712_VF	0x166f
+#define BNX2X_DEV_ID_57713	0x1651
+#define BNX2X_DEV_ID_57713E	0x1652
+#define BNX2X_DEV_ID_57800	0x168a
+#define BNX2X_DEV_ID_57800_MF	0x16a5
+#define BNX2X_DEV_ID_57800_VF	0x16a9
+#define BNX2X_DEV_ID_57810	0x168e
+#define BNX2X_DEV_ID_57810_MF	0x16ae
+#define BNX2X_DEV_ID_57810_VF	0x16af
+#define BNX2X_DEV_ID_57811	0x163d
+#define BNX2X_DEV_ID_57811_MF	0x163e
+#define BNX2X_DEV_ID_57811_VF	0x163f
+
+#define BNX2X_DEV_ID_57840_OBS		0x168d
+#define BNX2X_DEV_ID_57840_OBS_MF	0x16ab
+#define BNX2X_DEV_ID_57840_4_10		0x16a1
+#define BNX2X_DEV_ID_57840_2_20		0x16a2
+#define BNX2X_DEV_ID_57840_MF		0x16a4
+#define BNX2X_DEV_ID_57840_VF		0x16ad
+
+RTE_PCI_DEV_ID_DECL_BNX2X(PCI_VENDOR_ID_BROADCOM, BNX2X_DEV_ID_57800)
+RTE_PCI_DEV_ID_DECL_BNX2XVF(PCI_VENDOR_ID_BROADCOM, BNX2X_DEV_ID_57800_VF)
+RTE_PCI_DEV_ID_DECL_BNX2X(PCI_VENDOR_ID_BROADCOM, BNX2X_DEV_ID_57711)
+RTE_PCI_DEV_ID_DECL_BNX2X(PCI_VENDOR_ID_BROADCOM, BNX2X_DEV_ID_57810)
+RTE_PCI_DEV_ID_DECL_BNX2XVF(PCI_VENDOR_ID_BROADCOM, BNX2X_DEV_ID_57810_VF)
+RTE_PCI_DEV_ID_DECL_BNX2X(PCI_VENDOR_ID_BROADCOM, BNX2X_DEV_ID_57811)
+RTE_PCI_DEV_ID_DECL_BNX2XVF(PCI_VENDOR_ID_BROADCOM, BNX2X_DEV_ID_57811_VF)
+RTE_PCI_DEV_ID_DECL_BNX2X(PCI_VENDOR_ID_BROADCOM, BNX2X_DEV_ID_57840_OBS)
+RTE_PCI_DEV_ID_DECL_BNX2X(PCI_VENDOR_ID_BROADCOM, BNX2X_DEV_ID_57840_4_10)
+RTE_PCI_DEV_ID_DECL_BNX2X(PCI_VENDOR_ID_BROADCOM, BNX2X_DEV_ID_57840_2_20)
+RTE_PCI_DEV_ID_DECL_BNX2XVF(PCI_VENDOR_ID_BROADCOM, BNX2X_DEV_ID_57840_VF)
+#ifdef RTE_LIBRTE_BNX2X_MF_SUPPORT
+RTE_PCI_DEV_ID_DECL_BNX2X(PCI_VENDOR_ID_BROADCOM, BNX2X_DEV_ID_57810_MF)
+RTE_PCI_DEV_ID_DECL_BNX2X(PCI_VENDOR_ID_BROADCOM, BNX2X_DEV_ID_57811_MF)
+RTE_PCI_DEV_ID_DECL_BNX2X(PCI_VENDOR_ID_BROADCOM, BNX2X_DEV_ID_57840_MF)
+#endif
+
+/*
+ * Undef all RTE_PCI_DEV_ID_DECL_* here.
+ */
+#undef RTE_PCI_DEV_ID_DECL_BNX2X
+#undef RTE_PCI_DEV_ID_DECL_BNX2XVF
+#undef RTE_PCI_DEV_ID_DECL_EM
+#undef RTE_PCI_DEV_ID_DECL_IGB
+#undef RTE_PCI_DEV_ID_DECL_IGBVF
+#undef RTE_PCI_DEV_ID_DECL_IXGBE
+#undef RTE_PCI_DEV_ID_DECL_IXGBEVF
+#undef RTE_PCI_DEV_ID_DECL_I40E
+#undef RTE_PCI_DEV_ID_DECL_I40EVF
+#undef RTE_PCI_DEV_ID_DECL_VIRTIO
+#undef RTE_PCI_DEV_ID_DECL_VMXNET3
+#undef RTE_PCI_DEV_ID_DECL_FM10K
+#undef RTE_PCI_DEV_ID_DECL_FM10KVF
diff --git a/lib/librte_eal/common/include/rte_per_lcore.h b/lib/librte_eal/common/include/rte_per_lcore.h
new file mode 100644
index 00000000..5434729a
--- /dev/null
+++ b/lib/librte_eal/common/include/rte_per_lcore.h
@@ -0,0 +1,79 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_PER_LCORE_H_
+#define _RTE_PER_LCORE_H_
+
+/**
+ * @file
+ *
+ * Per-lcore variables in RTE
+ *
+ * This file defines an API for instantiating per-lcore "global
+ * variables" that are environment-specific. Note that in all
+ * environments, a "shared variable" is the default when you use a
+ * global variable.
+ *
+ * Parts of this are execution environment specific.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <pthread.h>
+
+/**
+ * Macro to define a per lcore variable "var" of type "type", don't
+ * use keywords like "static" or "volatile" in type, just prefix the
+ * whole macro.
+ */
+#define RTE_DEFINE_PER_LCORE(type, name)			\
+	__thread __typeof__(type) per_lcore_##name
+
+/**
+ * Macro to declare an extern per lcore variable "var" of type "type"
+ */
+#define RTE_DECLARE_PER_LCORE(type, name)			\
+	extern __thread __typeof__(type) per_lcore_##name
+
+/**
+ * Read/write the per-lcore variable value
+ */
+#define RTE_PER_LCORE(name) (per_lcore_##name)
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_PER_LCORE_H_ */
diff --git a/lib/librte_eal/common/include/rte_random.h b/lib/librte_eal/common/include/rte_random.h
new file mode 100644
index 00000000..24ae8363
--- /dev/null
+++ b/lib/librte_eal/common/include/rte_random.h
@@ -0,0 +1,91 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_RANDOM_H_
+#define _RTE_RANDOM_H_
+
+/**
+ * @file
+ *
+ * Pseudo-random Generators in RTE
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+#include <stdlib.h>
+
+/**
+ * Seed the pseudo-random generator.
+ *
+ * The generator is automatically seeded by the EAL init with a timer
+ * value. It may need to be re-seeded by the user with a real random
+ * value.
+ *
+ * @param seedval
+ *   The value of the seed.
+ */
+static inline void
+rte_srand(uint64_t seedval)
+{
+	srand48((long unsigned int)seedval);
+}
+
+/**
+ * Get a pseudo-random value.
+ *
+ * This function generates pseudo-random numbers using the linear
+ * congruential algorithm and 48-bit integer arithmetic, called twice
+ * to generate a 64-bit value.
+ *
+ * @return
+ *   A pseudo-random value between 0 and (1<<64)-1.
+ */
+static inline uint64_t
+rte_rand(void)
+{
+	uint64_t val;
+	val = lrand48();
+	val <<= 32;
+	val += lrand48();
+	return val;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+
+#endif /* _RTE_PER_LCORE_H_ */
diff --git a/lib/librte_eal/common/include/rte_string_fns.h b/lib/librte_eal/common/include/rte_string_fns.h
new file mode 100644
index 00000000..cfca2f8d
--- /dev/null
+++ b/lib/librte_eal/common/include/rte_string_fns.h
@@ -0,0 +1,81 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * @file
+ *
+ * String-related functions as replacement for libc equivalents
+ */
+
+#ifndef _RTE_STRING_FNS_H_
+#define _RTE_STRING_FNS_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Takes string "string" parameter and splits it at character "delim"
+ * up to maxtokens-1 times - to give "maxtokens" resulting tokens. Like
+ * strtok or strsep functions, this modifies its input string, by replacing
+ * instances of "delim" with '\\0'. All resultant tokens are returned in the
+ * "tokens" array which must have enough entries to hold "maxtokens".
+ *
+ * @param string
+ *   The input string to be split into tokens
+ *
+ * @param stringlen
+ *   The max length of the input buffer
+ *
+ * @param tokens
+ *   The array to hold the pointers to the tokens in the string
+ *
+ * @param maxtokens
+ *   The number of elements in the tokens array. At most, maxtokens-1 splits
+ *   of the string will be done.
+ *
+ * @param delim
+ *   The character on which the split of the data will be done
+ *
+ * @return
+ *   The number of tokens in the tokens array.
+ */
+int
+rte_strsplit(char *string, int stringlen,
+             char **tokens, int maxtokens, char delim);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* RTE_STRING_FNS_H */
diff --git a/lib/librte_eal/common/include/rte_tailq.h b/lib/librte_eal/common/include/rte_tailq.h
new file mode 100644
index 00000000..4a686e68
--- /dev/null
+++ b/lib/librte_eal/common/include/rte_tailq.h
@@ -0,0 +1,162 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_TAILQ_H_
+#define _RTE_TAILQ_H_
+
+/**
+ * @file
+ *  Here defines rte_tailq APIs for only internal use
+ *
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <sys/queue.h>
+#include <stdio.h>
+#include <rte_debug.h>
+
+/** dummy structure type used by the rte_tailq APIs */
+struct rte_tailq_entry {
+	TAILQ_ENTRY(rte_tailq_entry) next; /**< Pointer entries for a tailq list */
+	void *data; /**< Pointer to the data referenced by this tailq entry */
+};
+/** dummy */
+TAILQ_HEAD(rte_tailq_entry_head, rte_tailq_entry);
+
+#define RTE_TAILQ_NAMESIZE 32
+
+/**
+ * The structure defining a tailq header entry for storing
+ * in the rte_config structure in shared memory. Each tailq
+ * is identified by name.
+ * Any library storing a set of objects e.g. rings, mempools, hash-tables,
+ * is recommended to use an entry here, so as to make it easy for
+ * a multi-process app to find already-created elements in shared memory.
+ */
+struct rte_tailq_head {
+	struct rte_tailq_entry_head tailq_head; /**< NOTE: must be first element */
+	char name[RTE_TAILQ_NAMESIZE];
+};
+
+struct rte_tailq_elem {
+	/**
+	 * Reference to head in shared mem, updated at init time by
+	 * rte_eal_tailqs_init()
+	 */
+	struct rte_tailq_head *head;
+	TAILQ_ENTRY(rte_tailq_elem) next;
+	const char name[RTE_TAILQ_NAMESIZE];
+};
+
+/**
+ * Return the first tailq entry casted to the right struct.
+ */
+#define RTE_TAILQ_CAST(tailq_entry, struct_name) \
+	(struct struct_name *)&(tailq_entry)->tailq_head
+
+/**
+ * Utility macro to make looking up a tailqueue for a particular struct easier.
+ *
+ * @param name
+ *   The name of tailq
+ *
+ * @param struct_name
+ *   The name of the list type we are using. (Generally this is the same as the
+ *   first parameter passed to TAILQ_HEAD macro)
+ *
+ * @return
+ *   The return value from rte_eal_tailq_lookup, typecast to the appropriate
+ *   structure pointer type.
+ *   NULL on error, since the tailq_head is the first
+ *   element in the rte_tailq_head structure.
+ */
+#define RTE_TAILQ_LOOKUP(name, struct_name) \
+	RTE_TAILQ_CAST(rte_eal_tailq_lookup(name), struct_name)
+
+/**
+ * Dump tail queues to the console.
+ *
+ * @param f
+ *   A pointer to a file for output
+ */
+void rte_dump_tailq(FILE *f);
+
+/**
+ * Lookup for a tail queue.
+ *
+ * Get a pointer to a tail queue header of a tail
+ * queue identified by the name given as an argument.
+ * Note: this function is not multi-thread safe, and should only be called from
+ * a single thread at a time
+ *
+ * @param name
+ *   The name of the queue.
+ * @return
+ *   A pointer to the tail queue head structure.
+ */
+struct rte_tailq_head *rte_eal_tailq_lookup(const char *name);
+
+/**
+ * Register a tail queue.
+ *
+ * Register a tail queue from shared memory.
+ * This function is mainly used by EAL_REGISTER_TAILQ macro which is used to
+ * register tailq from the different dpdk libraries. Since this macro is a
+ * constructor, the function has no access to dpdk shared memory, so the
+ * registered tailq can not be used before call to rte_eal_init() which calls
+ * rte_eal_tailqs_init().
+ *
+ * @param t
+ *   The tailq element which contains the name of the tailq you want to
+ *   create (/retrieve when in secondary process).
+ * @return
+ *   0 on success or -1 in case of an error.
+ */
+int rte_eal_tailq_register(struct rte_tailq_elem *t);
+
+#define EAL_REGISTER_TAILQ(t) \
+void tailqinitfn_ ##t(void); \
+void __attribute__((constructor, used)) tailqinitfn_ ##t(void) \
+{ \
+	if (rte_eal_tailq_register(&t) < 0) \
+		rte_panic("Cannot initialize tailq: %s\n", t.name); \
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_TAILQ_H_ */
diff --git a/lib/librte_eal/common/include/rte_time.h b/lib/librte_eal/common/include/rte_time.h
new file mode 100644
index 00000000..4b13b9c1
--- /dev/null
+++ b/lib/librte_eal/common/include/rte_time.h
@@ -0,0 +1,122 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2015 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#define NSEC_PER_SEC             1000000000L
+
+/**
+ * Structure to hold the parameters of a running cycle counter to assist
+ * in converting cycles to nanoseconds.
+ */
+struct rte_timecounter {
+	/** Last cycle counter value read. */
+	uint64_t cycle_last;
+	/** Nanoseconds count. */
+	uint64_t nsec;
+	/** Bitmask separating nanosecond and sub-nanoseconds. */
+	uint64_t nsec_mask;
+	/** Sub-nanoseconds count. */
+	uint64_t nsec_frac;
+	/** Bitmask for two's complement substraction of non-64 bit counters. */
+	uint64_t cc_mask;
+	/** Cycle to nanosecond divisor (power of two). */
+	uint32_t cc_shift;
+};
+
+/**
+ * Converts cyclecounter cycles to nanoseconds.
+ */
+static inline uint64_t
+rte_cyclecounter_cycles_to_ns(struct rte_timecounter *tc, uint64_t cycles)
+{
+	uint64_t ns;
+
+	/* Add fractional nanoseconds. */
+	ns = cycles + tc->nsec_frac;
+	tc->nsec_frac = ns & tc->nsec_mask;
+
+	/* Shift to get only nanoseconds. */
+	return ns >> tc->cc_shift;
+}
+
+/**
+ * Update the internal nanosecond count in the structure.
+ */
+static inline uint64_t
+rte_timecounter_update(struct rte_timecounter *tc, uint64_t cycle_now)
+{
+	uint64_t cycle_delta, ns_offset;
+
+	/* Calculate the delta since the last call. */
+	if (tc->cycle_last <= cycle_now)
+		cycle_delta = (cycle_now - tc->cycle_last) & tc->cc_mask;
+	else
+		/* Handle cycle counts that have wrapped around . */
+		cycle_delta = (~(tc->cycle_last - cycle_now) & tc->cc_mask) + 1;
+
+	/* Convert to nanoseconds. */
+	ns_offset = rte_cyclecounter_cycles_to_ns(tc, cycle_delta);
+
+	/* Store current cycle counter for next call. */
+	tc->cycle_last = cycle_now;
+
+	/* Update the nanosecond count. */
+	tc->nsec += ns_offset;
+
+	return tc->nsec;
+}
+
+/**
+ * Convert from timespec structure into nanosecond units.
+ */
+static inline uint64_t
+rte_timespec_to_ns(const struct timespec *ts)
+{
+	return ((uint64_t) ts->tv_sec * NSEC_PER_SEC) + ts->tv_nsec;
+}
+
+/**
+ * Convert from nanosecond units into timespec structure.
+ */
+static inline struct timespec
+rte_ns_to_timespec(uint64_t nsec)
+{
+	struct timespec ts = {0, 0};
+
+	if (nsec == 0)
+		return ts;
+
+	ts.tv_sec = nsec / NSEC_PER_SEC;
+	ts.tv_nsec = nsec % NSEC_PER_SEC;
+
+	return ts;
+}
diff --git a/lib/librte_eal/common/include/rte_version.h b/lib/librte_eal/common/include/rte_version.h
new file mode 100644
index 00000000..f8ea6d73
--- /dev/null
+++ b/lib/librte_eal/common/include/rte_version.h
@@ -0,0 +1,130 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * @file
+ * Definitions of DPDK version numbers
+ */
+
+#ifndef _RTE_VERSION_H_
+#define _RTE_VERSION_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+#include <string.h>
+#include <rte_common.h>
+
+/**
+ * String that appears before the version number
+ */
+#define RTE_VER_PREFIX "DPDK"
+
+/**
+ * Major version/year number i.e. the yy in yy.mm.z
+ */
+#define RTE_VER_YEAR 16
+
+/**
+ * Minor version/month number i.e. the mm in yy.mm.z
+ */
+#define RTE_VER_MONTH 4
+
+/**
+ * Patch level number i.e. the z in yy.mm.z
+ */
+#define RTE_VER_MINOR 0
+
+/**
+ * Extra string to be appended to version number
+ */
+#define RTE_VER_SUFFIX ""
+
+/**
+ * Patch release number
+ *   0-15 = release candidates
+ *   16   = release
+ */
+#define RTE_VER_RELEASE 16
+
+/**
+ * Macro to compute a version number usable for comparisons
+ */
+#define RTE_VERSION_NUM(a,b,c,d) ((a) << 24 | (b) << 16 | (c) << 8 | (d))
+
+/**
+ * All version numbers in one to compare with RTE_VERSION_NUM()
+ */
+#define RTE_VERSION RTE_VERSION_NUM( \
+			RTE_VER_YEAR, \
+			RTE_VER_MONTH, \
+			RTE_VER_MINOR, \
+			RTE_VER_RELEASE)
+
+/**
+ * Function returning version string
+ * @return
+ *     string
+ */
+static inline const char *
+rte_version(void)
+{
+	static char version[32];
+	if (version[0] != 0)
+		return version;
+	if (strlen(RTE_VER_SUFFIX) == 0)
+		snprintf(version, sizeof(version), "%s %d.%02d.%d",
+			RTE_VER_PREFIX,
+			RTE_VER_YEAR,
+			RTE_VER_MONTH,
+			RTE_VER_MINOR);
+	else
+		snprintf(version, sizeof(version), "%s %d.%02d.%d%s%d",
+			RTE_VER_PREFIX,
+			RTE_VER_YEAR,
+			RTE_VER_MONTH,
+			RTE_VER_MINOR,
+			RTE_VER_SUFFIX,
+			RTE_VER_RELEASE < 16 ?
+				RTE_VER_RELEASE :
+				RTE_VER_RELEASE - 16);
+	return version;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* RTE_VERSION_H */
diff --git a/lib/librte_eal/common/include/rte_warnings.h b/lib/librte_eal/common/include/rte_warnings.h
new file mode 100644
index 00000000..54b545c9
--- /dev/null
+++ b/lib/librte_eal/common/include/rte_warnings.h
@@ -0,0 +1,84 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * @file
+ * Definitions of warnings for use of various insecure functions
+ */
+
+#ifndef _RTE_WARNINGS_H_
+#define _RTE_WARNINGS_H_
+
+#ifdef RTE_INSECURE_FUNCTION_WARNING
+
+/* we need to include all used standard header files so that they appear
+ * _before_ we poison the function names.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdarg.h>
+#include <errno.h>
+#ifdef RTE_EXEC_ENV_LINUXAPP
+#include <dirent.h>
+#endif
+
+/* the following function are deemed not fully secure for use e.g. they
+ * do not always null-terminate arguments */
+#pragma GCC poison sprintf strtok snprintf vsnprintf
+#pragma GCC poison strlen strcpy strcat
+#pragma GCC poison sscanf
+
+/* other unsafe functions may be implemented as macros so just undef them */
+#ifdef strsep
+#undef strsep
+#else
+#pragma GCC poison strsep
+#endif
+
+#ifdef strncpy
+#undef strncpy
+#else
+#pragma GCC poison strncpy
+#endif
+
+#ifdef strncat
+#undef strncat
+#else
+#pragma GCC poison strncat
+#endif
+
+#endif
+
+#endif /* RTE_WARNINGS_H */
diff --git a/lib/librte_eal/common/malloc_elem.c b/lib/librte_eal/common/malloc_elem.c
new file mode 100644
index 00000000..27e9925d
--- /dev/null
+++ b/lib/librte_eal/common/malloc_elem.c
@@ -0,0 +1,344 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <stdint.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/queue.h>
+
+#include <rte_memory.h>
+#include <rte_eal.h>
+#include <rte_launch.h>
+#include <rte_per_lcore.h>
+#include <rte_lcore.h>
+#include <rte_debug.h>
+#include <rte_common.h>
+#include <rte_spinlock.h>
+
+#include "malloc_elem.h"
+#include "malloc_heap.h"
+
+#define MIN_DATA_SIZE (RTE_CACHE_LINE_SIZE)
+
+/*
+ * initialise a general malloc_elem header structure
+ */
+void
+malloc_elem_init(struct malloc_elem *elem,
+		struct malloc_heap *heap, const struct rte_memseg *ms, size_t size)
+{
+	elem->heap = heap;
+	elem->ms = ms;
+	elem->prev = NULL;
+	memset(&elem->free_list, 0, sizeof(elem->free_list));
+	elem->state = ELEM_FREE;
+	elem->size = size;
+	elem->pad = 0;
+	set_header(elem);
+	set_trailer(elem);
+}
+
+/*
+ * initialise a dummy malloc_elem header for the end-of-memseg marker
+ */
+void
+malloc_elem_mkend(struct malloc_elem *elem, struct malloc_elem *prev)
+{
+	malloc_elem_init(elem, prev->heap, prev->ms, 0);
+	elem->prev = prev;
+	elem->state = ELEM_BUSY; /* mark busy so its never merged */
+}
+
+/*
+ * calculate the starting point of where data of the requested size
+ * and alignment would fit in the current element. If the data doesn't
+ * fit, return NULL.
+ */
+static void *
+elem_start_pt(struct malloc_elem *elem, size_t size, unsigned align,
+		size_t bound)
+{
+	const size_t bmask = ~(bound - 1);
+	uintptr_t end_pt = (uintptr_t)elem +
+			elem->size - MALLOC_ELEM_TRAILER_LEN;
+	uintptr_t new_data_start = RTE_ALIGN_FLOOR((end_pt - size), align);
+	uintptr_t new_elem_start;
+
+	/* check boundary */
+	if ((new_data_start & bmask) != ((end_pt - 1) & bmask)) {
+		end_pt = RTE_ALIGN_FLOOR(end_pt, bound);
+		new_data_start = RTE_ALIGN_FLOOR((end_pt - size), align);
+		if (((end_pt - 1) & bmask) != (new_data_start & bmask))
+			return NULL;
+	}
+
+	new_elem_start = new_data_start - MALLOC_ELEM_HEADER_LEN;
+
+	/* if the new start point is before the exist start, it won't fit */
+	return (new_elem_start < (uintptr_t)elem) ? NULL : (void *)new_elem_start;
+}
+
+/*
+ * use elem_start_pt to determine if we get meet the size and
+ * alignment request from the current element
+ */
+int
+malloc_elem_can_hold(struct malloc_elem *elem, size_t size,	unsigned align,
+		size_t bound)
+{
+	return elem_start_pt(elem, size, align, bound) != NULL;
+}
+
+/*
+ * split an existing element into two smaller elements at the given
+ * split_pt parameter.
+ */
+static void
+split_elem(struct malloc_elem *elem, struct malloc_elem *split_pt)
+{
+	struct malloc_elem *next_elem = RTE_PTR_ADD(elem, elem->size);
+	const size_t old_elem_size = (uintptr_t)split_pt - (uintptr_t)elem;
+	const size_t new_elem_size = elem->size - old_elem_size;
+
+	malloc_elem_init(split_pt, elem->heap, elem->ms, new_elem_size);
+	split_pt->prev = elem;
+	next_elem->prev = split_pt;
+	elem->size = old_elem_size;
+	set_trailer(elem);
+}
+
+/*
+ * Given an element size, compute its freelist index.
+ * We free an element into the freelist containing similarly-sized elements.
+ * We try to allocate elements starting with the freelist containing
+ * similarly-sized elements, and if necessary, we search freelists
+ * containing larger elements.
+ *
+ * Example element size ranges for a heap with five free lists:
+ *   heap->free_head[0] - (0   , 2^8]
+ *   heap->free_head[1] - (2^8 , 2^10]
+ *   heap->free_head[2] - (2^10 ,2^12]
+ *   heap->free_head[3] - (2^12, 2^14]
+ *   heap->free_head[4] - (2^14, MAX_SIZE]
+ */
+size_t
+malloc_elem_free_list_index(size_t size)
+{
+#define MALLOC_MINSIZE_LOG2   8
+#define MALLOC_LOG2_INCREMENT 2
+
+	size_t log2;
+	size_t index;
+
+	if (size <= (1UL << MALLOC_MINSIZE_LOG2))
+		return 0;
+
+	/* Find next power of 2 >= size. */
+	log2 = sizeof(size) * 8 - __builtin_clzl(size-1);
+
+	/* Compute freelist index, based on log2(size). */
+	index = (log2 - MALLOC_MINSIZE_LOG2 + MALLOC_LOG2_INCREMENT - 1) /
+	        MALLOC_LOG2_INCREMENT;
+
+	return index <= RTE_HEAP_NUM_FREELISTS-1?
+	        index: RTE_HEAP_NUM_FREELISTS-1;
+}
+
+/*
+ * Add the specified element to its heap's free list.
+ */
+void
+malloc_elem_free_list_insert(struct malloc_elem *elem)
+{
+	size_t idx;
+
+	idx = malloc_elem_free_list_index(elem->size - MALLOC_ELEM_HEADER_LEN);
+	elem->state = ELEM_FREE;
+	LIST_INSERT_HEAD(&elem->heap->free_head[idx], elem, free_list);
+}
+
+/*
+ * Remove the specified element from its heap's free list.
+ */
+static void
+elem_free_list_remove(struct malloc_elem *elem)
+{
+	LIST_REMOVE(elem, free_list);
+}
+
+/*
+ * reserve a block of data in an existing malloc_elem. If the malloc_elem
+ * is much larger than the data block requested, we split the element in two.
+ * This function is only called from malloc_heap_alloc so parameter checking
+ * is not done here, as it's done there previously.
+ */
+struct malloc_elem *
+malloc_elem_alloc(struct malloc_elem *elem, size_t size, unsigned align,
+		size_t bound)
+{
+	struct malloc_elem *new_elem = elem_start_pt(elem, size, align, bound);
+	const size_t old_elem_size = (uintptr_t)new_elem - (uintptr_t)elem;
+	const size_t trailer_size = elem->size - old_elem_size - size -
+		MALLOC_ELEM_OVERHEAD;
+
+	elem_free_list_remove(elem);
+
+	if (trailer_size > MALLOC_ELEM_OVERHEAD + MIN_DATA_SIZE) {
+		/* split it, too much free space after elem */
+		struct malloc_elem *new_free_elem =
+				RTE_PTR_ADD(new_elem, size + MALLOC_ELEM_OVERHEAD);
+
+		split_elem(elem, new_free_elem);
+		malloc_elem_free_list_insert(new_free_elem);
+	}
+
+	if (old_elem_size < MALLOC_ELEM_OVERHEAD + MIN_DATA_SIZE) {
+		/* don't split it, pad the element instead */
+		elem->state = ELEM_BUSY;
+		elem->pad = old_elem_size;
+
+		/* put a dummy header in padding, to point to real element header */
+		if (elem->pad > 0){ /* pad will be at least 64-bytes, as everything
+		                     * is cache-line aligned */
+			new_elem->pad = elem->pad;
+			new_elem->state = ELEM_PAD;
+			new_elem->size = elem->size - elem->pad;
+			set_header(new_elem);
+		}
+
+		return new_elem;
+	}
+
+	/* we are going to split the element in two. The original element
+	 * remains free, and the new element is the one allocated.
+	 * Re-insert original element, in case its new size makes it
+	 * belong on a different list.
+	 */
+	split_elem(elem, new_elem);
+	new_elem->state = ELEM_BUSY;
+	malloc_elem_free_list_insert(elem);
+
+	return new_elem;
+}
+
+/*
+ * joing two struct malloc_elem together. elem1 and elem2 must
+ * be contiguous in memory.
+ */
+static inline void
+join_elem(struct malloc_elem *elem1, struct malloc_elem *elem2)
+{
+	struct malloc_elem *next = RTE_PTR_ADD(elem2, elem2->size);
+	elem1->size += elem2->size;
+	next->prev = elem1;
+}
+
+/*
+ * free a malloc_elem block by adding it to the free list. If the
+ * blocks either immediately before or immediately after newly freed block
+ * are also free, the blocks are merged together.
+ */
+int
+malloc_elem_free(struct malloc_elem *elem)
+{
+	if (!malloc_elem_cookies_ok(elem) || elem->state != ELEM_BUSY)
+		return -1;
+
+	rte_spinlock_lock(&(elem->heap->lock));
+	struct malloc_elem *next = RTE_PTR_ADD(elem, elem->size);
+	if (next->state == ELEM_FREE){
+		/* remove from free list, join to this one */
+		elem_free_list_remove(next);
+		join_elem(elem, next);
+	}
+
+	/* check if previous element is free, if so join with it and return,
+	 * need to re-insert in free list, as that element's size is changing
+	 */
+	if (elem->prev != NULL && elem->prev->state == ELEM_FREE) {
+		elem_free_list_remove(elem->prev);
+		join_elem(elem->prev, elem);
+		malloc_elem_free_list_insert(elem->prev);
+	}
+	/* otherwise add ourselves to the free list */
+	else {
+		malloc_elem_free_list_insert(elem);
+		elem->pad = 0;
+	}
+	/* decrease heap's count of allocated elements */
+	elem->heap->alloc_count--;
+	rte_spinlock_unlock(&(elem->heap->lock));
+
+	return 0;
+}
+
+/*
+ * attempt to resize a malloc_elem by expanding into any free space
+ * immediately after it in memory.
+ */
+int
+malloc_elem_resize(struct malloc_elem *elem, size_t size)
+{
+	const size_t new_size = size + MALLOC_ELEM_OVERHEAD;
+	/* if we request a smaller size, then always return ok */
+	const size_t current_size = elem->size - elem->pad;
+	if (current_size >= new_size)
+		return 0;
+
+	struct malloc_elem *next = RTE_PTR_ADD(elem, elem->size);
+	rte_spinlock_lock(&elem->heap->lock);
+	if (next ->state != ELEM_FREE)
+		goto err_return;
+	if (current_size + next->size < new_size)
+		goto err_return;
+
+	/* we now know the element fits, so remove from free list,
+	 * join the two
+	 */
+	elem_free_list_remove(next);
+	join_elem(elem, next);
+
+	if (elem->size - new_size >= MIN_DATA_SIZE + MALLOC_ELEM_OVERHEAD){
+		/* now we have a big block together. Lets cut it down a bit, by splitting */
+		struct malloc_elem *split_pt = RTE_PTR_ADD(elem, new_size);
+		split_pt = RTE_PTR_ALIGN_CEIL(split_pt, RTE_CACHE_LINE_SIZE);
+		split_elem(elem, split_pt);
+		malloc_elem_free_list_insert(split_pt);
+	}
+	rte_spinlock_unlock(&elem->heap->lock);
+	return 0;
+
+err_return:
+	rte_spinlock_unlock(&elem->heap->lock);
+	return -1;
+}
diff --git a/lib/librte_eal/common/malloc_elem.h b/lib/librte_eal/common/malloc_elem.h
new file mode 100644
index 00000000..f04b2d1e
--- /dev/null
+++ b/lib/librte_eal/common/malloc_elem.h
@@ -0,0 +1,192 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef MALLOC_ELEM_H_
+#define MALLOC_ELEM_H_
+
+#include <rte_memory.h>
+
+/* dummy definition of struct so we can use pointers to it in malloc_elem struct */
+struct malloc_heap;
+
+enum elem_state {
+	ELEM_FREE = 0,
+	ELEM_BUSY,
+	ELEM_PAD  /* element is a padding-only header */
+};
+
+struct malloc_elem {
+	struct malloc_heap *heap;
+	struct malloc_elem *volatile prev;      /* points to prev elem in memseg */
+	LIST_ENTRY(malloc_elem) free_list;      /* list of free elements in heap */
+	const struct rte_memseg *ms;
+	volatile enum elem_state state;
+	uint32_t pad;
+	size_t size;
+#ifdef RTE_LIBRTE_MALLOC_DEBUG
+	uint64_t header_cookie;         /* Cookie marking start of data */
+	                                /* trailer cookie at start + size */
+#endif
+} __rte_cache_aligned;
+
+#ifndef RTE_LIBRTE_MALLOC_DEBUG
+static const unsigned MALLOC_ELEM_TRAILER_LEN = 0;
+
+/* dummy function - just check if pointer is non-null */
+static inline int
+malloc_elem_cookies_ok(const struct malloc_elem *elem){ return elem != NULL; }
+
+/* dummy function - no header if malloc_debug is not enabled */
+static inline void
+set_header(struct malloc_elem *elem __rte_unused){ }
+
+/* dummy function - no trailer if malloc_debug is not enabled */
+static inline void
+set_trailer(struct malloc_elem *elem __rte_unused){ }
+
+
+#else
+static const unsigned MALLOC_ELEM_TRAILER_LEN = RTE_CACHE_LINE_SIZE;
+
+#define MALLOC_HEADER_COOKIE   0xbadbadbadadd2e55ULL /**< Header cookie. */
+#define MALLOC_TRAILER_COOKIE  0xadd2e55badbadbadULL /**< Trailer cookie.*/
+
+/* define macros to make referencing the header and trailer cookies easier */
+#define MALLOC_ELEM_TRAILER(elem) (*((uint64_t*)RTE_PTR_ADD(elem, \
+		elem->size - MALLOC_ELEM_TRAILER_LEN)))
+#define MALLOC_ELEM_HEADER(elem) (elem->header_cookie)
+
+static inline void
+set_header(struct malloc_elem *elem)
+{
+	if (elem != NULL)
+		MALLOC_ELEM_HEADER(elem) = MALLOC_HEADER_COOKIE;
+}
+
+static inline void
+set_trailer(struct malloc_elem *elem)
+{
+	if (elem != NULL)
+		MALLOC_ELEM_TRAILER(elem) = MALLOC_TRAILER_COOKIE;
+}
+
+/* check that the header and trailer cookies are set correctly */
+static inline int
+malloc_elem_cookies_ok(const struct malloc_elem *elem)
+{
+	return elem != NULL &&
+			MALLOC_ELEM_HEADER(elem) == MALLOC_HEADER_COOKIE &&
+			MALLOC_ELEM_TRAILER(elem) == MALLOC_TRAILER_COOKIE;
+}
+
+#endif
+
+static const unsigned MALLOC_ELEM_HEADER_LEN = sizeof(struct malloc_elem);
+#define MALLOC_ELEM_OVERHEAD (MALLOC_ELEM_HEADER_LEN + MALLOC_ELEM_TRAILER_LEN)
+
+/*
+ * Given a pointer to the start of a memory block returned by malloc, get
+ * the actual malloc_elem header for that block.
+ */
+static inline struct malloc_elem *
+malloc_elem_from_data(const void *data)
+{
+	if (data == NULL)
+		return NULL;
+
+	struct malloc_elem *elem = RTE_PTR_SUB(data, MALLOC_ELEM_HEADER_LEN);
+	if (!malloc_elem_cookies_ok(elem))
+		return NULL;
+	return elem->state != ELEM_PAD ? elem:  RTE_PTR_SUB(elem, elem->pad);
+}
+
+/*
+ * initialise a malloc_elem header
+ */
+void
+malloc_elem_init(struct malloc_elem *elem,
+		struct malloc_heap *heap,
+		const struct rte_memseg *ms,
+		size_t size);
+
+/*
+ * initialise a dummy malloc_elem header for the end-of-memseg marker
+ */
+void
+malloc_elem_mkend(struct malloc_elem *elem,
+		struct malloc_elem *prev_free);
+
+/*
+ * return true if the current malloc_elem can hold a block of data
+ * of the requested size and with the requested alignment
+ */
+int
+malloc_elem_can_hold(struct malloc_elem *elem, size_t size,
+		unsigned align, size_t bound);
+
+/*
+ * reserve a block of data in an existing malloc_elem. If the malloc_elem
+ * is much larger than the data block requested, we split the element in two.
+ */
+struct malloc_elem *
+malloc_elem_alloc(struct malloc_elem *elem, size_t size,
+		unsigned align, size_t bound);
+
+/*
+ * free a malloc_elem block by adding it to the free list. If the
+ * blocks either immediately before or immediately after newly freed block
+ * are also free, the blocks are merged together.
+ */
+int
+malloc_elem_free(struct malloc_elem *elem);
+
+/*
+ * attempt to resize a malloc_elem by expanding into any free space
+ * immediately after it in memory.
+ */
+int
+malloc_elem_resize(struct malloc_elem *elem, size_t size);
+
+/*
+ * Given an element size, compute its freelist index.
+ */
+size_t
+malloc_elem_free_list_index(size_t size);
+
+/*
+ * Add element to its heap's free list.
+ */
+void
+malloc_elem_free_list_insert(struct malloc_elem *elem);
+
+#endif /* MALLOC_ELEM_H_ */
diff --git a/lib/librte_eal/common/malloc_heap.c b/lib/librte_eal/common/malloc_heap.c
new file mode 100644
index 00000000..763fa324
--- /dev/null
+++ b/lib/librte_eal/common/malloc_heap.c
@@ -0,0 +1,236 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <stdint.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <errno.h>
+#include <sys/queue.h>
+
+#include <rte_memory.h>
+#include <rte_eal.h>
+#include <rte_eal_memconfig.h>
+#include <rte_launch.h>
+#include <rte_per_lcore.h>
+#include <rte_lcore.h>
+#include <rte_common.h>
+#include <rte_string_fns.h>
+#include <rte_spinlock.h>
+#include <rte_memcpy.h>
+#include <rte_atomic.h>
+
+#include "malloc_elem.h"
+#include "malloc_heap.h"
+
+static unsigned
+check_hugepage_sz(unsigned flags, uint64_t hugepage_sz)
+{
+	unsigned check_flag = 0;
+
+	if (!(flags & ~RTE_MEMZONE_SIZE_HINT_ONLY))
+		return 1;
+
+	switch (hugepage_sz) {
+	case RTE_PGSIZE_256K:
+		check_flag = RTE_MEMZONE_256KB;
+		break;
+	case RTE_PGSIZE_2M:
+		check_flag = RTE_MEMZONE_2MB;
+		break;
+	case RTE_PGSIZE_16M:
+		check_flag = RTE_MEMZONE_16MB;
+		break;
+	case RTE_PGSIZE_256M:
+		check_flag = RTE_MEMZONE_256MB;
+		break;
+	case RTE_PGSIZE_512M:
+		check_flag = RTE_MEMZONE_512MB;
+		break;
+	case RTE_PGSIZE_1G:
+		check_flag = RTE_MEMZONE_1GB;
+		break;
+	case RTE_PGSIZE_4G:
+		check_flag = RTE_MEMZONE_4GB;
+		break;
+	case RTE_PGSIZE_16G:
+		check_flag = RTE_MEMZONE_16GB;
+	}
+
+	return check_flag & flags;
+}
+
+/*
+ * Expand the heap with a memseg.
+ * This reserves the zone and sets a dummy malloc_elem header at the end
+ * to prevent overflow. The rest of the zone is added to free list as a single
+ * large free block
+ */
+static void
+malloc_heap_add_memseg(struct malloc_heap *heap, struct rte_memseg *ms)
+{
+	/* allocate the memory block headers, one at end, one at start */
+	struct malloc_elem *start_elem = (struct malloc_elem *)ms->addr;
+	struct malloc_elem *end_elem = RTE_PTR_ADD(ms->addr,
+			ms->len - MALLOC_ELEM_OVERHEAD);
+	end_elem = RTE_PTR_ALIGN_FLOOR(end_elem, RTE_CACHE_LINE_SIZE);
+	const size_t elem_size = (uintptr_t)end_elem - (uintptr_t)start_elem;
+
+	malloc_elem_init(start_elem, heap, ms, elem_size);
+	malloc_elem_mkend(end_elem, start_elem);
+	malloc_elem_free_list_insert(start_elem);
+
+	heap->total_size += elem_size;
+}
+
+/*
+ * Iterates through the freelist for a heap to find a free element
+ * which can store data of the required size and with the requested alignment.
+ * If size is 0, find the biggest available elem.
+ * Returns null on failure, or pointer to element on success.
+ */
+static struct malloc_elem *
+find_suitable_element(struct malloc_heap *heap, size_t size,
+		unsigned flags, size_t align, size_t bound)
+{
+	size_t idx;
+	struct malloc_elem *elem, *alt_elem = NULL;
+
+	for (idx = malloc_elem_free_list_index(size);
+			idx < RTE_HEAP_NUM_FREELISTS; idx++) {
+		for (elem = LIST_FIRST(&heap->free_head[idx]);
+				!!elem; elem = LIST_NEXT(elem, free_list)) {
+			if (malloc_elem_can_hold(elem, size, align, bound)) {
+				if (check_hugepage_sz(flags, elem->ms->hugepage_sz))
+					return elem;
+				if (alt_elem == NULL)
+					alt_elem = elem;
+			}
+		}
+	}
+
+	if ((alt_elem != NULL) && (flags & RTE_MEMZONE_SIZE_HINT_ONLY))
+		return alt_elem;
+
+	return NULL;
+}
+
+/*
+ * Main function to allocate a block of memory from the heap.
+ * It locks the free list, scans it, and adds a new memseg if the
+ * scan fails. Once the new memseg is added, it re-scans and should return
+ * the new element after releasing the lock.
+ */
+void *
+malloc_heap_alloc(struct malloc_heap *heap,
+		const char *type __attribute__((unused)), size_t size, unsigned flags,
+		size_t align, size_t bound)
+{
+	struct malloc_elem *elem;
+
+	size = RTE_CACHE_LINE_ROUNDUP(size);
+	align = RTE_CACHE_LINE_ROUNDUP(align);
+
+	rte_spinlock_lock(&heap->lock);
+
+	elem = find_suitable_element(heap, size, flags, align, bound);
+	if (elem != NULL) {
+		elem = malloc_elem_alloc(elem, size, align, bound);
+		/* increase heap's count of allocated elements */
+		heap->alloc_count++;
+	}
+	rte_spinlock_unlock(&heap->lock);
+
+	return elem == NULL ? NULL : (void *)(&elem[1]);
+}
+
+/*
+ * Function to retrieve data for heap on given socket
+ */
+int
+malloc_heap_get_stats(const struct malloc_heap *heap,
+		struct rte_malloc_socket_stats *socket_stats)
+{
+	size_t idx;
+	struct malloc_elem *elem;
+
+	/* Initialise variables for heap */
+	socket_stats->free_count = 0;
+	socket_stats->heap_freesz_bytes = 0;
+	socket_stats->greatest_free_size = 0;
+
+	/* Iterate through free list */
+	for (idx = 0; idx < RTE_HEAP_NUM_FREELISTS; idx++) {
+		for (elem = LIST_FIRST(&heap->free_head[idx]);
+			!!elem; elem = LIST_NEXT(elem, free_list))
+		{
+			socket_stats->free_count++;
+			socket_stats->heap_freesz_bytes += elem->size;
+			if (elem->size > socket_stats->greatest_free_size)
+				socket_stats->greatest_free_size = elem->size;
+		}
+	}
+	/* Get stats on overall heap and allocated memory on this heap */
+	socket_stats->heap_totalsz_bytes = heap->total_size;
+	socket_stats->heap_allocsz_bytes = (socket_stats->heap_totalsz_bytes -
+			socket_stats->heap_freesz_bytes);
+	socket_stats->alloc_count = heap->alloc_count;
+	return 0;
+}
+
+int
+rte_eal_malloc_heap_init(void)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	unsigned ms_cnt;
+	struct rte_memseg *ms;
+
+	if (mcfg == NULL)
+		return -1;
+
+	for (ms = &mcfg->memseg[0], ms_cnt = 0;
+			(ms_cnt < RTE_MAX_MEMSEG) && (ms->len > 0);
+			ms_cnt++, ms++) {
+#ifdef RTE_LIBRTE_IVSHMEM
+		/*
+		 * if segment has ioremap address set, it's an IVSHMEM segment and
+		 * it is not memory to allocate from.
+		 */
+		if (ms->ioremap_addr != 0)
+			continue;
+#endif
+		malloc_heap_add_memseg(&mcfg->malloc_heaps[ms->socket_id], ms);
+	}
+
+	return 0;
+}
diff --git a/lib/librte_eal/common/malloc_heap.h b/lib/librte_eal/common/malloc_heap.h
new file mode 100644
index 00000000..3ccbef0f
--- /dev/null
+++ b/lib/librte_eal/common/malloc_heap.h
@@ -0,0 +1,70 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef MALLOC_HEAP_H_
+#define MALLOC_HEAP_H_
+
+#include <rte_malloc.h>
+#include <rte_malloc_heap.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+static inline unsigned
+malloc_get_numa_socket(void)
+{
+	unsigned socket_id = rte_socket_id();
+
+	if (socket_id == (unsigned)SOCKET_ID_ANY)
+		return 0;
+
+	return socket_id;
+}
+
+void *
+malloc_heap_alloc(struct malloc_heap *heap,	const char *type, size_t size,
+		unsigned flags, size_t align, size_t bound);
+
+int
+malloc_heap_get_stats(const struct malloc_heap *heap,
+		struct rte_malloc_socket_stats *socket_stats);
+
+int
+rte_eal_malloc_heap_init(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* MALLOC_HEAP_H_ */
diff --git a/lib/librte_eal/common/rte_keepalive.c b/lib/librte_eal/common/rte_keepalive.c
new file mode 100644
index 00000000..23363ec1
--- /dev/null
+++ b/lib/librte_eal/common/rte_keepalive.c
@@ -0,0 +1,149 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2015-2016 Intel Corporation. All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <inttypes.h>
+
+#include <rte_common.h>
+#include <rte_cycles.h>
+#include <rte_lcore.h>
+#include <rte_log.h>
+#include <rte_keepalive.h>
+#include <rte_malloc.h>
+#include <rte_cycles.h>
+
+struct rte_keepalive {
+	/** Core Liveness. */
+	enum rte_keepalive_state {
+		ALIVE = 1,
+		MISSING = 0,
+		DEAD = 2,
+		GONE = 3
+	} __rte_cache_aligned state_flags[RTE_KEEPALIVE_MAXCORES];
+
+	/** Last-seen-alive timestamps */
+	uint64_t last_alive[RTE_KEEPALIVE_MAXCORES];
+
+	/**
+	 * Cores to check.
+	 * Indexed by core id, non-zero if the core should be checked.
+	 */
+	uint8_t active_cores[RTE_KEEPALIVE_MAXCORES];
+
+	/** Dead core handler. */
+	rte_keepalive_failure_callback_t callback;
+
+	/**
+	 * Dead core handler app data.
+	 * Pointer is passed to dead core handler.
+	 */
+	void *callback_data;
+	uint64_t tsc_initial;
+	uint64_t tsc_mhz;
+};
+
+static void
+print_trace(const char *msg, struct rte_keepalive *keepcfg, int idx_core)
+{
+	RTE_LOG(INFO, EAL, "%sLast seen %" PRId64 "ms ago.\n",
+		msg,
+		((rte_rdtsc() - keepcfg->last_alive[idx_core])*1000)
+		/ rte_get_tsc_hz()
+	      );
+}
+
+void
+rte_keepalive_dispatch_pings(__rte_unused void *ptr_timer,
+	void *ptr_data)
+{
+	struct rte_keepalive *keepcfg = ptr_data;
+	int idx_core;
+
+	for (idx_core = 0; idx_core < RTE_KEEPALIVE_MAXCORES; idx_core++) {
+		if (keepcfg->active_cores[idx_core] == 0)
+			continue;
+
+		switch (keepcfg->state_flags[idx_core]) {
+		case ALIVE: /* Alive */
+			keepcfg->state_flags[idx_core] = MISSING;
+			keepcfg->last_alive[idx_core] = rte_rdtsc();
+			break;
+		case MISSING: /* MIA */
+			print_trace("Core MIA. ", keepcfg, idx_core);
+			keepcfg->state_flags[idx_core] = DEAD;
+			break;
+		case DEAD: /* Dead */
+			keepcfg->state_flags[idx_core] = GONE;
+			print_trace("Core died. ", keepcfg, idx_core);
+			if (keepcfg->callback)
+				keepcfg->callback(
+					keepcfg->callback_data,
+					idx_core
+					);
+			break;
+		case GONE: /* Buried */
+			break;
+		}
+	}
+}
+
+struct rte_keepalive *
+rte_keepalive_create(rte_keepalive_failure_callback_t callback,
+	void *data)
+{
+	struct rte_keepalive *keepcfg;
+
+	keepcfg = rte_zmalloc("RTE_EAL_KEEPALIVE",
+		sizeof(struct rte_keepalive),
+		RTE_CACHE_LINE_SIZE);
+	if (keepcfg != NULL) {
+		keepcfg->callback = callback;
+		keepcfg->callback_data = data;
+		keepcfg->tsc_initial = rte_rdtsc();
+		keepcfg->tsc_mhz = rte_get_tsc_hz() / 1000;
+	}
+	return keepcfg;
+}
+
+void
+rte_keepalive_register_core(struct rte_keepalive *keepcfg, const int id_core)
+{
+	if (id_core < RTE_KEEPALIVE_MAXCORES) {
+		keepcfg->active_cores[id_core] = 1;
+		keepcfg->last_alive[id_core] = rte_rdtsc();
+	}
+}
+
+void
+rte_keepalive_mark_alive(struct rte_keepalive *keepcfg)
+{
+	keepcfg->state_flags[rte_lcore_id()] = ALIVE;
+}
diff --git a/lib/librte_eal/common/rte_malloc.c b/lib/librte_eal/common/rte_malloc.c
new file mode 100644
index 00000000..47deb007
--- /dev/null
+++ b/lib/librte_eal/common/rte_malloc.c
@@ -0,0 +1,262 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdint.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/queue.h>
+
+#include <rte_memcpy.h>
+#include <rte_memory.h>
+#include <rte_eal.h>
+#include <rte_eal_memconfig.h>
+#include <rte_branch_prediction.h>
+#include <rte_debug.h>
+#include <rte_launch.h>
+#include <rte_per_lcore.h>
+#include <rte_lcore.h>
+#include <rte_common.h>
+#include <rte_spinlock.h>
+
+#include <rte_malloc.h>
+#include "malloc_elem.h"
+#include "malloc_heap.h"
+
+
+/* Free the memory space back to heap */
+void rte_free(void *addr)
+{
+	if (addr == NULL) return;
+	if (malloc_elem_free(malloc_elem_from_data(addr)) < 0)
+		rte_panic("Fatal error: Invalid memory\n");
+}
+
+/*
+ * Allocate memory on specified heap.
+ */
+void *
+rte_malloc_socket(const char *type, size_t size, unsigned align, int socket_arg)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	int socket, i;
+	void *ret;
+
+	/* return NULL if size is 0 or alignment is not power-of-2 */
+	if (size == 0 || (align && !rte_is_power_of_2(align)))
+		return NULL;
+
+	if (!rte_eal_has_hugepages())
+		socket_arg = SOCKET_ID_ANY;
+
+	if (socket_arg == SOCKET_ID_ANY)
+		socket = malloc_get_numa_socket();
+	else
+		socket = socket_arg;
+
+	/* Check socket parameter */
+	if (socket >= RTE_MAX_NUMA_NODES)
+		return NULL;
+
+	ret = malloc_heap_alloc(&mcfg->malloc_heaps[socket], type,
+				size, 0, align == 0 ? 1 : align, 0);
+	if (ret != NULL || socket_arg != SOCKET_ID_ANY)
+		return ret;
+
+	/* try other heaps */
+	for (i = 0; i < RTE_MAX_NUMA_NODES; i++) {
+		/* we already tried this one */
+		if (i == socket)
+			continue;
+
+		ret = malloc_heap_alloc(&mcfg->malloc_heaps[i], type,
+					size, 0, align == 0 ? 1 : align, 0);
+		if (ret != NULL)
+			return ret;
+	}
+
+	return NULL;
+}
+
+/*
+ * Allocate memory on default heap.
+ */
+void *
+rte_malloc(const char *type, size_t size, unsigned align)
+{
+	return rte_malloc_socket(type, size, align, SOCKET_ID_ANY);
+}
+
+/*
+ * Allocate zero'd memory on specified heap.
+ */
+void *
+rte_zmalloc_socket(const char *type, size_t size, unsigned align, int socket)
+{
+	void *ptr = rte_malloc_socket(type, size, align, socket);
+
+	if (ptr != NULL)
+		memset(ptr, 0, size);
+	return ptr;
+}
+
+/*
+ * Allocate zero'd memory on default heap.
+ */
+void *
+rte_zmalloc(const char *type, size_t size, unsigned align)
+{
+	return rte_zmalloc_socket(type, size, align, SOCKET_ID_ANY);
+}
+
+/*
+ * Allocate zero'd memory on specified heap.
+ */
+void *
+rte_calloc_socket(const char *type, size_t num, size_t size, unsigned align, int socket)
+{
+	return rte_zmalloc_socket(type, num * size, align, socket);
+}
+
+/*
+ * Allocate zero'd memory on default heap.
+ */
+void *
+rte_calloc(const char *type, size_t num, size_t size, unsigned align)
+{
+	return rte_zmalloc(type, num * size, align);
+}
+
+/*
+ * Resize allocated memory.
+ */
+void *
+rte_realloc(void *ptr, size_t size, unsigned align)
+{
+	if (ptr == NULL)
+		return rte_malloc(NULL, size, align);
+
+	struct malloc_elem *elem = malloc_elem_from_data(ptr);
+	if (elem == NULL)
+		rte_panic("Fatal error: memory corruption detected\n");
+
+	size = RTE_CACHE_LINE_ROUNDUP(size), align = RTE_CACHE_LINE_ROUNDUP(align);
+	/* check alignment matches first, and if ok, see if we can resize block */
+	if (RTE_PTR_ALIGN(ptr,align) == ptr &&
+			malloc_elem_resize(elem, size) == 0)
+		return ptr;
+
+	/* either alignment is off, or we have no room to expand,
+	 * so move data. */
+	void *new_ptr = rte_malloc(NULL, size, align);
+	if (new_ptr == NULL)
+		return NULL;
+	const unsigned old_size = elem->size - MALLOC_ELEM_OVERHEAD;
+	rte_memcpy(new_ptr, ptr, old_size < size ? old_size : size);
+	rte_free(ptr);
+
+	return new_ptr;
+}
+
+int
+rte_malloc_validate(const void *ptr, size_t *size)
+{
+	const struct malloc_elem *elem = malloc_elem_from_data(ptr);
+	if (!malloc_elem_cookies_ok(elem))
+		return -1;
+	if (size != NULL)
+		*size = elem->size - elem->pad - MALLOC_ELEM_OVERHEAD;
+	return 0;
+}
+
+/*
+ * Function to retrieve data for heap on given socket
+ */
+int
+rte_malloc_get_socket_stats(int socket,
+		struct rte_malloc_socket_stats *socket_stats)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+
+	if (socket >= RTE_MAX_NUMA_NODES || socket < 0)
+		return -1;
+
+	return malloc_heap_get_stats(&mcfg->malloc_heaps[socket], socket_stats);
+}
+
+/*
+ * Print stats on memory type. If type is NULL, info on all types is printed
+ */
+void
+rte_malloc_dump_stats(FILE *f, __rte_unused const char *type)
+{
+	unsigned int socket;
+	struct rte_malloc_socket_stats sock_stats;
+	/* Iterate through all initialised heaps */
+	for (socket=0; socket< RTE_MAX_NUMA_NODES; socket++) {
+		if ((rte_malloc_get_socket_stats(socket, &sock_stats) < 0))
+			continue;
+
+		fprintf(f, "Socket:%u\n", socket);
+		fprintf(f, "\tHeap_size:%zu,\n", sock_stats.heap_totalsz_bytes);
+		fprintf(f, "\tFree_size:%zu,\n", sock_stats.heap_freesz_bytes);
+		fprintf(f, "\tAlloc_size:%zu,\n", sock_stats.heap_allocsz_bytes);
+		fprintf(f, "\tGreatest_free_size:%zu,\n",
+				sock_stats.greatest_free_size);
+		fprintf(f, "\tAlloc_count:%u,\n",sock_stats.alloc_count);
+		fprintf(f, "\tFree_count:%u,\n", sock_stats.free_count);
+	}
+	return;
+}
+
+/*
+ * TODO: Set limit to memory that can be allocated to memory type
+ */
+int
+rte_malloc_set_limit(__rte_unused const char *type,
+		__rte_unused size_t max)
+{
+	return 0;
+}
+
+/*
+ * Return the physical address of a virtual address obtained through rte_malloc
+ */
+phys_addr_t
+rte_malloc_virt2phy(const void *addr)
+{
+	const struct malloc_elem *elem = malloc_elem_from_data(addr);
+	if (elem == NULL)
+		return 0;
+	return elem->ms->phys_addr + ((uintptr_t)addr - (uintptr_t)elem->ms->addr);
+}
diff --git a/lib/librte_eal/linuxapp/Makefile b/lib/librte_eal/linuxapp/Makefile
new file mode 100644
index 00000000..20d2a916
--- /dev/null
+++ b/lib/librte_eal/linuxapp/Makefile
@@ -0,0 +1,39 @@
+#   BSD LICENSE
+#
+#   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+#   All rights reserved.
+#
+#   Redistribution and use in source and binary forms, with or without
+#   modification, are permitted provided that the following conditions
+#   are met:
+#
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in
+#       the documentation and/or other materials provided with the
+#       distribution.
+#     * Neither the name of Intel Corporation nor the names of its
+#       contributors may be used to endorse or promote products derived
+#       from this software without specific prior written permission.
+#
+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+DIRS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal
+DIRS-$(CONFIG_RTE_EAL_IGB_UIO) += igb_uio
+DIRS-$(CONFIG_RTE_KNI_KMOD) += kni
+DIRS-$(CONFIG_RTE_LIBRTE_XEN_DOM0) += xen_dom0
+
+include $(RTE_SDK)/mk/rte.subdir.mk
diff --git a/lib/librte_eal/linuxapp/eal/Makefile b/lib/librte_eal/linuxapp/eal/Makefile
new file mode 100644
index 00000000..e1093619
--- /dev/null
+++ b/lib/librte_eal/linuxapp/eal/Makefile
@@ -0,0 +1,139 @@
+#   BSD LICENSE
+#
+#   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
+#   All rights reserved.
+#
+#   Redistribution and use in source and binary forms, with or without
+#   modification, are permitted provided that the following conditions
+#   are met:
+#
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in
+#       the documentation and/or other materials provided with the
+#       distribution.
+#     * Neither the name of Intel Corporation nor the names of its
+#       contributors may be used to endorse or promote products derived
+#       from this software without specific prior written permission.
+#
+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+LIB = librte_eal.a
+
+ARCH_DIR ?= $(RTE_ARCH)
+EXPORT_MAP := rte_eal_version.map
+VPATH += $(RTE_SDK)/lib/librte_eal/common/arch/$(ARCH_DIR)
+
+LIBABIVER := 2
+
+VPATH += $(RTE_SDK)/lib/librte_eal/common
+
+CFLAGS += -I$(SRCDIR)/include
+CFLAGS += -I$(RTE_SDK)/lib/librte_eal/common
+CFLAGS += -I$(RTE_SDK)/lib/librte_eal/common/include
+CFLAGS += -I$(RTE_SDK)/lib/librte_ring
+CFLAGS += -I$(RTE_SDK)/lib/librte_mempool
+CFLAGS += -I$(RTE_SDK)/lib/librte_ivshmem
+CFLAGS += $(WERROR_FLAGS) -O3
+
+LDLIBS += -ldl
+LDLIBS += -lpthread
+LDLIBS += -lgcc_s
+LDLIBS += -lrt
+
+# specific to linuxapp exec-env
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) := eal.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_hugepage_info.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_memory.c
+ifeq ($(CONFIG_RTE_LIBRTE_XEN_DOM0),y)
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_xen_memory.c
+endif
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_thread.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_log.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_pci.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_pci_uio.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_pci_vfio.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_pci_vfio_mp_sync.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_debug.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_lcore.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_timer.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_interrupts.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_alarm.c
+ifeq ($(CONFIG_RTE_LIBRTE_IVSHMEM),y)
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_ivshmem.c
+endif
+
+# from common dir
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_lcore.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_timer.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_memzone.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_log.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_launch.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_pci.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_pci_uio.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_memory.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_tailqs.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_errno.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_cpuflags.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_string_fns.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_hexdump.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_devargs.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_dev.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_options.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_thread.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_proc.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += rte_malloc.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += malloc_elem.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += malloc_heap.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += rte_keepalive.c
+
+# from arch dir
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += rte_cpuflags.c
+
+CFLAGS_eal_common_cpuflags.o := $(CPUFLAGS_LIST)
+
+CFLAGS_eal.o := -D_GNU_SOURCE
+CFLAGS_eal_interrupts.o := -D_GNU_SOURCE
+CFLAGS_eal_pci_vfio_mp_sync.o := -D_GNU_SOURCE
+CFLAGS_eal_timer.o := -D_GNU_SOURCE
+CFLAGS_eal_lcore.o := -D_GNU_SOURCE
+CFLAGS_eal_thread.o := -D_GNU_SOURCE
+CFLAGS_eal_log.o := -D_GNU_SOURCE
+CFLAGS_eal_common_log.o := -D_GNU_SOURCE
+CFLAGS_eal_hugepage_info.o := -D_GNU_SOURCE
+CFLAGS_eal_pci.o := -D_GNU_SOURCE
+CFLAGS_eal_pci_uio.o := -D_GNU_SOURCE
+CFLAGS_eal_pci_vfio.o := -D_GNU_SOURCE
+CFLAGS_eal_common_whitelist.o := -D_GNU_SOURCE
+CFLAGS_eal_common_options.o := -D_GNU_SOURCE
+CFLAGS_eal_common_thread.o := -D_GNU_SOURCE
+CFLAGS_eal_common_lcore.o := -D_GNU_SOURCE
+
+# workaround for a gcc bug with noreturn attribute
+# http://gcc.gnu.org/bugzilla/show_bug.cgi?id=12603
+ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y)
+CFLAGS_eal_thread.o += -Wno-return-type
+endif
+
+INC := rte_interrupts.h rte_kni_common.h rte_dom0_common.h
+
+SYMLINK-$(CONFIG_RTE_EXEC_ENV_LINUXAPP)-include/exec-env := \
+	$(addprefix include/exec-env/,$(INC))
+
+DEPDIRS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += lib/librte_eal/common
+DEPDIRS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += lib/librte_eal/common/arch/$(ARCH_DIR)
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_eal/linuxapp/eal/eal.c b/lib/librte_eal/linuxapp/eal/eal.c
new file mode 100644
index 00000000..8aafd519
--- /dev/null
+++ b/lib/librte_eal/linuxapp/eal/eal.c
@@ -0,0 +1,936 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2012-2014 6WIND S.A.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <stdarg.h>
+#include <unistd.h>
+#include <pthread.h>
+#include <syslog.h>
+#include <getopt.h>
+#include <sys/file.h>
+#include <fcntl.h>
+#include <stddef.h>
+#include <errno.h>
+#include <limits.h>
+#include <errno.h>
+#include <sys/mman.h>
+#include <sys/queue.h>
+#include <sys/stat.h>
+#if defined(RTE_ARCH_X86)
+#include <sys/io.h>
+#endif
+
+#include <rte_common.h>
+#include <rte_debug.h>
+#include <rte_memory.h>
+#include <rte_memzone.h>
+#include <rte_launch.h>
+#include <rte_eal.h>
+#include <rte_eal_memconfig.h>
+#include <rte_per_lcore.h>
+#include <rte_lcore.h>
+#include <rte_log.h>
+#include <rte_random.h>
+#include <rte_cycles.h>
+#include <rte_string_fns.h>
+#include <rte_cpuflags.h>
+#include <rte_interrupts.h>
+#include <rte_pci.h>
+#include <rte_devargs.h>
+#include <rte_common.h>
+#include <rte_version.h>
+#include <rte_atomic.h>
+#include <malloc_heap.h>
+
+#include "eal_private.h"
+#include "eal_thread.h"
+#include "eal_internal_cfg.h"
+#include "eal_filesystem.h"
+#include "eal_hugepages.h"
+#include "eal_options.h"
+
+#define MEMSIZE_IF_NO_HUGE_PAGE (64ULL * 1024ULL * 1024ULL)
+
+#define SOCKET_MEM_STRLEN (RTE_MAX_NUMA_NODES * 10)
+
+/* Allow the application to print its usage message too if set */
+static rte_usage_hook_t	rte_application_usage_hook = NULL;
+
+/* early configuration structure, when memory config is not mmapped */
+static struct rte_mem_config early_mem_config;
+
+/* define fd variable here, because file needs to be kept open for the
+ * duration of the program, as we hold a write lock on it in the primary proc */
+static int mem_cfg_fd = -1;
+
+static struct flock wr_lock = {
+		.l_type = F_WRLCK,
+		.l_whence = SEEK_SET,
+		.l_start = offsetof(struct rte_mem_config, memseg),
+		.l_len = sizeof(early_mem_config.memseg),
+};
+
+/* Address of global and public configuration */
+static struct rte_config rte_config = {
+		.mem_config = &early_mem_config,
+};
+
+/* internal configuration (per-core) */
+struct lcore_config lcore_config[RTE_MAX_LCORE];
+
+/* internal configuration */
+struct internal_config internal_config;
+
+/* used by rte_rdtsc() */
+int rte_cycles_vmware_tsc_map;
+
+/* Return a pointer to the configuration structure */
+struct rte_config *
+rte_eal_get_configuration(void)
+{
+	return &rte_config;
+}
+
+/* parse a sysfs (or other) file containing one integer value */
+int
+eal_parse_sysfs_value(const char *filename, unsigned long *val)
+{
+	FILE *f;
+	char buf[BUFSIZ];
+	char *end = NULL;
+
+	if ((f = fopen(filename, "r")) == NULL) {
+		RTE_LOG(ERR, EAL, "%s(): cannot open sysfs value %s\n",
+			__func__, filename);
+		return -1;
+	}
+
+	if (fgets(buf, sizeof(buf), f) == NULL) {
+		RTE_LOG(ERR, EAL, "%s(): cannot read sysfs value %s\n",
+			__func__, filename);
+		fclose(f);
+		return -1;
+	}
+	*val = strtoul(buf, &end, 0);
+	if ((buf[0] == '\0') || (end == NULL) || (*end != '\n')) {
+		RTE_LOG(ERR, EAL, "%s(): cannot parse sysfs value %s\n",
+				__func__, filename);
+		fclose(f);
+		return -1;
+	}
+	fclose(f);
+	return 0;
+}
+
+
+/* create memory configuration in shared/mmap memory. Take out
+ * a write lock on the memsegs, so we can auto-detect primary/secondary.
+ * This means we never close the file while running (auto-close on exit).
+ * We also don't lock the whole file, so that in future we can use read-locks
+ * on other parts, e.g. memzones, to detect if there are running secondary
+ * processes. */
+static void
+rte_eal_config_create(void)
+{
+	void *rte_mem_cfg_addr;
+	int retval;
+
+	const char *pathname = eal_runtime_config_path();
+
+	if (internal_config.no_shconf)
+		return;
+
+	/* map the config before hugepage address so that we don't waste a page */
+	if (internal_config.base_virtaddr != 0)
+		rte_mem_cfg_addr = (void *)
+			RTE_ALIGN_FLOOR(internal_config.base_virtaddr -
+			sizeof(struct rte_mem_config), sysconf(_SC_PAGE_SIZE));
+	else
+		rte_mem_cfg_addr = NULL;
+
+	if (mem_cfg_fd < 0){
+		mem_cfg_fd = open(pathname, O_RDWR | O_CREAT, 0660);
+		if (mem_cfg_fd < 0)
+			rte_panic("Cannot open '%s' for rte_mem_config\n", pathname);
+	}
+
+	retval = ftruncate(mem_cfg_fd, sizeof(*rte_config.mem_config));
+	if (retval < 0){
+		close(mem_cfg_fd);
+		rte_panic("Cannot resize '%s' for rte_mem_config\n", pathname);
+	}
+
+	retval = fcntl(mem_cfg_fd, F_SETLK, &wr_lock);
+	if (retval < 0){
+		close(mem_cfg_fd);
+		rte_exit(EXIT_FAILURE, "Cannot create lock on '%s'. Is another primary "
+				"process running?\n", pathname);
+	}
+
+	rte_mem_cfg_addr = mmap(rte_mem_cfg_addr, sizeof(*rte_config.mem_config),
+				PROT_READ | PROT_WRITE, MAP_SHARED, mem_cfg_fd, 0);
+
+	if (rte_mem_cfg_addr == MAP_FAILED){
+		rte_panic("Cannot mmap memory for rte_config\n");
+	}
+	memcpy(rte_mem_cfg_addr, &early_mem_config, sizeof(early_mem_config));
+	rte_config.mem_config = (struct rte_mem_config *) rte_mem_cfg_addr;
+
+	/* store address of the config in the config itself so that secondary
+	 * processes could later map the config into this exact location */
+	rte_config.mem_config->mem_cfg_addr = (uintptr_t) rte_mem_cfg_addr;
+
+}
+
+/* attach to an existing shared memory config */
+static void
+rte_eal_config_attach(void)
+{
+	struct rte_mem_config *mem_config;
+
+	const char *pathname = eal_runtime_config_path();
+
+	if (internal_config.no_shconf)
+		return;
+
+	if (mem_cfg_fd < 0){
+		mem_cfg_fd = open(pathname, O_RDWR);
+		if (mem_cfg_fd < 0)
+			rte_panic("Cannot open '%s' for rte_mem_config\n", pathname);
+	}
+
+	/* map it as read-only first */
+	mem_config = (struct rte_mem_config *) mmap(NULL, sizeof(*mem_config),
+			PROT_READ, MAP_SHARED, mem_cfg_fd, 0);
+	if (mem_config == MAP_FAILED)
+		rte_panic("Cannot mmap memory for rte_config\n");
+
+	rte_config.mem_config = mem_config;
+}
+
+/* reattach the shared config at exact memory location primary process has it */
+static void
+rte_eal_config_reattach(void)
+{
+	struct rte_mem_config *mem_config;
+	void *rte_mem_cfg_addr;
+
+	if (internal_config.no_shconf)
+		return;
+
+	/* save the address primary process has mapped shared config to */
+	rte_mem_cfg_addr = (void *) (uintptr_t) rte_config.mem_config->mem_cfg_addr;
+
+	/* unmap original config */
+	munmap(rte_config.mem_config, sizeof(struct rte_mem_config));
+
+	/* remap the config at proper address */
+	mem_config = (struct rte_mem_config *) mmap(rte_mem_cfg_addr,
+			sizeof(*mem_config), PROT_READ | PROT_WRITE, MAP_SHARED,
+			mem_cfg_fd, 0);
+	close(mem_cfg_fd);
+	if (mem_config == MAP_FAILED || mem_config != rte_mem_cfg_addr)
+		rte_panic("Cannot mmap memory for rte_config\n");
+
+	rte_config.mem_config = mem_config;
+}
+
+/* Detect if we are a primary or a secondary process */
+enum rte_proc_type_t
+eal_proc_type_detect(void)
+{
+	enum rte_proc_type_t ptype = RTE_PROC_PRIMARY;
+	const char *pathname = eal_runtime_config_path();
+
+	/* if we can open the file but not get a write-lock we are a secondary
+	 * process. NOTE: if we get a file handle back, we keep that open
+	 * and don't close it to prevent a race condition between multiple opens */
+	if (((mem_cfg_fd = open(pathname, O_RDWR)) >= 0) &&
+			(fcntl(mem_cfg_fd, F_SETLK, &wr_lock) < 0))
+		ptype = RTE_PROC_SECONDARY;
+
+	RTE_LOG(INFO, EAL, "Auto-detected process type: %s\n",
+			ptype == RTE_PROC_PRIMARY ? "PRIMARY" : "SECONDARY");
+
+	return ptype;
+}
+
+/* Sets up rte_config structure with the pointer to shared memory config.*/
+static void
+rte_config_init(void)
+{
+	rte_config.process_type = internal_config.process_type;
+
+	switch (rte_config.process_type){
+	case RTE_PROC_PRIMARY:
+		rte_eal_config_create();
+		break;
+	case RTE_PROC_SECONDARY:
+		rte_eal_config_attach();
+		rte_eal_mcfg_wait_complete(rte_config.mem_config);
+		rte_eal_config_reattach();
+		break;
+	case RTE_PROC_AUTO:
+	case RTE_PROC_INVALID:
+		rte_panic("Invalid process type\n");
+	}
+}
+
+/* Unlocks hugepage directories that were locked by eal_hugepage_info_init */
+static void
+eal_hugedirs_unlock(void)
+{
+	int i;
+
+	for (i = 0; i < MAX_HUGEPAGE_SIZES; i++)
+	{
+		/* skip uninitialized */
+		if (internal_config.hugepage_info[i].lock_descriptor < 0)
+			continue;
+		/* unlock hugepage file */
+		flock(internal_config.hugepage_info[i].lock_descriptor, LOCK_UN);
+		close(internal_config.hugepage_info[i].lock_descriptor);
+		/* reset the field */
+		internal_config.hugepage_info[i].lock_descriptor = -1;
+	}
+}
+
+/* display usage */
+static void
+eal_usage(const char *prgname)
+{
+	printf("\nUsage: %s ", prgname);
+	eal_common_usage();
+	printf("EAL Linux options:\n"
+	       "  --"OPT_SOCKET_MEM"        Memory to allocate on sockets (comma separated values)\n"
+	       "  --"OPT_HUGE_DIR"          Directory where hugetlbfs is mounted\n"
+	       "  --"OPT_FILE_PREFIX"       Prefix for hugepage filenames\n"
+	       "  --"OPT_BASE_VIRTADDR"     Base virtual address\n"
+	       "  --"OPT_CREATE_UIO_DEV"    Create /dev/uioX (usually done by hotplug)\n"
+	       "  --"OPT_VFIO_INTR"         Interrupt mode for VFIO (legacy|msi|msix)\n"
+	       "  --"OPT_XEN_DOM0"          Support running on Xen dom0 without hugetlbfs\n"
+	       "\n");
+	/* Allow the application to print its usage message too if hook is set */
+	if ( rte_application_usage_hook ) {
+		printf("===== Application Usage =====\n\n");
+		rte_application_usage_hook(prgname);
+	}
+}
+
+/* Set a per-application usage message */
+rte_usage_hook_t
+rte_set_application_usage_hook( rte_usage_hook_t usage_func )
+{
+	rte_usage_hook_t	old_func;
+
+	/* Will be NULL on the first call to denote the last usage routine. */
+	old_func					= rte_application_usage_hook;
+	rte_application_usage_hook	= usage_func;
+
+	return old_func;
+}
+
+static int
+eal_parse_socket_mem(char *socket_mem)
+{
+	char * arg[RTE_MAX_NUMA_NODES];
+	char *end;
+	int arg_num, i, len;
+	uint64_t total_mem = 0;
+
+	len = strnlen(socket_mem, SOCKET_MEM_STRLEN);
+	if (len == SOCKET_MEM_STRLEN) {
+		RTE_LOG(ERR, EAL, "--socket-mem is too long\n");
+		return -1;
+	}
+
+	/* all other error cases will be caught later */
+	if (!isdigit(socket_mem[len-1]))
+		return -1;
+
+	/* split the optarg into separate socket values */
+	arg_num = rte_strsplit(socket_mem, len,
+			arg, RTE_MAX_NUMA_NODES, ',');
+
+	/* if split failed, or 0 arguments */
+	if (arg_num <= 0)
+		return -1;
+
+	internal_config.force_sockets = 1;
+
+	/* parse each defined socket option */
+	errno = 0;
+	for (i = 0; i < arg_num; i++) {
+		end = NULL;
+		internal_config.socket_mem[i] = strtoull(arg[i], &end, 10);
+
+		/* check for invalid input */
+		if ((errno != 0)  ||
+				(arg[i][0] == '\0') || (end == NULL) || (*end != '\0'))
+			return -1;
+		internal_config.socket_mem[i] *= 1024ULL;
+		internal_config.socket_mem[i] *= 1024ULL;
+		total_mem += internal_config.socket_mem[i];
+	}
+
+	/* check if we have a positive amount of total memory */
+	if (total_mem == 0)
+		return -1;
+
+	return 0;
+}
+
+static int
+eal_parse_base_virtaddr(const char *arg)
+{
+	char *end;
+	uint64_t addr;
+
+	errno = 0;
+	addr = strtoull(arg, &end, 16);
+
+	/* check for errors */
+	if ((errno != 0) || (arg[0] == '\0') || end == NULL || (*end != '\0'))
+		return -1;
+
+	/* make sure we don't exceed 32-bit boundary on 32-bit target */
+#ifndef RTE_ARCH_64
+	if (addr >= UINTPTR_MAX)
+		return -1;
+#endif
+
+	/* align the addr on 16M boundary, 16MB is the minimum huge page
+	 * size on IBM Power architecture. If the addr is aligned to 16MB,
+	 * it can align to 2MB for x86. So this alignment can also be used
+	 * on x86 */
+	internal_config.base_virtaddr =
+		RTE_PTR_ALIGN_CEIL((uintptr_t)addr, (size_t)RTE_PGSIZE_16M);
+
+	return 0;
+}
+
+static int
+eal_parse_vfio_intr(const char *mode)
+{
+	unsigned i;
+	static struct {
+		const char *name;
+		enum rte_intr_mode value;
+	} map[] = {
+		{ "legacy", RTE_INTR_MODE_LEGACY },
+		{ "msi", RTE_INTR_MODE_MSI },
+		{ "msix", RTE_INTR_MODE_MSIX },
+	};
+
+	for (i = 0; i < RTE_DIM(map); i++) {
+		if (!strcmp(mode, map[i].name)) {
+			internal_config.vfio_intr_mode = map[i].value;
+			return 0;
+		}
+	}
+	return -1;
+}
+
+static inline size_t
+eal_get_hugepage_mem_size(void)
+{
+	uint64_t size = 0;
+	unsigned i, j;
+
+	for (i = 0; i < internal_config.num_hugepage_sizes; i++) {
+		struct hugepage_info *hpi = &internal_config.hugepage_info[i];
+		if (hpi->hugedir != NULL) {
+			for (j = 0; j < RTE_MAX_NUMA_NODES; j++) {
+				size += hpi->hugepage_sz * hpi->num_pages[j];
+			}
+		}
+	}
+
+	return (size < SIZE_MAX) ? (size_t)(size) : SIZE_MAX;
+}
+
+/* Parse the arguments for --log-level only */
+static void
+eal_log_level_parse(int argc, char **argv)
+{
+	int opt;
+	char **argvopt;
+	int option_index;
+	const int old_optind = optind;
+	const int old_optopt = optopt;
+	char * const old_optarg = optarg;
+
+	argvopt = argv;
+	optind = 1;
+
+	eal_reset_internal_config(&internal_config);
+
+	while ((opt = getopt_long(argc, argvopt, eal_short_options,
+				  eal_long_options, &option_index)) != EOF) {
+
+		int ret;
+
+		/* getopt is not happy, stop right now */
+		if (opt == '?')
+			break;
+
+		ret = (opt == OPT_LOG_LEVEL_NUM) ?
+			eal_parse_common_option(opt, optarg, &internal_config) : 0;
+
+		/* common parser is not happy */
+		if (ret < 0)
+			break;
+	}
+
+	/* restore getopt lib */
+	optind = old_optind;
+	optopt = old_optopt;
+	optarg = old_optarg;
+}
+
+/* Parse the argument given in the command line of the application */
+static int
+eal_parse_args(int argc, char **argv)
+{
+	int opt, ret;
+	char **argvopt;
+	int option_index;
+	char *prgname = argv[0];
+	const int old_optind = optind;
+	const int old_optopt = optopt;
+	char * const old_optarg = optarg;
+
+	argvopt = argv;
+	optind = 1;
+
+	while ((opt = getopt_long(argc, argvopt, eal_short_options,
+				  eal_long_options, &option_index)) != EOF) {
+
+		/* getopt is not happy, stop right now */
+		if (opt == '?') {
+			eal_usage(prgname);
+			ret = -1;
+			goto out;
+		}
+
+		ret = eal_parse_common_option(opt, optarg, &internal_config);
+		/* common parser is not happy */
+		if (ret < 0) {
+			eal_usage(prgname);
+			ret = -1;
+			goto out;
+		}
+		/* common parser handled this option */
+		if (ret == 0)
+			continue;
+
+		switch (opt) {
+		case 'h':
+			eal_usage(prgname);
+			exit(EXIT_SUCCESS);
+
+		/* long options */
+		case OPT_XEN_DOM0_NUM:
+#ifdef RTE_LIBRTE_XEN_DOM0
+			internal_config.xen_dom0_support = 1;
+#else
+			RTE_LOG(ERR, EAL, "Can't support DPDK app "
+				"running on Dom0, please configure"
+				" RTE_LIBRTE_XEN_DOM0=y\n");
+			ret = -1;
+			goto out;
+#endif
+			break;
+
+		case OPT_HUGE_DIR_NUM:
+			internal_config.hugepage_dir = optarg;
+			break;
+
+		case OPT_FILE_PREFIX_NUM:
+			internal_config.hugefile_prefix = optarg;
+			break;
+
+		case OPT_SOCKET_MEM_NUM:
+			if (eal_parse_socket_mem(optarg) < 0) {
+				RTE_LOG(ERR, EAL, "invalid parameters for --"
+						OPT_SOCKET_MEM "\n");
+				eal_usage(prgname);
+				ret = -1;
+				goto out;
+			}
+			break;
+
+		case OPT_BASE_VIRTADDR_NUM:
+			if (eal_parse_base_virtaddr(optarg) < 0) {
+				RTE_LOG(ERR, EAL, "invalid parameter for --"
+						OPT_BASE_VIRTADDR "\n");
+				eal_usage(prgname);
+				ret = -1;
+				goto out;
+			}
+			break;
+
+		case OPT_VFIO_INTR_NUM:
+			if (eal_parse_vfio_intr(optarg) < 0) {
+				RTE_LOG(ERR, EAL, "invalid parameters for --"
+						OPT_VFIO_INTR "\n");
+				eal_usage(prgname);
+				ret = -1;
+				goto out;
+			}
+			break;
+
+		case OPT_CREATE_UIO_DEV_NUM:
+			internal_config.create_uio_dev = 1;
+			break;
+
+		default:
+			if (opt < OPT_LONG_MIN_NUM && isprint(opt)) {
+				RTE_LOG(ERR, EAL, "Option %c is not supported "
+					"on Linux\n", opt);
+			} else if (opt >= OPT_LONG_MIN_NUM &&
+				   opt < OPT_LONG_MAX_NUM) {
+				RTE_LOG(ERR, EAL, "Option %s is not supported "
+					"on Linux\n",
+					eal_long_options[option_index].name);
+			} else {
+				RTE_LOG(ERR, EAL, "Option %d is not supported "
+					"on Linux\n", opt);
+			}
+			eal_usage(prgname);
+			ret = -1;
+			goto out;
+		}
+	}
+
+	if (eal_adjust_config(&internal_config) != 0) {
+		ret = -1;
+		goto out;
+	}
+
+	/* sanity checks */
+	if (eal_check_common_options(&internal_config) != 0) {
+		eal_usage(prgname);
+		ret = -1;
+		goto out;
+	}
+
+	/* --xen-dom0 doesn't make sense with --socket-mem */
+	if (internal_config.xen_dom0_support && internal_config.force_sockets == 1) {
+		RTE_LOG(ERR, EAL, "Options --"OPT_SOCKET_MEM" cannot be specified "
+			"together with --"OPT_XEN_DOM0"\n");
+		eal_usage(prgname);
+		ret = -1;
+		goto out;
+	}
+
+	if (optind >= 0)
+		argv[optind-1] = prgname;
+	ret = optind-1;
+
+out:
+	/* restore getopt lib */
+	optind = old_optind;
+	optopt = old_optopt;
+	optarg = old_optarg;
+
+	return ret;
+}
+
+static void
+eal_check_mem_on_local_socket(void)
+{
+	const struct rte_memseg *ms;
+	int i, socket_id;
+
+	socket_id = rte_lcore_to_socket_id(rte_config.master_lcore);
+
+	ms = rte_eal_get_physmem_layout();
+
+	for (i = 0; i < RTE_MAX_MEMSEG; i++)
+		if (ms[i].socket_id == socket_id &&
+				ms[i].len > 0)
+			return;
+
+	RTE_LOG(WARNING, EAL, "WARNING: Master core has no "
+			"memory on local socket!\n");
+}
+
+static int
+sync_func(__attribute__((unused)) void *arg)
+{
+	return 0;
+}
+
+inline static void
+rte_eal_mcfg_complete(void)
+{
+	/* ALL shared mem_config related INIT DONE */
+	if (rte_config.process_type == RTE_PROC_PRIMARY)
+		rte_config.mem_config->magic = RTE_MAGIC;
+}
+
+/*
+ * Request iopl privilege for all RPL, returns 0 on success
+ * iopl() call is mostly for the i386 architecture. For other architectures,
+ * return -1 to indicate IO privilege can't be changed in this way.
+ */
+int
+rte_eal_iopl_init(void)
+{
+#if defined(RTE_ARCH_X86)
+	if (iopl(3) != 0)
+		return -1;
+	return 0;
+#elif defined(RTE_ARCH_ARM) || defined(RTE_ARCH_ARM64)
+	return 0; /* iopl syscall not supported for ARM/ARM64 */
+#else
+	return -1;
+#endif
+}
+
+/* Launch threads, called at application init(). */
+int
+rte_eal_init(int argc, char **argv)
+{
+	int i, fctret, ret;
+	pthread_t thread_id;
+	static rte_atomic32_t run_once = RTE_ATOMIC32_INIT(0);
+	const char *logid;
+	char cpuset[RTE_CPU_AFFINITY_STR_LEN];
+	char thread_name[RTE_MAX_THREAD_NAME_LEN];
+
+	if (!rte_atomic32_test_and_set(&run_once))
+		return -1;
+
+	logid = strrchr(argv[0], '/');
+	logid = strdup(logid ? logid + 1: argv[0]);
+
+	thread_id = pthread_self();
+
+	if (rte_eal_log_early_init() < 0)
+		rte_panic("Cannot init early logs\n");
+
+	eal_log_level_parse(argc, argv);
+
+	/* set log level as early as possible */
+	rte_set_log_level(internal_config.log_level);
+
+	if (rte_eal_cpu_init() < 0)
+		rte_panic("Cannot detect lcores\n");
+
+	fctret = eal_parse_args(argc, argv);
+	if (fctret < 0)
+		exit(1);
+
+	if (internal_config.no_hugetlbfs == 0 &&
+			internal_config.process_type != RTE_PROC_SECONDARY &&
+			internal_config.xen_dom0_support == 0 &&
+			eal_hugepage_info_init() < 0)
+		rte_panic("Cannot get hugepage information\n");
+
+	if (internal_config.memory == 0 && internal_config.force_sockets == 0) {
+		if (internal_config.no_hugetlbfs)
+			internal_config.memory = MEMSIZE_IF_NO_HUGE_PAGE;
+		else
+			internal_config.memory = eal_get_hugepage_mem_size();
+	}
+
+	if (internal_config.vmware_tsc_map == 1) {
+#ifdef RTE_LIBRTE_EAL_VMWARE_TSC_MAP_SUPPORT
+		rte_cycles_vmware_tsc_map = 1;
+		RTE_LOG (DEBUG, EAL, "Using VMWARE TSC MAP, "
+				"you must have monitor_control.pseudo_perfctr = TRUE\n");
+#else
+		RTE_LOG (WARNING, EAL, "Ignoring --vmware-tsc-map because "
+				"RTE_LIBRTE_EAL_VMWARE_TSC_MAP_SUPPORT is not set\n");
+#endif
+	}
+
+	rte_srand(rte_rdtsc());
+
+	rte_config_init();
+
+	if (rte_eal_pci_init() < 0)
+		rte_panic("Cannot init PCI\n");
+
+#ifdef RTE_LIBRTE_IVSHMEM
+	if (rte_eal_ivshmem_init() < 0)
+		rte_panic("Cannot init IVSHMEM\n");
+#endif
+
+	if (rte_eal_memory_init() < 0)
+		rte_panic("Cannot init memory\n");
+
+	/* the directories are locked during eal_hugepage_info_init */
+	eal_hugedirs_unlock();
+
+	if (rte_eal_memzone_init() < 0)
+		rte_panic("Cannot init memzone\n");
+
+	if (rte_eal_tailqs_init() < 0)
+		rte_panic("Cannot init tail queues for objects\n");
+
+#ifdef RTE_LIBRTE_IVSHMEM
+	if (rte_eal_ivshmem_obj_init() < 0)
+		rte_panic("Cannot init IVSHMEM objects\n");
+#endif
+
+	if (rte_eal_log_init(logid, internal_config.syslog_facility) < 0)
+		rte_panic("Cannot init logs\n");
+
+	if (rte_eal_alarm_init() < 0)
+		rte_panic("Cannot init interrupt-handling thread\n");
+
+	if (rte_eal_timer_init() < 0)
+		rte_panic("Cannot init HPET or TSC timers\n");
+
+	eal_check_mem_on_local_socket();
+
+	if (eal_plugins_init() < 0)
+		rte_panic("Cannot init plugins\n");
+
+	eal_thread_init_master(rte_config.master_lcore);
+
+	ret = eal_thread_dump_affinity(cpuset, RTE_CPU_AFFINITY_STR_LEN);
+
+	RTE_LOG(DEBUG, EAL, "Master lcore %u is ready (tid=%x;cpuset=[%s%s])\n",
+		rte_config.master_lcore, (int)thread_id, cpuset,
+		ret == 0 ? "" : "...");
+
+	if (rte_eal_dev_init() < 0)
+		rte_panic("Cannot init pmd devices\n");
+
+	if (rte_eal_intr_init() < 0)
+		rte_panic("Cannot init interrupt-handling thread\n");
+
+	RTE_LCORE_FOREACH_SLAVE(i) {
+
+		/*
+		 * create communication pipes between master thread
+		 * and children
+		 */
+		if (pipe(lcore_config[i].pipe_master2slave) < 0)
+			rte_panic("Cannot create pipe\n");
+		if (pipe(lcore_config[i].pipe_slave2master) < 0)
+			rte_panic("Cannot create pipe\n");
+
+		lcore_config[i].state = WAIT;
+
+		/* create a thread for each lcore */
+		ret = pthread_create(&lcore_config[i].thread_id, NULL,
+				     eal_thread_loop, NULL);
+		if (ret != 0)
+			rte_panic("Cannot create thread\n");
+
+		/* Set thread_name for aid in debugging. */
+		snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN,
+			"lcore-slave-%d", i);
+		ret = rte_thread_setname(lcore_config[i].thread_id,
+						thread_name);
+		if (ret != 0)
+			RTE_LOG(ERR, EAL,
+				"Cannot set name for lcore thread\n");
+	}
+
+	/*
+	 * Launch a dummy function on all slave lcores, so that master lcore
+	 * knows they are all ready when this function returns.
+	 */
+	rte_eal_mp_remote_launch(sync_func, NULL, SKIP_MASTER);
+	rte_eal_mp_wait_lcore();
+
+	/* Probe & Initialize PCI devices */
+	if (rte_eal_pci_probe())
+		rte_panic("Cannot probe PCI\n");
+
+	rte_eal_mcfg_complete();
+
+	return fctret;
+}
+
+/* get core role */
+enum rte_lcore_role_t
+rte_eal_lcore_role(unsigned lcore_id)
+{
+	return rte_config.lcore_role[lcore_id];
+}
+
+enum rte_proc_type_t
+rte_eal_process_type(void)
+{
+	return rte_config.process_type;
+}
+
+int rte_eal_has_hugepages(void)
+{
+	return ! internal_config.no_hugetlbfs;
+}
+
+int
+rte_eal_check_module(const char *module_name)
+{
+	char sysfs_mod_name[PATH_MAX];
+	struct stat st;
+	int n;
+
+	if (NULL == module_name)
+		return -1;
+
+	/* Check if there is sysfs mounted */
+	if (stat("/sys/module", &st) != 0) {
+		RTE_LOG(DEBUG, EAL, "sysfs is not mounted! error %i (%s)\n",
+			errno, strerror(errno));
+		return -1;
+	}
+
+	/* A module might be built-in, therefore try sysfs */
+	n = snprintf(sysfs_mod_name, PATH_MAX, "/sys/module/%s", module_name);
+	if (n < 0 || n > PATH_MAX) {
+		RTE_LOG(DEBUG, EAL, "Could not format module path\n");
+		return -1;
+	}
+
+	if (stat(sysfs_mod_name, &st) != 0) {
+		RTE_LOG(DEBUG, EAL, "Module %s not found! error %i (%s)\n",
+		        sysfs_mod_name, errno, strerror(errno));
+		return 0;
+	}
+
+	/* Module has been found */
+	return 1;
+}
diff --git a/lib/librte_eal/linuxapp/eal/eal_alarm.c b/lib/librte_eal/linuxapp/eal/eal_alarm.c
new file mode 100644
index 00000000..8b042abc
--- /dev/null
+++ b/lib/librte_eal/linuxapp/eal/eal_alarm.c
@@ -0,0 +1,273 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <stdio.h>
+#include <stdint.h>
+#include <signal.h>
+#include <errno.h>
+#include <string.h>
+#include <sys/queue.h>
+#include <sys/time.h>
+#include <sys/timerfd.h>
+
+#include <rte_memory.h>
+#include <rte_memzone.h>
+#include <rte_interrupts.h>
+#include <rte_alarm.h>
+#include <rte_common.h>
+#include <rte_per_lcore.h>
+#include <rte_eal.h>
+#include <rte_launch.h>
+#include <rte_lcore.h>
+#include <rte_errno.h>
+#include <rte_malloc.h>
+#include <rte_spinlock.h>
+#include <eal_private.h>
+
+#ifndef	TFD_NONBLOCK
+#include <fcntl.h>
+#define	TFD_NONBLOCK	O_NONBLOCK
+#endif
+
+#define NS_PER_US 1000
+#define US_PER_MS 1000
+#define MS_PER_S 1000
+#define US_PER_S (US_PER_MS * MS_PER_S)
+
+#ifdef CLOCK_MONOTONIC_RAW /* Defined in glibc bits/time.h */
+#define CLOCK_TYPE_ID CLOCK_MONOTONIC_RAW
+#else
+#define CLOCK_TYPE_ID CLOCK_MONOTONIC
+#endif
+
+struct alarm_entry {
+	LIST_ENTRY(alarm_entry) next;
+	struct timeval time;
+	rte_eal_alarm_callback cb_fn;
+	void *cb_arg;
+	volatile uint8_t executing;
+	volatile pthread_t executing_id;
+};
+
+static LIST_HEAD(alarm_list, alarm_entry) alarm_list = LIST_HEAD_INITIALIZER();
+static rte_spinlock_t alarm_list_lk = RTE_SPINLOCK_INITIALIZER;
+
+static struct rte_intr_handle intr_handle = {.fd = -1 };
+static int handler_registered = 0;
+static void eal_alarm_callback(struct rte_intr_handle *hdl, void *arg);
+
+int
+rte_eal_alarm_init(void)
+{
+	intr_handle.type = RTE_INTR_HANDLE_ALARM;
+	/* create a timerfd file descriptor */
+	intr_handle.fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK);
+	if (intr_handle.fd == -1)
+		goto error;
+
+	return 0;
+
+error:
+	rte_errno = errno;
+	return -1;
+}
+
+static void
+eal_alarm_callback(struct rte_intr_handle *hdl __rte_unused,
+		void *arg __rte_unused)
+{
+	struct timespec now;
+	struct alarm_entry *ap;
+
+	rte_spinlock_lock(&alarm_list_lk);
+	while ((ap = LIST_FIRST(&alarm_list)) !=NULL &&
+			clock_gettime(CLOCK_TYPE_ID, &now) == 0 &&
+			(ap->time.tv_sec < now.tv_sec || (ap->time.tv_sec == now.tv_sec &&
+						(ap->time.tv_usec * NS_PER_US) <= now.tv_nsec))) {
+		ap->executing = 1;
+		ap->executing_id = pthread_self();
+		rte_spinlock_unlock(&alarm_list_lk);
+
+		ap->cb_fn(ap->cb_arg);
+
+		rte_spinlock_lock(&alarm_list_lk);
+
+		LIST_REMOVE(ap, next);
+		rte_free(ap);
+	}
+
+	if (!LIST_EMPTY(&alarm_list)) {
+		struct itimerspec atime = { .it_interval = { 0, 0 } };
+
+		ap = LIST_FIRST(&alarm_list);
+		atime.it_value.tv_sec = ap->time.tv_sec;
+		atime.it_value.tv_nsec = ap->time.tv_usec * NS_PER_US;
+		/* perform borrow for subtraction if necessary */
+		if (now.tv_nsec > (ap->time.tv_usec * NS_PER_US))
+			atime.it_value.tv_sec--, atime.it_value.tv_nsec += US_PER_S * NS_PER_US;
+
+		atime.it_value.tv_sec -= now.tv_sec;
+		atime.it_value.tv_nsec -= now.tv_nsec;
+		timerfd_settime(intr_handle.fd, 0, &atime, NULL);
+	}
+	rte_spinlock_unlock(&alarm_list_lk);
+}
+
+int
+rte_eal_alarm_set(uint64_t us, rte_eal_alarm_callback cb_fn, void *cb_arg)
+{
+	struct timespec now;
+	int ret = 0;
+	struct alarm_entry *ap, *new_alarm;
+
+	/* Check parameters, including that us won't cause a uint64_t overflow */
+	if (us < 1 || us > (UINT64_MAX - US_PER_S) || cb_fn == NULL)
+		return -EINVAL;
+
+	new_alarm = rte_zmalloc(NULL, sizeof(*new_alarm), 0);
+	if (new_alarm == NULL)
+		return -ENOMEM;
+
+	/* use current time to calculate absolute time of alarm */
+	clock_gettime(CLOCK_TYPE_ID, &now);
+
+	new_alarm->cb_fn = cb_fn;
+	new_alarm->cb_arg = cb_arg;
+	new_alarm->time.tv_usec = ((now.tv_nsec / NS_PER_US) + us) % US_PER_S;
+	new_alarm->time.tv_sec = now.tv_sec + (((now.tv_nsec / NS_PER_US) + us) / US_PER_S);
+
+	rte_spinlock_lock(&alarm_list_lk);
+	if (!handler_registered) {
+		ret |= rte_intr_callback_register(&intr_handle,
+				eal_alarm_callback, NULL);
+		handler_registered = (ret == 0) ? 1 : 0;
+	}
+
+	if (LIST_EMPTY(&alarm_list))
+		LIST_INSERT_HEAD(&alarm_list, new_alarm, next);
+	else {
+		LIST_FOREACH(ap, &alarm_list, next) {
+			if (ap->time.tv_sec > new_alarm->time.tv_sec ||
+					(ap->time.tv_sec == new_alarm->time.tv_sec &&
+							ap->time.tv_usec > new_alarm->time.tv_usec)){
+				LIST_INSERT_BEFORE(ap, new_alarm, next);
+				break;
+			}
+			if (LIST_NEXT(ap, next) == NULL) {
+				LIST_INSERT_AFTER(ap, new_alarm, next);
+				break;
+			}
+		}
+	}
+
+	if (LIST_FIRST(&alarm_list) == new_alarm) {
+		struct itimerspec alarm_time = {
+			.it_interval = {0, 0},
+			.it_value = {
+				.tv_sec = us / US_PER_S,
+				.tv_nsec = (us % US_PER_S) * NS_PER_US,
+			},
+		};
+		ret |= timerfd_settime(intr_handle.fd, 0, &alarm_time, NULL);
+	}
+	rte_spinlock_unlock(&alarm_list_lk);
+
+	return ret;
+}
+
+int
+rte_eal_alarm_cancel(rte_eal_alarm_callback cb_fn, void *cb_arg)
+{
+	struct alarm_entry *ap, *ap_prev;
+	int count = 0;
+	int err = 0;
+	int executing;
+
+	if (!cb_fn) {
+		rte_errno = EINVAL;
+		return -1;
+	}
+
+	do {
+		executing = 0;
+		rte_spinlock_lock(&alarm_list_lk);
+		/* remove any matches at the start of the list */
+		while ((ap = LIST_FIRST(&alarm_list)) != NULL &&
+				cb_fn == ap->cb_fn &&
+				(cb_arg == (void *)-1 || cb_arg == ap->cb_arg)) {
+
+			if (ap->executing == 0) {
+				LIST_REMOVE(ap, next);
+				rte_free(ap);
+				count++;
+			} else {
+				/* If calling from other context, mark that alarm is executing
+				 * so loop can spin till it finish. Otherwise we are trying to
+				 * cancel our self - mark it by EINPROGRESS */
+				if (pthread_equal(ap->executing_id, pthread_self()) == 0)
+					executing++;
+				else
+					err = EINPROGRESS;
+
+				break;
+			}
+		}
+		ap_prev = ap;
+
+		/* now go through list, removing entries not at start */
+		LIST_FOREACH(ap, &alarm_list, next) {
+			/* this won't be true first time through */
+			if (cb_fn == ap->cb_fn &&
+					(cb_arg == (void *)-1 || cb_arg == ap->cb_arg)) {
+
+				if (ap->executing == 0) {
+					LIST_REMOVE(ap, next);
+					rte_free(ap);
+					count++;
+					ap = ap_prev;
+				} else if (pthread_equal(ap->executing_id, pthread_self()) == 0)
+					executing++;
+				else
+					err = EINPROGRESS;
+			}
+			ap_prev = ap;
+		}
+		rte_spinlock_unlock(&alarm_list_lk);
+	} while (executing != 0);
+
+	if (count == 0 && err == 0)
+		rte_errno = ENOENT;
+	else if (err)
+		rte_errno = err;
+
+	return count;
+}
diff --git a/lib/librte_eal/linuxapp/eal/eal_debug.c b/lib/librte_eal/linuxapp/eal/eal_debug.c
new file mode 100644
index 00000000..907fbfa7
--- /dev/null
+++ b/lib/librte_eal/linuxapp/eal/eal_debug.c
@@ -0,0 +1,119 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <execinfo.h>
+#include <stdarg.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdint.h>
+
+#include <rte_log.h>
+#include <rte_debug.h>
+#include <rte_common.h>
+
+#define BACKTRACE_SIZE 256
+
+/* dump the stack of the calling core */
+void rte_dump_stack(void)
+{
+	void *func[BACKTRACE_SIZE];
+	char **symb = NULL;
+	int size;
+
+	size = backtrace(func, BACKTRACE_SIZE);
+	symb = backtrace_symbols(func, size);
+
+	if (symb == NULL)
+		return;
+
+	while (size > 0) {
+		rte_log(RTE_LOG_ERR, RTE_LOGTYPE_EAL,
+			"%d: [%s]\n", size, symb[size - 1]);
+		size --;
+	}
+
+	free(symb);
+}
+
+/* not implemented in this environment */
+void rte_dump_registers(void)
+{
+	return;
+}
+
+/* call abort(), it will generate a coredump if enabled */
+void __rte_panic(const char *funcname, const char *format, ...)
+{
+	va_list ap;
+
+	/* disable history */
+	rte_log_set_history(0);
+
+	rte_log(RTE_LOG_CRIT, RTE_LOGTYPE_EAL, "PANIC in %s():\n", funcname);
+	va_start(ap, format);
+	rte_vlog(RTE_LOG_CRIT, RTE_LOGTYPE_EAL, format, ap);
+	va_end(ap);
+	rte_dump_stack();
+	rte_dump_registers();
+	abort();
+}
+
+/*
+ * Like rte_panic this terminates the application. However, no traceback is
+ * provided and no core-dump is generated.
+ */
+void
+rte_exit(int exit_code, const char *format, ...)
+{
+	va_list ap;
+
+	/* disable history */
+	rte_log_set_history(0);
+
+	if (exit_code != 0)
+		RTE_LOG(CRIT, EAL, "Error - exiting with code: %d\n"
+				"  Cause: ", exit_code);
+
+	va_start(ap, format);
+	rte_vlog(RTE_LOG_CRIT, RTE_LOGTYPE_EAL, format, ap);
+	va_end(ap);
+
+#ifndef RTE_EAL_ALWAYS_PANIC_ON_ERROR
+	exit(exit_code);
+#else
+	rte_dump_stack();
+	rte_dump_registers();
+	abort();
+#endif
+}
diff --git a/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c b/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c
new file mode 100644
index 00000000..18858e2d
--- /dev/null
+++ b/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c
@@ -0,0 +1,365 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <string.h>
+#include <sys/types.h>
+#include <sys/file.h>
+#include <dirent.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <fnmatch.h>
+#include <inttypes.h>
+#include <stdarg.h>
+#include <unistd.h>
+#include <errno.h>
+#include <sys/queue.h>
+
+#include <rte_memory.h>
+#include <rte_memzone.h>
+#include <rte_eal.h>
+#include <rte_launch.h>
+#include <rte_per_lcore.h>
+#include <rte_lcore.h>
+#include <rte_debug.h>
+#include <rte_log.h>
+#include <rte_common.h>
+#include "rte_string_fns.h"
+#include "eal_internal_cfg.h"
+#include "eal_hugepages.h"
+#include "eal_filesystem.h"
+
+static const char sys_dir_path[] = "/sys/kernel/mm/hugepages";
+
+/* this function is only called from eal_hugepage_info_init which itself
+ * is only called from a primary process */
+static uint32_t
+get_num_hugepages(const char *subdir)
+{
+	char path[PATH_MAX];
+	long unsigned resv_pages, num_pages = 0;
+	const char *nr_hp_file = "free_hugepages";
+	const char *nr_rsvd_file = "resv_hugepages";
+
+	/* first, check how many reserved pages kernel reports */
+	snprintf(path, sizeof(path), "%s/%s/%s",
+			sys_dir_path, subdir, nr_rsvd_file);
+	if (eal_parse_sysfs_value(path, &resv_pages) < 0)
+		return 0;
+
+	snprintf(path, sizeof(path), "%s/%s/%s",
+			sys_dir_path, subdir, nr_hp_file);
+	if (eal_parse_sysfs_value(path, &num_pages) < 0)
+		return 0;
+
+	if (num_pages == 0)
+		RTE_LOG(WARNING, EAL, "No free hugepages reported in %s\n",
+				subdir);
+
+	/* adjust num_pages */
+	if (num_pages >= resv_pages)
+		num_pages -= resv_pages;
+	else if (resv_pages)
+		num_pages = 0;
+
+	/* we want to return a uint32_t and more than this looks suspicious
+	 * anyway ... */
+	if (num_pages > UINT32_MAX)
+		num_pages = UINT32_MAX;
+
+	return num_pages;
+}
+
+static uint64_t
+get_default_hp_size(void)
+{
+	const char proc_meminfo[] = "/proc/meminfo";
+	const char str_hugepagesz[] = "Hugepagesize:";
+	unsigned hugepagesz_len = sizeof(str_hugepagesz) - 1;
+	char buffer[256];
+	unsigned long long size = 0;
+
+	FILE *fd = fopen(proc_meminfo, "r");
+	if (fd == NULL)
+		rte_panic("Cannot open %s\n", proc_meminfo);
+	while(fgets(buffer, sizeof(buffer), fd)){
+		if (strncmp(buffer, str_hugepagesz, hugepagesz_len) == 0){
+			size = rte_str_to_size(&buffer[hugepagesz_len]);
+			break;
+		}
+	}
+	fclose(fd);
+	if (size == 0)
+		rte_panic("Cannot get default hugepage size from %s\n", proc_meminfo);
+	return size;
+}
+
+static const char *
+get_hugepage_dir(uint64_t hugepage_sz)
+{
+	enum proc_mount_fieldnames {
+		DEVICE = 0,
+		MOUNTPT,
+		FSTYPE,
+		OPTIONS,
+		_FIELDNAME_MAX
+	};
+	static uint64_t default_size = 0;
+	const char proc_mounts[] = "/proc/mounts";
+	const char hugetlbfs_str[] = "hugetlbfs";
+	const size_t htlbfs_str_len = sizeof(hugetlbfs_str) - 1;
+	const char pagesize_opt[] = "pagesize=";
+	const size_t pagesize_opt_len = sizeof(pagesize_opt) - 1;
+	const char split_tok = ' ';
+	char *splitstr[_FIELDNAME_MAX];
+	char buf[BUFSIZ];
+	char *retval = NULL;
+
+	FILE *fd = fopen(proc_mounts, "r");
+	if (fd == NULL)
+		rte_panic("Cannot open %s\n", proc_mounts);
+
+	if (default_size == 0)
+		default_size = get_default_hp_size();
+
+	while (fgets(buf, sizeof(buf), fd)){
+		if (rte_strsplit(buf, sizeof(buf), splitstr, _FIELDNAME_MAX,
+				split_tok) != _FIELDNAME_MAX) {
+			RTE_LOG(ERR, EAL, "Error parsing %s\n", proc_mounts);
+			break; /* return NULL */
+		}
+
+		/* we have a specified --huge-dir option, only examine that dir */
+		if (internal_config.hugepage_dir != NULL &&
+				strcmp(splitstr[MOUNTPT], internal_config.hugepage_dir) != 0)
+			continue;
+
+		if (strncmp(splitstr[FSTYPE], hugetlbfs_str, htlbfs_str_len) == 0){
+			const char *pagesz_str = strstr(splitstr[OPTIONS], pagesize_opt);
+
+			/* if no explicit page size, the default page size is compared */
+			if (pagesz_str == NULL){
+				if (hugepage_sz == default_size){
+					retval = strdup(splitstr[MOUNTPT]);
+					break;
+				}
+			}
+			/* there is an explicit page size, so check it */
+			else {
+				uint64_t pagesz = rte_str_to_size(&pagesz_str[pagesize_opt_len]);
+				if (pagesz == hugepage_sz) {
+					retval = strdup(splitstr[MOUNTPT]);
+					break;
+				}
+			}
+		} /* end if strncmp hugetlbfs */
+	} /* end while fgets */
+
+	fclose(fd);
+	return retval;
+}
+
+/*
+ * Clear the hugepage directory of whatever hugepage files
+ * there are. Checks if the file is locked (i.e.
+ * if it's in use by another DPDK process).
+ */
+static int
+clear_hugedir(const char * hugedir)
+{
+	DIR *dir;
+	struct dirent *dirent;
+	int dir_fd, fd, lck_result;
+	const char filter[] = "*map_*"; /* matches hugepage files */
+
+	/* open directory */
+	dir = opendir(hugedir);
+	if (!dir) {
+		RTE_LOG(ERR, EAL, "Unable to open hugepage directory %s\n",
+				hugedir);
+		goto error;
+	}
+	dir_fd = dirfd(dir);
+
+	dirent = readdir(dir);
+	if (!dirent) {
+		RTE_LOG(ERR, EAL, "Unable to read hugepage directory %s\n",
+				hugedir);
+		goto error;
+	}
+
+	while(dirent != NULL){
+		/* skip files that don't match the hugepage pattern */
+		if (fnmatch(filter, dirent->d_name, 0) > 0) {
+			dirent = readdir(dir);
+			continue;
+		}
+
+		/* try and lock the file */
+		fd = openat(dir_fd, dirent->d_name, O_RDONLY);
+
+		/* skip to next file */
+		if (fd == -1) {
+			dirent = readdir(dir);
+			continue;
+		}
+
+		/* non-blocking lock */
+		lck_result = flock(fd, LOCK_EX | LOCK_NB);
+
+		/* if lock succeeds, unlock and remove the file */
+		if (lck_result != -1) {
+			flock(fd, LOCK_UN);
+			unlinkat(dir_fd, dirent->d_name, 0);
+		}
+		close (fd);
+		dirent = readdir(dir);
+	}
+
+	closedir(dir);
+	return 0;
+
+error:
+	if (dir)
+		closedir(dir);
+
+	RTE_LOG(ERR, EAL, "Error while clearing hugepage dir: %s\n",
+		strerror(errno));
+
+	return -1;
+}
+
+static int
+compare_hpi(const void *a, const void *b)
+{
+	const struct hugepage_info *hpi_a = a;
+	const struct hugepage_info *hpi_b = b;
+
+	return hpi_b->hugepage_sz - hpi_a->hugepage_sz;
+}
+
+/*
+ * when we initialize the hugepage info, everything goes
+ * to socket 0 by default. it will later get sorted by memory
+ * initialization procedure.
+ */
+int
+eal_hugepage_info_init(void)
+{
+	const char dirent_start_text[] = "hugepages-";
+	const size_t dirent_start_len = sizeof(dirent_start_text) - 1;
+	unsigned i, num_sizes = 0;
+	DIR *dir;
+	struct dirent *dirent;
+
+	dir = opendir(sys_dir_path);
+	if (dir == NULL)
+		rte_panic("Cannot open directory %s to read system hugepage "
+			  "info\n", sys_dir_path);
+
+	for (dirent = readdir(dir); dirent != NULL; dirent = readdir(dir)) {
+		struct hugepage_info *hpi;
+
+		if (strncmp(dirent->d_name, dirent_start_text,
+			    dirent_start_len) != 0)
+			continue;
+
+		if (num_sizes >= MAX_HUGEPAGE_SIZES)
+			break;
+
+		hpi = &internal_config.hugepage_info[num_sizes];
+		hpi->hugepage_sz =
+			rte_str_to_size(&dirent->d_name[dirent_start_len]);
+		hpi->hugedir = get_hugepage_dir(hpi->hugepage_sz);
+
+		/* first, check if we have a mountpoint */
+		if (hpi->hugedir == NULL) {
+			uint32_t num_pages;
+
+			num_pages = get_num_hugepages(dirent->d_name);
+			if (num_pages > 0)
+				RTE_LOG(NOTICE, EAL,
+					"%" PRIu32 " hugepages of size "
+					"%" PRIu64 " reserved, but no mounted "
+					"hugetlbfs found for that size\n",
+					num_pages, hpi->hugepage_sz);
+			continue;
+		}
+
+		/* try to obtain a writelock */
+		hpi->lock_descriptor = open(hpi->hugedir, O_RDONLY);
+
+		/* if blocking lock failed */
+		if (flock(hpi->lock_descriptor, LOCK_EX) == -1) {
+			RTE_LOG(CRIT, EAL,
+				"Failed to lock hugepage directory!\n");
+			break;
+		}
+		/* clear out the hugepages dir from unused pages */
+		if (clear_hugedir(hpi->hugedir) == -1)
+			break;
+
+		/* for now, put all pages into socket 0,
+		 * later they will be sorted */
+		hpi->num_pages[0] = get_num_hugepages(dirent->d_name);
+
+#ifndef RTE_ARCH_64
+		/* for 32-bit systems, limit number of hugepages to
+		 * 1GB per page size */
+		hpi->num_pages[0] = RTE_MIN(hpi->num_pages[0],
+					    RTE_PGSIZE_1G / hpi->hugepage_sz);
+#endif
+
+		num_sizes++;
+	}
+	closedir(dir);
+
+	/* something went wrong, and we broke from the for loop above */
+	if (dirent != NULL)
+		return -1;
+
+	internal_config.num_hugepage_sizes = num_sizes;
+
+	/* sort the page directory entries by size, largest to smallest */
+	qsort(&internal_config.hugepage_info[0], num_sizes,
+	      sizeof(internal_config.hugepage_info[0]), compare_hpi);
+
+	/* now we have all info, check we have at least one valid size */
+	for (i = 0; i < num_sizes; i++)
+		if (internal_config.hugepage_info[i].hugedir != NULL &&
+		    internal_config.hugepage_info[i].num_pages[0] > 0)
+			return 0;
+
+	/* no valid hugepage mounts available, return error */
+	return -1;
+}
diff --git a/lib/librte_eal/linuxapp/eal/eal_interrupts.c b/lib/librte_eal/linuxapp/eal/eal_interrupts.c
new file mode 100644
index 00000000..06b26a9e
--- /dev/null
+++ b/lib/librte_eal/linuxapp/eal/eal_interrupts.c
@@ -0,0 +1,1224 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <pthread.h>
+#include <sys/queue.h>
+#include <stdarg.h>
+#include <unistd.h>
+#include <string.h>
+#include <errno.h>
+#include <inttypes.h>
+#include <sys/epoll.h>
+#include <sys/signalfd.h>
+#include <sys/ioctl.h>
+#include <sys/eventfd.h>
+#include <assert.h>
+
+#include <rte_common.h>
+#include <rte_interrupts.h>
+#include <rte_memory.h>
+#include <rte_memzone.h>
+#include <rte_launch.h>
+#include <rte_eal.h>
+#include <rte_per_lcore.h>
+#include <rte_lcore.h>
+#include <rte_atomic.h>
+#include <rte_branch_prediction.h>
+#include <rte_ring.h>
+#include <rte_debug.h>
+#include <rte_log.h>
+#include <rte_mempool.h>
+#include <rte_pci.h>
+#include <rte_malloc.h>
+#include <rte_errno.h>
+#include <rte_spinlock.h>
+
+#include "eal_private.h"
+#include "eal_vfio.h"
+#include "eal_thread.h"
+
+#define EAL_INTR_EPOLL_WAIT_FOREVER (-1)
+#define NB_OTHER_INTR               1
+
+static RTE_DEFINE_PER_LCORE(int, _epfd) = -1; /**< epoll fd per thread */
+
+/**
+ * union for pipe fds.
+ */
+union intr_pipefds{
+	struct {
+		int pipefd[2];
+	};
+	struct {
+		int readfd;
+		int writefd;
+	};
+};
+
+/**
+ * union buffer for reading on different devices
+ */
+union rte_intr_read_buffer {
+	int uio_intr_count;              /* for uio device */
+#ifdef VFIO_PRESENT
+	uint64_t vfio_intr_count;        /* for vfio device */
+#endif
+	uint64_t timerfd_num;            /* for timerfd */
+	char charbuf[16];                /* for others */
+};
+
+TAILQ_HEAD(rte_intr_cb_list, rte_intr_callback);
+TAILQ_HEAD(rte_intr_source_list, rte_intr_source);
+
+struct rte_intr_callback {
+	TAILQ_ENTRY(rte_intr_callback) next;
+	rte_intr_callback_fn cb_fn;  /**< callback address */
+	void *cb_arg;                /**< parameter for callback */
+};
+
+struct rte_intr_source {
+	TAILQ_ENTRY(rte_intr_source) next;
+	struct rte_intr_handle intr_handle; /**< interrupt handle */
+	struct rte_intr_cb_list callbacks;  /**< user callbacks */
+	uint32_t active;
+};
+
+/* global spinlock for interrupt data operation */
+static rte_spinlock_t intr_lock = RTE_SPINLOCK_INITIALIZER;
+
+/* union buffer for pipe read/write */
+static union intr_pipefds intr_pipe;
+
+/* interrupt sources list */
+static struct rte_intr_source_list intr_sources;
+
+/* interrupt handling thread */
+static pthread_t intr_thread;
+
+/* VFIO interrupts */
+#ifdef VFIO_PRESENT
+
+#define IRQ_SET_BUF_LEN  (sizeof(struct vfio_irq_set) + sizeof(int))
+/* irq set buffer length for queue interrupts and LSC interrupt */
+#define MSIX_IRQ_SET_BUF_LEN (sizeof(struct vfio_irq_set) + \
+			      sizeof(int) * (RTE_MAX_RXTX_INTR_VEC_ID + 1))
+
+/* enable legacy (INTx) interrupts */
+static int
+vfio_enable_intx(struct rte_intr_handle *intr_handle) {
+	struct vfio_irq_set *irq_set;
+	char irq_set_buf[IRQ_SET_BUF_LEN];
+	int len, ret;
+	int *fd_ptr;
+
+	len = sizeof(irq_set_buf);
+
+	/* enable INTx */
+	irq_set = (struct vfio_irq_set *) irq_set_buf;
+	irq_set->argsz = len;
+	irq_set->count = 1;
+	irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER;
+	irq_set->index = VFIO_PCI_INTX_IRQ_INDEX;
+	irq_set->start = 0;
+	fd_ptr = (int *) &irq_set->data;
+	*fd_ptr = intr_handle->fd;
+
+	ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set);
+
+	if (ret) {
+		RTE_LOG(ERR, EAL, "Error enabling INTx interrupts for fd %d\n",
+						intr_handle->fd);
+		return -1;
+	}
+
+	/* unmask INTx after enabling */
+	memset(irq_set, 0, len);
+	len = sizeof(struct vfio_irq_set);
+	irq_set->argsz = len;
+	irq_set->count = 1;
+	irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_UNMASK;
+	irq_set->index = VFIO_PCI_INTX_IRQ_INDEX;
+	irq_set->start = 0;
+
+	ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set);
+
+	if (ret) {
+		RTE_LOG(ERR, EAL, "Error unmasking INTx interrupts for fd %d\n",
+						intr_handle->fd);
+		return -1;
+	}
+	return 0;
+}
+
+/* disable legacy (INTx) interrupts */
+static int
+vfio_disable_intx(struct rte_intr_handle *intr_handle) {
+	struct vfio_irq_set *irq_set;
+	char irq_set_buf[IRQ_SET_BUF_LEN];
+	int len, ret;
+
+	len = sizeof(struct vfio_irq_set);
+
+	/* mask interrupts before disabling */
+	irq_set = (struct vfio_irq_set *) irq_set_buf;
+	irq_set->argsz = len;
+	irq_set->count = 1;
+	irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_UNMASK;
+	irq_set->index = VFIO_PCI_INTX_IRQ_INDEX;
+	irq_set->start = 0;
+
+	ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set);
+
+	if (ret) {
+		RTE_LOG(ERR, EAL, "Error unmasking INTx interrupts for fd %d\n",
+						intr_handle->fd);
+		return -1;
+	}
+
+	/* disable INTx*/
+	memset(irq_set, 0, len);
+	irq_set->argsz = len;
+	irq_set->count = 0;
+	irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER;
+	irq_set->index = VFIO_PCI_INTX_IRQ_INDEX;
+	irq_set->start = 0;
+
+	ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set);
+
+	if (ret) {
+		RTE_LOG(ERR, EAL,
+			"Error disabling INTx interrupts for fd %d\n", intr_handle->fd);
+		return -1;
+	}
+	return 0;
+}
+
+/* enable MSI interrupts */
+static int
+vfio_enable_msi(struct rte_intr_handle *intr_handle) {
+	int len, ret;
+	char irq_set_buf[IRQ_SET_BUF_LEN];
+	struct vfio_irq_set *irq_set;
+	int *fd_ptr;
+
+	len = sizeof(irq_set_buf);
+
+	irq_set = (struct vfio_irq_set *) irq_set_buf;
+	irq_set->argsz = len;
+	irq_set->count = 1;
+	irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER;
+	irq_set->index = VFIO_PCI_MSI_IRQ_INDEX;
+	irq_set->start = 0;
+	fd_ptr = (int *) &irq_set->data;
+	*fd_ptr = intr_handle->fd;
+
+	ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set);
+
+	if (ret) {
+		RTE_LOG(ERR, EAL, "Error enabling MSI interrupts for fd %d\n",
+						intr_handle->fd);
+		return -1;
+	}
+	return 0;
+}
+
+/* disable MSI interrupts */
+static int
+vfio_disable_msi(struct rte_intr_handle *intr_handle) {
+	struct vfio_irq_set *irq_set;
+	char irq_set_buf[IRQ_SET_BUF_LEN];
+	int len, ret;
+
+	len = sizeof(struct vfio_irq_set);
+
+	irq_set = (struct vfio_irq_set *) irq_set_buf;
+	irq_set->argsz = len;
+	irq_set->count = 0;
+	irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER;
+	irq_set->index = VFIO_PCI_MSI_IRQ_INDEX;
+	irq_set->start = 0;
+
+	ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set);
+
+	if (ret)
+		RTE_LOG(ERR, EAL,
+			"Error disabling MSI interrupts for fd %d\n", intr_handle->fd);
+
+	return ret;
+}
+
+/* enable MSI-X interrupts */
+static int
+vfio_enable_msix(struct rte_intr_handle *intr_handle) {
+	int len, ret;
+	char irq_set_buf[MSIX_IRQ_SET_BUF_LEN];
+	struct vfio_irq_set *irq_set;
+	int *fd_ptr;
+
+	len = sizeof(irq_set_buf);
+
+	irq_set = (struct vfio_irq_set *) irq_set_buf;
+	irq_set->argsz = len;
+	if (!intr_handle->max_intr)
+		intr_handle->max_intr = 1;
+	else if (intr_handle->max_intr > RTE_MAX_RXTX_INTR_VEC_ID)
+		intr_handle->max_intr = RTE_MAX_RXTX_INTR_VEC_ID + 1;
+
+	irq_set->count = intr_handle->max_intr;
+	irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER;
+	irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
+	irq_set->start = 0;
+	fd_ptr = (int *) &irq_set->data;
+	/* INTR vector offset 0 reserve for non-efds mapping */
+	fd_ptr[RTE_INTR_VEC_ZERO_OFFSET] = intr_handle->fd;
+	memcpy(&fd_ptr[RTE_INTR_VEC_RXTX_OFFSET], intr_handle->efds,
+		sizeof(*intr_handle->efds) * intr_handle->nb_efd);
+
+	ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set);
+
+	if (ret) {
+		RTE_LOG(ERR, EAL, "Error enabling MSI-X interrupts for fd %d\n",
+						intr_handle->fd);
+		return -1;
+	}
+
+	return 0;
+}
+
+/* disable MSI-X interrupts */
+static int
+vfio_disable_msix(struct rte_intr_handle *intr_handle) {
+	struct vfio_irq_set *irq_set;
+	char irq_set_buf[MSIX_IRQ_SET_BUF_LEN];
+	int len, ret;
+
+	len = sizeof(struct vfio_irq_set);
+
+	irq_set = (struct vfio_irq_set *) irq_set_buf;
+	irq_set->argsz = len;
+	irq_set->count = 0;
+	irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER;
+	irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
+	irq_set->start = 0;
+
+	ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set);
+
+	if (ret)
+		RTE_LOG(ERR, EAL,
+			"Error disabling MSI-X interrupts for fd %d\n", intr_handle->fd);
+
+	return ret;
+}
+#endif
+
+static int
+uio_intx_intr_disable(struct rte_intr_handle *intr_handle)
+{
+	unsigned char command_high;
+
+	/* use UIO config file descriptor for uio_pci_generic */
+	if (pread(intr_handle->uio_cfg_fd, &command_high, 1, 5) != 1) {
+		RTE_LOG(ERR, EAL,
+			"Error reading interrupts status for fd %d\n",
+			intr_handle->uio_cfg_fd);
+		return -1;
+	}
+	/* disable interrupts */
+	command_high |= 0x4;
+	if (pwrite(intr_handle->uio_cfg_fd, &command_high, 1, 5) != 1) {
+		RTE_LOG(ERR, EAL,
+			"Error disabling interrupts for fd %d\n",
+			intr_handle->uio_cfg_fd);
+		return -1;
+	}
+
+	return 0;
+}
+
+static int
+uio_intx_intr_enable(struct rte_intr_handle *intr_handle)
+{
+	unsigned char command_high;
+
+	/* use UIO config file descriptor for uio_pci_generic */
+	if (pread(intr_handle->uio_cfg_fd, &command_high, 1, 5) != 1) {
+		RTE_LOG(ERR, EAL,
+			"Error reading interrupts status for fd %d\n",
+			intr_handle->uio_cfg_fd);
+		return -1;
+	}
+	/* enable interrupts */
+	command_high &= ~0x4;
+	if (pwrite(intr_handle->uio_cfg_fd, &command_high, 1, 5) != 1) {
+		RTE_LOG(ERR, EAL,
+			"Error enabling interrupts for fd %d\n",
+			intr_handle->uio_cfg_fd);
+		return -1;
+	}
+
+	return 0;
+}
+
+static int
+uio_intr_disable(struct rte_intr_handle *intr_handle)
+{
+	const int value = 0;
+
+	if (write(intr_handle->fd, &value, sizeof(value)) < 0) {
+		RTE_LOG(ERR, EAL,
+			"Error disabling interrupts for fd %d (%s)\n",
+			intr_handle->fd, strerror(errno));
+		return -1;
+	}
+	return 0;
+}
+
+static int
+uio_intr_enable(struct rte_intr_handle *intr_handle)
+{
+	const int value = 1;
+
+	if (write(intr_handle->fd, &value, sizeof(value)) < 0) {
+		RTE_LOG(ERR, EAL,
+			"Error enabling interrupts for fd %d (%s)\n",
+			intr_handle->fd, strerror(errno));
+		return -1;
+	}
+	return 0;
+}
+
+int
+rte_intr_callback_register(struct rte_intr_handle *intr_handle,
+			rte_intr_callback_fn cb, void *cb_arg)
+{
+	int ret, wake_thread;
+	struct rte_intr_source *src;
+	struct rte_intr_callback *callback;
+
+	wake_thread = 0;
+
+	/* first do parameter checking */
+	if (intr_handle == NULL || intr_handle->fd < 0 || cb == NULL) {
+		RTE_LOG(ERR, EAL,
+			"Registering with invalid input parameter\n");
+		return -EINVAL;
+	}
+
+	/* allocate a new interrupt callback entity */
+	callback = rte_zmalloc("interrupt callback list",
+				sizeof(*callback), 0);
+	if (callback == NULL) {
+		RTE_LOG(ERR, EAL, "Can not allocate memory\n");
+		return -ENOMEM;
+	}
+	callback->cb_fn = cb;
+	callback->cb_arg = cb_arg;
+
+	rte_spinlock_lock(&intr_lock);
+
+	/* check if there is at least one callback registered for the fd */
+	TAILQ_FOREACH(src, &intr_sources, next) {
+		if (src->intr_handle.fd == intr_handle->fd) {
+			/* we had no interrupts for this */
+			if TAILQ_EMPTY(&src->callbacks)
+				wake_thread = 1;
+
+			TAILQ_INSERT_TAIL(&(src->callbacks), callback, next);
+			ret = 0;
+			break;
+		}
+	}
+
+	/* no existing callbacks for this - add new source */
+	if (src == NULL) {
+		if ((src = rte_zmalloc("interrupt source list",
+				sizeof(*src), 0)) == NULL) {
+			RTE_LOG(ERR, EAL, "Can not allocate memory\n");
+			rte_free(callback);
+			ret = -ENOMEM;
+		} else {
+			src->intr_handle = *intr_handle;
+			TAILQ_INIT(&src->callbacks);
+			TAILQ_INSERT_TAIL(&(src->callbacks), callback, next);
+			TAILQ_INSERT_TAIL(&intr_sources, src, next);
+			wake_thread = 1;
+			ret = 0;
+		}
+	}
+
+	rte_spinlock_unlock(&intr_lock);
+
+	/**
+	 * check if need to notify the pipe fd waited by epoll_wait to
+	 * rebuild the wait list.
+	 */
+	if (wake_thread)
+		if (write(intr_pipe.writefd, "1", 1) < 0)
+			return -EPIPE;
+
+	return ret;
+}
+
+int
+rte_intr_callback_unregister(struct rte_intr_handle *intr_handle,
+			rte_intr_callback_fn cb_fn, void *cb_arg)
+{
+	int ret;
+	struct rte_intr_source *src;
+	struct rte_intr_callback *cb, *next;
+
+	/* do parameter checking first */
+	if (intr_handle == NULL || intr_handle->fd < 0) {
+		RTE_LOG(ERR, EAL,
+		"Unregistering with invalid input parameter\n");
+		return -EINVAL;
+	}
+
+	rte_spinlock_lock(&intr_lock);
+
+	/* check if the insterrupt source for the fd is existent */
+	TAILQ_FOREACH(src, &intr_sources, next)
+		if (src->intr_handle.fd == intr_handle->fd)
+			break;
+
+	/* No interrupt source registered for the fd */
+	if (src == NULL) {
+		ret = -ENOENT;
+
+	/* interrupt source has some active callbacks right now. */
+	} else if (src->active != 0) {
+		ret = -EAGAIN;
+
+	/* ok to remove. */
+	} else {
+		ret = 0;
+
+		/*walk through the callbacks and remove all that match. */
+		for (cb = TAILQ_FIRST(&src->callbacks); cb != NULL; cb = next) {
+
+			next = TAILQ_NEXT(cb, next);
+
+			if (cb->cb_fn == cb_fn && (cb_arg == (void *)-1 ||
+					cb->cb_arg == cb_arg)) {
+				TAILQ_REMOVE(&src->callbacks, cb, next);
+				rte_free(cb);
+				ret++;
+			}
+		}
+
+		/* all callbacks for that source are removed. */
+		if (TAILQ_EMPTY(&src->callbacks)) {
+			TAILQ_REMOVE(&intr_sources, src, next);
+			rte_free(src);
+		}
+	}
+
+	rte_spinlock_unlock(&intr_lock);
+
+	/* notify the pipe fd waited by epoll_wait to rebuild the wait list */
+	if (ret >= 0 && write(intr_pipe.writefd, "1", 1) < 0) {
+		ret = -EPIPE;
+	}
+
+	return ret;
+}
+
+int
+rte_intr_enable(struct rte_intr_handle *intr_handle)
+{
+	if (!intr_handle || intr_handle->fd < 0 || intr_handle->uio_cfg_fd < 0)
+		return -1;
+
+	switch (intr_handle->type){
+	/* write to the uio fd to enable the interrupt */
+	case RTE_INTR_HANDLE_UIO:
+		if (uio_intr_enable(intr_handle))
+			return -1;
+		break;
+	case RTE_INTR_HANDLE_UIO_INTX:
+		if (uio_intx_intr_enable(intr_handle))
+			return -1;
+		break;
+	/* not used at this moment */
+	case RTE_INTR_HANDLE_ALARM:
+		return -1;
+#ifdef VFIO_PRESENT
+	case RTE_INTR_HANDLE_VFIO_MSIX:
+		if (vfio_enable_msix(intr_handle))
+			return -1;
+		break;
+	case RTE_INTR_HANDLE_VFIO_MSI:
+		if (vfio_enable_msi(intr_handle))
+			return -1;
+		break;
+	case RTE_INTR_HANDLE_VFIO_LEGACY:
+		if (vfio_enable_intx(intr_handle))
+			return -1;
+		break;
+#endif
+	/* unknown handle type */
+	default:
+		RTE_LOG(ERR, EAL,
+			"Unknown handle type of fd %d\n",
+					intr_handle->fd);
+		return -1;
+	}
+
+	return 0;
+}
+
+int
+rte_intr_disable(struct rte_intr_handle *intr_handle)
+{
+	if (!intr_handle || intr_handle->fd < 0 || intr_handle->uio_cfg_fd < 0)
+		return -1;
+
+	switch (intr_handle->type){
+	/* write to the uio fd to disable the interrupt */
+	case RTE_INTR_HANDLE_UIO:
+		if (uio_intr_disable(intr_handle))
+			return -1;
+		break;
+	case RTE_INTR_HANDLE_UIO_INTX:
+		if (uio_intx_intr_disable(intr_handle))
+			return -1;
+		break;
+	/* not used at this moment */
+	case RTE_INTR_HANDLE_ALARM:
+		return -1;
+#ifdef VFIO_PRESENT
+	case RTE_INTR_HANDLE_VFIO_MSIX:
+		if (vfio_disable_msix(intr_handle))
+			return -1;
+		break;
+	case RTE_INTR_HANDLE_VFIO_MSI:
+		if (vfio_disable_msi(intr_handle))
+			return -1;
+		break;
+	case RTE_INTR_HANDLE_VFIO_LEGACY:
+		if (vfio_disable_intx(intr_handle))
+			return -1;
+		break;
+#endif
+	/* unknown handle type */
+	default:
+		RTE_LOG(ERR, EAL,
+			"Unknown handle type of fd %d\n",
+					intr_handle->fd);
+		return -1;
+	}
+
+	return 0;
+}
+
+static int
+eal_intr_process_interrupts(struct epoll_event *events, int nfds)
+{
+	int n, bytes_read;
+	struct rte_intr_source *src;
+	struct rte_intr_callback *cb;
+	union rte_intr_read_buffer buf;
+	struct rte_intr_callback active_cb;
+
+	for (n = 0; n < nfds; n++) {
+
+		/**
+		 * if the pipe fd is ready to read, return out to
+		 * rebuild the wait list.
+		 */
+		if (events[n].data.fd == intr_pipe.readfd){
+			int r = read(intr_pipe.readfd, buf.charbuf,
+					sizeof(buf.charbuf));
+			RTE_SET_USED(r);
+			return -1;
+		}
+		rte_spinlock_lock(&intr_lock);
+		TAILQ_FOREACH(src, &intr_sources, next)
+			if (src->intr_handle.fd ==
+					events[n].data.fd)
+				break;
+		if (src == NULL){
+			rte_spinlock_unlock(&intr_lock);
+			continue;
+		}
+
+		/* mark this interrupt source as active and release the lock. */
+		src->active = 1;
+		rte_spinlock_unlock(&intr_lock);
+
+		/* set the length to be read dor different handle type */
+		switch (src->intr_handle.type) {
+		case RTE_INTR_HANDLE_UIO:
+		case RTE_INTR_HANDLE_UIO_INTX:
+			bytes_read = sizeof(buf.uio_intr_count);
+			break;
+		case RTE_INTR_HANDLE_ALARM:
+			bytes_read = sizeof(buf.timerfd_num);
+			break;
+#ifdef VFIO_PRESENT
+		case RTE_INTR_HANDLE_VFIO_MSIX:
+		case RTE_INTR_HANDLE_VFIO_MSI:
+		case RTE_INTR_HANDLE_VFIO_LEGACY:
+			bytes_read = sizeof(buf.vfio_intr_count);
+			break;
+#endif
+		case RTE_INTR_HANDLE_EXT:
+		default:
+			bytes_read = 1;
+			break;
+		}
+
+		if (src->intr_handle.type != RTE_INTR_HANDLE_EXT) {
+			/**
+			 * read out to clear the ready-to-be-read flag
+			 * for epoll_wait.
+			 */
+			bytes_read = read(events[n].data.fd, &buf, bytes_read);
+			if (bytes_read < 0) {
+				if (errno == EINTR || errno == EWOULDBLOCK)
+					continue;
+
+				RTE_LOG(ERR, EAL, "Error reading from file "
+					"descriptor %d: %s\n",
+					events[n].data.fd,
+					strerror(errno));
+			} else if (bytes_read == 0)
+				RTE_LOG(ERR, EAL, "Read nothing from file "
+					"descriptor %d\n", events[n].data.fd);
+		}
+
+		/* grab a lock, again to call callbacks and update status. */
+		rte_spinlock_lock(&intr_lock);
+
+		if (bytes_read > 0) {
+
+			/* Finally, call all callbacks. */
+			TAILQ_FOREACH(cb, &src->callbacks, next) {
+
+				/* make a copy and unlock. */
+				active_cb = *cb;
+				rte_spinlock_unlock(&intr_lock);
+
+				/* call the actual callback */
+				active_cb.cb_fn(&src->intr_handle,
+					active_cb.cb_arg);
+
+				/*get the lock back. */
+				rte_spinlock_lock(&intr_lock);
+			}
+		}
+
+		/* we done with that interrupt source, release it. */
+		src->active = 0;
+		rte_spinlock_unlock(&intr_lock);
+	}
+
+	return 0;
+}
+
+/**
+ * It handles all the interrupts.
+ *
+ * @param pfd
+ *  epoll file descriptor.
+ * @param totalfds
+ *  The number of file descriptors added in epoll.
+ *
+ * @return
+ *  void
+ */
+static void
+eal_intr_handle_interrupts(int pfd, unsigned totalfds)
+{
+	struct epoll_event events[totalfds];
+	int nfds = 0;
+
+	for(;;) {
+		nfds = epoll_wait(pfd, events, totalfds,
+			EAL_INTR_EPOLL_WAIT_FOREVER);
+		/* epoll_wait fail */
+		if (nfds < 0) {
+			if (errno == EINTR)
+				continue;
+			RTE_LOG(ERR, EAL,
+				"epoll_wait returns with fail\n");
+			return;
+		}
+		/* epoll_wait timeout, will never happens here */
+		else if (nfds == 0)
+			continue;
+		/* epoll_wait has at least one fd ready to read */
+		if (eal_intr_process_interrupts(events, nfds) < 0)
+			return;
+	}
+}
+
+/**
+ * It builds/rebuilds up the epoll file descriptor with all the
+ * file descriptors being waited on. Then handles the interrupts.
+ *
+ * @param arg
+ *  pointer. (unused)
+ *
+ * @return
+ *  never return;
+ */
+static __attribute__((noreturn)) void *
+eal_intr_thread_main(__rte_unused void *arg)
+{
+	struct epoll_event ev;
+
+	/* host thread, never break out */
+	for (;;) {
+		/* build up the epoll fd with all descriptors we are to
+		 * wait on then pass it to the handle_interrupts function
+		 */
+		static struct epoll_event pipe_event = {
+			.events = EPOLLIN | EPOLLPRI,
+		};
+		struct rte_intr_source *src;
+		unsigned numfds = 0;
+
+		/* create epoll fd */
+		int pfd = epoll_create(1);
+		if (pfd < 0)
+			rte_panic("Cannot create epoll instance\n");
+
+		pipe_event.data.fd = intr_pipe.readfd;
+		/**
+		 * add pipe fd into wait list, this pipe is used to
+		 * rebuild the wait list.
+		 */
+		if (epoll_ctl(pfd, EPOLL_CTL_ADD, intr_pipe.readfd,
+						&pipe_event) < 0) {
+			rte_panic("Error adding fd to %d epoll_ctl, %s\n",
+					intr_pipe.readfd, strerror(errno));
+		}
+		numfds++;
+
+		rte_spinlock_lock(&intr_lock);
+
+		TAILQ_FOREACH(src, &intr_sources, next) {
+			if (src->callbacks.tqh_first == NULL)
+				continue; /* skip those with no callbacks */
+			ev.events = EPOLLIN | EPOLLPRI;
+			ev.data.fd = src->intr_handle.fd;
+
+			/**
+			 * add all the uio device file descriptor
+			 * into wait list.
+			 */
+			if (epoll_ctl(pfd, EPOLL_CTL_ADD,
+					src->intr_handle.fd, &ev) < 0){
+				rte_panic("Error adding fd %d epoll_ctl, %s\n",
+					src->intr_handle.fd, strerror(errno));
+			}
+			else
+				numfds++;
+		}
+		rte_spinlock_unlock(&intr_lock);
+		/* serve the interrupt */
+		eal_intr_handle_interrupts(pfd, numfds);
+
+		/**
+		 * when we return, we need to rebuild the
+		 * list of fds to monitor.
+		 */
+		close(pfd);
+	}
+}
+
+int
+rte_eal_intr_init(void)
+{
+	int ret = 0, ret_1 = 0;
+	char thread_name[RTE_MAX_THREAD_NAME_LEN];
+
+	/* init the global interrupt source head */
+	TAILQ_INIT(&intr_sources);
+
+	/**
+	 * create a pipe which will be waited by epoll and notified to
+	 * rebuild the wait list of epoll.
+	 */
+	if (pipe(intr_pipe.pipefd) < 0)
+		return -1;
+
+	/* create the host thread to wait/handle the interrupt */
+	ret = pthread_create(&intr_thread, NULL,
+			eal_intr_thread_main, NULL);
+	if (ret != 0) {
+		RTE_LOG(ERR, EAL,
+			"Failed to create thread for interrupt handling\n");
+	} else {
+		/* Set thread_name for aid in debugging. */
+		snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN,
+			"eal-intr-thread");
+		ret_1 = rte_thread_setname(intr_thread, thread_name);
+		if (ret_1 != 0)
+			RTE_LOG(ERR, EAL,
+			"Failed to set thread name for interrupt handling\n");
+	}
+
+	return -ret;
+}
+
+static void
+eal_intr_proc_rxtx_intr(int fd, const struct rte_intr_handle *intr_handle)
+{
+	union rte_intr_read_buffer buf;
+	int bytes_read = 1;
+	int nbytes;
+
+	switch (intr_handle->type) {
+	case RTE_INTR_HANDLE_UIO:
+	case RTE_INTR_HANDLE_UIO_INTX:
+		bytes_read = sizeof(buf.uio_intr_count);
+		break;
+#ifdef VFIO_PRESENT
+	case RTE_INTR_HANDLE_VFIO_MSIX:
+	case RTE_INTR_HANDLE_VFIO_MSI:
+	case RTE_INTR_HANDLE_VFIO_LEGACY:
+		bytes_read = sizeof(buf.vfio_intr_count);
+		break;
+#endif
+	default:
+		bytes_read = 1;
+		RTE_LOG(INFO, EAL, "unexpected intr type\n");
+		break;
+	}
+
+	/**
+	 * read out to clear the ready-to-be-read flag
+	 * for epoll_wait.
+	 */
+	do {
+		nbytes = read(fd, &buf, bytes_read);
+		if (nbytes < 0) {
+			if (errno == EINTR || errno == EWOULDBLOCK ||
+			    errno == EAGAIN)
+				continue;
+			RTE_LOG(ERR, EAL,
+				"Error reading from fd %d: %s\n",
+				fd, strerror(errno));
+		} else if (nbytes == 0)
+			RTE_LOG(ERR, EAL, "Read nothing from fd %d\n", fd);
+		return;
+	} while (1);
+}
+
+static int
+eal_epoll_process_event(struct epoll_event *evs, unsigned int n,
+			struct rte_epoll_event *events)
+{
+	unsigned int i, count = 0;
+	struct rte_epoll_event *rev;
+
+	for (i = 0; i < n; i++) {
+		rev = evs[i].data.ptr;
+		if (!rev || !rte_atomic32_cmpset(&rev->status, RTE_EPOLL_VALID,
+						 RTE_EPOLL_EXEC))
+			continue;
+
+		events[count].status        = RTE_EPOLL_VALID;
+		events[count].fd            = rev->fd;
+		events[count].epfd          = rev->epfd;
+		events[count].epdata.event  = rev->epdata.event;
+		events[count].epdata.data   = rev->epdata.data;
+		if (rev->epdata.cb_fun)
+			rev->epdata.cb_fun(rev->fd,
+					   rev->epdata.cb_arg);
+
+		rte_compiler_barrier();
+		rev->status = RTE_EPOLL_VALID;
+		count++;
+	}
+	return count;
+}
+
+static inline int
+eal_init_tls_epfd(void)
+{
+	int pfd = epoll_create(255);
+
+	if (pfd < 0) {
+		RTE_LOG(ERR, EAL,
+			"Cannot create epoll instance\n");
+		return -1;
+	}
+	return pfd;
+}
+
+int
+rte_intr_tls_epfd(void)
+{
+	if (RTE_PER_LCORE(_epfd) == -1)
+		RTE_PER_LCORE(_epfd) = eal_init_tls_epfd();
+
+	return RTE_PER_LCORE(_epfd);
+}
+
+int
+rte_epoll_wait(int epfd, struct rte_epoll_event *events,
+	       int maxevents, int timeout)
+{
+	struct epoll_event evs[maxevents];
+	int rc;
+
+	if (!events) {
+		RTE_LOG(ERR, EAL, "rte_epoll_event can't be NULL\n");
+		return -1;
+	}
+
+	/* using per thread epoll fd */
+	if (epfd == RTE_EPOLL_PER_THREAD)
+		epfd = rte_intr_tls_epfd();
+
+	while (1) {
+		rc = epoll_wait(epfd, evs, maxevents, timeout);
+		if (likely(rc > 0)) {
+			/* epoll_wait has at least one fd ready to read */
+			rc = eal_epoll_process_event(evs, rc, events);
+			break;
+		} else if (rc < 0) {
+			if (errno == EINTR)
+				continue;
+			/* epoll_wait fail */
+			RTE_LOG(ERR, EAL, "epoll_wait returns with fail %s\n",
+				strerror(errno));
+			rc = -1;
+			break;
+		} else {
+			/* rc == 0, epoll_wait timed out */
+			break;
+		}
+	}
+
+	return rc;
+}
+
+static inline void
+eal_epoll_data_safe_free(struct rte_epoll_event *ev)
+{
+	while (!rte_atomic32_cmpset(&ev->status, RTE_EPOLL_VALID,
+				    RTE_EPOLL_INVALID))
+		while (ev->status != RTE_EPOLL_VALID)
+			rte_pause();
+	memset(&ev->epdata, 0, sizeof(ev->epdata));
+	ev->fd = -1;
+	ev->epfd = -1;
+}
+
+int
+rte_epoll_ctl(int epfd, int op, int fd,
+	      struct rte_epoll_event *event)
+{
+	struct epoll_event ev;
+
+	if (!event) {
+		RTE_LOG(ERR, EAL, "rte_epoll_event can't be NULL\n");
+		return -1;
+	}
+
+	/* using per thread epoll fd */
+	if (epfd == RTE_EPOLL_PER_THREAD)
+		epfd = rte_intr_tls_epfd();
+
+	if (op == EPOLL_CTL_ADD) {
+		event->status = RTE_EPOLL_VALID;
+		event->fd = fd;  /* ignore fd in event */
+		event->epfd = epfd;
+		ev.data.ptr = (void *)event;
+	}
+
+	ev.events = event->epdata.event;
+	if (epoll_ctl(epfd, op, fd, &ev) < 0) {
+		RTE_LOG(ERR, EAL, "Error op %d fd %d epoll_ctl, %s\n",
+			op, fd, strerror(errno));
+		if (op == EPOLL_CTL_ADD)
+			/* rollback status when CTL_ADD fail */
+			event->status = RTE_EPOLL_INVALID;
+		return -1;
+	}
+
+	if (op == EPOLL_CTL_DEL && event->status != RTE_EPOLL_INVALID)
+		eal_epoll_data_safe_free(event);
+
+	return 0;
+}
+
+int
+rte_intr_rx_ctl(struct rte_intr_handle *intr_handle, int epfd,
+		int op, unsigned int vec, void *data)
+{
+	struct rte_epoll_event *rev;
+	struct rte_epoll_data *epdata;
+	int epfd_op;
+	unsigned int efd_idx;
+	int rc = 0;
+
+	efd_idx = (vec >= RTE_INTR_VEC_RXTX_OFFSET) ?
+		(vec - RTE_INTR_VEC_RXTX_OFFSET) : vec;
+
+	if (!intr_handle || intr_handle->nb_efd == 0 ||
+	    efd_idx >= intr_handle->nb_efd) {
+		RTE_LOG(ERR, EAL, "Wrong intr vector number.\n");
+		return -EPERM;
+	}
+
+	switch (op) {
+	case RTE_INTR_EVENT_ADD:
+		epfd_op = EPOLL_CTL_ADD;
+		rev = &intr_handle->elist[efd_idx];
+		if (rev->status != RTE_EPOLL_INVALID) {
+			RTE_LOG(INFO, EAL, "Event already been added.\n");
+			return -EEXIST;
+		}
+
+		/* attach to intr vector fd */
+		epdata = &rev->epdata;
+		epdata->event  = EPOLLIN | EPOLLPRI | EPOLLET;
+		epdata->data   = data;
+		epdata->cb_fun = (rte_intr_event_cb_t)eal_intr_proc_rxtx_intr;
+		epdata->cb_arg = (void *)intr_handle;
+		rc = rte_epoll_ctl(epfd, epfd_op,
+				   intr_handle->efds[efd_idx], rev);
+		if (!rc)
+			RTE_LOG(DEBUG, EAL,
+				"efd %d associated with vec %d added on epfd %d"
+				"\n", rev->fd, vec, epfd);
+		else
+			rc = -EPERM;
+		break;
+	case RTE_INTR_EVENT_DEL:
+		epfd_op = EPOLL_CTL_DEL;
+		rev = &intr_handle->elist[efd_idx];
+		if (rev->status == RTE_EPOLL_INVALID) {
+			RTE_LOG(INFO, EAL, "Event does not exist.\n");
+			return -EPERM;
+		}
+
+		rc = rte_epoll_ctl(rev->epfd, epfd_op, rev->fd, rev);
+		if (rc)
+			rc = -EPERM;
+		break;
+	default:
+		RTE_LOG(ERR, EAL, "event op type mismatch\n");
+		rc = -EPERM;
+	}
+
+	return rc;
+}
+
+int
+rte_intr_efd_enable(struct rte_intr_handle *intr_handle, uint32_t nb_efd)
+{
+	uint32_t i;
+	int fd;
+	uint32_t n = RTE_MIN(nb_efd, (uint32_t)RTE_MAX_RXTX_INTR_VEC_ID);
+
+	assert(nb_efd != 0);
+
+	if (intr_handle->type == RTE_INTR_HANDLE_VFIO_MSIX) {
+		for (i = 0; i < n; i++) {
+			fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
+			if (fd < 0) {
+				RTE_LOG(ERR, EAL,
+					"can't setup eventfd, error %i (%s)\n",
+					errno, strerror(errno));
+				return -1;
+			}
+			intr_handle->efds[i] = fd;
+		}
+		intr_handle->nb_efd   = n;
+		intr_handle->max_intr = NB_OTHER_INTR + n;
+	} else {
+		intr_handle->efds[0]  = intr_handle->fd;
+		intr_handle->nb_efd   = RTE_MIN(nb_efd, 1U);
+		intr_handle->max_intr = NB_OTHER_INTR;
+	}
+
+	return 0;
+}
+
+void
+rte_intr_efd_disable(struct rte_intr_handle *intr_handle)
+{
+	uint32_t i;
+	struct rte_epoll_event *rev;
+
+	for (i = 0; i < intr_handle->nb_efd; i++) {
+		rev = &intr_handle->elist[i];
+		if (rev->status == RTE_EPOLL_INVALID)
+			continue;
+		if (rte_epoll_ctl(rev->epfd, EPOLL_CTL_DEL, rev->fd, rev)) {
+			/* force free if the entry valid */
+			eal_epoll_data_safe_free(rev);
+			rev->status = RTE_EPOLL_INVALID;
+		}
+	}
+
+	if (intr_handle->max_intr > intr_handle->nb_efd) {
+		for (i = 0; i < intr_handle->nb_efd; i++)
+			close(intr_handle->efds[i]);
+	}
+	intr_handle->nb_efd = 0;
+	intr_handle->max_intr = 0;
+}
+
+int
+rte_intr_dp_is_en(struct rte_intr_handle *intr_handle)
+{
+	return !(!intr_handle->nb_efd);
+}
+
+int
+rte_intr_allow_others(struct rte_intr_handle *intr_handle)
+{
+	if (!rte_intr_dp_is_en(intr_handle))
+		return 1;
+	else
+		return !!(intr_handle->max_intr - intr_handle->nb_efd);
+}
+
+int
+rte_intr_cap_multiple(struct rte_intr_handle *intr_handle)
+{
+	if (intr_handle->type == RTE_INTR_HANDLE_VFIO_MSIX)
+		return 1;
+
+	return 0;
+}
diff --git a/lib/librte_eal/linuxapp/eal/eal_ivshmem.c b/lib/librte_eal/linuxapp/eal/eal_ivshmem.c
new file mode 100644
index 00000000..07aec694
--- /dev/null
+++ b/lib/librte_eal/linuxapp/eal/eal_ivshmem.c
@@ -0,0 +1,955 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifdef RTE_LIBRTE_IVSHMEM /* hide it from coverage */
+
+#include <stdint.h>
+#include <unistd.h>
+#include <inttypes.h>
+#include <sys/mman.h>
+#include <sys/file.h>
+#include <string.h>
+#include <sys/queue.h>
+
+#include <rte_log.h>
+#include <rte_pci.h>
+#include <rte_memory.h>
+#include <rte_eal.h>
+#include <rte_eal_memconfig.h>
+#include <rte_string_fns.h>
+#include <rte_errno.h>
+#include <rte_ring.h>
+#include <rte_mempool.h>
+#include <rte_malloc.h>
+#include <rte_common.h>
+#include <rte_ivshmem.h>
+
+#include "eal_internal_cfg.h"
+#include "eal_private.h"
+
+#define PCI_VENDOR_ID_IVSHMEM 0x1Af4
+#define PCI_DEVICE_ID_IVSHMEM 0x1110
+
+#define IVSHMEM_MAGIC 0x0BADC0DE
+
+#define IVSHMEM_RESOURCE_PATH "/sys/bus/pci/devices/%04x:%02x:%02x.%x/resource2"
+#define IVSHMEM_CONFIG_PATH "/var/run/.%s_ivshmem_config"
+
+#define PHYS 0x1
+#define VIRT 0x2
+#define IOREMAP 0x4
+#define FULL (PHYS|VIRT|IOREMAP)
+
+#define METADATA_SIZE_ALIGNED \
+	(RTE_ALIGN_CEIL(sizeof(struct rte_ivshmem_metadata),pagesz))
+
+#define CONTAINS(x,y)\
+	(((y).addr_64 >= (x).addr_64) && ((y).addr_64 < (x).addr_64 + (x).len))
+
+#define DIM(x) (sizeof(x)/sizeof(x[0]))
+
+struct ivshmem_pci_device {
+	char path[PATH_MAX];
+	phys_addr_t ioremap_addr;
+};
+
+/* data type to store in config */
+struct ivshmem_segment {
+	struct rte_ivshmem_metadata_entry entry;
+	uint64_t align;
+	char path[PATH_MAX];
+};
+struct ivshmem_shared_config {
+	struct ivshmem_segment segment[RTE_MAX_MEMSEG];
+	uint32_t segment_idx;
+	struct ivshmem_pci_device pci_devs[RTE_LIBRTE_IVSHMEM_MAX_PCI_DEVS];
+	uint32_t pci_devs_idx;
+};
+static struct ivshmem_shared_config * ivshmem_config;
+static int memseg_idx;
+static int pagesz;
+
+/* Tailq heads to add rings to */
+TAILQ_HEAD(rte_ring_list, rte_tailq_entry);
+
+/*
+ * Utility functions
+ */
+
+static int
+is_ivshmem_device(struct rte_pci_device * dev)
+{
+	return dev->id.vendor_id == PCI_VENDOR_ID_IVSHMEM
+			&& dev->id.device_id == PCI_DEVICE_ID_IVSHMEM;
+}
+
+static void *
+map_metadata(int fd, uint64_t len)
+{
+	size_t metadata_len = sizeof(struct rte_ivshmem_metadata);
+	size_t aligned_len = METADATA_SIZE_ALIGNED;
+
+	return mmap(NULL, metadata_len, PROT_READ | PROT_WRITE,
+			MAP_SHARED, fd, len - aligned_len);
+}
+
+static void
+unmap_metadata(void * ptr)
+{
+	munmap(ptr, sizeof(struct rte_ivshmem_metadata));
+}
+
+static int
+has_ivshmem_metadata(int fd, uint64_t len)
+{
+	struct rte_ivshmem_metadata metadata;
+	void * ptr;
+
+	ptr = map_metadata(fd, len);
+
+	if (ptr == MAP_FAILED)
+		return -1;
+
+	metadata = *(struct rte_ivshmem_metadata*) (ptr);
+
+	unmap_metadata(ptr);
+
+	return metadata.magic_number == IVSHMEM_MAGIC;
+}
+
+static void
+remove_segment(struct ivshmem_segment * ms, int len, int idx)
+{
+	int i;
+
+	for (i = idx; i < len - 1; i++)
+		memcpy(&ms[i], &ms[i+1], sizeof(struct ivshmem_segment));
+	memset(&ms[len-1], 0, sizeof(struct ivshmem_segment));
+}
+
+static int
+overlap(const struct rte_memzone * mz1, const struct rte_memzone * mz2)
+{
+	uint64_t start1, end1, start2, end2;
+	uint64_t p_start1, p_end1, p_start2, p_end2;
+	uint64_t i_start1, i_end1, i_start2, i_end2;
+	int result = 0;
+
+	/* gather virtual addresses */
+	start1 = mz1->addr_64;
+	end1 = mz1->addr_64 + mz1->len;
+	start2 = mz2->addr_64;
+	end2 = mz2->addr_64 + mz2->len;
+
+	/* gather physical addresses */
+	p_start1 = mz1->phys_addr;
+	p_end1 = mz1->phys_addr + mz1->len;
+	p_start2 = mz2->phys_addr;
+	p_end2 = mz2->phys_addr + mz2->len;
+
+	/* gather ioremap addresses */
+	i_start1 = mz1->ioremap_addr;
+	i_end1 = mz1->ioremap_addr + mz1->len;
+	i_start2 = mz2->ioremap_addr;
+	i_end2 = mz2->ioremap_addr + mz2->len;
+
+	/* check for overlap in virtual addresses */
+	if (start1 > start2 && start1 < end2)
+		result |= VIRT;
+	if (start2 >= start1 && start2 < end1)
+		result |= VIRT;
+
+	/* check for overlap in physical addresses */
+	if (p_start1 > p_start2 && p_start1 < p_end2)
+		result |= PHYS;
+	if (p_start2 > p_start1 && p_start2 < p_end1)
+		result |= PHYS;
+
+	/* check for overlap in ioremap addresses */
+	if (i_start1 > i_start2 && i_start1 < i_end2)
+		result |= IOREMAP;
+	if (i_start2 > i_start1 && i_start2 < i_end1)
+		result |= IOREMAP;
+
+	return result;
+}
+
+static int
+adjacent(const struct rte_memzone * mz1, const struct rte_memzone * mz2)
+{
+	uint64_t start1, end1, start2, end2;
+	uint64_t p_start1, p_end1, p_start2, p_end2;
+	uint64_t i_start1, i_end1, i_start2, i_end2;
+	int result = 0;
+
+	/* gather virtual addresses */
+	start1 = mz1->addr_64;
+	end1 = mz1->addr_64 + mz1->len;
+	start2 = mz2->addr_64;
+	end2 = mz2->addr_64 + mz2->len;
+
+	/* gather physical addresses */
+	p_start1 = mz1->phys_addr;
+	p_end1 = mz1->phys_addr + mz1->len;
+	p_start2 = mz2->phys_addr;
+	p_end2 = mz2->phys_addr + mz2->len;
+
+	/* gather ioremap addresses */
+	i_start1 = mz1->ioremap_addr;
+	i_end1 = mz1->ioremap_addr + mz1->len;
+	i_start2 = mz2->ioremap_addr;
+	i_end2 = mz2->ioremap_addr + mz2->len;
+
+	/* check if segments are virtually adjacent */
+	if (start1 == end2)
+		result |= VIRT;
+	if (start2 == end1)
+		result |= VIRT;
+
+	/* check if segments are physically adjacent */
+	if (p_start1 == p_end2)
+		result |= PHYS;
+	if (p_start2 == p_end1)
+		result |= PHYS;
+
+	/* check if segments are ioremap-adjacent */
+	if (i_start1 == i_end2)
+		result |= IOREMAP;
+	if (i_start2 == i_end1)
+		result |= IOREMAP;
+
+	return result;
+}
+
+static int
+has_adjacent_segments(struct ivshmem_segment * ms, int len)
+{
+	int i, j;
+
+	for (i = 0; i < len; i++)
+		for (j = i + 1; j < len; j++) {
+			/* we're only interested in fully adjacent segments; partially
+			 * adjacent segments can coexist.
+			 */
+			if (adjacent(&ms[i].entry.mz, &ms[j].entry.mz) == FULL)
+				return 1;
+		}
+	return 0;
+}
+
+static int
+has_overlapping_segments(struct ivshmem_segment * ms, int len)
+{
+	int i, j;
+
+	for (i = 0; i < len; i++)
+		for (j = i + 1; j < len; j++)
+			if (overlap(&ms[i].entry.mz, &ms[j].entry.mz))
+				return 1;
+	return 0;
+}
+
+static int
+seg_compare(const void * a, const void * b)
+{
+	const struct ivshmem_segment * s1 = (const struct ivshmem_segment*) a;
+	const struct ivshmem_segment * s2 = (const struct ivshmem_segment*) b;
+
+	/* move unallocated zones to the end */
+	if (s1->entry.mz.addr == NULL && s2->entry.mz.addr == NULL)
+		return 0;
+	if (s1->entry.mz.addr == 0)
+		return 1;
+	if (s2->entry.mz.addr == 0)
+		return -1;
+
+	return s1->entry.mz.phys_addr > s2->entry.mz.phys_addr;
+}
+
+#ifdef RTE_LIBRTE_IVSHMEM_DEBUG
+static void
+entry_dump(struct rte_ivshmem_metadata_entry *e)
+{
+	RTE_LOG(DEBUG, EAL, "\tvirt: %p-%p\n", e->mz.addr,
+			RTE_PTR_ADD(e->mz.addr, e->mz.len));
+	RTE_LOG(DEBUG, EAL, "\tphys: 0x%" PRIx64 "-0x%" PRIx64 "\n",
+			e->mz.phys_addr,
+			e->mz.phys_addr + e->mz.len);
+	RTE_LOG(DEBUG, EAL, "\tio: 0x%" PRIx64 "-0x%" PRIx64 "\n",
+			e->mz.ioremap_addr,
+			e->mz.ioremap_addr + e->mz.len);
+	RTE_LOG(DEBUG, EAL, "\tlen: 0x%" PRIx64 "\n", e->mz.len);
+	RTE_LOG(DEBUG, EAL, "\toff: 0x%" PRIx64 "\n", e->offset);
+}
+#endif
+
+
+
+/*
+ * Actual useful code
+ */
+
+/* read through metadata mapped from the IVSHMEM device */
+static int
+read_metadata(char * path, int path_len, int fd, uint64_t flen)
+{
+	struct rte_ivshmem_metadata metadata;
+	struct rte_ivshmem_metadata_entry * entry;
+	int idx, i;
+	void * ptr;
+
+	ptr = map_metadata(fd, flen);
+
+	if (ptr == MAP_FAILED)
+		return -1;
+
+	metadata = *(struct rte_ivshmem_metadata*) (ptr);
+
+	unmap_metadata(ptr);
+
+	RTE_LOG(DEBUG, EAL, "Parsing metadata for \"%s\"\n", metadata.name);
+
+	idx = ivshmem_config->segment_idx;
+
+	for (i = 0; i < RTE_LIBRTE_IVSHMEM_MAX_ENTRIES &&
+		idx <= RTE_MAX_MEMSEG; i++) {
+
+		if (idx == RTE_MAX_MEMSEG) {
+			RTE_LOG(ERR, EAL, "Not enough memory segments!\n");
+			return -1;
+		}
+
+		entry = &metadata.entry[i];
+
+		/* stop on uninitialized memzone */
+		if (entry->mz.len == 0)
+			break;
+
+		/* copy metadata entry */
+		memcpy(&ivshmem_config->segment[idx].entry, entry,
+				sizeof(struct rte_ivshmem_metadata_entry));
+
+		/* copy path */
+		snprintf(ivshmem_config->segment[idx].path, path_len, "%s", path);
+
+		idx++;
+	}
+	ivshmem_config->segment_idx = idx;
+
+	return 0;
+}
+
+/* check through each segment and look for adjacent or overlapping ones. */
+static int
+cleanup_segments(struct ivshmem_segment * ms, int tbl_len)
+{
+	struct ivshmem_segment * s, * tmp;
+	int i, j, concat, seg_adjacent, seg_overlapping;
+	uint64_t start1, start2, end1, end2, p_start1, p_start2, i_start1, i_start2;
+
+	qsort(ms, tbl_len, sizeof(struct ivshmem_segment),
+				seg_compare);
+
+	while (has_overlapping_segments(ms, tbl_len) ||
+			has_adjacent_segments(ms, tbl_len)) {
+
+		for (i = 0; i < tbl_len; i++) {
+			s = &ms[i];
+
+			concat = 0;
+
+			for (j = i + 1; j < tbl_len; j++) {
+				tmp = &ms[j];
+
+				/* check if this segment is overlapping with existing segment,
+				 * or is adjacent to existing segment */
+				seg_overlapping = overlap(&s->entry.mz, &tmp->entry.mz);
+				seg_adjacent = adjacent(&s->entry.mz, &tmp->entry.mz);
+
+				/* check if segments fully overlap or are fully adjacent */
+				if ((seg_adjacent == FULL) || (seg_overlapping == FULL)) {
+
+#ifdef RTE_LIBRTE_IVSHMEM_DEBUG
+					RTE_LOG(DEBUG, EAL, "Concatenating segments\n");
+					RTE_LOG(DEBUG, EAL, "Segment %i:\n", i);
+					entry_dump(&s->entry);
+					RTE_LOG(DEBUG, EAL, "Segment %i:\n", j);
+					entry_dump(&tmp->entry);
+#endif
+
+					start1 = s->entry.mz.addr_64;
+					start2 = tmp->entry.mz.addr_64;
+					p_start1 = s->entry.mz.phys_addr;
+					p_start2 = tmp->entry.mz.phys_addr;
+					i_start1 = s->entry.mz.ioremap_addr;
+					i_start2 = tmp->entry.mz.ioremap_addr;
+					end1 = s->entry.mz.addr_64 + s->entry.mz.len;
+					end2 = tmp->entry.mz.addr_64 + tmp->entry.mz.len;
+
+					/* settle for minimum start address and maximum length */
+					s->entry.mz.addr_64 = RTE_MIN(start1, start2);
+					s->entry.mz.phys_addr = RTE_MIN(p_start1, p_start2);
+					s->entry.mz.ioremap_addr = RTE_MIN(i_start1, i_start2);
+					s->entry.offset = RTE_MIN(s->entry.offset, tmp->entry.offset);
+					s->entry.mz.len = RTE_MAX(end1, end2) - s->entry.mz.addr_64;
+					concat = 1;
+
+#ifdef RTE_LIBRTE_IVSHMEM_DEBUG
+					RTE_LOG(DEBUG, EAL, "Resulting segment:\n");
+					entry_dump(&s->entry);
+
+#endif
+				}
+				/* if segments not fully overlap, we have an error condition.
+				 * adjacent segments can coexist.
+				 */
+				else if (seg_overlapping > 0) {
+					RTE_LOG(ERR, EAL, "Segments %i and %i overlap!\n", i, j);
+#ifdef RTE_LIBRTE_IVSHMEM_DEBUG
+					RTE_LOG(DEBUG, EAL, "Segment %i:\n", i);
+					entry_dump(&s->entry);
+					RTE_LOG(DEBUG, EAL, "Segment %i:\n", j);
+					entry_dump(&tmp->entry);
+#endif
+					return -1;
+				}
+				if (concat)
+					break;
+			}
+			/* if we concatenated, remove segment at j */
+			if (concat) {
+				remove_segment(ms, tbl_len, j);
+				tbl_len--;
+				break;
+			}
+		}
+	}
+
+	return tbl_len;
+}
+
+static int
+create_shared_config(void)
+{
+	char path[PATH_MAX];
+	int fd;
+
+	/* build ivshmem config file path */
+	snprintf(path, sizeof(path), IVSHMEM_CONFIG_PATH,
+			internal_config.hugefile_prefix);
+
+	fd = open(path, O_CREAT | O_RDWR, 0600);
+
+	if (fd < 0) {
+		RTE_LOG(ERR, EAL, "Could not open %s: %s\n", path, strerror(errno));
+		return -1;
+	}
+
+	/* try ex-locking first - if the file is locked, we have a problem */
+	if (flock(fd, LOCK_EX | LOCK_NB) == -1) {
+		RTE_LOG(ERR, EAL, "Locking %s failed: %s\n", path, strerror(errno));
+		close(fd);
+		return -1;
+	}
+
+	if (ftruncate(fd, sizeof(struct ivshmem_shared_config)) < 0) {
+		RTE_LOG(ERR, EAL, "ftruncate failed: %s\n", strerror(errno));
+		return -1;
+	}
+
+	ivshmem_config = mmap(NULL, sizeof(struct ivshmem_shared_config),
+			PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+
+	if (ivshmem_config == MAP_FAILED)
+		return -1;
+
+	memset(ivshmem_config, 0, sizeof(struct ivshmem_shared_config));
+
+	/* change the exclusive lock we got earlier to a shared lock */
+	if (flock(fd, LOCK_SH | LOCK_NB) == -1) {
+		RTE_LOG(ERR, EAL, "Locking %s failed: %s \n", path, strerror(errno));
+		return -1;
+	}
+
+	close(fd);
+
+	return 0;
+}
+
+/* open shared config file and, if present, map the config.
+ * having no config file is not an error condition, as we later check if
+ * ivshmem_config is NULL (if it is, that means nothing was mapped). */
+static int
+open_shared_config(void)
+{
+	char path[PATH_MAX];
+	int fd;
+
+	/* build ivshmem config file path */
+	snprintf(path, sizeof(path), IVSHMEM_CONFIG_PATH,
+			internal_config.hugefile_prefix);
+
+	fd = open(path, O_RDONLY);
+
+	/* if the file doesn't exist, just return success */
+	if (fd < 0 && errno == ENOENT)
+		return 0;
+	/* else we have an error condition */
+	else if (fd < 0) {
+		RTE_LOG(ERR, EAL, "Could not open %s: %s\n",
+				path, strerror(errno));
+		return -1;
+	}
+
+	/* try ex-locking first - if the lock *does* succeed, this means it's a
+	 * stray config file, so it should be deleted.
+	 */
+	if (flock(fd, LOCK_EX | LOCK_NB) != -1) {
+
+		/* if we can't remove the file, something is wrong */
+		if (unlink(path) < 0) {
+			RTE_LOG(ERR, EAL, "Could not remove %s: %s\n", path,
+					strerror(errno));
+			return -1;
+		}
+
+		/* release the lock */
+		flock(fd, LOCK_UN);
+		close(fd);
+
+		/* return success as having a stray config file is equivalent to not
+		 * having config file at all.
+		 */
+		return 0;
+	}
+
+	ivshmem_config = mmap(NULL, sizeof(struct ivshmem_shared_config),
+			PROT_READ, MAP_SHARED, fd, 0);
+
+	if (ivshmem_config == MAP_FAILED)
+		return -1;
+
+	/* place a shared lock on config file */
+	if (flock(fd, LOCK_SH | LOCK_NB) == -1) {
+		RTE_LOG(ERR, EAL, "Locking %s failed: %s \n", path, strerror(errno));
+		return -1;
+	}
+
+	close(fd);
+
+	return 0;
+}
+
+/*
+ * This function does the following:
+ *
+ * 1) Builds a table of ivshmem_segments with proper offset alignment
+ * 2) Cleans up that table so that we don't have any overlapping or adjacent
+ *    memory segments
+ * 3) Creates memsegs from this table and maps them into memory.
+ */
+static inline int
+map_all_segments(void)
+{
+	struct ivshmem_segment ms_tbl[RTE_MAX_MEMSEG];
+	struct ivshmem_pci_device * pci_dev;
+	struct rte_mem_config * mcfg;
+	struct ivshmem_segment * seg;
+	int fd, fd_zero;
+	unsigned i, j;
+	struct rte_memzone mz;
+	struct rte_memseg ms;
+	void * base_addr;
+	uint64_t align, len;
+	phys_addr_t ioremap_addr;
+
+	ioremap_addr = 0;
+
+	memset(ms_tbl, 0, sizeof(ms_tbl));
+	memset(&mz, 0, sizeof(struct rte_memzone));
+	memset(&ms, 0, sizeof(struct rte_memseg));
+
+	/* first, build a table of memsegs to map, to avoid failed mmaps due to
+	 * overlaps
+	 */
+	for (i = 0; i < ivshmem_config->segment_idx && i <= RTE_MAX_MEMSEG; i++) {
+		if (i == RTE_MAX_MEMSEG) {
+			RTE_LOG(ERR, EAL, "Too many segments requested!\n");
+			return -1;
+		}
+
+		seg = &ivshmem_config->segment[i];
+
+		/* copy segment to table */
+		memcpy(&ms_tbl[i], seg, sizeof(struct ivshmem_segment));
+
+		/* find ioremap addr */
+		for (j = 0; j < DIM(ivshmem_config->pci_devs); j++) {
+			pci_dev = &ivshmem_config->pci_devs[j];
+			if (!strncmp(pci_dev->path, seg->path, sizeof(pci_dev->path))) {
+				ioremap_addr = pci_dev->ioremap_addr;
+				break;
+			}
+		}
+		if (ioremap_addr == 0) {
+			RTE_LOG(ERR, EAL, "Cannot find ioremap addr!\n");
+			return -1;
+		}
+
+		/* work out alignments */
+		align = seg->entry.mz.addr_64 -
+				RTE_ALIGN_FLOOR(seg->entry.mz.addr_64, 0x1000);
+		len = RTE_ALIGN_CEIL(seg->entry.mz.len + align, 0x1000);
+
+		/* save original alignments */
+		ms_tbl[i].align = align;
+
+		/* create a memory zone */
+		mz.addr_64 = seg->entry.mz.addr_64 - align;
+		mz.len = len;
+		mz.hugepage_sz = seg->entry.mz.hugepage_sz;
+		mz.phys_addr = seg->entry.mz.phys_addr - align;
+
+		/* find true physical address */
+		mz.ioremap_addr = ioremap_addr + seg->entry.offset - align;
+
+		ms_tbl[i].entry.offset = seg->entry.offset - align;
+
+		memcpy(&ms_tbl[i].entry.mz, &mz, sizeof(struct rte_memzone));
+	}
+
+	/* clean up the segments */
+	memseg_idx = cleanup_segments(ms_tbl, ivshmem_config->segment_idx);
+
+	if (memseg_idx < 0)
+		return -1;
+
+	mcfg = rte_eal_get_configuration()->mem_config;
+
+	fd_zero = open("/dev/zero", O_RDWR);
+
+	if (fd_zero < 0) {
+		RTE_LOG(ERR, EAL, "Cannot open /dev/zero: %s\n", strerror(errno));
+		return -1;
+	}
+
+	/* create memsegs and put them into DPDK memory */
+	for (i = 0; i < (unsigned) memseg_idx; i++) {
+
+		seg = &ms_tbl[i];
+
+		ms.addr_64 = seg->entry.mz.addr_64;
+		ms.hugepage_sz = seg->entry.mz.hugepage_sz;
+		ms.len = seg->entry.mz.len;
+		ms.nchannel = rte_memory_get_nchannel();
+		ms.nrank = rte_memory_get_nrank();
+		ms.phys_addr = seg->entry.mz.phys_addr;
+		ms.ioremap_addr = seg->entry.mz.ioremap_addr;
+		ms.socket_id = seg->entry.mz.socket_id;
+
+		base_addr = mmap(ms.addr, ms.len,
+				PROT_READ | PROT_WRITE, MAP_PRIVATE, fd_zero, 0);
+
+		if (base_addr == MAP_FAILED || base_addr != ms.addr) {
+			RTE_LOG(ERR, EAL, "Cannot map /dev/zero!\n");
+			return -1;
+		}
+
+		fd = open(seg->path, O_RDWR);
+
+		if (fd < 0) {
+			RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", seg->path,
+					strerror(errno));
+			return -1;
+		}
+
+		munmap(ms.addr, ms.len);
+
+		base_addr = mmap(ms.addr, ms.len,
+				PROT_READ | PROT_WRITE, MAP_SHARED, fd,
+				seg->entry.offset);
+
+
+		if (base_addr == MAP_FAILED || base_addr != ms.addr) {
+			RTE_LOG(ERR, EAL, "Cannot map segment into memory: "
+					"expected %p got %p (%s)\n", ms.addr, base_addr,
+					strerror(errno));
+			return -1;
+		}
+
+		RTE_LOG(DEBUG, EAL, "Memory segment mapped: %p (len %" PRIx64 ") at "
+				"offset 0x%" PRIx64 "\n",
+				ms.addr, ms.len, seg->entry.offset);
+
+		/* put the pointers back into their real positions using original
+		 * alignment */
+		ms.addr_64 += seg->align;
+		ms.phys_addr += seg->align;
+		ms.ioremap_addr += seg->align;
+		ms.len -= seg->align;
+
+		/* at this point, the rest of DPDK memory is not initialized, so we
+		 * expect memsegs to be empty */
+		memcpy(&mcfg->memseg[i], &ms,
+				sizeof(struct rte_memseg));
+
+		close(fd);
+
+		RTE_LOG(DEBUG, EAL, "IVSHMEM segment found, size: 0x%lx\n",
+				ms.len);
+	}
+
+	return 0;
+}
+
+/* this happens at a later stage, after general EAL memory initialization */
+int
+rte_eal_ivshmem_obj_init(void)
+{
+	struct rte_ring_list* ring_list = NULL;
+	struct rte_mem_config * mcfg;
+	struct ivshmem_segment * seg;
+	struct rte_memzone * mz;
+	struct rte_ring * r;
+	struct rte_tailq_entry *te;
+	unsigned i, ms, idx;
+	uint64_t offset;
+
+	/* secondary process would not need any object discovery - it'll all
+	 * already be in shared config */
+	if (rte_eal_process_type() != RTE_PROC_PRIMARY || ivshmem_config == NULL)
+		return 0;
+
+	/* check that we have an initialised ring tail queue */
+	ring_list = RTE_TAILQ_LOOKUP(RTE_TAILQ_RING_NAME, rte_ring_list);
+	if (ring_list == NULL) {
+		RTE_LOG(ERR, EAL, "No rte_ring tailq found!\n");
+		return -1;
+	}
+
+	mcfg = rte_eal_get_configuration()->mem_config;
+
+	/* create memzones */
+	for (i = 0; i < ivshmem_config->segment_idx && i <= RTE_MAX_MEMZONE; i++) {
+
+		seg = &ivshmem_config->segment[i];
+
+		/* add memzone */
+		if (mcfg->memzone_cnt == RTE_MAX_MEMZONE) {
+			RTE_LOG(ERR, EAL, "No more memory zones available!\n");
+			return -1;
+		}
+
+		idx = mcfg->memzone_cnt;
+
+		RTE_LOG(DEBUG, EAL, "Found memzone: '%s' at %p (len 0x%" PRIx64 ")\n",
+				seg->entry.mz.name, seg->entry.mz.addr, seg->entry.mz.len);
+
+		memcpy(&mcfg->memzone[idx], &seg->entry.mz,
+				sizeof(struct rte_memzone));
+
+		/* find ioremap address */
+		for (ms = 0; ms <= RTE_MAX_MEMSEG; ms++) {
+			if (ms == RTE_MAX_MEMSEG) {
+				RTE_LOG(ERR, EAL, "Physical address of segment not found!\n");
+				return -1;
+			}
+			if (CONTAINS(mcfg->memseg[ms], mcfg->memzone[idx])) {
+				offset = mcfg->memzone[idx].addr_64 -
+								mcfg->memseg[ms].addr_64;
+				mcfg->memzone[idx].ioremap_addr = mcfg->memseg[ms].ioremap_addr +
+						offset;
+				break;
+			}
+		}
+
+		mcfg->memzone_cnt++;
+	}
+
+	rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK);
+
+	/* find rings */
+	for (i = 0; i < mcfg->memzone_cnt; i++) {
+		mz = &mcfg->memzone[i];
+
+		/* check if memzone has a ring prefix */
+		if (strncmp(mz->name, RTE_RING_MZ_PREFIX,
+				sizeof(RTE_RING_MZ_PREFIX) - 1) != 0)
+			continue;
+
+		r = (struct rte_ring*) (mz->addr_64);
+
+		te = rte_zmalloc("RING_TAILQ_ENTRY", sizeof(*te), 0);
+		if (te == NULL) {
+			RTE_LOG(ERR, EAL, "Cannot allocate ring tailq entry!\n");
+			return -1;
+		}
+
+		te->data = (void *) r;
+
+		TAILQ_INSERT_TAIL(ring_list, te, next);
+
+		RTE_LOG(DEBUG, EAL, "Found ring: '%s' at %p\n", r->name, mz->addr);
+	}
+	rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
+
+#ifdef RTE_LIBRTE_IVSHMEM_DEBUG
+	rte_memzone_dump(stdout);
+	rte_ring_list_dump(stdout);
+#endif
+
+	return 0;
+}
+
+/* initialize ivshmem structures */
+int rte_eal_ivshmem_init(void)
+{
+	struct rte_pci_device * dev;
+	struct rte_pci_resource * res;
+	int fd, ret;
+	char path[PATH_MAX];
+
+	/* initialize everything to 0 */
+	memset(path, 0, sizeof(path));
+	ivshmem_config = NULL;
+
+	pagesz = getpagesize();
+
+	RTE_LOG(DEBUG, EAL, "Searching for IVSHMEM devices...\n");
+
+	if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
+
+		if (open_shared_config() < 0) {
+			RTE_LOG(ERR, EAL, "Could not open IVSHMEM config!\n");
+			return -1;
+		}
+	}
+	else {
+
+		TAILQ_FOREACH(dev, &pci_device_list, next) {
+
+			if (is_ivshmem_device(dev)) {
+
+				/* IVSHMEM memory is always on BAR2 */
+				res = &dev->mem_resource[2];
+
+				/* if we don't have a BAR2 */
+				if (res->len == 0)
+					continue;
+
+				/* construct pci device path */
+				snprintf(path, sizeof(path), IVSHMEM_RESOURCE_PATH,
+						dev->addr.domain, dev->addr.bus, dev->addr.devid,
+						dev->addr.function);
+
+				/* try to find memseg */
+				fd = open(path, O_RDWR);
+				if (fd < 0) {
+					RTE_LOG(ERR, EAL, "Could not open %s\n", path);
+					return -1;
+				}
+
+				/* check if it's a DPDK IVSHMEM device */
+				ret = has_ivshmem_metadata(fd, res->len);
+
+				/* is DPDK device */
+				if (ret == 1) {
+
+					/* config file creation is deferred until the first
+					 * DPDK device is found. then, it has to be created
+					 * only once. */
+					if (ivshmem_config == NULL &&
+							create_shared_config() < 0) {
+						RTE_LOG(ERR, EAL, "Could not create IVSHMEM config!\n");
+						close(fd);
+						return -1;
+					}
+
+					if (read_metadata(path, sizeof(path), fd, res->len) < 0) {
+						RTE_LOG(ERR, EAL, "Could not read metadata from"
+								" device %02x:%02x.%x!\n", dev->addr.bus,
+								dev->addr.devid, dev->addr.function);
+						close(fd);
+						return -1;
+					}
+
+					if (ivshmem_config->pci_devs_idx == RTE_LIBRTE_IVSHMEM_MAX_PCI_DEVS) {
+						RTE_LOG(WARNING, EAL,
+								"IVSHMEM PCI device limit exceeded. Increase "
+								"CONFIG_RTE_LIBRTE_IVSHMEM_MAX_PCI_DEVS  in "
+								"your config file.\n");
+						break;
+					}
+
+					RTE_LOG(INFO, EAL, "Found IVSHMEM device %02x:%02x.%x\n",
+							dev->addr.bus, dev->addr.devid, dev->addr.function);
+
+					ivshmem_config->pci_devs[ivshmem_config->pci_devs_idx].ioremap_addr = res->phys_addr;
+					snprintf(ivshmem_config->pci_devs[ivshmem_config->pci_devs_idx].path,
+							sizeof(ivshmem_config->pci_devs[ivshmem_config->pci_devs_idx].path),
+							"%s", path);
+
+					ivshmem_config->pci_devs_idx++;
+				}
+				/* failed to read */
+				else if (ret < 0) {
+					RTE_LOG(ERR, EAL, "Could not read IVSHMEM device: %s\n",
+							strerror(errno));
+					close(fd);
+					return -1;
+				}
+				/* not a DPDK device */
+				else
+					RTE_LOG(DEBUG, EAL, "Skipping non-DPDK IVSHMEM device\n");
+
+				/* close the BAR fd */
+				close(fd);
+			}
+		}
+	}
+
+	/* ivshmem_config is not NULL only if config was created and/or mapped */
+	if (ivshmem_config) {
+		if (map_all_segments() < 0) {
+			RTE_LOG(ERR, EAL, "Mapping IVSHMEM segments failed!\n");
+			return -1;
+		}
+	}
+	else {
+		RTE_LOG(DEBUG, EAL, "No IVSHMEM configuration found! \n");
+	}
+
+	return 0;
+}
+
+#endif
diff --git a/lib/librte_eal/linuxapp/eal/eal_lcore.c b/lib/librte_eal/linuxapp/eal/eal_lcore.c
new file mode 100644
index 00000000..de5b4260
--- /dev/null
+++ b/lib/librte_eal/linuxapp/eal/eal_lcore.c
@@ -0,0 +1,110 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <unistd.h>
+#include <limits.h>
+#include <string.h>
+#include <dirent.h>
+
+#include <rte_log.h>
+#include <rte_eal.h>
+#include <rte_lcore.h>
+#include <rte_common.h>
+#include <rte_string_fns.h>
+#include <rte_debug.h>
+
+#include "eal_private.h"
+#include "eal_filesystem.h"
+#include "eal_thread.h"
+
+#define SYS_CPU_DIR "/sys/devices/system/cpu/cpu%u"
+#define CORE_ID_FILE "topology/core_id"
+#define NUMA_NODE_PATH "/sys/devices/system/node"
+
+/* Check if a cpu is present by the presence of the cpu information for it */
+int
+eal_cpu_detected(unsigned lcore_id)
+{
+	char path[PATH_MAX];
+	int len = snprintf(path, sizeof(path), SYS_CPU_DIR
+		"/"CORE_ID_FILE, lcore_id);
+	if (len <= 0 || (unsigned)len >= sizeof(path))
+		return 0;
+	if (access(path, F_OK) != 0)
+		return 0;
+
+	return 1;
+}
+
+/*
+ * Get CPU socket id (NUMA node) for a logical core.
+ *
+ * This searches each nodeX directories in /sys for the symlink for the given
+ * lcore_id and returns the numa node where the lcore is found. If lcore is not
+ * found on any numa node, returns zero.
+ */
+unsigned
+eal_cpu_socket_id(unsigned lcore_id)
+{
+	unsigned socket;
+
+	for (socket = 0; socket < RTE_MAX_NUMA_NODES; socket++) {
+		char path[PATH_MAX];
+
+		snprintf(path, sizeof(path), "%s/node%u/cpu%u", NUMA_NODE_PATH,
+				socket, lcore_id);
+		if (access(path, F_OK) == 0)
+			return socket;
+	}
+	return 0;
+}
+
+/* Get the cpu core id value from the /sys/.../cpuX core_id value */
+unsigned
+eal_cpu_core_id(unsigned lcore_id)
+{
+	char path[PATH_MAX];
+	unsigned long id;
+
+	int len = snprintf(path, sizeof(path), SYS_CPU_DIR "/%s", lcore_id, CORE_ID_FILE);
+	if (len <= 0 || (unsigned)len >= sizeof(path))
+		goto err;
+	if (eal_parse_sysfs_value(path, &id) != 0)
+		goto err;
+	return (unsigned)id;
+
+err:
+	RTE_LOG(ERR, EAL, "Error reading core id value from %s "
+			"for lcore %u - assuming core 0\n", SYS_CPU_DIR, lcore_id);
+	return 0;
+}
diff --git a/lib/librte_eal/linuxapp/eal/eal_log.c b/lib/librte_eal/linuxapp/eal/eal_log.c
new file mode 100644
index 00000000..0b133c3e
--- /dev/null
+++ b/lib/librte_eal/linuxapp/eal/eal_log.c
@@ -0,0 +1,146 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <string.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <sys/types.h>
+#include <syslog.h>
+#include <sys/queue.h>
+
+#include <rte_memory.h>
+#include <rte_memzone.h>
+#include <rte_eal.h>
+#include <rte_launch.h>
+#include <rte_per_lcore.h>
+#include <rte_lcore.h>
+#include <rte_spinlock.h>
+#include <rte_log.h>
+
+#include "eal_private.h"
+
+/*
+ * default log function, used once mempool (hence log history) is
+ * available
+ */
+static ssize_t
+console_log_write(__attribute__((unused)) void *c, const char *buf, size_t size)
+{
+	char copybuf[BUFSIZ + 1];
+	ssize_t ret;
+	uint32_t loglevel;
+
+	/* add this log in history */
+	rte_log_add_in_history(buf, size);
+
+	/* write on stdout */
+	ret = fwrite(buf, 1, size, stdout);
+	fflush(stdout);
+
+	/* truncate message if too big (should not happen) */
+	if (size > BUFSIZ)
+		size = BUFSIZ;
+
+	/* Syslog error levels are from 0 to 7, so subtract 1 to convert */
+	loglevel = rte_log_cur_msg_loglevel() - 1;
+	memcpy(copybuf, buf, size);
+	copybuf[size] = '\0';
+
+	/* write on syslog too */
+	syslog(loglevel, "%s", copybuf);
+
+	return ret;
+}
+
+static cookie_io_functions_t console_log_func = {
+	.write = console_log_write,
+};
+
+/*
+ * set the log to default function, called during eal init process,
+ * once memzones are available.
+ */
+int
+rte_eal_log_init(const char *id, int facility)
+{
+	FILE *log_stream;
+
+	log_stream = fopencookie(NULL, "w+", console_log_func);
+	if (log_stream == NULL)
+		return -1;
+
+	openlog(id, LOG_NDELAY | LOG_PID, facility);
+
+	if (rte_eal_common_log_init(log_stream) < 0)
+		return -1;
+
+	return 0;
+}
+
+/* early logs */
+
+/*
+ * early log function, used during boot when mempool (hence log
+ * history) is not available
+ */
+static ssize_t
+early_log_write(__attribute__((unused)) void *c, const char *buf, size_t size)
+{
+	ssize_t ret;
+	ret = fwrite(buf, size, 1, stdout);
+	fflush(stdout);
+	if (ret == 0)
+		return -1;
+	return ret;
+}
+
+static cookie_io_functions_t early_log_func = {
+	.write = early_log_write,
+};
+static FILE *early_log_stream;
+
+/*
+ * init the log library, called by rte_eal_init() to enable early
+ * logs
+ */
+int
+rte_eal_log_early_init(void)
+{
+	early_log_stream = fopencookie(NULL, "w+", early_log_func);
+	if (early_log_stream == NULL) {
+		printf("Cannot configure early_log_stream\n");
+		return -1;
+	}
+	rte_openlog_stream(early_log_stream);
+	return 0;
+}
diff --git a/lib/librte_eal/linuxapp/eal/eal_memory.c b/lib/librte_eal/linuxapp/eal/eal_memory.c
new file mode 100644
index 00000000..5b9132c6
--- /dev/null
+++ b/lib/librte_eal/linuxapp/eal/eal_memory.c
@@ -0,0 +1,1559 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+/*   BSD LICENSE
+ *
+ *   Copyright(c) 2013 6WIND.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of 6WIND S.A. nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#define _FILE_OFFSET_BITS 64
+#include <errno.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <string.h>
+#include <stdarg.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/queue.h>
+#include <sys/file.h>
+#include <unistd.h>
+#include <limits.h>
+#include <errno.h>
+#include <sys/ioctl.h>
+#include <sys/time.h>
+
+#include <rte_log.h>
+#include <rte_memory.h>
+#include <rte_memzone.h>
+#include <rte_launch.h>
+#include <rte_eal.h>
+#include <rte_eal_memconfig.h>
+#include <rte_per_lcore.h>
+#include <rte_lcore.h>
+#include <rte_common.h>
+#include <rte_string_fns.h>
+
+#include "eal_private.h"
+#include "eal_internal_cfg.h"
+#include "eal_filesystem.h"
+#include "eal_hugepages.h"
+
+#ifdef RTE_LIBRTE_XEN_DOM0
+int rte_xen_dom0_supported(void)
+{
+	return internal_config.xen_dom0_support;
+}
+#endif
+
+/**
+ * @file
+ * Huge page mapping under linux
+ *
+ * To reserve a big contiguous amount of memory, we use the hugepage
+ * feature of linux. For that, we need to have hugetlbfs mounted. This
+ * code will create many files in this directory (one per page) and
+ * map them in virtual memory. For each page, we will retrieve its
+ * physical address and remap it in order to have a virtual contiguous
+ * zone as well as a physical contiguous zone.
+ */
+
+static uint64_t baseaddr_offset;
+
+static unsigned proc_pagemap_readable;
+
+#define RANDOMIZE_VA_SPACE_FILE "/proc/sys/kernel/randomize_va_space"
+
+static void
+test_proc_pagemap_readable(void)
+{
+	int fd = open("/proc/self/pagemap", O_RDONLY);
+
+	if (fd < 0) {
+		RTE_LOG(ERR, EAL,
+			"Cannot open /proc/self/pagemap: %s. "
+			"virt2phys address translation will not work\n",
+			strerror(errno));
+		return;
+	}
+
+	/* Is readable */
+	close(fd);
+	proc_pagemap_readable = 1;
+}
+
+/* Lock page in physical memory and prevent from swapping. */
+int
+rte_mem_lock_page(const void *virt)
+{
+	unsigned long virtual = (unsigned long)virt;
+	int page_size = getpagesize();
+	unsigned long aligned = (virtual & ~ (page_size - 1));
+	return mlock((void*)aligned, page_size);
+}
+
+/*
+ * Get physical address of any mapped virtual address in the current process.
+ */
+phys_addr_t
+rte_mem_virt2phy(const void *virtaddr)
+{
+	int fd;
+	uint64_t page, physaddr;
+	unsigned long virt_pfn;
+	int page_size;
+	off_t offset;
+
+	/* Cannot parse /proc/self/pagemap, no need to log errors everywhere */
+	if (!proc_pagemap_readable)
+		return RTE_BAD_PHYS_ADDR;
+
+	/* standard page size */
+	page_size = getpagesize();
+
+	fd = open("/proc/self/pagemap", O_RDONLY);
+	if (fd < 0) {
+		RTE_LOG(ERR, EAL, "%s(): cannot open /proc/self/pagemap: %s\n",
+			__func__, strerror(errno));
+		return RTE_BAD_PHYS_ADDR;
+	}
+
+	virt_pfn = (unsigned long)virtaddr / page_size;
+	offset = sizeof(uint64_t) * virt_pfn;
+	if (lseek(fd, offset, SEEK_SET) == (off_t) -1) {
+		RTE_LOG(ERR, EAL, "%s(): seek error in /proc/self/pagemap: %s\n",
+				__func__, strerror(errno));
+		close(fd);
+		return RTE_BAD_PHYS_ADDR;
+	}
+	if (read(fd, &page, sizeof(uint64_t)) < 0) {
+		RTE_LOG(ERR, EAL, "%s(): cannot read /proc/self/pagemap: %s\n",
+				__func__, strerror(errno));
+		close(fd);
+		return RTE_BAD_PHYS_ADDR;
+	}
+
+	/*
+	 * the pfn (page frame number) are bits 0-54 (see
+	 * pagemap.txt in linux Documentation)
+	 */
+	physaddr = ((page & 0x7fffffffffffffULL) * page_size)
+		+ ((unsigned long)virtaddr % page_size);
+	close(fd);
+	return physaddr;
+}
+
+/*
+ * For each hugepage in hugepg_tbl, fill the physaddr value. We find
+ * it by browsing the /proc/self/pagemap special file.
+ */
+static int
+find_physaddrs(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi)
+{
+	unsigned i;
+	phys_addr_t addr;
+
+	for (i = 0; i < hpi->num_pages[0]; i++) {
+		addr = rte_mem_virt2phy(hugepg_tbl[i].orig_va);
+		if (addr == RTE_BAD_PHYS_ADDR)
+			return -1;
+		hugepg_tbl[i].physaddr = addr;
+	}
+	return 0;
+}
+
+/*
+ * Check whether address-space layout randomization is enabled in
+ * the kernel. This is important for multi-process as it can prevent
+ * two processes mapping data to the same virtual address
+ * Returns:
+ *    0 - address space randomization disabled
+ *    1/2 - address space randomization enabled
+ *    negative error code on error
+ */
+static int
+aslr_enabled(void)
+{
+	char c;
+	int retval, fd = open(RANDOMIZE_VA_SPACE_FILE, O_RDONLY);
+	if (fd < 0)
+		return -errno;
+	retval = read(fd, &c, 1);
+	close(fd);
+	if (retval < 0)
+		return -errno;
+	if (retval == 0)
+		return -EIO;
+	switch (c) {
+		case '0' : return 0;
+		case '1' : return 1;
+		case '2' : return 2;
+		default: return -EINVAL;
+	}
+}
+
+/*
+ * Try to mmap *size bytes in /dev/zero. If it is successful, return the
+ * pointer to the mmap'd area and keep *size unmodified. Else, retry
+ * with a smaller zone: decrease *size by hugepage_sz until it reaches
+ * 0. In this case, return NULL. Note: this function returns an address
+ * which is a multiple of hugepage size.
+ */
+static void *
+get_virtual_area(size_t *size, size_t hugepage_sz)
+{
+	void *addr;
+	int fd;
+	long aligned_addr;
+
+	if (internal_config.base_virtaddr != 0) {
+		addr = (void*) (uintptr_t) (internal_config.base_virtaddr +
+				baseaddr_offset);
+	}
+	else addr = NULL;
+
+	RTE_LOG(DEBUG, EAL, "Ask a virtual area of 0x%zx bytes\n", *size);
+
+	fd = open("/dev/zero", O_RDONLY);
+	if (fd < 0){
+		RTE_LOG(ERR, EAL, "Cannot open /dev/zero\n");
+		return NULL;
+	}
+	do {
+		addr = mmap(addr,
+				(*size) + hugepage_sz, PROT_READ, MAP_PRIVATE, fd, 0);
+		if (addr == MAP_FAILED)
+			*size -= hugepage_sz;
+	} while (addr == MAP_FAILED && *size > 0);
+
+	if (addr == MAP_FAILED) {
+		close(fd);
+		RTE_LOG(ERR, EAL, "Cannot get a virtual area: %s\n",
+			strerror(errno));
+		return NULL;
+	}
+
+	munmap(addr, (*size) + hugepage_sz);
+	close(fd);
+
+	/* align addr to a huge page size boundary */
+	aligned_addr = (long)addr;
+	aligned_addr += (hugepage_sz - 1);
+	aligned_addr &= (~(hugepage_sz - 1));
+	addr = (void *)(aligned_addr);
+
+	RTE_LOG(DEBUG, EAL, "Virtual area found at %p (size = 0x%zx)\n",
+		addr, *size);
+
+	/* increment offset */
+	baseaddr_offset += *size;
+
+	return addr;
+}
+
+/*
+ * Mmap all hugepages of hugepage table: it first open a file in
+ * hugetlbfs, then mmap() hugepage_sz data in it. If orig is set, the
+ * virtual address is stored in hugepg_tbl[i].orig_va, else it is stored
+ * in hugepg_tbl[i].final_va. The second mapping (when orig is 0) tries to
+ * map continguous physical blocks in contiguous virtual blocks.
+ */
+static int
+map_all_hugepages(struct hugepage_file *hugepg_tbl,
+		struct hugepage_info *hpi, int orig)
+{
+	int fd;
+	unsigned i;
+	void *virtaddr;
+	void *vma_addr = NULL;
+	size_t vma_len = 0;
+
+#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
+	RTE_SET_USED(vma_len);
+#endif
+
+	for (i = 0; i < hpi->num_pages[0]; i++) {
+		uint64_t hugepage_sz = hpi->hugepage_sz;
+
+		if (orig) {
+			hugepg_tbl[i].file_id = i;
+			hugepg_tbl[i].size = hugepage_sz;
+#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
+			eal_get_hugefile_temp_path(hugepg_tbl[i].filepath,
+					sizeof(hugepg_tbl[i].filepath), hpi->hugedir,
+					hugepg_tbl[i].file_id);
+#else
+			eal_get_hugefile_path(hugepg_tbl[i].filepath,
+					sizeof(hugepg_tbl[i].filepath), hpi->hugedir,
+					hugepg_tbl[i].file_id);
+#endif
+			hugepg_tbl[i].filepath[sizeof(hugepg_tbl[i].filepath) - 1] = '\0';
+		}
+#ifndef RTE_ARCH_64
+		/* for 32-bit systems, don't remap 1G and 16G pages, just reuse
+		 * original map address as final map address.
+		 */
+		else if ((hugepage_sz == RTE_PGSIZE_1G)
+			|| (hugepage_sz == RTE_PGSIZE_16G)) {
+			hugepg_tbl[i].final_va = hugepg_tbl[i].orig_va;
+			hugepg_tbl[i].orig_va = NULL;
+			continue;
+		}
+#endif
+
+#ifndef RTE_EAL_SINGLE_FILE_SEGMENTS
+		else if (vma_len == 0) {
+			unsigned j, num_pages;
+
+			/* reserve a virtual area for next contiguous
+			 * physical block: count the number of
+			 * contiguous physical pages. */
+			for (j = i+1; j < hpi->num_pages[0] ; j++) {
+#ifdef RTE_ARCH_PPC_64
+				/* The physical addresses are sorted in
+				 * descending order on PPC64 */
+				if (hugepg_tbl[j].physaddr !=
+				    hugepg_tbl[j-1].physaddr - hugepage_sz)
+					break;
+#else
+				if (hugepg_tbl[j].physaddr !=
+				    hugepg_tbl[j-1].physaddr + hugepage_sz)
+					break;
+#endif
+			}
+			num_pages = j - i;
+			vma_len = num_pages * hugepage_sz;
+
+			/* get the biggest virtual memory area up to
+			 * vma_len. If it fails, vma_addr is NULL, so
+			 * let the kernel provide the address. */
+			vma_addr = get_virtual_area(&vma_len, hpi->hugepage_sz);
+			if (vma_addr == NULL)
+				vma_len = hugepage_sz;
+		}
+#endif
+
+		/* try to create hugepage file */
+		fd = open(hugepg_tbl[i].filepath, O_CREAT | O_RDWR, 0755);
+		if (fd < 0) {
+			RTE_LOG(ERR, EAL, "%s(): open failed: %s\n", __func__,
+					strerror(errno));
+			return -1;
+		}
+
+		/* map the segment, and populate page tables,
+		 * the kernel fills this segment with zeros */
+		virtaddr = mmap(vma_addr, hugepage_sz, PROT_READ | PROT_WRITE,
+				MAP_SHARED | MAP_POPULATE, fd, 0);
+		if (virtaddr == MAP_FAILED) {
+			RTE_LOG(ERR, EAL, "%s(): mmap failed: %s\n", __func__,
+					strerror(errno));
+			close(fd);
+			return -1;
+		}
+
+		if (orig) {
+			hugepg_tbl[i].orig_va = virtaddr;
+		}
+		else {
+			hugepg_tbl[i].final_va = virtaddr;
+		}
+
+		/* set shared flock on the file. */
+		if (flock(fd, LOCK_SH | LOCK_NB) == -1) {
+			RTE_LOG(ERR, EAL, "%s(): Locking file failed:%s \n",
+				__func__, strerror(errno));
+			close(fd);
+			return -1;
+		}
+
+		close(fd);
+
+		vma_addr = (char *)vma_addr + hugepage_sz;
+		vma_len -= hugepage_sz;
+	}
+	return 0;
+}
+
+#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
+
+/*
+ * Remaps all hugepages into single file segments
+ */
+static int
+remap_all_hugepages(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi)
+{
+	int fd;
+	unsigned i = 0, j, num_pages, page_idx = 0;
+	void *vma_addr = NULL, *old_addr = NULL, *page_addr = NULL;
+	size_t vma_len = 0;
+	size_t hugepage_sz = hpi->hugepage_sz;
+	size_t total_size, offset;
+	char filepath[MAX_HUGEPAGE_PATH];
+	phys_addr_t physaddr;
+	int socket;
+
+	while (i < hpi->num_pages[0]) {
+
+#ifndef RTE_ARCH_64
+		/* for 32-bit systems, don't remap 1G pages and 16G pages,
+		 * just reuse original map address as final map address.
+		 */
+		if ((hugepage_sz == RTE_PGSIZE_1G)
+			|| (hugepage_sz == RTE_PGSIZE_16G)) {
+			hugepg_tbl[i].final_va = hugepg_tbl[i].orig_va;
+			hugepg_tbl[i].orig_va = NULL;
+			i++;
+			continue;
+		}
+#endif
+
+		/* reserve a virtual area for next contiguous
+		 * physical block: count the number of
+		 * contiguous physical pages. */
+		for (j = i+1; j < hpi->num_pages[0] ; j++) {
+#ifdef RTE_ARCH_PPC_64
+			/* The physical addresses are sorted in descending
+			 * order on PPC64 */
+			if (hugepg_tbl[j].physaddr !=
+				hugepg_tbl[j-1].physaddr - hugepage_sz)
+				break;
+#else
+			if (hugepg_tbl[j].physaddr !=
+				hugepg_tbl[j-1].physaddr + hugepage_sz)
+				break;
+#endif
+		}
+		num_pages = j - i;
+		vma_len = num_pages * hugepage_sz;
+
+		socket = hugepg_tbl[i].socket_id;
+
+		/* get the biggest virtual memory area up to
+		 * vma_len. If it fails, vma_addr is NULL, so
+		 * let the kernel provide the address. */
+		vma_addr = get_virtual_area(&vma_len, hpi->hugepage_sz);
+
+		/* If we can't find a big enough virtual area, work out how many pages
+		 * we are going to get */
+		if (vma_addr == NULL)
+			j = i + 1;
+		else if (vma_len != num_pages * hugepage_sz) {
+			num_pages = vma_len / hugepage_sz;
+			j = i + num_pages;
+
+		}
+
+		hugepg_tbl[page_idx].file_id = page_idx;
+		eal_get_hugefile_path(filepath,
+				sizeof(filepath),
+				hpi->hugedir,
+				hugepg_tbl[page_idx].file_id);
+
+		/* try to create hugepage file */
+		fd = open(filepath, O_CREAT | O_RDWR, 0755);
+		if (fd < 0) {
+			RTE_LOG(ERR, EAL, "%s(): open failed: %s\n", __func__, strerror(errno));
+			return -1;
+		}
+
+		total_size = 0;
+		for (;i < j; i++) {
+
+			/* unmap current segment */
+			if (total_size > 0)
+				munmap(vma_addr, total_size);
+
+			/* unmap original page */
+			munmap(hugepg_tbl[i].orig_va, hugepage_sz);
+			unlink(hugepg_tbl[i].filepath);
+
+			total_size += hugepage_sz;
+
+			old_addr = vma_addr;
+
+			/* map new, bigger segment, and populate page tables,
+			 * the kernel fills this segment with zeros */
+			vma_addr = mmap(vma_addr, total_size,
+					PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd, 0);
+
+			if (vma_addr == MAP_FAILED || vma_addr != old_addr) {
+				RTE_LOG(ERR, EAL, "%s(): mmap failed: %s\n", __func__, strerror(errno));
+				close(fd);
+				return -1;
+			}
+		}
+
+		/* set shared flock on the file. */
+		if (flock(fd, LOCK_SH | LOCK_NB) == -1) {
+			RTE_LOG(ERR, EAL, "%s(): Locking file failed:%s \n",
+				__func__, strerror(errno));
+			close(fd);
+			return -1;
+		}
+
+		snprintf(hugepg_tbl[page_idx].filepath, MAX_HUGEPAGE_PATH, "%s",
+				filepath);
+
+		physaddr = rte_mem_virt2phy(vma_addr);
+
+		if (physaddr == RTE_BAD_PHYS_ADDR)
+			return -1;
+
+		hugepg_tbl[page_idx].final_va = vma_addr;
+
+		hugepg_tbl[page_idx].physaddr = physaddr;
+
+		hugepg_tbl[page_idx].repeated = num_pages;
+
+		hugepg_tbl[page_idx].socket_id = socket;
+
+		close(fd);
+
+		/* verify the memory segment - that is, check that every VA corresponds
+		 * to the physical address we expect to see
+		 */
+		for (offset = 0; offset < vma_len; offset += hugepage_sz) {
+			uint64_t expected_physaddr;
+
+			expected_physaddr = hugepg_tbl[page_idx].physaddr + offset;
+			page_addr = RTE_PTR_ADD(vma_addr, offset);
+			physaddr = rte_mem_virt2phy(page_addr);
+
+			if (physaddr != expected_physaddr) {
+				RTE_LOG(ERR, EAL, "Segment sanity check failed: wrong physaddr "
+						"at %p (offset 0x%" PRIx64 ": 0x%" PRIx64
+						" (expected 0x%" PRIx64 ")\n",
+						page_addr, offset, physaddr, expected_physaddr);
+				return -1;
+			}
+		}
+
+		page_idx++;
+	}
+
+	/* zero out the rest */
+	memset(&hugepg_tbl[page_idx], 0, (hpi->num_pages[0] - page_idx) * sizeof(struct hugepage_file));
+	return page_idx;
+}
+#else/* RTE_EAL_SINGLE_FILE_SEGMENTS=n */
+
+/* Unmap all hugepages from original mapping */
+static int
+unmap_all_hugepages_orig(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi)
+{
+        unsigned i;
+        for (i = 0; i < hpi->num_pages[0]; i++) {
+                if (hugepg_tbl[i].orig_va) {
+                        munmap(hugepg_tbl[i].orig_va, hpi->hugepage_sz);
+                        hugepg_tbl[i].orig_va = NULL;
+                }
+        }
+        return 0;
+}
+#endif /* RTE_EAL_SINGLE_FILE_SEGMENTS */
+
+/*
+ * Parse /proc/self/numa_maps to get the NUMA socket ID for each huge
+ * page.
+ */
+static int
+find_numasocket(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi)
+{
+	int socket_id;
+	char *end, *nodestr;
+	unsigned i, hp_count = 0;
+	uint64_t virt_addr;
+	char buf[BUFSIZ];
+	char hugedir_str[PATH_MAX];
+	FILE *f;
+
+	f = fopen("/proc/self/numa_maps", "r");
+	if (f == NULL) {
+		RTE_LOG(NOTICE, EAL, "cannot open /proc/self/numa_maps,"
+				" consider that all memory is in socket_id 0\n");
+		return 0;
+	}
+
+	snprintf(hugedir_str, sizeof(hugedir_str),
+			"%s/%s", hpi->hugedir, internal_config.hugefile_prefix);
+
+	/* parse numa map */
+	while (fgets(buf, sizeof(buf), f) != NULL) {
+
+		/* ignore non huge page */
+		if (strstr(buf, " huge ") == NULL &&
+				strstr(buf, hugedir_str) == NULL)
+			continue;
+
+		/* get zone addr */
+		virt_addr = strtoull(buf, &end, 16);
+		if (virt_addr == 0 || end == buf) {
+			RTE_LOG(ERR, EAL, "%s(): error in numa_maps parsing\n", __func__);
+			goto error;
+		}
+
+		/* get node id (socket id) */
+		nodestr = strstr(buf, " N");
+		if (nodestr == NULL) {
+			RTE_LOG(ERR, EAL, "%s(): error in numa_maps parsing\n", __func__);
+			goto error;
+		}
+		nodestr += 2;
+		end = strstr(nodestr, "=");
+		if (end == NULL) {
+			RTE_LOG(ERR, EAL, "%s(): error in numa_maps parsing\n", __func__);
+			goto error;
+		}
+		end[0] = '\0';
+		end = NULL;
+
+		socket_id = strtoul(nodestr, &end, 0);
+		if ((nodestr[0] == '\0') || (end == NULL) || (*end != '\0')) {
+			RTE_LOG(ERR, EAL, "%s(): error in numa_maps parsing\n", __func__);
+			goto error;
+		}
+
+		/* if we find this page in our mappings, set socket_id */
+		for (i = 0; i < hpi->num_pages[0]; i++) {
+			void *va = (void *)(unsigned long)virt_addr;
+			if (hugepg_tbl[i].orig_va == va) {
+				hugepg_tbl[i].socket_id = socket_id;
+				hp_count++;
+			}
+		}
+	}
+
+	if (hp_count < hpi->num_pages[0])
+		goto error;
+
+	fclose(f);
+	return 0;
+
+error:
+	fclose(f);
+	return -1;
+}
+
+static int
+cmp_physaddr(const void *a, const void *b)
+{
+#ifndef RTE_ARCH_PPC_64
+	const struct hugepage_file *p1 = (const struct hugepage_file *)a;
+	const struct hugepage_file *p2 = (const struct hugepage_file *)b;
+#else
+	/* PowerPC needs memory sorted in reverse order from x86 */
+	const struct hugepage_file *p1 = (const struct hugepage_file *)b;
+	const struct hugepage_file *p2 = (const struct hugepage_file *)a;
+#endif
+	if (p1->physaddr < p2->physaddr)
+		return -1;
+	else if (p1->physaddr > p2->physaddr)
+		return 1;
+	else
+		return 0;
+}
+
+/*
+ * Uses mmap to create a shared memory area for storage of data
+ * Used in this file to store the hugepage file map on disk
+ */
+static void *
+create_shared_memory(const char *filename, const size_t mem_size)
+{
+	void *retval;
+	int fd = open(filename, O_CREAT | O_RDWR, 0666);
+	if (fd < 0)
+		return NULL;
+	if (ftruncate(fd, mem_size) < 0) {
+		close(fd);
+		return NULL;
+	}
+	retval = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+	close(fd);
+	return retval;
+}
+
+/*
+ * this copies *active* hugepages from one hugepage table to another.
+ * destination is typically the shared memory.
+ */
+static int
+copy_hugepages_to_shared_mem(struct hugepage_file * dst, int dest_size,
+		const struct hugepage_file * src, int src_size)
+{
+	int src_pos, dst_pos = 0;
+
+	for (src_pos = 0; src_pos < src_size; src_pos++) {
+		if (src[src_pos].final_va != NULL) {
+			/* error on overflow attempt */
+			if (dst_pos == dest_size)
+				return -1;
+			memcpy(&dst[dst_pos], &src[src_pos], sizeof(struct hugepage_file));
+			dst_pos++;
+		}
+	}
+	return 0;
+}
+
+static int
+unlink_hugepage_files(struct hugepage_file *hugepg_tbl,
+		unsigned num_hp_info)
+{
+	unsigned socket, size;
+	int page, nrpages = 0;
+
+	/* get total number of hugepages */
+	for (size = 0; size < num_hp_info; size++)
+		for (socket = 0; socket < RTE_MAX_NUMA_NODES; socket++)
+			nrpages +=
+			internal_config.hugepage_info[size].num_pages[socket];
+
+	for (page = 0; page < nrpages; page++) {
+		struct hugepage_file *hp = &hugepg_tbl[page];
+
+		if (hp->final_va != NULL && unlink(hp->filepath)) {
+			RTE_LOG(WARNING, EAL, "%s(): Removing %s failed: %s\n",
+				__func__, hp->filepath, strerror(errno));
+		}
+	}
+	return 0;
+}
+
+/*
+ * unmaps hugepages that are not going to be used. since we originally allocate
+ * ALL hugepages (not just those we need), additional unmapping needs to be done.
+ */
+static int
+unmap_unneeded_hugepages(struct hugepage_file *hugepg_tbl,
+		struct hugepage_info *hpi,
+		unsigned num_hp_info)
+{
+	unsigned socket, size;
+	int page, nrpages = 0;
+
+	/* get total number of hugepages */
+	for (size = 0; size < num_hp_info; size++)
+		for (socket = 0; socket < RTE_MAX_NUMA_NODES; socket++)
+			nrpages += internal_config.hugepage_info[size].num_pages[socket];
+
+	for (size = 0; size < num_hp_info; size++) {
+		for (socket = 0; socket < RTE_MAX_NUMA_NODES; socket++) {
+			unsigned pages_found = 0;
+
+			/* traverse until we have unmapped all the unused pages */
+			for (page = 0; page < nrpages; page++) {
+				struct hugepage_file *hp = &hugepg_tbl[page];
+
+#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
+				/* if this page was already cleared */
+				if (hp->final_va == NULL)
+					continue;
+#endif
+
+				/* find a page that matches the criteria */
+				if ((hp->size == hpi[size].hugepage_sz) &&
+						(hp->socket_id == (int) socket)) {
+
+					/* if we skipped enough pages, unmap the rest */
+					if (pages_found == hpi[size].num_pages[socket]) {
+						uint64_t unmap_len;
+
+#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
+						unmap_len = hp->size * hp->repeated;
+#else
+						unmap_len = hp->size;
+#endif
+
+						/* get start addr and len of the remaining segment */
+						munmap(hp->final_va, (size_t) unmap_len);
+
+						hp->final_va = NULL;
+						if (unlink(hp->filepath) == -1) {
+							RTE_LOG(ERR, EAL, "%s(): Removing %s failed: %s\n",
+									__func__, hp->filepath, strerror(errno));
+							return -1;
+						}
+					}
+#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
+					/* else, check how much do we need to map */
+					else {
+						int nr_pg_left =
+								hpi[size].num_pages[socket] - pages_found;
+
+						/* if we need enough memory to fit into the segment */
+						if (hp->repeated <= nr_pg_left) {
+							pages_found += hp->repeated;
+						}
+						/* truncate the segment */
+						else {
+							uint64_t final_size = nr_pg_left * hp->size;
+							uint64_t seg_size = hp->repeated * hp->size;
+
+							void * unmap_va = RTE_PTR_ADD(hp->final_va,
+									final_size);
+							int fd;
+
+							munmap(unmap_va, seg_size - final_size);
+
+							fd = open(hp->filepath, O_RDWR);
+							if (fd < 0) {
+								RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
+										hp->filepath, strerror(errno));
+								return -1;
+							}
+							if (ftruncate(fd, final_size) < 0) {
+								RTE_LOG(ERR, EAL, "Cannot truncate %s: %s\n",
+										hp->filepath, strerror(errno));
+								return -1;
+							}
+							close(fd);
+
+							pages_found += nr_pg_left;
+							hp->repeated = nr_pg_left;
+						}
+					}
+#else
+					/* else, lock the page and skip */
+					else
+						pages_found++;
+#endif
+
+				} /* match page */
+			} /* foreach page */
+		} /* foreach socket */
+	} /* foreach pagesize */
+
+	return 0;
+}
+
+static inline uint64_t
+get_socket_mem_size(int socket)
+{
+	uint64_t size = 0;
+	unsigned i;
+
+	for (i = 0; i < internal_config.num_hugepage_sizes; i++){
+		struct hugepage_info *hpi = &internal_config.hugepage_info[i];
+		if (hpi->hugedir != NULL)
+			size += hpi->hugepage_sz * hpi->num_pages[socket];
+	}
+
+	return size;
+}
+
+/*
+ * This function is a NUMA-aware equivalent of calc_num_pages.
+ * It takes in the list of hugepage sizes and the
+ * number of pages thereof, and calculates the best number of
+ * pages of each size to fulfill the request for <memory> ram
+ */
+static int
+calc_num_pages_per_socket(uint64_t * memory,
+		struct hugepage_info *hp_info,
+		struct hugepage_info *hp_used,
+		unsigned num_hp_info)
+{
+	unsigned socket, j, i = 0;
+	unsigned requested, available;
+	int total_num_pages = 0;
+	uint64_t remaining_mem, cur_mem;
+	uint64_t total_mem = internal_config.memory;
+
+	if (num_hp_info == 0)
+		return -1;
+
+	/* if specific memory amounts per socket weren't requested */
+	if (internal_config.force_sockets == 0) {
+		int cpu_per_socket[RTE_MAX_NUMA_NODES];
+		size_t default_size, total_size;
+		unsigned lcore_id;
+
+		/* Compute number of cores per socket */
+		memset(cpu_per_socket, 0, sizeof(cpu_per_socket));
+		RTE_LCORE_FOREACH(lcore_id) {
+			cpu_per_socket[rte_lcore_to_socket_id(lcore_id)]++;
+		}
+
+		/*
+		 * Automatically spread requested memory amongst detected sockets according
+		 * to number of cores from cpu mask present on each socket
+		 */
+		total_size = internal_config.memory;
+		for (socket = 0; socket < RTE_MAX_NUMA_NODES && total_size != 0; socket++) {
+
+			/* Set memory amount per socket */
+			default_size = (internal_config.memory * cpu_per_socket[socket])
+			                / rte_lcore_count();
+
+			/* Limit to maximum available memory on socket */
+			default_size = RTE_MIN(default_size, get_socket_mem_size(socket));
+
+			/* Update sizes */
+			memory[socket] = default_size;
+			total_size -= default_size;
+		}
+
+		/*
+		 * If some memory is remaining, try to allocate it by getting all
+		 * available memory from sockets, one after the other
+		 */
+		for (socket = 0; socket < RTE_MAX_NUMA_NODES && total_size != 0; socket++) {
+			/* take whatever is available */
+			default_size = RTE_MIN(get_socket_mem_size(socket) - memory[socket],
+			                       total_size);
+
+			/* Update sizes */
+			memory[socket] += default_size;
+			total_size -= default_size;
+		}
+	}
+
+	for (socket = 0; socket < RTE_MAX_NUMA_NODES && total_mem != 0; socket++) {
+		/* skips if the memory on specific socket wasn't requested */
+		for (i = 0; i < num_hp_info && memory[socket] != 0; i++){
+			hp_used[i].hugedir = hp_info[i].hugedir;
+			hp_used[i].num_pages[socket] = RTE_MIN(
+					memory[socket] / hp_info[i].hugepage_sz,
+					hp_info[i].num_pages[socket]);
+
+			cur_mem = hp_used[i].num_pages[socket] *
+					hp_used[i].hugepage_sz;
+
+			memory[socket] -= cur_mem;
+			total_mem -= cur_mem;
+
+			total_num_pages += hp_used[i].num_pages[socket];
+
+			/* check if we have met all memory requests */
+			if (memory[socket] == 0)
+				break;
+
+			/* check if we have any more pages left at this size, if so
+			 * move on to next size */
+			if (hp_used[i].num_pages[socket] == hp_info[i].num_pages[socket])
+				continue;
+			/* At this point we know that there are more pages available that are
+			 * bigger than the memory we want, so lets see if we can get enough
+			 * from other page sizes.
+			 */
+			remaining_mem = 0;
+			for (j = i+1; j < num_hp_info; j++)
+				remaining_mem += hp_info[j].hugepage_sz *
+				hp_info[j].num_pages[socket];
+
+			/* is there enough other memory, if not allocate another page and quit */
+			if (remaining_mem < memory[socket]){
+				cur_mem = RTE_MIN(memory[socket],
+						hp_info[i].hugepage_sz);
+				memory[socket] -= cur_mem;
+				total_mem -= cur_mem;
+				hp_used[i].num_pages[socket]++;
+				total_num_pages++;
+				break; /* we are done with this socket*/
+			}
+		}
+		/* if we didn't satisfy all memory requirements per socket */
+		if (memory[socket] > 0) {
+			/* to prevent icc errors */
+			requested = (unsigned) (internal_config.socket_mem[socket] /
+					0x100000);
+			available = requested -
+					((unsigned) (memory[socket] / 0x100000));
+			RTE_LOG(ERR, EAL, "Not enough memory available on socket %u! "
+					"Requested: %uMB, available: %uMB\n", socket,
+					requested, available);
+			return -1;
+		}
+	}
+
+	/* if we didn't satisfy total memory requirements */
+	if (total_mem > 0) {
+		requested = (unsigned) (internal_config.memory / 0x100000);
+		available = requested - (unsigned) (total_mem / 0x100000);
+		RTE_LOG(ERR, EAL, "Not enough memory available! Requested: %uMB,"
+				" available: %uMB\n", requested, available);
+		return -1;
+	}
+	return total_num_pages;
+}
+
+/*
+ * Prepare physical memory mapping: fill configuration structure with
+ * these infos, return 0 on success.
+ *  1. map N huge pages in separate files in hugetlbfs
+ *  2. find associated physical addr
+ *  3. find associated NUMA socket ID
+ *  4. sort all huge pages by physical address
+ *  5. remap these N huge pages in the correct order
+ *  6. unmap the first mapping
+ *  7. fill memsegs in configuration with contiguous zones
+ */
+int
+rte_eal_hugepage_init(void)
+{
+	struct rte_mem_config *mcfg;
+	struct hugepage_file *hugepage, *tmp_hp = NULL;
+	struct hugepage_info used_hp[MAX_HUGEPAGE_SIZES];
+
+	uint64_t memory[RTE_MAX_NUMA_NODES];
+
+	unsigned hp_offset;
+	int i, j, new_memseg;
+	int nr_hugefiles, nr_hugepages = 0;
+	void *addr;
+#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
+	int new_pages_count[MAX_HUGEPAGE_SIZES];
+#endif
+
+	test_proc_pagemap_readable();
+
+	memset(used_hp, 0, sizeof(used_hp));
+
+	/* get pointer to global configuration */
+	mcfg = rte_eal_get_configuration()->mem_config;
+
+	/* hugetlbfs can be disabled */
+	if (internal_config.no_hugetlbfs) {
+		addr = mmap(NULL, internal_config.memory, PROT_READ | PROT_WRITE,
+				MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
+		if (addr == MAP_FAILED) {
+			RTE_LOG(ERR, EAL, "%s: mmap() failed: %s\n", __func__,
+					strerror(errno));
+			return -1;
+		}
+		mcfg->memseg[0].phys_addr = (phys_addr_t)(uintptr_t)addr;
+		mcfg->memseg[0].addr = addr;
+		mcfg->memseg[0].hugepage_sz = RTE_PGSIZE_4K;
+		mcfg->memseg[0].len = internal_config.memory;
+		mcfg->memseg[0].socket_id = 0;
+		return 0;
+	}
+
+/* check if app runs on Xen Dom0 */
+	if (internal_config.xen_dom0_support) {
+#ifdef RTE_LIBRTE_XEN_DOM0
+		/* use dom0_mm kernel driver to init memory */
+		if (rte_xen_dom0_memory_init() < 0)
+			return -1;
+		else
+			return 0;
+#endif
+	}
+
+	/* calculate total number of hugepages available. at this point we haven't
+	 * yet started sorting them so they all are on socket 0 */
+	for (i = 0; i < (int) internal_config.num_hugepage_sizes; i++) {
+		/* meanwhile, also initialize used_hp hugepage sizes in used_hp */
+		used_hp[i].hugepage_sz = internal_config.hugepage_info[i].hugepage_sz;
+
+		nr_hugepages += internal_config.hugepage_info[i].num_pages[0];
+	}
+
+	/*
+	 * allocate a memory area for hugepage table.
+	 * this isn't shared memory yet. due to the fact that we need some
+	 * processing done on these pages, shared memory will be created
+	 * at a later stage.
+	 */
+	tmp_hp = malloc(nr_hugepages * sizeof(struct hugepage_file));
+	if (tmp_hp == NULL)
+		goto fail;
+
+	memset(tmp_hp, 0, nr_hugepages * sizeof(struct hugepage_file));
+
+	hp_offset = 0; /* where we start the current page size entries */
+
+	/* map all hugepages and sort them */
+	for (i = 0; i < (int)internal_config.num_hugepage_sizes; i ++){
+		struct hugepage_info *hpi;
+
+		/*
+		 * we don't yet mark hugepages as used at this stage, so
+		 * we just map all hugepages available to the system
+		 * all hugepages are still located on socket 0
+		 */
+		hpi = &internal_config.hugepage_info[i];
+
+		if (hpi->num_pages[0] == 0)
+			continue;
+
+		/* map all hugepages available */
+		if (map_all_hugepages(&tmp_hp[hp_offset], hpi, 1) < 0){
+			RTE_LOG(DEBUG, EAL, "Failed to mmap %u MB hugepages\n",
+					(unsigned)(hpi->hugepage_sz / 0x100000));
+			goto fail;
+		}
+
+		/* find physical addresses and sockets for each hugepage */
+		if (find_physaddrs(&tmp_hp[hp_offset], hpi) < 0){
+			RTE_LOG(DEBUG, EAL, "Failed to find phys addr for %u MB pages\n",
+					(unsigned)(hpi->hugepage_sz / 0x100000));
+			goto fail;
+		}
+
+		if (find_numasocket(&tmp_hp[hp_offset], hpi) < 0){
+			RTE_LOG(DEBUG, EAL, "Failed to find NUMA socket for %u MB pages\n",
+					(unsigned)(hpi->hugepage_sz / 0x100000));
+			goto fail;
+		}
+
+		qsort(&tmp_hp[hp_offset], hpi->num_pages[0],
+		      sizeof(struct hugepage_file), cmp_physaddr);
+
+#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
+		/* remap all hugepages into single file segments */
+		new_pages_count[i] = remap_all_hugepages(&tmp_hp[hp_offset], hpi);
+		if (new_pages_count[i] < 0){
+			RTE_LOG(DEBUG, EAL, "Failed to remap %u MB pages\n",
+					(unsigned)(hpi->hugepage_sz / 0x100000));
+			goto fail;
+		}
+
+		/* we have processed a num of hugepages of this size, so inc offset */
+		hp_offset += new_pages_count[i];
+#else
+		/* remap all hugepages */
+		if (map_all_hugepages(&tmp_hp[hp_offset], hpi, 0) < 0){
+			RTE_LOG(DEBUG, EAL, "Failed to remap %u MB pages\n",
+					(unsigned)(hpi->hugepage_sz / 0x100000));
+			goto fail;
+		}
+
+		/* unmap original mappings */
+		if (unmap_all_hugepages_orig(&tmp_hp[hp_offset], hpi) < 0)
+			goto fail;
+
+		/* we have processed a num of hugepages of this size, so inc offset */
+		hp_offset += hpi->num_pages[0];
+#endif
+	}
+
+#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
+	nr_hugefiles = 0;
+	for (i = 0; i < (int) internal_config.num_hugepage_sizes; i++) {
+		nr_hugefiles += new_pages_count[i];
+	}
+#else
+	nr_hugefiles = nr_hugepages;
+#endif
+
+
+	/* clean out the numbers of pages */
+	for (i = 0; i < (int) internal_config.num_hugepage_sizes; i++)
+		for (j = 0; j < RTE_MAX_NUMA_NODES; j++)
+			internal_config.hugepage_info[i].num_pages[j] = 0;
+
+	/* get hugepages for each socket */
+	for (i = 0; i < nr_hugefiles; i++) {
+		int socket = tmp_hp[i].socket_id;
+
+		/* find a hugepage info with right size and increment num_pages */
+		const int nb_hpsizes = RTE_MIN(MAX_HUGEPAGE_SIZES,
+				(int)internal_config.num_hugepage_sizes);
+		for (j = 0; j < nb_hpsizes; j++) {
+			if (tmp_hp[i].size ==
+					internal_config.hugepage_info[j].hugepage_sz) {
+#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
+					internal_config.hugepage_info[j].num_pages[socket] +=
+						tmp_hp[i].repeated;
+#else
+				internal_config.hugepage_info[j].num_pages[socket]++;
+#endif
+			}
+		}
+	}
+
+	/* make a copy of socket_mem, needed for number of pages calculation */
+	for (i = 0; i < RTE_MAX_NUMA_NODES; i++)
+		memory[i] = internal_config.socket_mem[i];
+
+	/* calculate final number of pages */
+	nr_hugepages = calc_num_pages_per_socket(memory,
+			internal_config.hugepage_info, used_hp,
+			internal_config.num_hugepage_sizes);
+
+	/* error if not enough memory available */
+	if (nr_hugepages < 0)
+		goto fail;
+
+	/* reporting in! */
+	for (i = 0; i < (int) internal_config.num_hugepage_sizes; i++) {
+		for (j = 0; j < RTE_MAX_NUMA_NODES; j++) {
+			if (used_hp[i].num_pages[j] > 0) {
+				RTE_LOG(DEBUG, EAL,
+					"Requesting %u pages of size %uMB"
+					" from socket %i\n",
+					used_hp[i].num_pages[j],
+					(unsigned)
+					(used_hp[i].hugepage_sz / 0x100000),
+					j);
+			}
+		}
+	}
+
+	/* create shared memory */
+	hugepage = create_shared_memory(eal_hugepage_info_path(),
+			nr_hugefiles * sizeof(struct hugepage_file));
+
+	if (hugepage == NULL) {
+		RTE_LOG(ERR, EAL, "Failed to create shared memory!\n");
+		goto fail;
+	}
+	memset(hugepage, 0, nr_hugefiles * sizeof(struct hugepage_file));
+
+	/*
+	 * unmap pages that we won't need (looks at used_hp).
+	 * also, sets final_va to NULL on pages that were unmapped.
+	 */
+	if (unmap_unneeded_hugepages(tmp_hp, used_hp,
+			internal_config.num_hugepage_sizes) < 0) {
+		RTE_LOG(ERR, EAL, "Unmapping and locking hugepages failed!\n");
+		goto fail;
+	}
+
+	/*
+	 * copy stuff from malloc'd hugepage* to the actual shared memory.
+	 * this procedure only copies those hugepages that have final_va
+	 * not NULL. has overflow protection.
+	 */
+	if (copy_hugepages_to_shared_mem(hugepage, nr_hugefiles,
+			tmp_hp, nr_hugefiles) < 0) {
+		RTE_LOG(ERR, EAL, "Copying tables to shared memory failed!\n");
+		goto fail;
+	}
+
+	/* free the hugepage backing files */
+	if (internal_config.hugepage_unlink &&
+		unlink_hugepage_files(tmp_hp, internal_config.num_hugepage_sizes) < 0) {
+		RTE_LOG(ERR, EAL, "Unlinking hugepage files failed!\n");
+		goto fail;
+	}
+
+	/* free the temporary hugepage table */
+	free(tmp_hp);
+	tmp_hp = NULL;
+
+	/* find earliest free memseg - this is needed because in case of IVSHMEM,
+	 * segments might have already been initialized */
+	for (j = 0; j < RTE_MAX_MEMSEG; j++)
+		if (mcfg->memseg[j].addr == NULL) {
+			/* move to previous segment and exit loop */
+			j--;
+			break;
+		}
+
+	for (i = 0; i < nr_hugefiles; i++) {
+		new_memseg = 0;
+
+		/* if this is a new section, create a new memseg */
+		if (i == 0)
+			new_memseg = 1;
+		else if (hugepage[i].socket_id != hugepage[i-1].socket_id)
+			new_memseg = 1;
+		else if (hugepage[i].size != hugepage[i-1].size)
+			new_memseg = 1;
+
+#ifdef RTE_ARCH_PPC_64
+		/* On PPC64 architecture, the mmap always start from higher
+		 * virtual address to lower address. Here, both the physical
+		 * address and virtual address are in descending order */
+		else if ((hugepage[i-1].physaddr - hugepage[i].physaddr) !=
+		    hugepage[i].size)
+			new_memseg = 1;
+		else if (((unsigned long)hugepage[i-1].final_va -
+		    (unsigned long)hugepage[i].final_va) != hugepage[i].size)
+			new_memseg = 1;
+#else
+		else if ((hugepage[i].physaddr - hugepage[i-1].physaddr) !=
+		    hugepage[i].size)
+			new_memseg = 1;
+		else if (((unsigned long)hugepage[i].final_va -
+		    (unsigned long)hugepage[i-1].final_va) != hugepage[i].size)
+			new_memseg = 1;
+#endif
+
+		if (new_memseg) {
+			j += 1;
+			if (j == RTE_MAX_MEMSEG)
+				break;
+
+			mcfg->memseg[j].phys_addr = hugepage[i].physaddr;
+			mcfg->memseg[j].addr = hugepage[i].final_va;
+#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
+			mcfg->memseg[j].len = hugepage[i].size * hugepage[i].repeated;
+#else
+			mcfg->memseg[j].len = hugepage[i].size;
+#endif
+			mcfg->memseg[j].socket_id = hugepage[i].socket_id;
+			mcfg->memseg[j].hugepage_sz = hugepage[i].size;
+		}
+		/* continuation of previous memseg */
+		else {
+#ifdef RTE_ARCH_PPC_64
+		/* Use the phy and virt address of the last page as segment
+		 * address for IBM Power architecture */
+			mcfg->memseg[j].phys_addr = hugepage[i].physaddr;
+			mcfg->memseg[j].addr = hugepage[i].final_va;
+#endif
+			mcfg->memseg[j].len += mcfg->memseg[j].hugepage_sz;
+		}
+		hugepage[i].memseg_id = j;
+	}
+
+	if (i < nr_hugefiles) {
+		RTE_LOG(ERR, EAL, "Can only reserve %d pages "
+			"from %d requested\n"
+			"Current %s=%d is not enough\n"
+			"Please either increase it or request less amount "
+			"of memory.\n",
+			i, nr_hugefiles, RTE_STR(CONFIG_RTE_MAX_MEMSEG),
+			RTE_MAX_MEMSEG);
+		return -ENOMEM;
+	}
+
+	return 0;
+
+fail:
+	free(tmp_hp);
+	return -1;
+}
+
+/*
+ * uses fstat to report the size of a file on disk
+ */
+static off_t
+getFileSize(int fd)
+{
+	struct stat st;
+	if (fstat(fd, &st) < 0)
+		return 0;
+	return st.st_size;
+}
+
+/*
+ * This creates the memory mappings in the secondary process to match that of
+ * the server process. It goes through each memory segment in the DPDK runtime
+ * configuration and finds the hugepages which form that segment, mapping them
+ * in order to form a contiguous block in the virtual memory space
+ */
+int
+rte_eal_hugepage_attach(void)
+{
+	const struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	const struct hugepage_file *hp = NULL;
+	unsigned num_hp = 0;
+	unsigned i, s = 0; /* s used to track the segment number */
+	off_t size;
+	int fd, fd_zero = -1, fd_hugepage = -1;
+
+	if (aslr_enabled() > 0) {
+		RTE_LOG(WARNING, EAL, "WARNING: Address Space Layout Randomization "
+				"(ASLR) is enabled in the kernel.\n");
+		RTE_LOG(WARNING, EAL, "   This may cause issues with mapping memory "
+				"into secondary processes\n");
+	}
+
+	test_proc_pagemap_readable();
+
+	if (internal_config.xen_dom0_support) {
+#ifdef RTE_LIBRTE_XEN_DOM0
+		if (rte_xen_dom0_memory_attach() < 0) {
+			RTE_LOG(ERR, EAL,"Failed to attach memory setments of primay "
+					"process\n");
+			return -1;
+		}
+		return 0;
+#endif
+	}
+
+	fd_zero = open("/dev/zero", O_RDONLY);
+	if (fd_zero < 0) {
+		RTE_LOG(ERR, EAL, "Could not open /dev/zero\n");
+		goto error;
+	}
+	fd_hugepage = open(eal_hugepage_info_path(), O_RDONLY);
+	if (fd_hugepage < 0) {
+		RTE_LOG(ERR, EAL, "Could not open %s\n", eal_hugepage_info_path());
+		goto error;
+	}
+
+	/* map all segments into memory to make sure we get the addrs */
+	for (s = 0; s < RTE_MAX_MEMSEG; ++s) {
+		void *base_addr;
+
+		/*
+		 * the first memory segment with len==0 is the one that
+		 * follows the last valid segment.
+		 */
+		if (mcfg->memseg[s].len == 0)
+			break;
+
+#ifdef RTE_LIBRTE_IVSHMEM
+		/*
+		 * if segment has ioremap address set, it's an IVSHMEM segment and
+		 * doesn't need mapping as it was already mapped earlier
+		 */
+		if (mcfg->memseg[s].ioremap_addr != 0)
+			continue;
+#endif
+
+		/*
+		 * fdzero is mmapped to get a contiguous block of virtual
+		 * addresses of the appropriate memseg size.
+		 * use mmap to get identical addresses as the primary process.
+		 */
+		base_addr = mmap(mcfg->memseg[s].addr, mcfg->memseg[s].len,
+				 PROT_READ, MAP_PRIVATE, fd_zero, 0);
+		if (base_addr == MAP_FAILED ||
+		    base_addr != mcfg->memseg[s].addr) {
+			RTE_LOG(ERR, EAL, "Could not mmap %llu bytes "
+				"in /dev/zero to requested address [%p]: '%s'\n",
+				(unsigned long long)mcfg->memseg[s].len,
+				mcfg->memseg[s].addr, strerror(errno));
+			if (aslr_enabled() > 0) {
+				RTE_LOG(ERR, EAL, "It is recommended to "
+					"disable ASLR in the kernel "
+					"and retry running both primary "
+					"and secondary processes\n");
+			}
+			goto error;
+		}
+	}
+
+	size = getFileSize(fd_hugepage);
+	hp = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd_hugepage, 0);
+	if (hp == NULL) {
+		RTE_LOG(ERR, EAL, "Could not mmap %s\n", eal_hugepage_info_path());
+		goto error;
+	}
+
+	num_hp = size / sizeof(struct hugepage_file);
+	RTE_LOG(DEBUG, EAL, "Analysing %u files\n", num_hp);
+
+	s = 0;
+	while (s < RTE_MAX_MEMSEG && mcfg->memseg[s].len > 0){
+		void *addr, *base_addr;
+		uintptr_t offset = 0;
+		size_t mapping_size;
+#ifdef RTE_LIBRTE_IVSHMEM
+		/*
+		 * if segment has ioremap address set, it's an IVSHMEM segment and
+		 * doesn't need mapping as it was already mapped earlier
+		 */
+		if (mcfg->memseg[s].ioremap_addr != 0) {
+			s++;
+			continue;
+		}
+#endif
+		/*
+		 * free previously mapped memory so we can map the
+		 * hugepages into the space
+		 */
+		base_addr = mcfg->memseg[s].addr;
+		munmap(base_addr, mcfg->memseg[s].len);
+
+		/* find the hugepages for this segment and map them
+		 * we don't need to worry about order, as the server sorted the
+		 * entries before it did the second mmap of them */
+		for (i = 0; i < num_hp && offset < mcfg->memseg[s].len; i++){
+			if (hp[i].memseg_id == (int)s){
+				fd = open(hp[i].filepath, O_RDWR);
+				if (fd < 0) {
+					RTE_LOG(ERR, EAL, "Could not open %s\n",
+						hp[i].filepath);
+					goto error;
+				}
+#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
+				mapping_size = hp[i].size * hp[i].repeated;
+#else
+				mapping_size = hp[i].size;
+#endif
+				addr = mmap(RTE_PTR_ADD(base_addr, offset),
+						mapping_size, PROT_READ | PROT_WRITE,
+						MAP_SHARED, fd, 0);
+				close(fd); /* close file both on success and on failure */
+				if (addr == MAP_FAILED ||
+						addr != RTE_PTR_ADD(base_addr, offset)) {
+					RTE_LOG(ERR, EAL, "Could not mmap %s\n",
+						hp[i].filepath);
+					goto error;
+				}
+				offset+=mapping_size;
+			}
+		}
+		RTE_LOG(DEBUG, EAL, "Mapped segment %u of size 0x%llx\n", s,
+				(unsigned long long)mcfg->memseg[s].len);
+		s++;
+	}
+	/* unmap the hugepage config file, since we are done using it */
+	munmap((void *)(uintptr_t)hp, size);
+	close(fd_zero);
+	close(fd_hugepage);
+	return 0;
+
+error:
+	if (fd_zero >= 0)
+		close(fd_zero);
+	if (fd_hugepage >= 0)
+		close(fd_hugepage);
+	return -1;
+}
diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c b/lib/librte_eal/linuxapp/eal/eal_pci.c
new file mode 100644
index 00000000..dbf12a84
--- /dev/null
+++ b/lib/librte_eal/linuxapp/eal/eal_pci.c
@@ -0,0 +1,762 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <string.h>
+#include <dirent.h>
+
+#include <rte_log.h>
+#include <rte_pci.h>
+#include <rte_eal_memconfig.h>
+#include <rte_malloc.h>
+#include <rte_devargs.h>
+#include <rte_memcpy.h>
+
+#include "eal_filesystem.h"
+#include "eal_private.h"
+#include "eal_pci_init.h"
+
+/**
+ * @file
+ * PCI probing under linux
+ *
+ * This code is used to simulate a PCI probe by parsing information in sysfs.
+ * When a registered device matches a driver, it is then initialized with
+ * IGB_UIO driver (or doesn't initialize, if the device wasn't bound to it).
+ */
+
+/* unbind kernel driver for this device */
+int
+pci_unbind_kernel_driver(struct rte_pci_device *dev)
+{
+	int n;
+	FILE *f;
+	char filename[PATH_MAX];
+	char buf[BUFSIZ];
+	struct rte_pci_addr *loc = &dev->addr;
+
+	/* open /sys/bus/pci/devices/AAAA:BB:CC.D/driver */
+	snprintf(filename, sizeof(filename),
+	         SYSFS_PCI_DEVICES "/" PCI_PRI_FMT "/driver/unbind",
+	         loc->domain, loc->bus, loc->devid, loc->function);
+
+	f = fopen(filename, "w");
+	if (f == NULL) /* device was not bound */
+		return 0;
+
+	n = snprintf(buf, sizeof(buf), PCI_PRI_FMT "\n",
+	             loc->domain, loc->bus, loc->devid, loc->function);
+	if ((n < 0) || (n >= (int)sizeof(buf))) {
+		RTE_LOG(ERR, EAL, "%s(): snprintf failed\n", __func__);
+		goto error;
+	}
+	if (fwrite(buf, n, 1, f) == 0) {
+		RTE_LOG(ERR, EAL, "%s(): could not write to %s\n", __func__,
+				filename);
+		goto error;
+	}
+
+	fclose(f);
+	return 0;
+
+error:
+	fclose(f);
+	return -1;
+}
+
+static int
+pci_get_kernel_driver_by_path(const char *filename, char *dri_name)
+{
+	int count;
+	char path[PATH_MAX];
+	char *name;
+
+	if (!filename || !dri_name)
+		return -1;
+
+	count = readlink(filename, path, PATH_MAX);
+	if (count >= PATH_MAX)
+		return -1;
+
+	/* For device does not have a driver */
+	if (count < 0)
+		return 1;
+
+	path[count] = '\0';
+
+	name = strrchr(path, '/');
+	if (name) {
+		strncpy(dri_name, name + 1, strlen(name + 1) + 1);
+		return 0;
+	}
+
+	return -1;
+}
+
+/* Map pci device */
+int
+rte_eal_pci_map_device(struct rte_pci_device *dev)
+{
+	int ret = -1;
+
+	/* try mapping the NIC resources using VFIO if it exists */
+	switch (dev->kdrv) {
+	case RTE_KDRV_VFIO:
+#ifdef VFIO_PRESENT
+		if (pci_vfio_is_enabled())
+			ret = pci_vfio_map_resource(dev);
+#endif
+		break;
+	case RTE_KDRV_IGB_UIO:
+	case RTE_KDRV_UIO_GENERIC:
+		/* map resources for devices that use uio */
+		ret = pci_uio_map_resource(dev);
+		break;
+	default:
+		RTE_LOG(DEBUG, EAL,
+			"  Not managed by a supported kernel driver, skipped\n");
+		ret = 1;
+		break;
+	}
+
+	return ret;
+}
+
+/* Unmap pci device */
+void
+rte_eal_pci_unmap_device(struct rte_pci_device *dev)
+{
+	/* try unmapping the NIC resources using VFIO if it exists */
+	switch (dev->kdrv) {
+	case RTE_KDRV_VFIO:
+		RTE_LOG(ERR, EAL, "Hotplug doesn't support vfio yet\n");
+		break;
+	case RTE_KDRV_IGB_UIO:
+	case RTE_KDRV_UIO_GENERIC:
+		/* unmap resources for devices that use uio */
+		pci_uio_unmap_resource(dev);
+		break;
+	default:
+		RTE_LOG(DEBUG, EAL,
+			"  Not managed by a supported kernel driver, skipped\n");
+		break;
+	}
+}
+
+void *
+pci_find_max_end_va(void)
+{
+	const struct rte_memseg *seg = rte_eal_get_physmem_layout();
+	const struct rte_memseg *last = seg;
+	unsigned i = 0;
+
+	for (i = 0; i < RTE_MAX_MEMSEG; i++, seg++) {
+		if (seg->addr == NULL)
+			break;
+
+		if (seg->addr > last->addr)
+			last = seg;
+
+	}
+	return RTE_PTR_ADD(last->addr, last->len);
+}
+
+/* parse the "resource" sysfs file */
+static int
+pci_parse_sysfs_resource(const char *filename, struct rte_pci_device *dev)
+{
+	FILE *f;
+	char buf[BUFSIZ];
+	union pci_resource_info {
+		struct {
+			char *phys_addr;
+			char *end_addr;
+			char *flags;
+		};
+		char *ptrs[PCI_RESOURCE_FMT_NVAL];
+	} res_info;
+	int i;
+	uint64_t phys_addr, end_addr, flags;
+
+	f = fopen(filename, "r");
+	if (f == NULL) {
+		RTE_LOG(ERR, EAL, "Cannot open sysfs resource\n");
+		return -1;
+	}
+
+	for (i = 0; i<PCI_MAX_RESOURCE; i++) {
+
+		if (fgets(buf, sizeof(buf), f) == NULL) {
+			RTE_LOG(ERR, EAL,
+				"%s(): cannot read resource\n", __func__);
+			goto error;
+		}
+
+		if (rte_strsplit(buf, sizeof(buf), res_info.ptrs, 3, ' ') != 3) {
+			RTE_LOG(ERR, EAL,
+				"%s(): bad resource format\n", __func__);
+			goto error;
+		}
+		errno = 0;
+		phys_addr = strtoull(res_info.phys_addr, NULL, 16);
+		end_addr = strtoull(res_info.end_addr, NULL, 16);
+		flags = strtoull(res_info.flags, NULL, 16);
+		if (errno != 0) {
+			RTE_LOG(ERR, EAL,
+				"%s(): bad resource format\n", __func__);
+			goto error;
+		}
+
+		if (flags & IORESOURCE_MEM) {
+			dev->mem_resource[i].phys_addr = phys_addr;
+			dev->mem_resource[i].len = end_addr - phys_addr + 1;
+			/* not mapped for now */
+			dev->mem_resource[i].addr = NULL;
+		}
+	}
+	fclose(f);
+	return 0;
+
+error:
+	fclose(f);
+	return -1;
+}
+
+/* Scan one pci sysfs entry, and fill the devices list from it. */
+static int
+pci_scan_one(const char *dirname, uint16_t domain, uint8_t bus,
+	     uint8_t devid, uint8_t function)
+{
+	char filename[PATH_MAX];
+	unsigned long tmp;
+	struct rte_pci_device *dev;
+	char driver[PATH_MAX];
+	int ret;
+
+	dev = malloc(sizeof(*dev));
+	if (dev == NULL)
+		return -1;
+
+	memset(dev, 0, sizeof(*dev));
+	dev->addr.domain = domain;
+	dev->addr.bus = bus;
+	dev->addr.devid = devid;
+	dev->addr.function = function;
+
+	/* get vendor id */
+	snprintf(filename, sizeof(filename), "%s/vendor", dirname);
+	if (eal_parse_sysfs_value(filename, &tmp) < 0) {
+		free(dev);
+		return -1;
+	}
+	dev->id.vendor_id = (uint16_t)tmp;
+
+	/* get device id */
+	snprintf(filename, sizeof(filename), "%s/device", dirname);
+	if (eal_parse_sysfs_value(filename, &tmp) < 0) {
+		free(dev);
+		return -1;
+	}
+	dev->id.device_id = (uint16_t)tmp;
+
+	/* get subsystem_vendor id */
+	snprintf(filename, sizeof(filename), "%s/subsystem_vendor",
+		 dirname);
+	if (eal_parse_sysfs_value(filename, &tmp) < 0) {
+		free(dev);
+		return -1;
+	}
+	dev->id.subsystem_vendor_id = (uint16_t)tmp;
+
+	/* get subsystem_device id */
+	snprintf(filename, sizeof(filename), "%s/subsystem_device",
+		 dirname);
+	if (eal_parse_sysfs_value(filename, &tmp) < 0) {
+		free(dev);
+		return -1;
+	}
+	dev->id.subsystem_device_id = (uint16_t)tmp;
+
+	/* get max_vfs */
+	dev->max_vfs = 0;
+	snprintf(filename, sizeof(filename), "%s/max_vfs", dirname);
+	if (!access(filename, F_OK) &&
+	    eal_parse_sysfs_value(filename, &tmp) == 0)
+		dev->max_vfs = (uint16_t)tmp;
+	else {
+		/* for non igb_uio driver, need kernel version >= 3.8 */
+		snprintf(filename, sizeof(filename),
+			 "%s/sriov_numvfs", dirname);
+		if (!access(filename, F_OK) &&
+		    eal_parse_sysfs_value(filename, &tmp) == 0)
+			dev->max_vfs = (uint16_t)tmp;
+	}
+
+	/* get numa node */
+	snprintf(filename, sizeof(filename), "%s/numa_node",
+		 dirname);
+	if (access(filename, R_OK) != 0) {
+		/* if no NUMA support, set default to 0 */
+		dev->numa_node = 0;
+	} else {
+		if (eal_parse_sysfs_value(filename, &tmp) < 0) {
+			free(dev);
+			return -1;
+		}
+		dev->numa_node = tmp;
+	}
+
+	/* parse resources */
+	snprintf(filename, sizeof(filename), "%s/resource", dirname);
+	if (pci_parse_sysfs_resource(filename, dev) < 0) {
+		RTE_LOG(ERR, EAL, "%s(): cannot parse resource\n", __func__);
+		free(dev);
+		return -1;
+	}
+
+	/* parse driver */
+	snprintf(filename, sizeof(filename), "%s/driver", dirname);
+	ret = pci_get_kernel_driver_by_path(filename, driver);
+	if (ret < 0) {
+		RTE_LOG(ERR, EAL, "Fail to get kernel driver\n");
+		free(dev);
+		return -1;
+	}
+
+	if (!ret) {
+		if (!strcmp(driver, "vfio-pci"))
+			dev->kdrv = RTE_KDRV_VFIO;
+		else if (!strcmp(driver, "igb_uio"))
+			dev->kdrv = RTE_KDRV_IGB_UIO;
+		else if (!strcmp(driver, "uio_pci_generic"))
+			dev->kdrv = RTE_KDRV_UIO_GENERIC;
+		else
+			dev->kdrv = RTE_KDRV_UNKNOWN;
+	} else
+		dev->kdrv = RTE_KDRV_NONE;
+
+	/* device is valid, add in list (sorted) */
+	if (TAILQ_EMPTY(&pci_device_list)) {
+		TAILQ_INSERT_TAIL(&pci_device_list, dev, next);
+	} else {
+		struct rte_pci_device *dev2;
+		int ret;
+
+		TAILQ_FOREACH(dev2, &pci_device_list, next) {
+			ret = rte_eal_compare_pci_addr(&dev->addr, &dev2->addr);
+			if (ret > 0)
+				continue;
+
+			if (ret < 0) {
+				TAILQ_INSERT_BEFORE(dev2, dev, next);
+			} else { /* already registered */
+				dev2->kdrv = dev->kdrv;
+				dev2->max_vfs = dev->max_vfs;
+				memmove(dev2->mem_resource, dev->mem_resource,
+					sizeof(dev->mem_resource));
+				free(dev);
+			}
+			return 0;
+		}
+		TAILQ_INSERT_TAIL(&pci_device_list, dev, next);
+	}
+
+	return 0;
+}
+
+/*
+ * split up a pci address into its constituent parts.
+ */
+static int
+parse_pci_addr_format(const char *buf, int bufsize, uint16_t *domain,
+		uint8_t *bus, uint8_t *devid, uint8_t *function)
+{
+	/* first split on ':' */
+	union splitaddr {
+		struct {
+			char *domain;
+			char *bus;
+			char *devid;
+			char *function;
+		};
+		char *str[PCI_FMT_NVAL]; /* last element-separator is "." not ":" */
+	} splitaddr;
+
+	char *buf_copy = strndup(buf, bufsize);
+	if (buf_copy == NULL)
+		return -1;
+
+	if (rte_strsplit(buf_copy, bufsize, splitaddr.str, PCI_FMT_NVAL, ':')
+			!= PCI_FMT_NVAL - 1)
+		goto error;
+	/* final split is on '.' between devid and function */
+	splitaddr.function = strchr(splitaddr.devid,'.');
+	if (splitaddr.function == NULL)
+		goto error;
+	*splitaddr.function++ = '\0';
+
+	/* now convert to int values */
+	errno = 0;
+	*domain = (uint16_t)strtoul(splitaddr.domain, NULL, 16);
+	*bus = (uint8_t)strtoul(splitaddr.bus, NULL, 16);
+	*devid = (uint8_t)strtoul(splitaddr.devid, NULL, 16);
+	*function = (uint8_t)strtoul(splitaddr.function, NULL, 10);
+	if (errno != 0)
+		goto error;
+
+	free(buf_copy); /* free the copy made with strdup */
+	return 0;
+error:
+	free(buf_copy);
+	return -1;
+}
+
+/*
+ * Scan the content of the PCI bus, and the devices in the devices
+ * list
+ */
+int
+rte_eal_pci_scan(void)
+{
+	struct dirent *e;
+	DIR *dir;
+	char dirname[PATH_MAX];
+	uint16_t domain;
+	uint8_t bus, devid, function;
+
+	dir = opendir(SYSFS_PCI_DEVICES);
+	if (dir == NULL) {
+		RTE_LOG(ERR, EAL, "%s(): opendir failed: %s\n",
+			__func__, strerror(errno));
+		return -1;
+	}
+
+	while ((e = readdir(dir)) != NULL) {
+		if (e->d_name[0] == '.')
+			continue;
+
+		if (parse_pci_addr_format(e->d_name, sizeof(e->d_name), &domain,
+				&bus, &devid, &function) != 0)
+			continue;
+
+		snprintf(dirname, sizeof(dirname), "%s/%s", SYSFS_PCI_DEVICES,
+			 e->d_name);
+		if (pci_scan_one(dirname, domain, bus, devid, function) < 0)
+			goto error;
+	}
+	closedir(dir);
+	return 0;
+
+error:
+	closedir(dir);
+	return -1;
+}
+
+#ifdef RTE_PCI_CONFIG
+/*
+ * It is deprecated, all its configurations have been moved into
+ * each PMD respectively.
+ */
+void
+pci_config_space_set(__rte_unused struct rte_pci_device *dev)
+{
+	RTE_LOG(DEBUG, EAL, "Nothing here, as it is deprecated\n");
+}
+#endif
+
+/* Read PCI config space. */
+int rte_eal_pci_read_config(const struct rte_pci_device *device,
+			    void *buf, size_t len, off_t offset)
+{
+	const struct rte_intr_handle *intr_handle = &device->intr_handle;
+
+	switch (intr_handle->type) {
+	case RTE_INTR_HANDLE_UIO:
+	case RTE_INTR_HANDLE_UIO_INTX:
+		return pci_uio_read_config(intr_handle, buf, len, offset);
+
+#ifdef VFIO_PRESENT
+	case RTE_INTR_HANDLE_VFIO_MSIX:
+	case RTE_INTR_HANDLE_VFIO_MSI:
+	case RTE_INTR_HANDLE_VFIO_LEGACY:
+		return pci_vfio_read_config(intr_handle, buf, len, offset);
+#endif
+	default:
+		RTE_LOG(ERR, EAL,
+			"Unknown handle type of fd %d\n",
+					intr_handle->fd);
+		return -1;
+	}
+}
+
+/* Write PCI config space. */
+int rte_eal_pci_write_config(const struct rte_pci_device *device,
+			     const void *buf, size_t len, off_t offset)
+{
+	const struct rte_intr_handle *intr_handle = &device->intr_handle;
+
+	switch (intr_handle->type) {
+	case RTE_INTR_HANDLE_UIO:
+	case RTE_INTR_HANDLE_UIO_INTX:
+		return pci_uio_write_config(intr_handle, buf, len, offset);
+
+#ifdef VFIO_PRESENT
+	case RTE_INTR_HANDLE_VFIO_MSIX:
+	case RTE_INTR_HANDLE_VFIO_MSI:
+	case RTE_INTR_HANDLE_VFIO_LEGACY:
+		return pci_vfio_write_config(intr_handle, buf, len, offset);
+#endif
+	default:
+		RTE_LOG(ERR, EAL,
+			"Unknown handle type of fd %d\n",
+					intr_handle->fd);
+		return -1;
+	}
+}
+
+#if defined(RTE_ARCH_X86)
+static int
+pci_ioport_map(struct rte_pci_device *dev, int bar __rte_unused,
+	       struct rte_pci_ioport *p)
+{
+	uint16_t start, end;
+	FILE *fp;
+	char *line = NULL;
+	char pci_id[16];
+	int found = 0;
+	size_t linesz;
+
+	snprintf(pci_id, sizeof(pci_id), PCI_PRI_FMT,
+		 dev->addr.domain, dev->addr.bus,
+		 dev->addr.devid, dev->addr.function);
+
+	fp = fopen("/proc/ioports", "r");
+	if (fp == NULL) {
+		RTE_LOG(ERR, EAL, "%s(): can't open ioports\n", __func__);
+		return -1;
+	}
+
+	while (getdelim(&line, &linesz, '\n', fp) > 0) {
+		char *ptr = line;
+		char *left;
+		int n;
+
+		n = strcspn(ptr, ":");
+		ptr[n] = 0;
+		left = &ptr[n + 1];
+
+		while (*left && isspace(*left))
+			left++;
+
+		if (!strncmp(left, pci_id, strlen(pci_id))) {
+			found = 1;
+
+			while (*ptr && isspace(*ptr))
+				ptr++;
+
+			sscanf(ptr, "%04hx-%04hx", &start, &end);
+
+			break;
+		}
+	}
+
+	free(line);
+	fclose(fp);
+
+	if (!found)
+		return -1;
+
+	dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN;
+	p->base = start;
+	RTE_LOG(DEBUG, EAL, "PCI Port IO found start=0x%x\n", start);
+
+	return 0;
+}
+#endif
+
+int
+rte_eal_pci_ioport_map(struct rte_pci_device *dev, int bar,
+		       struct rte_pci_ioport *p)
+{
+	int ret = -1;
+
+	switch (dev->kdrv) {
+#ifdef VFIO_PRESENT
+	case RTE_KDRV_VFIO:
+		if (pci_vfio_is_enabled())
+			ret = pci_vfio_ioport_map(dev, bar, p);
+		break;
+#endif
+	case RTE_KDRV_IGB_UIO:
+		ret = pci_uio_ioport_map(dev, bar, p);
+		break;
+	case RTE_KDRV_UIO_GENERIC:
+#if defined(RTE_ARCH_X86)
+		ret = pci_ioport_map(dev, bar, p);
+#else
+		ret = pci_uio_ioport_map(dev, bar, p);
+#endif
+		break;
+	case RTE_KDRV_NONE:
+#if defined(RTE_ARCH_X86)
+		ret = pci_ioport_map(dev, bar, p);
+#endif
+		break;
+	default:
+		break;
+	}
+
+	if (!ret)
+		p->dev = dev;
+
+	return ret;
+}
+
+void
+rte_eal_pci_ioport_read(struct rte_pci_ioport *p,
+			void *data, size_t len, off_t offset)
+{
+	switch (p->dev->kdrv) {
+#ifdef VFIO_PRESENT
+	case RTE_KDRV_VFIO:
+		pci_vfio_ioport_read(p, data, len, offset);
+		break;
+#endif
+	case RTE_KDRV_IGB_UIO:
+		pci_uio_ioport_read(p, data, len, offset);
+		break;
+	case RTE_KDRV_UIO_GENERIC:
+		pci_uio_ioport_read(p, data, len, offset);
+		break;
+	case RTE_KDRV_NONE:
+#if defined(RTE_ARCH_X86)
+		pci_uio_ioport_read(p, data, len, offset);
+#endif
+		break;
+	default:
+		break;
+	}
+}
+
+void
+rte_eal_pci_ioport_write(struct rte_pci_ioport *p,
+			 const void *data, size_t len, off_t offset)
+{
+	switch (p->dev->kdrv) {
+#ifdef VFIO_PRESENT
+	case RTE_KDRV_VFIO:
+		pci_vfio_ioport_write(p, data, len, offset);
+		break;
+#endif
+	case RTE_KDRV_IGB_UIO:
+		pci_uio_ioport_write(p, data, len, offset);
+		break;
+	case RTE_KDRV_UIO_GENERIC:
+		pci_uio_ioport_write(p, data, len, offset);
+		break;
+	case RTE_KDRV_NONE:
+#if defined(RTE_ARCH_X86)
+		pci_uio_ioport_write(p, data, len, offset);
+#endif
+		break;
+	default:
+		break;
+	}
+}
+
+int
+rte_eal_pci_ioport_unmap(struct rte_pci_ioport *p)
+{
+	int ret = -1;
+
+	switch (p->dev->kdrv) {
+#ifdef VFIO_PRESENT
+	case RTE_KDRV_VFIO:
+		if (pci_vfio_is_enabled())
+			ret = pci_vfio_ioport_unmap(p);
+		break;
+#endif
+	case RTE_KDRV_IGB_UIO:
+		ret = pci_uio_ioport_unmap(p);
+		break;
+	case RTE_KDRV_UIO_GENERIC:
+#if defined(RTE_ARCH_X86)
+		ret = 0;
+#else
+		ret = pci_uio_ioport_unmap(p);
+#endif
+		break;
+	case RTE_KDRV_NONE:
+#if defined(RTE_ARCH_X86)
+		ret = 0;
+#endif
+		break;
+	default:
+		break;
+	}
+
+	return ret;
+}
+
+/* Init the PCI EAL subsystem */
+int
+rte_eal_pci_init(void)
+{
+	TAILQ_INIT(&pci_driver_list);
+	TAILQ_INIT(&pci_device_list);
+
+	/* for debug purposes, PCI can be disabled */
+	if (internal_config.no_pci)
+		return 0;
+
+	if (rte_eal_pci_scan() < 0) {
+		RTE_LOG(ERR, EAL, "%s(): Cannot scan PCI bus\n", __func__);
+		return -1;
+	}
+#ifdef VFIO_PRESENT
+	pci_vfio_enable();
+
+	if (pci_vfio_is_enabled()) {
+
+		/* if we are primary process, create a thread to communicate with
+		 * secondary processes. the thread will use a socket to wait for
+		 * requests from secondary process to send open file descriptors,
+		 * because VFIO does not allow multiple open descriptors on a group or
+		 * VFIO container.
+		 */
+		if (internal_config.process_type == RTE_PROC_PRIMARY &&
+				pci_vfio_mp_sync_setup() < 0)
+			return -1;
+	}
+#endif
+	return 0;
+}
diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_init.h b/lib/librte_eal/linuxapp/eal/eal_pci_init.h
new file mode 100644
index 00000000..7011753d
--- /dev/null
+++ b/lib/librte_eal/linuxapp/eal/eal_pci_init.h
@@ -0,0 +1,127 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef EAL_PCI_INIT_H_
+#define EAL_PCI_INIT_H_
+
+#include "eal_vfio.h"
+
+/*
+ * Helper function to map PCI resources right after hugepages in virtual memory
+ */
+extern void *pci_map_addr;
+void *pci_find_max_end_va(void);
+
+int pci_uio_alloc_resource(struct rte_pci_device *dev,
+		struct mapped_pci_resource **uio_res);
+void pci_uio_free_resource(struct rte_pci_device *dev,
+		struct mapped_pci_resource *uio_res);
+int pci_uio_map_resource_by_index(struct rte_pci_device *dev, int res_idx,
+		struct mapped_pci_resource *uio_res, int map_idx);
+
+int pci_uio_read_config(const struct rte_intr_handle *intr_handle,
+			void *buf, size_t len, off_t offs);
+int pci_uio_write_config(const struct rte_intr_handle *intr_handle,
+			 const void *buf, size_t len, off_t offs);
+
+int pci_uio_ioport_map(struct rte_pci_device *dev, int bar,
+		       struct rte_pci_ioport *p);
+void pci_uio_ioport_read(struct rte_pci_ioport *p,
+			 void *data, size_t len, off_t offset);
+void pci_uio_ioport_write(struct rte_pci_ioport *p,
+			  const void *data, size_t len, off_t offset);
+int pci_uio_ioport_unmap(struct rte_pci_ioport *p);
+
+#ifdef VFIO_PRESENT
+
+#define VFIO_MAX_GROUPS 64
+
+int pci_vfio_enable(void);
+int pci_vfio_is_enabled(void);
+int pci_vfio_mp_sync_setup(void);
+
+/* access config space */
+int pci_vfio_read_config(const struct rte_intr_handle *intr_handle,
+			 void *buf, size_t len, off_t offs);
+int pci_vfio_write_config(const struct rte_intr_handle *intr_handle,
+			  const void *buf, size_t len, off_t offs);
+
+int pci_vfio_ioport_map(struct rte_pci_device *dev, int bar,
+		        struct rte_pci_ioport *p);
+void pci_vfio_ioport_read(struct rte_pci_ioport *p,
+			  void *data, size_t len, off_t offset);
+void pci_vfio_ioport_write(struct rte_pci_ioport *p,
+			   const void *data, size_t len, off_t offset);
+int pci_vfio_ioport_unmap(struct rte_pci_ioport *p);
+
+/* map VFIO resource prototype */
+int pci_vfio_map_resource(struct rte_pci_device *dev);
+int pci_vfio_get_group_fd(int iommu_group_fd);
+int pci_vfio_get_container_fd(void);
+
+/*
+ * Function prototypes for VFIO multiprocess sync functions
+ */
+int vfio_mp_sync_send_request(int socket, int req);
+int vfio_mp_sync_receive_request(int socket);
+int vfio_mp_sync_send_fd(int socket, int fd);
+int vfio_mp_sync_receive_fd(int socket);
+int vfio_mp_sync_connect_to_primary(void);
+
+/* socket comm protocol definitions */
+#define SOCKET_REQ_CONTAINER 0x100
+#define SOCKET_REQ_GROUP 0x200
+#define SOCKET_OK 0x0
+#define SOCKET_NO_FD 0x1
+#define SOCKET_ERR 0xFF
+
+/*
+ * we don't need to store device fd's anywhere since they can be obtained from
+ * the group fd via an ioctl() call.
+ */
+struct vfio_group {
+	int group_no;
+	int fd;
+};
+
+struct vfio_config {
+	int vfio_enabled;
+	int vfio_container_fd;
+	int vfio_container_has_dma;
+	int vfio_group_idx;
+	struct vfio_group vfio_groups[VFIO_MAX_GROUPS];
+};
+
+#endif
+
+#endif /* EAL_PCI_INIT_H_ */
diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
new file mode 100644
index 00000000..068694dc
--- /dev/null
+++ b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
@@ -0,0 +1,491 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <string.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <dirent.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+#include <linux/pci_regs.h>
+
+#if defined(RTE_ARCH_X86)
+#include <sys/io.h>
+#endif
+
+#include <rte_log.h>
+#include <rte_pci.h>
+#include <rte_eal_memconfig.h>
+#include <rte_common.h>
+#include <rte_malloc.h>
+
+#include "eal_filesystem.h"
+#include "eal_pci_init.h"
+
+void *pci_map_addr = NULL;
+
+#define OFF_MAX              ((uint64_t)(off_t)-1)
+
+int
+pci_uio_read_config(const struct rte_intr_handle *intr_handle,
+		    void *buf, size_t len, off_t offset)
+{
+	return pread(intr_handle->uio_cfg_fd, buf, len, offset);
+}
+
+int
+pci_uio_write_config(const struct rte_intr_handle *intr_handle,
+		     const void *buf, size_t len, off_t offset)
+{
+	return pwrite(intr_handle->uio_cfg_fd, buf, len, offset);
+}
+
+static int
+pci_uio_set_bus_master(int dev_fd)
+{
+	uint16_t reg;
+	int ret;
+
+	ret = pread(dev_fd, &reg, sizeof(reg), PCI_COMMAND);
+	if (ret != sizeof(reg)) {
+		RTE_LOG(ERR, EAL,
+			"Cannot read command from PCI config space!\n");
+		return -1;
+	}
+
+	/* return if bus mastering is already on */
+	if (reg & PCI_COMMAND_MASTER)
+		return 0;
+
+	reg |= PCI_COMMAND_MASTER;
+
+	ret = pwrite(dev_fd, &reg, sizeof(reg), PCI_COMMAND);
+	if (ret != sizeof(reg)) {
+		RTE_LOG(ERR, EAL,
+			"Cannot write command to PCI config space!\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+static int
+pci_mknod_uio_dev(const char *sysfs_uio_path, unsigned uio_num)
+{
+	FILE *f;
+	char filename[PATH_MAX];
+	int ret;
+	unsigned major, minor;
+	dev_t dev;
+
+	/* get the name of the sysfs file that contains the major and minor
+	 * of the uio device and read its content */
+	snprintf(filename, sizeof(filename), "%s/dev", sysfs_uio_path);
+
+	f = fopen(filename, "r");
+	if (f == NULL) {
+		RTE_LOG(ERR, EAL, "%s(): cannot open sysfs to get major:minor\n",
+			__func__);
+		return -1;
+	}
+
+	ret = fscanf(f, "%u:%u", &major, &minor);
+	if (ret != 2) {
+		RTE_LOG(ERR, EAL, "%s(): cannot parse sysfs to get major:minor\n",
+			__func__);
+		fclose(f);
+		return -1;
+	}
+	fclose(f);
+
+	/* create the char device "mknod /dev/uioX c major minor" */
+	snprintf(filename, sizeof(filename), "/dev/uio%u", uio_num);
+	dev = makedev(major, minor);
+	ret = mknod(filename, S_IFCHR | S_IRUSR | S_IWUSR, dev);
+	if (f == NULL) {
+		RTE_LOG(ERR, EAL, "%s(): mknod() failed %s\n",
+			__func__, strerror(errno));
+		return -1;
+	}
+
+	return ret;
+}
+
+/*
+ * Return the uioX char device used for a pci device. On success, return
+ * the UIO number and fill dstbuf string with the path of the device in
+ * sysfs. On error, return a negative value. In this case dstbuf is
+ * invalid.
+ */
+static int
+pci_get_uio_dev(struct rte_pci_device *dev, char *dstbuf,
+			   unsigned int buflen, int create)
+{
+	struct rte_pci_addr *loc = &dev->addr;
+	unsigned int uio_num;
+	struct dirent *e;
+	DIR *dir;
+	char dirname[PATH_MAX];
+
+	/* depending on kernel version, uio can be located in uio/uioX
+	 * or uio:uioX */
+
+	snprintf(dirname, sizeof(dirname),
+			SYSFS_PCI_DEVICES "/" PCI_PRI_FMT "/uio",
+			loc->domain, loc->bus, loc->devid, loc->function);
+
+	dir = opendir(dirname);
+	if (dir == NULL) {
+		/* retry with the parent directory */
+		snprintf(dirname, sizeof(dirname),
+				SYSFS_PCI_DEVICES "/" PCI_PRI_FMT,
+				loc->domain, loc->bus, loc->devid, loc->function);
+		dir = opendir(dirname);
+
+		if (dir == NULL) {
+			RTE_LOG(ERR, EAL, "Cannot opendir %s\n", dirname);
+			return -1;
+		}
+	}
+
+	/* take the first file starting with "uio" */
+	while ((e = readdir(dir)) != NULL) {
+		/* format could be uio%d ...*/
+		int shortprefix_len = sizeof("uio") - 1;
+		/* ... or uio:uio%d */
+		int longprefix_len = sizeof("uio:uio") - 1;
+		char *endptr;
+
+		if (strncmp(e->d_name, "uio", 3) != 0)
+			continue;
+
+		/* first try uio%d */
+		errno = 0;
+		uio_num = strtoull(e->d_name + shortprefix_len, &endptr, 10);
+		if (errno == 0 && endptr != (e->d_name + shortprefix_len)) {
+			snprintf(dstbuf, buflen, "%s/uio%u", dirname, uio_num);
+			break;
+		}
+
+		/* then try uio:uio%d */
+		errno = 0;
+		uio_num = strtoull(e->d_name + longprefix_len, &endptr, 10);
+		if (errno == 0 && endptr != (e->d_name + longprefix_len)) {
+			snprintf(dstbuf, buflen, "%s/uio:uio%u", dirname, uio_num);
+			break;
+		}
+	}
+	closedir(dir);
+
+	/* No uio resource found */
+	if (e == NULL)
+		return -1;
+
+	/* create uio device if we've been asked to */
+	if (internal_config.create_uio_dev && create &&
+			pci_mknod_uio_dev(dstbuf, uio_num) < 0)
+		RTE_LOG(WARNING, EAL, "Cannot create /dev/uio%u\n", uio_num);
+
+	return uio_num;
+}
+
+void
+pci_uio_free_resource(struct rte_pci_device *dev,
+		struct mapped_pci_resource *uio_res)
+{
+	rte_free(uio_res);
+
+	if (dev->intr_handle.uio_cfg_fd >= 0) {
+		close(dev->intr_handle.uio_cfg_fd);
+		dev->intr_handle.uio_cfg_fd = -1;
+	}
+	if (dev->intr_handle.fd) {
+		close(dev->intr_handle.fd);
+		dev->intr_handle.fd = -1;
+		dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN;
+	}
+}
+
+int
+pci_uio_alloc_resource(struct rte_pci_device *dev,
+		struct mapped_pci_resource **uio_res)
+{
+	char dirname[PATH_MAX];
+	char cfgname[PATH_MAX];
+	char devname[PATH_MAX]; /* contains the /dev/uioX */
+	int uio_num;
+	struct rte_pci_addr *loc;
+
+	loc = &dev->addr;
+
+	/* find uio resource */
+	uio_num = pci_get_uio_dev(dev, dirname, sizeof(dirname), 1);
+	if (uio_num < 0) {
+		RTE_LOG(WARNING, EAL, "  "PCI_PRI_FMT" not managed by UIO driver, "
+				"skipping\n", loc->domain, loc->bus, loc->devid, loc->function);
+		return 1;
+	}
+	snprintf(devname, sizeof(devname), "/dev/uio%u", uio_num);
+
+	/* save fd if in primary process */
+	dev->intr_handle.fd = open(devname, O_RDWR);
+	if (dev->intr_handle.fd < 0) {
+		RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
+			devname, strerror(errno));
+		goto error;
+	}
+
+	snprintf(cfgname, sizeof(cfgname),
+			"/sys/class/uio/uio%u/device/config", uio_num);
+	dev->intr_handle.uio_cfg_fd = open(cfgname, O_RDWR);
+	if (dev->intr_handle.uio_cfg_fd < 0) {
+		RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
+			cfgname, strerror(errno));
+		goto error;
+	}
+
+	if (dev->kdrv == RTE_KDRV_IGB_UIO)
+		dev->intr_handle.type = RTE_INTR_HANDLE_UIO;
+	else {
+		dev->intr_handle.type = RTE_INTR_HANDLE_UIO_INTX;
+
+		/* set bus master that is not done by uio_pci_generic */
+		if (pci_uio_set_bus_master(dev->intr_handle.uio_cfg_fd)) {
+			RTE_LOG(ERR, EAL, "Cannot set up bus mastering!\n");
+			goto error;
+		}
+	}
+
+	/* allocate the mapping details for secondary processes*/
+	*uio_res = rte_zmalloc("UIO_RES", sizeof(**uio_res), 0);
+	if (*uio_res == NULL) {
+		RTE_LOG(ERR, EAL,
+			"%s(): cannot store uio mmap details\n", __func__);
+		goto error;
+	}
+
+	snprintf((*uio_res)->path, sizeof((*uio_res)->path), "%s", devname);
+	memcpy(&(*uio_res)->pci_addr, &dev->addr, sizeof((*uio_res)->pci_addr));
+
+	return 0;
+
+error:
+	pci_uio_free_resource(dev, *uio_res);
+	return -1;
+}
+
+int
+pci_uio_map_resource_by_index(struct rte_pci_device *dev, int res_idx,
+		struct mapped_pci_resource *uio_res, int map_idx)
+{
+	int fd;
+	char devname[PATH_MAX]; /* contains the /dev/uioX */
+	void *mapaddr;
+	struct rte_pci_addr *loc;
+	struct pci_map *maps;
+
+	loc = &dev->addr;
+	maps = uio_res->maps;
+
+	/* update devname for mmap  */
+	snprintf(devname, sizeof(devname),
+			SYSFS_PCI_DEVICES "/" PCI_PRI_FMT "/resource%d",
+			loc->domain, loc->bus, loc->devid,
+			loc->function, res_idx);
+
+	/* allocate memory to keep path */
+	maps[map_idx].path = rte_malloc(NULL, strlen(devname) + 1, 0);
+	if (maps[map_idx].path == NULL) {
+		RTE_LOG(ERR, EAL, "Cannot allocate memory for path: %s\n",
+				strerror(errno));
+		return -1;
+	}
+
+	/*
+	 * open resource file, to mmap it
+	 */
+	fd = open(devname, O_RDWR);
+	if (fd < 0) {
+		RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
+				devname, strerror(errno));
+		goto error;
+	}
+
+	/* try mapping somewhere close to the end of hugepages */
+	if (pci_map_addr == NULL)
+		pci_map_addr = pci_find_max_end_va();
+
+	mapaddr = pci_map_resource(pci_map_addr, fd, 0,
+			(size_t)dev->mem_resource[res_idx].len, 0);
+	close(fd);
+	if (mapaddr == MAP_FAILED)
+		goto error;
+
+	pci_map_addr = RTE_PTR_ADD(mapaddr,
+			(size_t)dev->mem_resource[res_idx].len);
+
+	maps[map_idx].phaddr = dev->mem_resource[res_idx].phys_addr;
+	maps[map_idx].size = dev->mem_resource[res_idx].len;
+	maps[map_idx].addr = mapaddr;
+	maps[map_idx].offset = 0;
+	strcpy(maps[map_idx].path, devname);
+	dev->mem_resource[res_idx].addr = mapaddr;
+
+	return 0;
+
+error:
+	rte_free(maps[map_idx].path);
+	return -1;
+}
+
+int
+pci_uio_ioport_map(struct rte_pci_device *dev, int bar,
+		   struct rte_pci_ioport *p)
+{
+#if defined(RTE_ARCH_X86)
+	char dirname[PATH_MAX];
+	char filename[PATH_MAX];
+	int uio_num;
+	unsigned long start;
+
+	uio_num = pci_get_uio_dev(dev, dirname, sizeof(dirname), 0);
+	if (uio_num < 0)
+		return -1;
+
+	/* get portio start */
+	snprintf(filename, sizeof(filename),
+		 "%s/portio/port%d/start", dirname, bar);
+	if (eal_parse_sysfs_value(filename, &start) < 0) {
+		RTE_LOG(ERR, EAL, "%s(): cannot parse portio start\n",
+			__func__);
+		return -1;
+	}
+	/* ensure we don't get anything funny here, read/write will cast to
+	 * uin16_t */
+	if (start > UINT16_MAX)
+		return -1;
+
+	/* FIXME only for primary process ? */
+	if (dev->intr_handle.type == RTE_INTR_HANDLE_UNKNOWN) {
+
+		snprintf(filename, sizeof(filename), "/dev/uio%u", uio_num);
+		dev->intr_handle.fd = open(filename, O_RDWR);
+		if (dev->intr_handle.fd < 0) {
+			RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
+				filename, strerror(errno));
+			return -1;
+		}
+		dev->intr_handle.type = RTE_INTR_HANDLE_UIO;
+	}
+
+	RTE_LOG(DEBUG, EAL, "PCI Port IO found start=0x%lx\n", start);
+
+	p->base = start;
+	return 0;
+#else
+	RTE_SET_USED(dev);
+	RTE_SET_USED(bar);
+	RTE_SET_USED(p);
+	return -1;
+#endif
+}
+
+void
+pci_uio_ioport_read(struct rte_pci_ioport *p,
+		    void *data, size_t len, off_t offset)
+{
+#if defined(RTE_ARCH_X86)
+	uint8_t *d;
+	int size;
+	unsigned short reg = p->base + offset;
+
+	for (d = data; len > 0; d += size, reg += size, len -= size) {
+		if (len >= 4) {
+			size = 4;
+			*(uint32_t *)d = inl(reg);
+		} else if (len >= 2) {
+			size = 2;
+			*(uint16_t *)d = inw(reg);
+		} else {
+			size = 1;
+			*d = inb(reg);
+		}
+	}
+#else
+	RTE_SET_USED(p);
+	RTE_SET_USED(data);
+	RTE_SET_USED(len);
+	RTE_SET_USED(offset);
+#endif
+}
+
+void
+pci_uio_ioport_write(struct rte_pci_ioport *p,
+		     const void *data, size_t len, off_t offset)
+{
+#if defined(RTE_ARCH_X86)
+	const uint8_t *s;
+	int size;
+	unsigned short reg = p->base + offset;
+
+	for (s = data; len > 0; s += size, reg += size, len -= size) {
+		if (len >= 4) {
+			size = 4;
+			outl_p(*(const uint32_t *)s, reg);
+		} else if (len >= 2) {
+			size = 2;
+			outw_p(*(const uint16_t *)s, reg);
+		} else {
+			size = 1;
+			outb_p(*s, reg);
+		}
+	}
+#else
+	RTE_SET_USED(p);
+	RTE_SET_USED(data);
+	RTE_SET_USED(len);
+	RTE_SET_USED(offset);
+#endif
+}
+
+int
+pci_uio_ioport_unmap(struct rte_pci_ioport *p)
+{
+	RTE_SET_USED(p);
+#if defined(RTE_ARCH_X86)
+	/* FIXME close intr fd ? */
+	return 0;
+#else
+	return -1;
+#endif
+}
diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
new file mode 100644
index 00000000..10266f8f
--- /dev/null
+++ b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
@@ -0,0 +1,1097 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <string.h>
+#include <fcntl.h>
+#include <linux/pci_regs.h>
+#include <sys/eventfd.h>
+#include <sys/socket.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+
+#include <rte_log.h>
+#include <rte_pci.h>
+#include <rte_eal_memconfig.h>
+#include <rte_malloc.h>
+#include <eal_private.h>
+
+#include "eal_filesystem.h"
+#include "eal_pci_init.h"
+#include "eal_vfio.h"
+
+/**
+ * @file
+ * PCI probing under linux (VFIO version)
+ *
+ * This code tries to determine if the PCI device is bound to VFIO driver,
+ * and initialize it (map BARs, set up interrupts) if that's the case.
+ *
+ * This file is only compiled if CONFIG_RTE_EAL_VFIO is set to "y".
+ */
+
+#ifdef VFIO_PRESENT
+
+#define PAGE_SIZE   (sysconf(_SC_PAGESIZE))
+#define PAGE_MASK   (~(PAGE_SIZE - 1))
+
+static struct rte_tailq_elem rte_vfio_tailq = {
+	.name = "VFIO_RESOURCE_LIST",
+};
+EAL_REGISTER_TAILQ(rte_vfio_tailq)
+
+#define VFIO_DIR "/dev/vfio"
+#define VFIO_CONTAINER_PATH "/dev/vfio/vfio"
+#define VFIO_GROUP_FMT "/dev/vfio/%u"
+#define VFIO_NOIOMMU_GROUP_FMT "/dev/vfio/noiommu-%u"
+#define VFIO_GET_REGION_ADDR(x) ((uint64_t) x << 40ULL)
+#define VFIO_GET_REGION_IDX(x) (x >> 40)
+
+/* per-process VFIO config */
+static struct vfio_config vfio_cfg;
+
+/* DMA mapping function prototype.
+ * Takes VFIO container fd as a parameter.
+ * Returns 0 on success, -1 on error.
+ * */
+typedef int (*vfio_dma_func_t)(int);
+
+struct vfio_iommu_type {
+	int type_id;
+	const char *name;
+	vfio_dma_func_t dma_map_func;
+};
+
+static int vfio_type1_dma_map(int);
+static int vfio_noiommu_dma_map(int);
+
+/* IOMMU types we support */
+static const struct vfio_iommu_type iommu_types[] = {
+	/* x86 IOMMU, otherwise known as type 1 */
+	{ RTE_VFIO_TYPE1, "Type 1", &vfio_type1_dma_map},
+	/* IOMMU-less mode */
+	{ RTE_VFIO_NOIOMMU, "No-IOMMU", &vfio_noiommu_dma_map},
+};
+
+int
+vfio_type1_dma_map(int vfio_container_fd)
+{
+	const struct rte_memseg *ms = rte_eal_get_physmem_layout();
+	int i, ret;
+
+	/* map all DPDK segments for DMA. use 1:1 PA to IOVA mapping */
+	for (i = 0; i < RTE_MAX_MEMSEG; i++) {
+		struct vfio_iommu_type1_dma_map dma_map;
+
+		if (ms[i].addr == NULL)
+			break;
+
+		memset(&dma_map, 0, sizeof(dma_map));
+		dma_map.argsz = sizeof(struct vfio_iommu_type1_dma_map);
+		dma_map.vaddr = ms[i].addr_64;
+		dma_map.size = ms[i].len;
+		dma_map.iova = ms[i].phys_addr;
+		dma_map.flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE;
+
+		ret = ioctl(vfio_container_fd, VFIO_IOMMU_MAP_DMA, &dma_map);
+
+		if (ret) {
+			RTE_LOG(ERR, EAL, "  cannot set up DMA remapping, "
+					"error %i (%s)\n", errno, strerror(errno));
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
+int
+vfio_noiommu_dma_map(int __rte_unused vfio_container_fd)
+{
+	/* No-IOMMU mode does not need DMA mapping */
+	return 0;
+}
+
+int
+pci_vfio_read_config(const struct rte_intr_handle *intr_handle,
+		    void *buf, size_t len, off_t offs)
+{
+	return pread64(intr_handle->vfio_dev_fd, buf, len,
+	       VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + offs);
+}
+
+int
+pci_vfio_write_config(const struct rte_intr_handle *intr_handle,
+		    const void *buf, size_t len, off_t offs)
+{
+	return pwrite64(intr_handle->vfio_dev_fd, buf, len,
+	       VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + offs);
+}
+
+/* get PCI BAR number where MSI-X interrupts are */
+static int
+pci_vfio_get_msix_bar(int fd, int *msix_bar, uint32_t *msix_table_offset,
+		      uint32_t *msix_table_size)
+{
+	int ret;
+	uint32_t reg;
+	uint16_t flags;
+	uint8_t cap_id, cap_offset;
+
+	/* read PCI capability pointer from config space */
+	ret = pread64(fd, &reg, sizeof(reg),
+			VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) +
+			PCI_CAPABILITY_LIST);
+	if (ret != sizeof(reg)) {
+		RTE_LOG(ERR, EAL, "Cannot read capability pointer from PCI "
+				"config space!\n");
+		return -1;
+	}
+
+	/* we need first byte */
+	cap_offset = reg & 0xFF;
+
+	while (cap_offset) {
+
+		/* read PCI capability ID */
+		ret = pread64(fd, &reg, sizeof(reg),
+				VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) +
+				cap_offset);
+		if (ret != sizeof(reg)) {
+			RTE_LOG(ERR, EAL, "Cannot read capability ID from PCI "
+					"config space!\n");
+			return -1;
+		}
+
+		/* we need first byte */
+		cap_id = reg & 0xFF;
+
+		/* if we haven't reached MSI-X, check next capability */
+		if (cap_id != PCI_CAP_ID_MSIX) {
+			ret = pread64(fd, &reg, sizeof(reg),
+					VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) +
+					cap_offset);
+			if (ret != sizeof(reg)) {
+				RTE_LOG(ERR, EAL, "Cannot read capability pointer from PCI "
+						"config space!\n");
+				return -1;
+			}
+
+			/* we need second byte */
+			cap_offset = (reg & 0xFF00) >> 8;
+
+			continue;
+		}
+		/* else, read table offset */
+		else {
+			/* table offset resides in the next 4 bytes */
+			ret = pread64(fd, &reg, sizeof(reg),
+					VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) +
+					cap_offset + 4);
+			if (ret != sizeof(reg)) {
+				RTE_LOG(ERR, EAL, "Cannot read table offset from PCI config "
+						"space!\n");
+				return -1;
+			}
+
+			ret = pread64(fd, &flags, sizeof(flags),
+					VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) +
+					cap_offset + 2);
+			if (ret != sizeof(flags)) {
+				RTE_LOG(ERR, EAL, "Cannot read table flags from PCI config "
+						"space!\n");
+				return -1;
+			}
+
+			*msix_bar = reg & RTE_PCI_MSIX_TABLE_BIR;
+			*msix_table_offset = reg & RTE_PCI_MSIX_TABLE_OFFSET;
+			*msix_table_size = 16 * (1 + (flags & RTE_PCI_MSIX_FLAGS_QSIZE));
+
+			return 0;
+		}
+	}
+	return 0;
+}
+
+/* set PCI bus mastering */
+static int
+pci_vfio_set_bus_master(int dev_fd)
+{
+	uint16_t reg;
+	int ret;
+
+	ret = pread64(dev_fd, &reg, sizeof(reg),
+			VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) +
+			PCI_COMMAND);
+	if (ret != sizeof(reg)) {
+		RTE_LOG(ERR, EAL, "Cannot read command from PCI config space!\n");
+		return -1;
+	}
+
+	/* set the master bit */
+	reg |= PCI_COMMAND_MASTER;
+
+	ret = pwrite64(dev_fd, &reg, sizeof(reg),
+			VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) +
+			PCI_COMMAND);
+
+	if (ret != sizeof(reg)) {
+		RTE_LOG(ERR, EAL, "Cannot write command to PCI config space!\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+/* pick IOMMU type. returns a pointer to vfio_iommu_type or NULL for error */
+static const struct vfio_iommu_type *
+pci_vfio_set_iommu_type(int vfio_container_fd) {
+	unsigned idx;
+	for (idx = 0; idx < RTE_DIM(iommu_types); idx++) {
+		const struct vfio_iommu_type *t = &iommu_types[idx];
+
+		int ret = ioctl(vfio_container_fd, VFIO_SET_IOMMU,
+				t->type_id);
+		if (!ret) {
+			RTE_LOG(NOTICE, EAL, "  using IOMMU type %d (%s)\n",
+					t->type_id, t->name);
+			return t;
+		}
+		/* not an error, there may be more supported IOMMU types */
+		RTE_LOG(DEBUG, EAL, "  set IOMMU type %d (%s) failed, "
+				"error %i (%s)\n", t->type_id, t->name, errno,
+				strerror(errno));
+	}
+	/* if we didn't find a suitable IOMMU type, fail */
+	return NULL;
+}
+
+/* check if we have any supported extensions */
+static int
+pci_vfio_has_supported_extensions(int vfio_container_fd) {
+	int ret;
+	unsigned idx, n_extensions = 0;
+	for (idx = 0; idx < RTE_DIM(iommu_types); idx++) {
+		const struct vfio_iommu_type *t = &iommu_types[idx];
+
+		ret = ioctl(vfio_container_fd, VFIO_CHECK_EXTENSION,
+				t->type_id);
+		if (ret < 0) {
+			RTE_LOG(ERR, EAL, "  could not get IOMMU type, "
+				"error %i (%s)\n", errno,
+				strerror(errno));
+			close(vfio_container_fd);
+			return -1;
+		} else if (ret == 1) {
+			/* we found a supported extension */
+			n_extensions++;
+		}
+		RTE_LOG(DEBUG, EAL, "  IOMMU type %d (%s) is %s\n",
+				t->type_id, t->name,
+				ret ? "supported" : "not supported");
+	}
+
+	/* if we didn't find any supported IOMMU types, fail */
+	if (!n_extensions) {
+		close(vfio_container_fd);
+		return -1;
+	}
+
+	return 0;
+}
+
+/* set up interrupt support (but not enable interrupts) */
+static int
+pci_vfio_setup_interrupts(struct rte_pci_device *dev, int vfio_dev_fd)
+{
+	int i, ret, intr_idx;
+
+	/* default to invalid index */
+	intr_idx = VFIO_PCI_NUM_IRQS;
+
+	/* get interrupt type from internal config (MSI-X by default, can be
+	 * overriden from the command line
+	 */
+	switch (internal_config.vfio_intr_mode) {
+	case RTE_INTR_MODE_MSIX:
+		intr_idx = VFIO_PCI_MSIX_IRQ_INDEX;
+		break;
+	case RTE_INTR_MODE_MSI:
+		intr_idx = VFIO_PCI_MSI_IRQ_INDEX;
+		break;
+	case RTE_INTR_MODE_LEGACY:
+		intr_idx = VFIO_PCI_INTX_IRQ_INDEX;
+		break;
+	/* don't do anything if we want to automatically determine interrupt type */
+	case RTE_INTR_MODE_NONE:
+		break;
+	default:
+		RTE_LOG(ERR, EAL, "  unknown default interrupt type!\n");
+		return -1;
+	}
+
+	/* start from MSI-X interrupt type */
+	for (i = VFIO_PCI_MSIX_IRQ_INDEX; i >= 0; i--) {
+		struct vfio_irq_info irq = { .argsz = sizeof(irq) };
+		int fd = -1;
+
+		/* skip interrupt modes we don't want */
+		if (internal_config.vfio_intr_mode != RTE_INTR_MODE_NONE &&
+				i != intr_idx)
+			continue;
+
+		irq.index = i;
+
+		ret = ioctl(vfio_dev_fd, VFIO_DEVICE_GET_IRQ_INFO, &irq);
+		if (ret < 0) {
+			RTE_LOG(ERR, EAL, "  cannot get IRQ info, "
+					"error %i (%s)\n", errno, strerror(errno));
+			return -1;
+		}
+
+		/* if this vector cannot be used with eventfd, fail if we explicitly
+		 * specified interrupt type, otherwise continue */
+		if ((irq.flags & VFIO_IRQ_INFO_EVENTFD) == 0) {
+			if (internal_config.vfio_intr_mode != RTE_INTR_MODE_NONE) {
+				RTE_LOG(ERR, EAL,
+						"  interrupt vector does not support eventfd!\n");
+				return -1;
+			} else
+				continue;
+		}
+
+		/* set up an eventfd for interrupts */
+		fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
+		if (fd < 0) {
+			RTE_LOG(ERR, EAL, "  cannot set up eventfd, "
+					"error %i (%s)\n", errno, strerror(errno));
+			return -1;
+		}
+
+		dev->intr_handle.fd = fd;
+		dev->intr_handle.vfio_dev_fd = vfio_dev_fd;
+
+		switch (i) {
+		case VFIO_PCI_MSIX_IRQ_INDEX:
+			internal_config.vfio_intr_mode = RTE_INTR_MODE_MSIX;
+			dev->intr_handle.type = RTE_INTR_HANDLE_VFIO_MSIX;
+			break;
+		case VFIO_PCI_MSI_IRQ_INDEX:
+			internal_config.vfio_intr_mode = RTE_INTR_MODE_MSI;
+			dev->intr_handle.type = RTE_INTR_HANDLE_VFIO_MSI;
+			break;
+		case VFIO_PCI_INTX_IRQ_INDEX:
+			internal_config.vfio_intr_mode = RTE_INTR_MODE_LEGACY;
+			dev->intr_handle.type = RTE_INTR_HANDLE_VFIO_LEGACY;
+			break;
+		default:
+			RTE_LOG(ERR, EAL, "  unknown interrupt type!\n");
+			return -1;
+		}
+
+		return 0;
+	}
+
+	/* if we're here, we haven't found a suitable interrupt vector */
+	return -1;
+}
+
+/* open container fd or get an existing one */
+int
+pci_vfio_get_container_fd(void)
+{
+	int ret, vfio_container_fd;
+
+	/* if we're in a primary process, try to open the container */
+	if (internal_config.process_type == RTE_PROC_PRIMARY) {
+		vfio_container_fd = open(VFIO_CONTAINER_PATH, O_RDWR);
+		if (vfio_container_fd < 0) {
+			RTE_LOG(ERR, EAL, "  cannot open VFIO container, "
+					"error %i (%s)\n", errno, strerror(errno));
+			return -1;
+		}
+
+		/* check VFIO API version */
+		ret = ioctl(vfio_container_fd, VFIO_GET_API_VERSION);
+		if (ret != VFIO_API_VERSION) {
+			if (ret < 0)
+				RTE_LOG(ERR, EAL, "  could not get VFIO API version, "
+						"error %i (%s)\n", errno, strerror(errno));
+			else
+				RTE_LOG(ERR, EAL, "  unsupported VFIO API version!\n");
+			close(vfio_container_fd);
+			return -1;
+		}
+
+		ret = pci_vfio_has_supported_extensions(vfio_container_fd);
+		if (ret) {
+			RTE_LOG(ERR, EAL, "  no supported IOMMU "
+					"extensions found!\n");
+			return -1;
+		}
+
+		return vfio_container_fd;
+	} else {
+		/*
+		 * if we're in a secondary process, request container fd from the
+		 * primary process via our socket
+		 */
+		int socket_fd;
+
+		socket_fd = vfio_mp_sync_connect_to_primary();
+		if (socket_fd < 0) {
+			RTE_LOG(ERR, EAL, "  cannot connect to primary process!\n");
+			return -1;
+		}
+		if (vfio_mp_sync_send_request(socket_fd, SOCKET_REQ_CONTAINER) < 0) {
+			RTE_LOG(ERR, EAL, "  cannot request container fd!\n");
+			close(socket_fd);
+			return -1;
+		}
+		vfio_container_fd = vfio_mp_sync_receive_fd(socket_fd);
+		if (vfio_container_fd < 0) {
+			RTE_LOG(ERR, EAL, "  cannot get container fd!\n");
+			close(socket_fd);
+			return -1;
+		}
+		close(socket_fd);
+		return vfio_container_fd;
+	}
+
+	return -1;
+}
+
+/* open group fd or get an existing one */
+int
+pci_vfio_get_group_fd(int iommu_group_no)
+{
+	int i;
+	int vfio_group_fd;
+	char filename[PATH_MAX];
+
+	/* check if we already have the group descriptor open */
+	for (i = 0; i < vfio_cfg.vfio_group_idx; i++)
+		if (vfio_cfg.vfio_groups[i].group_no == iommu_group_no)
+			return vfio_cfg.vfio_groups[i].fd;
+
+	/* if primary, try to open the group */
+	if (internal_config.process_type == RTE_PROC_PRIMARY) {
+		/* try regular group format */
+		snprintf(filename, sizeof(filename),
+				 VFIO_GROUP_FMT, iommu_group_no);
+		vfio_group_fd = open(filename, O_RDWR);
+		if (vfio_group_fd < 0) {
+			/* if file not found, it's not an error */
+			if (errno != ENOENT) {
+				RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", filename,
+						strerror(errno));
+				return -1;
+			}
+
+			/* special case: try no-IOMMU path as well */
+			snprintf(filename, sizeof(filename),
+					VFIO_NOIOMMU_GROUP_FMT, iommu_group_no);
+			vfio_group_fd = open(filename, O_RDWR);
+			if (vfio_group_fd < 0) {
+				if (errno != ENOENT) {
+					RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", filename,
+							strerror(errno));
+					return -1;
+				}
+				return 0;
+			}
+			/* noiommu group found */
+		}
+
+		/* if the fd is valid, create a new group for it */
+		if (vfio_cfg.vfio_group_idx == VFIO_MAX_GROUPS) {
+			RTE_LOG(ERR, EAL, "Maximum number of VFIO groups reached!\n");
+			close(vfio_group_fd);
+			return -1;
+		}
+		vfio_cfg.vfio_groups[vfio_cfg.vfio_group_idx].group_no = iommu_group_no;
+		vfio_cfg.vfio_groups[vfio_cfg.vfio_group_idx].fd = vfio_group_fd;
+		return vfio_group_fd;
+	}
+	/* if we're in a secondary process, request group fd from the primary
+	 * process via our socket
+	 */
+	else {
+		int socket_fd, ret;
+
+		socket_fd = vfio_mp_sync_connect_to_primary();
+
+		if (socket_fd < 0) {
+			RTE_LOG(ERR, EAL, "  cannot connect to primary process!\n");
+			return -1;
+		}
+		if (vfio_mp_sync_send_request(socket_fd, SOCKET_REQ_GROUP) < 0) {
+			RTE_LOG(ERR, EAL, "  cannot request container fd!\n");
+			close(socket_fd);
+			return -1;
+		}
+		if (vfio_mp_sync_send_request(socket_fd, iommu_group_no) < 0) {
+			RTE_LOG(ERR, EAL, "  cannot send group number!\n");
+			close(socket_fd);
+			return -1;
+		}
+		ret = vfio_mp_sync_receive_request(socket_fd);
+		switch (ret) {
+		case SOCKET_NO_FD:
+			close(socket_fd);
+			return 0;
+		case SOCKET_OK:
+			vfio_group_fd = vfio_mp_sync_receive_fd(socket_fd);
+			/* if we got the fd, return it */
+			if (vfio_group_fd > 0) {
+				close(socket_fd);
+				return vfio_group_fd;
+			}
+			/* fall-through on error */
+		default:
+			RTE_LOG(ERR, EAL, "  cannot get container fd!\n");
+			close(socket_fd);
+			return -1;
+		}
+	}
+	return -1;
+}
+
+/* parse IOMMU group number for a PCI device
+ * returns 1 on success, -1 for errors, 0 for non-existent group
+ */
+static int
+pci_vfio_get_group_no(const char *pci_addr, int *iommu_group_no)
+{
+	char linkname[PATH_MAX];
+	char filename[PATH_MAX];
+	char *tok[16], *group_tok, *end;
+	int ret;
+
+	memset(linkname, 0, sizeof(linkname));
+	memset(filename, 0, sizeof(filename));
+
+	/* try to find out IOMMU group for this device */
+	snprintf(linkname, sizeof(linkname),
+			 SYSFS_PCI_DEVICES "/%s/iommu_group", pci_addr);
+
+	ret = readlink(linkname, filename, sizeof(filename));
+
+	/* if the link doesn't exist, no VFIO for us */
+	if (ret < 0)
+		return 0;
+
+	ret = rte_strsplit(filename, sizeof(filename),
+			tok, RTE_DIM(tok), '/');
+
+	if (ret <= 0) {
+		RTE_LOG(ERR, EAL, "  %s cannot get IOMMU group\n", pci_addr);
+		return -1;
+	}
+
+	/* IOMMU group is always the last token */
+	errno = 0;
+	group_tok = tok[ret - 1];
+	end = group_tok;
+	*iommu_group_no = strtol(group_tok, &end, 10);
+	if ((end != group_tok && *end != '\0') || errno != 0) {
+		RTE_LOG(ERR, EAL, "  %s error parsing IOMMU number!\n", pci_addr);
+		return -1;
+	}
+
+	return 1;
+}
+
+static void
+clear_current_group(void)
+{
+	vfio_cfg.vfio_groups[vfio_cfg.vfio_group_idx].group_no = 0;
+	vfio_cfg.vfio_groups[vfio_cfg.vfio_group_idx].fd = -1;
+}
+
+
+/*
+ * map the PCI resources of a PCI device in virtual memory (VFIO version).
+ * primary and secondary processes follow almost exactly the same path
+ */
+int
+pci_vfio_map_resource(struct rte_pci_device *dev)
+{
+	struct vfio_group_status group_status = {
+			.argsz = sizeof(group_status)
+	};
+	struct vfio_device_info device_info = { .argsz = sizeof(device_info) };
+	int vfio_group_fd, vfio_dev_fd;
+	int iommu_group_no;
+	char pci_addr[PATH_MAX] = {0};
+	struct rte_pci_addr *loc = &dev->addr;
+	int i, ret, msix_bar;
+	struct mapped_pci_resource *vfio_res = NULL;
+	struct mapped_pci_res_list *vfio_res_list = RTE_TAILQ_CAST(rte_vfio_tailq.head, mapped_pci_res_list);
+
+	struct pci_map *maps;
+	uint32_t msix_table_offset = 0;
+	uint32_t msix_table_size = 0;
+	uint32_t ioport_bar;
+
+	dev->intr_handle.fd = -1;
+	dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN;
+
+	/* store PCI address string */
+	snprintf(pci_addr, sizeof(pci_addr), PCI_PRI_FMT,
+			loc->domain, loc->bus, loc->devid, loc->function);
+
+	/* get group number */
+	ret = pci_vfio_get_group_no(pci_addr, &iommu_group_no);
+	if (ret == 0) {
+		RTE_LOG(WARNING, EAL, "  %s not managed by VFIO driver, skipping\n",
+			pci_addr);
+		return 1;
+	}
+
+	/* if negative, something failed */
+	if (ret < 0)
+		return -1;
+
+	/* get the actual group fd */
+	vfio_group_fd = pci_vfio_get_group_fd(iommu_group_no);
+	if (vfio_group_fd < 0)
+		return -1;
+
+	/* store group fd */
+	vfio_cfg.vfio_groups[vfio_cfg.vfio_group_idx].group_no = iommu_group_no;
+	vfio_cfg.vfio_groups[vfio_cfg.vfio_group_idx].fd = vfio_group_fd;
+
+	/* if group_fd == 0, that means the device isn't managed by VFIO */
+	if (vfio_group_fd == 0) {
+		RTE_LOG(WARNING, EAL, "  %s not managed by VFIO driver, skipping\n",
+				pci_addr);
+		/* we store 0 as group fd to distinguish between existing but
+		 * unbound VFIO groups, and groups that don't exist at all.
+		 */
+		vfio_cfg.vfio_group_idx++;
+		return 1;
+	}
+
+	/*
+	 * at this point, we know at least one port on this device is bound to VFIO,
+	 * so we can proceed to try and set this particular port up
+	 */
+
+	/* check if the group is viable */
+	ret = ioctl(vfio_group_fd, VFIO_GROUP_GET_STATUS, &group_status);
+	if (ret) {
+		RTE_LOG(ERR, EAL, "  %s cannot get group status, "
+				"error %i (%s)\n", pci_addr, errno, strerror(errno));
+		close(vfio_group_fd);
+		clear_current_group();
+		return -1;
+	} else if (!(group_status.flags & VFIO_GROUP_FLAGS_VIABLE)) {
+		RTE_LOG(ERR, EAL, "  %s VFIO group is not viable!\n", pci_addr);
+		close(vfio_group_fd);
+		clear_current_group();
+		return -1;
+	}
+
+	/*
+	 * at this point, we know that this group is viable (meaning, all devices
+	 * are either bound to VFIO or not bound to anything)
+	 */
+
+	/* check if group does not have a container yet */
+	if (!(group_status.flags & VFIO_GROUP_FLAGS_CONTAINER_SET)) {
+
+		/* add group to a container */
+		ret = ioctl(vfio_group_fd, VFIO_GROUP_SET_CONTAINER,
+				&vfio_cfg.vfio_container_fd);
+		if (ret) {
+			RTE_LOG(ERR, EAL, "  %s cannot add VFIO group to container, "
+					"error %i (%s)\n", pci_addr, errno, strerror(errno));
+			close(vfio_group_fd);
+			clear_current_group();
+			return -1;
+		}
+		/*
+		 * at this point we know that this group has been successfully
+		 * initialized, so we increment vfio_group_idx to indicate that we can
+		 * add new groups.
+		 */
+		vfio_cfg.vfio_group_idx++;
+	}
+
+	/*
+	 * pick an IOMMU type and set up DMA mappings for container
+	 *
+	 * needs to be done only once, only when at least one group is assigned to
+	 * a container and only in primary process
+	 */
+	if (internal_config.process_type == RTE_PROC_PRIMARY &&
+			vfio_cfg.vfio_container_has_dma == 0) {
+		/* select an IOMMU type which we will be using */
+		const struct vfio_iommu_type *t =
+				pci_vfio_set_iommu_type(vfio_cfg.vfio_container_fd);
+		if (!t) {
+			RTE_LOG(ERR, EAL, "  %s failed to select IOMMU type\n", pci_addr);
+			return -1;
+		}
+		ret = t->dma_map_func(vfio_cfg.vfio_container_fd);
+		if (ret) {
+			RTE_LOG(ERR, EAL, "  %s DMA remapping failed, "
+					"error %i (%s)\n", pci_addr, errno, strerror(errno));
+			return -1;
+		}
+		vfio_cfg.vfio_container_has_dma = 1;
+	}
+
+	/* get a file descriptor for the device */
+	vfio_dev_fd = ioctl(vfio_group_fd, VFIO_GROUP_GET_DEVICE_FD, pci_addr);
+	if (vfio_dev_fd < 0) {
+		/* if we cannot get a device fd, this simply means that this
+		 * particular port is not bound to VFIO
+		 */
+		RTE_LOG(WARNING, EAL, "  %s not managed by VFIO driver, skipping\n",
+				pci_addr);
+		return 1;
+	}
+
+	/* test and setup the device */
+	ret = ioctl(vfio_dev_fd, VFIO_DEVICE_GET_INFO, &device_info);
+	if (ret) {
+		RTE_LOG(ERR, EAL, "  %s cannot get device info, "
+				"error %i (%s)\n", pci_addr, errno, strerror(errno));
+		close(vfio_dev_fd);
+		return -1;
+	}
+
+	/* get MSI-X BAR, if any (we have to know where it is because we can't
+	 * easily mmap it when using VFIO) */
+	msix_bar = -1;
+	ret = pci_vfio_get_msix_bar(vfio_dev_fd, &msix_bar,
+				    &msix_table_offset, &msix_table_size);
+	if (ret < 0) {
+		RTE_LOG(ERR, EAL, "  %s cannot get MSI-X BAR number!\n", pci_addr);
+		close(vfio_dev_fd);
+		return -1;
+	}
+
+	/* if we're in a primary process, allocate vfio_res and get region info */
+	if (internal_config.process_type == RTE_PROC_PRIMARY) {
+		vfio_res = rte_zmalloc("VFIO_RES", sizeof(*vfio_res), 0);
+		if (vfio_res == NULL) {
+			RTE_LOG(ERR, EAL,
+				"%s(): cannot store uio mmap details\n", __func__);
+			close(vfio_dev_fd);
+			return -1;
+		}
+		memcpy(&vfio_res->pci_addr, &dev->addr, sizeof(vfio_res->pci_addr));
+
+		/* get number of registers (up to BAR5) */
+		vfio_res->nb_maps = RTE_MIN((int) device_info.num_regions,
+				VFIO_PCI_BAR5_REGION_INDEX + 1);
+	} else {
+		/* if we're in a secondary process, just find our tailq entry */
+		TAILQ_FOREACH(vfio_res, vfio_res_list, next) {
+			if (memcmp(&vfio_res->pci_addr, &dev->addr, sizeof(dev->addr)))
+				continue;
+			break;
+		}
+		/* if we haven't found our tailq entry, something's wrong */
+		if (vfio_res == NULL) {
+			RTE_LOG(ERR, EAL, "  %s cannot find TAILQ entry for PCI device!\n",
+					pci_addr);
+			close(vfio_dev_fd);
+			return -1;
+		}
+	}
+
+	/* map BARs */
+	maps = vfio_res->maps;
+
+	for (i = 0; i < (int) vfio_res->nb_maps; i++) {
+		struct vfio_region_info reg = { .argsz = sizeof(reg) };
+		void *bar_addr;
+		struct memreg {
+			unsigned long offset, size;
+		} memreg[2] = {};
+
+		reg.index = i;
+
+		ret = ioctl(vfio_dev_fd, VFIO_DEVICE_GET_REGION_INFO, &reg);
+
+		if (ret) {
+			RTE_LOG(ERR, EAL, "  %s cannot get device region info "
+					"error %i (%s)\n", pci_addr, errno, strerror(errno));
+			close(vfio_dev_fd);
+			if (internal_config.process_type == RTE_PROC_PRIMARY)
+				rte_free(vfio_res);
+			return -1;
+		}
+
+		/* chk for io port region */
+		ret = pread64(vfio_dev_fd, &ioport_bar, sizeof(ioport_bar),
+			      VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX)
+			      + PCI_BASE_ADDRESS_0 + i*4);
+
+		if (ret != sizeof(ioport_bar)) {
+			RTE_LOG(ERR, EAL,
+				"Cannot read command (%x) from config space!\n",
+				PCI_BASE_ADDRESS_0 + i*4);
+			return -1;
+		}
+
+		if (ioport_bar & PCI_BASE_ADDRESS_SPACE_IO) {
+			RTE_LOG(INFO, EAL,
+				"Ignore mapping IO port bar(%d) addr: %x\n",
+				 i, ioport_bar);
+			continue;
+		}
+
+		/* skip non-mmapable BARs */
+		if ((reg.flags & VFIO_REGION_INFO_FLAG_MMAP) == 0)
+			continue;
+
+		if (i == msix_bar) {
+			/*
+			 * VFIO will not let us map the MSI-X table,
+			 * but we can map around it.
+			 */
+			uint32_t table_start = msix_table_offset;
+			uint32_t table_end = table_start + msix_table_size;
+			table_end = (table_end + ~PAGE_MASK) & PAGE_MASK;
+			table_start &= PAGE_MASK;
+
+			if (table_start == 0 && table_end >= reg.size) {
+				/* Cannot map this BAR */
+				RTE_LOG(DEBUG, EAL, "Skipping BAR %d\n", i);
+				continue;
+			} else {
+				memreg[0].offset = reg.offset;
+				memreg[0].size = table_start;
+				memreg[1].offset = table_end;
+				memreg[1].size = reg.size - table_end;
+
+				RTE_LOG(DEBUG, EAL,
+					"Trying to map BAR %d that contains the MSI-X "
+					"table. Trying offsets: "
+					"0x%04lx:0x%04lx, 0x%04lx:0x%04lx\n", i,
+					memreg[0].offset, memreg[0].size,
+					memreg[1].offset, memreg[1].size);
+			}
+		} else {
+			memreg[0].offset = reg.offset;
+			memreg[0].size = reg.size;
+		}
+
+		/* try to figure out an address */
+		if (internal_config.process_type == RTE_PROC_PRIMARY) {
+			/* try mapping somewhere close to the end of hugepages */
+			if (pci_map_addr == NULL)
+				pci_map_addr = pci_find_max_end_va();
+
+			bar_addr = pci_map_addr;
+			pci_map_addr = RTE_PTR_ADD(bar_addr, (size_t) reg.size);
+		} else {
+			bar_addr = maps[i].addr;
+		}
+
+		/* reserve the address using an inaccessible mapping */
+		bar_addr = mmap(bar_addr, reg.size, 0, MAP_PRIVATE |
+				MAP_ANONYMOUS, -1, 0);
+		if (bar_addr != MAP_FAILED) {
+			void *map_addr = NULL;
+			if (memreg[0].size) {
+				/* actual map of first part */
+				map_addr = pci_map_resource(bar_addr, vfio_dev_fd,
+							    memreg[0].offset,
+							    memreg[0].size,
+							    MAP_FIXED);
+			}
+
+			/* if there's a second part, try to map it */
+			if (map_addr != MAP_FAILED
+			    && memreg[1].offset && memreg[1].size) {
+				void *second_addr = RTE_PTR_ADD(bar_addr, memreg[1].offset);
+				map_addr = pci_map_resource(second_addr,
+							    vfio_dev_fd, memreg[1].offset,
+							    memreg[1].size,
+							    MAP_FIXED);
+			}
+
+			if (map_addr == MAP_FAILED || !map_addr) {
+				munmap(bar_addr, reg.size);
+				bar_addr = MAP_FAILED;
+			}
+		}
+
+		if (bar_addr == MAP_FAILED ||
+				(internal_config.process_type == RTE_PROC_SECONDARY &&
+						bar_addr != maps[i].addr)) {
+			RTE_LOG(ERR, EAL, "  %s mapping BAR%i failed: %s\n", pci_addr, i,
+					strerror(errno));
+			close(vfio_dev_fd);
+			if (internal_config.process_type == RTE_PROC_PRIMARY)
+				rte_free(vfio_res);
+			return -1;
+		}
+
+		maps[i].addr = bar_addr;
+		maps[i].offset = reg.offset;
+		maps[i].size = reg.size;
+		maps[i].path = NULL; /* vfio doesn't have per-resource paths */
+		dev->mem_resource[i].addr = bar_addr;
+	}
+
+	/* if secondary process, do not set up interrupts */
+	if (internal_config.process_type == RTE_PROC_PRIMARY) {
+		if (pci_vfio_setup_interrupts(dev, vfio_dev_fd) != 0) {
+			RTE_LOG(ERR, EAL, "  %s error setting up interrupts!\n", pci_addr);
+			close(vfio_dev_fd);
+			rte_free(vfio_res);
+			return -1;
+		}
+
+		/* set bus mastering for the device */
+		if (pci_vfio_set_bus_master(vfio_dev_fd)) {
+			RTE_LOG(ERR, EAL, "  %s cannot set up bus mastering!\n", pci_addr);
+			close(vfio_dev_fd);
+			rte_free(vfio_res);
+			return -1;
+		}
+
+		/* Reset the device */
+		ioctl(vfio_dev_fd, VFIO_DEVICE_RESET);
+	}
+
+	if (internal_config.process_type == RTE_PROC_PRIMARY)
+		TAILQ_INSERT_TAIL(vfio_res_list, vfio_res, next);
+
+	return 0;
+}
+
+int
+pci_vfio_ioport_map(struct rte_pci_device *dev, int bar,
+		    struct rte_pci_ioport *p)
+{
+	if (bar < VFIO_PCI_BAR0_REGION_INDEX ||
+	    bar > VFIO_PCI_BAR5_REGION_INDEX) {
+		RTE_LOG(ERR, EAL, "invalid bar (%d)!\n", bar);
+		return -1;
+	}
+
+	p->dev = dev;
+	p->base = VFIO_GET_REGION_ADDR(bar);
+	return 0;
+}
+
+void
+pci_vfio_ioport_read(struct rte_pci_ioport *p,
+		     void *data, size_t len, off_t offset)
+{
+	const struct rte_intr_handle *intr_handle = &p->dev->intr_handle;
+
+	if (pread64(intr_handle->vfio_dev_fd, data,
+		    len, p->base + offset) <= 0)
+		RTE_LOG(ERR, EAL,
+			"Can't read from PCI bar (%" PRIu64 ") : offset (%x)\n",
+			VFIO_GET_REGION_IDX(p->base), (int)offset);
+}
+
+void
+pci_vfio_ioport_write(struct rte_pci_ioport *p,
+		      const void *data, size_t len, off_t offset)
+{
+	const struct rte_intr_handle *intr_handle = &p->dev->intr_handle;
+
+	if (pwrite64(intr_handle->vfio_dev_fd, data,
+		     len, p->base + offset) <= 0)
+		RTE_LOG(ERR, EAL,
+			"Can't write to PCI bar (%" PRIu64 ") : offset (%x)\n",
+			VFIO_GET_REGION_IDX(p->base), (int)offset);
+}
+
+int
+pci_vfio_ioport_unmap(struct rte_pci_ioport *p)
+{
+	RTE_SET_USED(p);
+	return -1;
+}
+
+int
+pci_vfio_enable(void)
+{
+	/* initialize group list */
+	int i;
+	int vfio_available;
+
+	for (i = 0; i < VFIO_MAX_GROUPS; i++) {
+		vfio_cfg.vfio_groups[i].fd = -1;
+		vfio_cfg.vfio_groups[i].group_no = -1;
+	}
+
+	/* inform the user that we are probing for VFIO */
+	RTE_LOG(INFO, EAL, "Probing VFIO support...\n");
+
+	/* check if vfio-pci module is loaded */
+	vfio_available = rte_eal_check_module("vfio_pci");
+
+	/* return error directly */
+	if (vfio_available == -1) {
+		RTE_LOG(INFO, EAL, "Could not get loaded module details!\n");
+		return -1;
+	}
+
+	/* return 0 if VFIO modules not loaded */
+	if (vfio_available == 0) {
+		RTE_LOG(DEBUG, EAL, "VFIO modules not loaded, "
+			"skipping VFIO support...\n");
+		return 0;
+	}
+
+	vfio_cfg.vfio_container_fd = pci_vfio_get_container_fd();
+
+	/* check if we have VFIO driver enabled */
+	if (vfio_cfg.vfio_container_fd != -1) {
+		RTE_LOG(NOTICE, EAL, "VFIO support initialized\n");
+		vfio_cfg.vfio_enabled = 1;
+	} else {
+		RTE_LOG(NOTICE, EAL, "VFIO support could not be initialized\n");
+	}
+
+	return 0;
+}
+
+int
+pci_vfio_is_enabled(void)
+{
+	return vfio_cfg.vfio_enabled;
+}
+#endif
diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_vfio_mp_sync.c b/lib/librte_eal/linuxapp/eal/eal_pci_vfio_mp_sync.c
new file mode 100644
index 00000000..d9188fde
--- /dev/null
+++ b/lib/librte_eal/linuxapp/eal/eal_pci_vfio_mp_sync.c
@@ -0,0 +1,405 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <string.h>
+#include <fcntl.h>
+#include <sys/socket.h>
+#include <pthread.h>
+
+/* sys/un.h with __USE_MISC uses strlen, which is unsafe */
+#ifdef __USE_MISC
+#define REMOVED_USE_MISC
+#undef __USE_MISC
+#endif
+#include <sys/un.h>
+/* make sure we redefine __USE_MISC only if it was previously undefined */
+#ifdef REMOVED_USE_MISC
+#define __USE_MISC
+#undef REMOVED_USE_MISC
+#endif
+
+#include <rte_log.h>
+#include <rte_pci.h>
+#include <rte_eal_memconfig.h>
+#include <rte_malloc.h>
+
+#include "eal_filesystem.h"
+#include "eal_pci_init.h"
+#include "eal_thread.h"
+
+/**
+ * @file
+ * VFIO socket for communication between primary and secondary processes.
+ *
+ * This file is only compiled if CONFIG_RTE_EAL_VFIO is set to "y".
+ */
+
+#ifdef VFIO_PRESENT
+
+#define SOCKET_PATH_FMT "%s/.%s_mp_socket"
+#define CMSGLEN (CMSG_LEN(sizeof(int)))
+#define FD_TO_CMSGHDR(fd, chdr) \
+		do {\
+			(chdr).cmsg_len = CMSGLEN;\
+			(chdr).cmsg_level = SOL_SOCKET;\
+			(chdr).cmsg_type = SCM_RIGHTS;\
+			memcpy((chdr).__cmsg_data, &(fd), sizeof(fd));\
+		} while (0)
+#define CMSGHDR_TO_FD(chdr, fd) \
+			memcpy(&(fd), (chdr).__cmsg_data, sizeof(fd))
+
+static pthread_t socket_thread;
+static int mp_socket_fd;
+
+
+/* get socket path (/var/run if root, $HOME otherwise) */
+static void
+get_socket_path(char *buffer, int bufsz)
+{
+	const char *dir = "/var/run";
+	const char *home_dir = getenv("HOME");
+
+	if (getuid() != 0 && home_dir != NULL)
+		dir = home_dir;
+
+	/* use current prefix as file path */
+	snprintf(buffer, bufsz, SOCKET_PATH_FMT, dir,
+			internal_config.hugefile_prefix);
+}
+
+
+
+/*
+ * data flow for socket comm protocol:
+ * 1. client sends SOCKET_REQ_CONTAINER or SOCKET_REQ_GROUP
+ * 1a. in case of SOCKET_REQ_GROUP, client also then sends group number
+ * 2. server receives message
+ * 2a. in case of invalid group, SOCKET_ERR is sent back to client
+ * 2b. in case of unbound group, SOCKET_NO_FD is sent back to client
+ * 2c. in case of valid group, SOCKET_OK is sent and immediately followed by fd
+ *
+ * in case of any error, socket is closed.
+ */
+
+/* send a request, return -1 on error */
+int
+vfio_mp_sync_send_request(int socket, int req)
+{
+	struct msghdr hdr;
+	struct iovec iov;
+	int buf;
+	int ret;
+
+	memset(&hdr, 0, sizeof(hdr));
+
+	buf = req;
+
+	hdr.msg_iov = &iov;
+	hdr.msg_iovlen = 1;
+	iov.iov_base = (char *) &buf;
+	iov.iov_len = sizeof(buf);
+
+	ret = sendmsg(socket, &hdr, 0);
+	if (ret < 0)
+		return -1;
+	return 0;
+}
+
+/* receive a request and return it */
+int
+vfio_mp_sync_receive_request(int socket)
+{
+	int buf;
+	struct msghdr hdr;
+	struct iovec iov;
+	int ret, req;
+
+	memset(&hdr, 0, sizeof(hdr));
+
+	buf = SOCKET_ERR;
+
+	hdr.msg_iov = &iov;
+	hdr.msg_iovlen = 1;
+	iov.iov_base = (char *) &buf;
+	iov.iov_len = sizeof(buf);
+
+	ret = recvmsg(socket, &hdr, 0);
+	if (ret < 0)
+		return -1;
+
+	req = buf;
+
+	return req;
+}
+
+/* send OK in message, fd in control message */
+int
+vfio_mp_sync_send_fd(int socket, int fd)
+{
+	int buf;
+	struct msghdr hdr;
+	struct cmsghdr *chdr;
+	char chdr_buf[CMSGLEN];
+	struct iovec iov;
+	int ret;
+
+	chdr = (struct cmsghdr *) chdr_buf;
+	memset(chdr, 0, sizeof(chdr_buf));
+	memset(&hdr, 0, sizeof(hdr));
+
+	hdr.msg_iov = &iov;
+	hdr.msg_iovlen = 1;
+	iov.iov_base = (char *) &buf;
+	iov.iov_len = sizeof(buf);
+	hdr.msg_control = chdr;
+	hdr.msg_controllen = CMSGLEN;
+
+	buf = SOCKET_OK;
+	FD_TO_CMSGHDR(fd, *chdr);
+
+	ret = sendmsg(socket, &hdr, 0);
+	if (ret < 0)
+		return -1;
+	return 0;
+}
+
+/* receive OK in message, fd in control message */
+int
+vfio_mp_sync_receive_fd(int socket)
+{
+	int buf;
+	struct msghdr hdr;
+	struct cmsghdr *chdr;
+	char chdr_buf[CMSGLEN];
+	struct iovec iov;
+	int ret, req, fd;
+
+	buf = SOCKET_ERR;
+
+	chdr = (struct cmsghdr *) chdr_buf;
+	memset(chdr, 0, sizeof(chdr_buf));
+	memset(&hdr, 0, sizeof(hdr));
+
+	hdr.msg_iov = &iov;
+	hdr.msg_iovlen = 1;
+	iov.iov_base = (char *) &buf;
+	iov.iov_len = sizeof(buf);
+	hdr.msg_control = chdr;
+	hdr.msg_controllen = CMSGLEN;
+
+	ret = recvmsg(socket, &hdr, 0);
+	if (ret < 0)
+		return -1;
+
+	req = buf;
+
+	if (req != SOCKET_OK)
+		return -1;
+
+	CMSGHDR_TO_FD(*chdr, fd);
+
+	return fd;
+}
+
+/* connect socket_fd in secondary process to the primary process's socket */
+int
+vfio_mp_sync_connect_to_primary(void)
+{
+	struct sockaddr_un addr;
+	socklen_t sockaddr_len;
+	int socket_fd;
+
+	/* set up a socket */
+	socket_fd = socket(AF_UNIX, SOCK_SEQPACKET, 0);
+	if (socket_fd < 0) {
+		RTE_LOG(ERR, EAL, "Failed to create socket!\n");
+		return -1;
+	}
+
+	get_socket_path(addr.sun_path, sizeof(addr.sun_path));
+	addr.sun_family = AF_UNIX;
+
+	sockaddr_len = sizeof(struct sockaddr_un);
+
+	if (connect(socket_fd, (struct sockaddr *) &addr, sockaddr_len) == 0)
+		return socket_fd;
+
+	/* if connect failed */
+	close(socket_fd);
+	return -1;
+}
+
+
+
+/*
+ * socket listening thread for primary process
+ */
+static __attribute__((noreturn)) void *
+pci_vfio_mp_sync_thread(void __rte_unused * arg)
+{
+	int ret, fd, vfio_group_no;
+
+	/* wait for requests on the socket */
+	for (;;) {
+		int conn_sock;
+		struct sockaddr_un addr;
+		socklen_t sockaddr_len = sizeof(addr);
+
+		/* this is a blocking call */
+		conn_sock = accept(mp_socket_fd, (struct sockaddr *) &addr,
+				&sockaddr_len);
+
+		/* just restart on error */
+		if (conn_sock == -1)
+			continue;
+
+		/* set socket to linger after close */
+		struct linger l;
+		l.l_onoff = 1;
+		l.l_linger = 60;
+		setsockopt(conn_sock, SOL_SOCKET, SO_LINGER, &l, sizeof(l));
+
+		ret = vfio_mp_sync_receive_request(conn_sock);
+
+		switch (ret) {
+		case SOCKET_REQ_CONTAINER:
+			fd = pci_vfio_get_container_fd();
+			if (fd < 0)
+				vfio_mp_sync_send_request(conn_sock, SOCKET_ERR);
+			else
+				vfio_mp_sync_send_fd(conn_sock, fd);
+			break;
+		case SOCKET_REQ_GROUP:
+			/* wait for group number */
+			vfio_group_no = vfio_mp_sync_receive_request(conn_sock);
+			if (vfio_group_no < 0) {
+				close(conn_sock);
+				continue;
+			}
+
+			fd = pci_vfio_get_group_fd(vfio_group_no);
+
+			if (fd < 0)
+				vfio_mp_sync_send_request(conn_sock, SOCKET_ERR);
+			/* if VFIO group exists but isn't bound to VFIO driver */
+			else if (fd == 0)
+				vfio_mp_sync_send_request(conn_sock, SOCKET_NO_FD);
+			/* if group exists and is bound to VFIO driver */
+			else {
+				vfio_mp_sync_send_request(conn_sock, SOCKET_OK);
+				vfio_mp_sync_send_fd(conn_sock, fd);
+			}
+			break;
+		default:
+			vfio_mp_sync_send_request(conn_sock, SOCKET_ERR);
+			break;
+		}
+		close(conn_sock);
+	}
+}
+
+static int
+vfio_mp_sync_socket_setup(void)
+{
+	int ret, socket_fd;
+	struct sockaddr_un addr;
+	socklen_t sockaddr_len;
+
+	/* set up a socket */
+	socket_fd = socket(AF_UNIX, SOCK_SEQPACKET, 0);
+	if (socket_fd < 0) {
+		RTE_LOG(ERR, EAL, "Failed to create socket!\n");
+		return -1;
+	}
+
+	get_socket_path(addr.sun_path, sizeof(addr.sun_path));
+	addr.sun_family = AF_UNIX;
+
+	sockaddr_len = sizeof(struct sockaddr_un);
+
+	unlink(addr.sun_path);
+
+	ret = bind(socket_fd, (struct sockaddr *) &addr, sockaddr_len);
+	if (ret) {
+		RTE_LOG(ERR, EAL, "Failed to bind socket: %s!\n", strerror(errno));
+		close(socket_fd);
+		return -1;
+	}
+
+	ret = listen(socket_fd, 50);
+	if (ret) {
+		RTE_LOG(ERR, EAL, "Failed to listen: %s!\n", strerror(errno));
+		close(socket_fd);
+		return -1;
+	}
+
+	/* save the socket in local configuration */
+	mp_socket_fd = socket_fd;
+
+	return 0;
+}
+
+/*
+ * set up a local socket and tell it to listen for incoming connections
+ */
+int
+pci_vfio_mp_sync_setup(void)
+{
+	int ret;
+	char thread_name[RTE_MAX_THREAD_NAME_LEN];
+
+	if (vfio_mp_sync_socket_setup() < 0) {
+		RTE_LOG(ERR, EAL, "Failed to set up local socket!\n");
+		return -1;
+	}
+
+	ret = pthread_create(&socket_thread, NULL,
+			pci_vfio_mp_sync_thread, NULL);
+	if (ret) {
+		RTE_LOG(ERR, EAL,
+			"Failed to create thread for communication with secondary processes!\n");
+		close(mp_socket_fd);
+		return -1;
+	}
+
+	/* Set thread_name for aid in debugging. */
+	snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN, "pci-vfio-sync");
+	ret = rte_thread_setname(socket_thread, thread_name);
+	if (ret)
+		RTE_LOG(ERR, EAL,
+			"Failed to set thread name for secondary processes!\n");
+
+	return 0;
+}
+
+#endif
diff --git a/lib/librte_eal/linuxapp/eal/eal_thread.c b/lib/librte_eal/linuxapp/eal/eal_thread.c
new file mode 100644
index 00000000..18bd8e04
--- /dev/null
+++ b/lib/librte_eal/linuxapp/eal/eal_thread.c
@@ -0,0 +1,199 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <pthread.h>
+#include <sched.h>
+#include <sys/queue.h>
+#include <sys/syscall.h>
+
+#include <rte_debug.h>
+#include <rte_atomic.h>
+#include <rte_launch.h>
+#include <rte_log.h>
+#include <rte_memory.h>
+#include <rte_memzone.h>
+#include <rte_per_lcore.h>
+#include <rte_eal.h>
+#include <rte_per_lcore.h>
+#include <rte_lcore.h>
+
+#include "eal_private.h"
+#include "eal_thread.h"
+
+RTE_DEFINE_PER_LCORE(unsigned, _lcore_id) = LCORE_ID_ANY;
+RTE_DEFINE_PER_LCORE(unsigned, _socket_id) = (unsigned)SOCKET_ID_ANY;
+RTE_DEFINE_PER_LCORE(rte_cpuset_t, _cpuset);
+
+/*
+ * Send a message to a slave lcore identified by slave_id to call a
+ * function f with argument arg. Once the execution is done, the
+ * remote lcore switch in FINISHED state.
+ */
+int
+rte_eal_remote_launch(int (*f)(void *), void *arg, unsigned slave_id)
+{
+	int n;
+	char c = 0;
+	int m2s = lcore_config[slave_id].pipe_master2slave[1];
+	int s2m = lcore_config[slave_id].pipe_slave2master[0];
+
+	if (lcore_config[slave_id].state != WAIT)
+		return -EBUSY;
+
+	lcore_config[slave_id].f = f;
+	lcore_config[slave_id].arg = arg;
+
+	/* send message */
+	n = 0;
+	while (n == 0 || (n < 0 && errno == EINTR))
+		n = write(m2s, &c, 1);
+	if (n < 0)
+		rte_panic("cannot write on configuration pipe\n");
+
+	/* wait ack */
+	do {
+		n = read(s2m, &c, 1);
+	} while (n < 0 && errno == EINTR);
+
+	if (n <= 0)
+		rte_panic("cannot read on configuration pipe\n");
+
+	return 0;
+}
+
+/* set affinity for current EAL thread */
+static int
+eal_thread_set_affinity(void)
+{
+	unsigned lcore_id = rte_lcore_id();
+
+	/* acquire system unique id  */
+	rte_gettid();
+
+	/* update EAL thread core affinity */
+	return rte_thread_set_affinity(&lcore_config[lcore_id].cpuset);
+}
+
+void eal_thread_init_master(unsigned lcore_id)
+{
+	/* set the lcore ID in per-lcore memory area */
+	RTE_PER_LCORE(_lcore_id) = lcore_id;
+
+	/* set CPU affinity */
+	if (eal_thread_set_affinity() < 0)
+		rte_panic("cannot set affinity\n");
+}
+
+/* main loop of threads */
+__attribute__((noreturn)) void *
+eal_thread_loop(__attribute__((unused)) void *arg)
+{
+	char c;
+	int n, ret;
+	unsigned lcore_id;
+	pthread_t thread_id;
+	int m2s, s2m;
+	char cpuset[RTE_CPU_AFFINITY_STR_LEN];
+
+	thread_id = pthread_self();
+
+	/* retrieve our lcore_id from the configuration structure */
+	RTE_LCORE_FOREACH_SLAVE(lcore_id) {
+		if (thread_id == lcore_config[lcore_id].thread_id)
+			break;
+	}
+	if (lcore_id == RTE_MAX_LCORE)
+		rte_panic("cannot retrieve lcore id\n");
+
+	m2s = lcore_config[lcore_id].pipe_master2slave[0];
+	s2m = lcore_config[lcore_id].pipe_slave2master[1];
+
+	/* set the lcore ID in per-lcore memory area */
+	RTE_PER_LCORE(_lcore_id) = lcore_id;
+
+	/* set CPU affinity */
+	if (eal_thread_set_affinity() < 0)
+		rte_panic("cannot set affinity\n");
+
+	ret = eal_thread_dump_affinity(cpuset, RTE_CPU_AFFINITY_STR_LEN);
+
+	RTE_LOG(DEBUG, EAL, "lcore %u is ready (tid=%x;cpuset=[%s%s])\n",
+		lcore_id, (int)thread_id, cpuset, ret == 0 ? "" : "...");
+
+	/* read on our pipe to get commands */
+	while (1) {
+		void *fct_arg;
+
+		/* wait command */
+		do {
+			n = read(m2s, &c, 1);
+		} while (n < 0 && errno == EINTR);
+
+		if (n <= 0)
+			rte_panic("cannot read on configuration pipe\n");
+
+		lcore_config[lcore_id].state = RUNNING;
+
+		/* send ack */
+		n = 0;
+		while (n == 0 || (n < 0 && errno == EINTR))
+			n = write(s2m, &c, 1);
+		if (n < 0)
+			rte_panic("cannot write on configuration pipe\n");
+
+		if (lcore_config[lcore_id].f == NULL)
+			rte_panic("NULL function pointer\n");
+
+		/* call the function and store the return value */
+		fct_arg = lcore_config[lcore_id].arg;
+		ret = lcore_config[lcore_id].f(fct_arg);
+		lcore_config[lcore_id].ret = ret;
+		rte_wmb();
+		lcore_config[lcore_id].state = FINISHED;
+	}
+
+	/* never reached */
+	/* pthread_exit(NULL); */
+	/* return NULL; */
+}
+
+/* require calling thread tid by gettid() */
+int rte_sys_gettid(void)
+{
+	return (int)syscall(SYS_gettid);
+}
diff --git a/lib/librte_eal/linuxapp/eal/eal_timer.c b/lib/librte_eal/linuxapp/eal/eal_timer.c
new file mode 100644
index 00000000..f2abb7b6
--- /dev/null
+++ b/lib/librte_eal/linuxapp/eal/eal_timer.c
@@ -0,0 +1,305 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2012-2013 6WIND S.A.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <sys/mman.h>
+#include <sys/queue.h>
+#include <pthread.h>
+#include <errno.h>
+
+#include <rte_common.h>
+#include <rte_log.h>
+#include <rte_cycles.h>
+#include <rte_lcore.h>
+#include <rte_memory.h>
+#include <rte_memzone.h>
+#include <rte_eal.h>
+#include <rte_debug.h>
+
+#include "eal_private.h"
+#include "eal_internal_cfg.h"
+
+enum timer_source eal_timer_source = EAL_TIMER_HPET;
+
+#ifdef RTE_LIBEAL_USE_HPET
+
+#define DEV_HPET "/dev/hpet"
+
+/* Maximum number of counters. */
+#define HPET_TIMER_NUM 3
+
+/* General capabilities register */
+#define CLK_PERIOD_SHIFT     32 /* Clock period shift. */
+#define CLK_PERIOD_MASK      0xffffffff00000000ULL /* Clock period mask. */
+
+/**
+ * HPET timer registers. From the Intel IA-PC HPET (High Precision Event
+ * Timers) Specification.
+ */
+struct eal_hpet_regs {
+	/* Memory-mapped, software visible registers */
+	uint64_t capabilities;      /**< RO General Capabilities Register. */
+	uint64_t reserved0;         /**< Reserved for future use. */
+	uint64_t config;            /**< RW General Configuration Register. */
+	uint64_t reserved1;         /**< Reserved for future use. */
+	uint64_t isr;               /**< RW Clear General Interrupt Status. */
+	uint64_t reserved2[25];     /**< Reserved for future use. */
+	union {
+		uint64_t counter;   /**< RW Main Counter Value Register. */
+		struct {
+			uint32_t counter_l; /**< RW Main Counter Low. */
+			uint32_t counter_h; /**< RW Main Counter High. */
+		};
+	};
+	uint64_t reserved3;         /**< Reserved for future use. */
+	struct {
+		uint64_t config;    /**< RW Timer Config and Capability Reg. */
+		uint64_t comp;      /**< RW Timer Comparator Value Register. */
+		uint64_t fsb;       /**< RW FSB Interrupt Route Register. */
+		uint64_t reserved4; /**< Reserved for future use. */
+	} timers[HPET_TIMER_NUM]; /**< Set of HPET timers. */
+};
+
+/* Mmap'd hpet registers */
+static volatile struct eal_hpet_regs *eal_hpet = NULL;
+
+/* Period at which the HPET counter increments in
+ * femtoseconds (10^-15 seconds). */
+static uint32_t eal_hpet_resolution_fs = 0;
+
+/* Frequency of the HPET counter in Hz */
+static uint64_t eal_hpet_resolution_hz = 0;
+
+/* Incremented 4 times during one 32bits hpet full count */
+static uint32_t eal_hpet_msb;
+
+static pthread_t msb_inc_thread_id;
+
+/*
+ * This function runs on a specific thread to update a global variable
+ * containing used to process MSB of the HPET (unfortunatelly, we need
+ * this because hpet is 32 bits by default under linux).
+ */
+static void
+hpet_msb_inc(__attribute__((unused)) void *arg)
+{
+	uint32_t t;
+
+	while (1) {
+		t = (eal_hpet->counter_l >> 30);
+		if (t != (eal_hpet_msb & 3))
+			eal_hpet_msb ++;
+		sleep(10);
+	}
+}
+
+uint64_t
+rte_get_hpet_hz(void)
+{
+	if(internal_config.no_hpet)
+		rte_panic("Error, HPET called, but no HPET present\n");
+
+	return eal_hpet_resolution_hz;
+}
+
+uint64_t
+rte_get_hpet_cycles(void)
+{
+	uint32_t t, msb;
+	uint64_t ret;
+
+	if(internal_config.no_hpet)
+		rte_panic("Error, HPET called, but no HPET present\n");
+
+	t = eal_hpet->counter_l;
+	msb = eal_hpet_msb;
+	ret = (msb + 2 - (t >> 30)) / 4;
+	ret <<= 32;
+	ret += t;
+	return ret;
+}
+
+#endif
+
+#ifdef RTE_LIBEAL_USE_HPET
+/*
+ * Open and mmap /dev/hpet (high precision event timer) that will
+ * provide our time reference.
+ */
+int
+rte_eal_hpet_init(int make_default)
+{
+	int fd, ret;
+	char thread_name[RTE_MAX_THREAD_NAME_LEN];
+
+	if (internal_config.no_hpet) {
+		RTE_LOG(NOTICE, EAL, "HPET is disabled\n");
+		return -1;
+	}
+
+	fd = open(DEV_HPET, O_RDONLY);
+	if (fd < 0) {
+		RTE_LOG(ERR, EAL, "ERROR: Cannot open "DEV_HPET": %s!\n",
+			strerror(errno));
+		internal_config.no_hpet = 1;
+		return -1;
+	}
+	eal_hpet = mmap(NULL, 1024, PROT_READ, MAP_SHARED, fd, 0);
+	if (eal_hpet == MAP_FAILED) {
+		RTE_LOG(ERR, EAL, "ERROR: Cannot mmap "DEV_HPET"!\n"
+				"Please enable CONFIG_HPET_MMAP in your kernel configuration "
+				"to allow HPET support.\n"
+				"To run without using HPET, set CONFIG_RTE_LIBEAL_USE_HPET=n "
+				"in your build configuration or use '--no-hpet' EAL flag.\n");
+		close(fd);
+		internal_config.no_hpet = 1;
+		return -1;
+	}
+	close(fd);
+
+	eal_hpet_resolution_fs = (uint32_t)((eal_hpet->capabilities &
+					CLK_PERIOD_MASK) >>
+					CLK_PERIOD_SHIFT);
+
+	eal_hpet_resolution_hz = (1000ULL*1000ULL*1000ULL*1000ULL*1000ULL) /
+		(uint64_t)eal_hpet_resolution_fs;
+
+	RTE_LOG(INFO, EAL, "HPET frequency is ~%"PRIu64" kHz\n",
+			eal_hpet_resolution_hz/1000);
+
+	eal_hpet_msb = (eal_hpet->counter_l >> 30);
+
+	/* create a thread that will increment a global variable for
+	 * msb (hpet is 32 bits by default under linux) */
+	ret = pthread_create(&msb_inc_thread_id, NULL,
+			(void *(*)(void *))hpet_msb_inc, NULL);
+	if (ret != 0) {
+		RTE_LOG(ERR, EAL, "ERROR: Cannot create HPET timer thread!\n");
+		internal_config.no_hpet = 1;
+		return -1;
+	}
+
+	/*
+	 * Set thread_name for aid in debugging.
+	 */
+	snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN, "hpet-msb-inc");
+	ret = rte_thread_setname(msb_inc_thread_id, thread_name);
+	if (ret != 0)
+		RTE_LOG(ERR, EAL,
+			"ERROR: Cannot set HPET timer thread name!\n");
+
+	if (make_default)
+		eal_timer_source = EAL_TIMER_HPET;
+	return 0;
+}
+#endif
+
+static void
+check_tsc_flags(void)
+{
+	char line[512];
+	FILE *stream;
+
+	stream = fopen("/proc/cpuinfo", "r");
+	if (!stream) {
+		RTE_LOG(WARNING, EAL, "WARNING: Unable to open /proc/cpuinfo\n");
+		return;
+	}
+
+	while (fgets(line, sizeof line, stream)) {
+		char *constant_tsc;
+		char *nonstop_tsc;
+
+		if (strncmp(line, "flags", 5) != 0)
+			continue;
+
+		constant_tsc = strstr(line, "constant_tsc");
+		nonstop_tsc = strstr(line, "nonstop_tsc");
+		if (!constant_tsc || !nonstop_tsc)
+			RTE_LOG(WARNING, EAL,
+				"WARNING: cpu flags "
+				"constant_tsc=%s "
+				"nonstop_tsc=%s "
+				"-> using unreliable clock cycles !\n",
+				constant_tsc ? "yes":"no",
+				nonstop_tsc ? "yes":"no");
+		break;
+	}
+
+	fclose(stream);
+}
+
+uint64_t
+get_tsc_freq(void)
+{
+#ifdef CLOCK_MONOTONIC_RAW
+#define NS_PER_SEC 1E9
+
+	struct timespec sleeptime = {.tv_nsec = NS_PER_SEC / 10 }; /* 1/10 second */
+
+	struct timespec t_start, t_end;
+	uint64_t tsc_hz;
+
+	if (clock_gettime(CLOCK_MONOTONIC_RAW, &t_start) == 0) {
+		uint64_t ns, end, start = rte_rdtsc();
+		nanosleep(&sleeptime,NULL);
+		clock_gettime(CLOCK_MONOTONIC_RAW, &t_end);
+		end = rte_rdtsc();
+		ns = ((t_end.tv_sec - t_start.tv_sec) * NS_PER_SEC);
+		ns += (t_end.tv_nsec - t_start.tv_nsec);
+
+		double secs = (double)ns/NS_PER_SEC;
+		tsc_hz = (uint64_t)((end - start)/secs);
+		return tsc_hz;
+	}
+#endif
+	return 0;
+}
+
+int
+rte_eal_timer_init(void)
+{
+
+	eal_timer_source = EAL_TIMER_TSC;
+
+	set_tsc_freq();
+	check_tsc_flags();
+	return 0;
+}
diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.h b/lib/librte_eal/linuxapp/eal/eal_vfio.h
new file mode 100644
index 00000000..f483bf40
--- /dev/null
+++ b/lib/librte_eal/linuxapp/eal/eal_vfio.h
@@ -0,0 +1,67 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef EAL_VFIO_H_
+#define EAL_VFIO_H_
+
+/*
+ * determine if VFIO is present on the system
+ */
+#ifdef RTE_EAL_VFIO
+#include <linux/version.h>
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 6, 0)
+#include <linux/vfio.h>
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 10, 0)
+#define RTE_PCI_MSIX_TABLE_BIR    0x7
+#define RTE_PCI_MSIX_TABLE_OFFSET 0xfffffff8
+#define RTE_PCI_MSIX_FLAGS_QSIZE  0x07ff
+#else
+#define RTE_PCI_MSIX_TABLE_BIR    PCI_MSIX_TABLE_BIR
+#define RTE_PCI_MSIX_TABLE_OFFSET PCI_MSIX_TABLE_OFFSET
+#define RTE_PCI_MSIX_FLAGS_QSIZE  PCI_MSIX_FLAGS_QSIZE
+#endif
+
+#define RTE_VFIO_TYPE1 VFIO_TYPE1_IOMMU
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 5, 0)
+#define RTE_VFIO_NOIOMMU 8
+#else
+#define RTE_VFIO_NOIOMMU VFIO_NOIOMMU_IOMMU
+#endif
+
+#define VFIO_PRESENT
+#endif /* kernel version */
+#endif /* RTE_EAL_VFIO */
+
+#endif /* EAL_VFIO_H_ */
diff --git a/lib/librte_eal/linuxapp/eal/eal_xen_memory.c b/lib/librte_eal/linuxapp/eal/eal_xen_memory.c
new file mode 100644
index 00000000..495eef9e
--- /dev/null
+++ b/lib/librte_eal/linuxapp/eal/eal_xen_memory.c
@@ -0,0 +1,369 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <errno.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <string.h>
+#include <stdarg.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/queue.h>
+#include <sys/file.h>
+#include <unistd.h>
+#include <limits.h>
+#include <errno.h>
+#include <sys/ioctl.h>
+#include <sys/time.h>
+
+#include <rte_log.h>
+#include <rte_memory.h>
+#include <rte_memzone.h>
+#include <rte_launch.h>
+#include <rte_eal.h>
+#include <rte_eal_memconfig.h>
+#include <rte_per_lcore.h>
+#include <rte_lcore.h>
+#include <rte_common.h>
+#include <rte_string_fns.h>
+
+#include "eal_private.h"
+#include "eal_internal_cfg.h"
+#include "eal_filesystem.h"
+#include <exec-env/rte_dom0_common.h>
+
+#define PAGE_SIZE RTE_PGSIZE_4K
+#define DEFAUL_DOM0_NAME "dom0-mem"
+
+static int xen_fd = -1;
+static const char sys_dir_path[] = "/sys/kernel/mm/dom0-mm/memsize-mB";
+
+/*
+ * Try to mmap *size bytes in /dev/zero. If it is successful, return the
+ * pointer to the mmap'd area and keep *size unmodified. Else, retry
+ * with a smaller zone: decrease *size by mem_size until it reaches
+ * 0. In this case, return NULL. Note: this function returns an address
+ * which is a multiple of mem_size size.
+ */
+static void *
+xen_get_virtual_area(size_t *size, size_t mem_size)
+{
+	void *addr;
+	int fd;
+	long aligned_addr;
+
+	RTE_LOG(DEBUG, EAL, "Ask a virtual area of 0x%zu bytes\n", *size);
+
+	fd = open("/dev/zero", O_RDONLY);
+	if (fd < 0){
+		RTE_LOG(ERR, EAL, "Cannot open /dev/zero\n");
+		return NULL;
+	}
+	do {
+		addr = mmap(NULL, (*size) + mem_size, PROT_READ,
+			MAP_PRIVATE, fd, 0);
+		if (addr == MAP_FAILED)
+			*size -= mem_size;
+	} while (addr == MAP_FAILED && *size > 0);
+
+	if (addr == MAP_FAILED) {
+		close(fd);
+		RTE_LOG(ERR, EAL, "Cannot get a virtual area\n");
+		return NULL;
+	}
+
+	munmap(addr, (*size) + mem_size);
+	close(fd);
+
+	/* align addr to a mem_size boundary */
+	aligned_addr = (uintptr_t)addr;
+	aligned_addr = RTE_ALIGN_CEIL(aligned_addr, mem_size);
+        addr = (void *)(aligned_addr);
+
+	RTE_LOG(DEBUG, EAL, "Virtual area found at %p (size = 0x%zx)\n",
+		addr, *size);
+
+	return addr;
+}
+
+/**
+ * Get memory size configuration from /sys/devices/virtual/misc/dom0_mm
+ * /memsize-mB/memsize file, and the size unit is mB.
+ */
+static int
+get_xen_memory_size(void)
+{
+	char path[PATH_MAX];
+	unsigned long mem_size = 0;
+	static const char *file_name;
+
+	file_name = "memsize";
+	snprintf(path, sizeof(path), "%s/%s",
+			sys_dir_path, file_name);
+
+	if (eal_parse_sysfs_value(path, &mem_size) < 0)
+		return -1;
+
+	if (mem_size == 0)
+		rte_exit(EXIT_FAILURE,"XEN-DOM0:the %s/%s was not"
+			" configured.\n",sys_dir_path, file_name);
+	if (mem_size % 2)
+		rte_exit(EXIT_FAILURE,"XEN-DOM0:the %s/%s must be"
+			" even number.\n",sys_dir_path, file_name);
+
+	if (mem_size > DOM0_CONFIG_MEMSIZE)
+		rte_exit(EXIT_FAILURE,"XEN-DOM0:the %s/%s should not be larger"
+			" than %d mB\n",sys_dir_path, file_name, DOM0_CONFIG_MEMSIZE);
+
+	return mem_size;
+}
+
+/**
+ * Based on physical address to caculate MFN in Xen Dom0.
+ */
+phys_addr_t
+rte_xen_mem_phy2mch(uint32_t memseg_id, const phys_addr_t phy_addr)
+{
+	int mfn_id;
+	uint64_t mfn, mfn_offset;
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	struct rte_memseg *memseg = mcfg->memseg;
+
+	mfn_id = (phy_addr - memseg[memseg_id].phys_addr) / RTE_PGSIZE_2M;
+
+	/*the MFN is contiguous in 2M */
+	mfn_offset = (phy_addr - memseg[memseg_id].phys_addr) %
+					RTE_PGSIZE_2M / PAGE_SIZE;
+	mfn = mfn_offset + memseg[memseg_id].mfn[mfn_id];
+
+	/** return mechine address */
+	return mfn * PAGE_SIZE + phy_addr % PAGE_SIZE;
+}
+
+int
+rte_xen_dom0_memory_init(void)
+{
+	void *vir_addr, *vma_addr = NULL;
+	int err, ret = 0;
+	uint32_t i, requested, mem_size, memseg_idx, num_memseg = 0;
+	size_t vma_len = 0;
+	struct memory_info meminfo;
+	struct memseg_info seginfo[RTE_MAX_MEMSEG];
+	int flags, page_size = getpagesize();
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	struct rte_memseg *memseg = mcfg->memseg;
+	uint64_t total_mem = internal_config.memory;
+
+	memset(seginfo, 0, sizeof(seginfo));
+	memset(&meminfo, 0, sizeof(struct memory_info));
+
+	mem_size = get_xen_memory_size();
+	requested = (unsigned) (total_mem / 0x100000);
+	if (requested > mem_size)
+		/* if we didn't satisfy total memory requirements */
+		rte_exit(EXIT_FAILURE,"Not enough memory available! Requested: %uMB,"
+				" available: %uMB\n", requested, mem_size);
+	else if (total_mem != 0)
+		mem_size = requested;
+
+	/* Check FD and open once */
+	if (xen_fd < 0) {
+		xen_fd = open(DOM0_MM_DEV, O_RDWR);
+		if (xen_fd < 0) {
+			RTE_LOG(ERR, EAL, "Can not open %s\n",DOM0_MM_DEV);
+			return -1;
+		}
+	}
+
+	meminfo.size = mem_size;
+
+	/* construct memory mangement name for Dom0 */
+	snprintf(meminfo.name, DOM0_NAME_MAX, "%s-%s",
+		internal_config.hugefile_prefix, DEFAUL_DOM0_NAME);
+
+	/* Notify kernel driver to allocate memory */
+	ret = ioctl(xen_fd, RTE_DOM0_IOCTL_PREPARE_MEMSEG, &meminfo);
+	if (ret < 0) {
+		RTE_LOG(ERR, EAL, "XEN DOM0:failed to get memory\n");
+		err = -EIO;
+		goto fail;
+	}
+
+	/* Get number of memory segment from driver */
+	ret = ioctl(xen_fd, RTE_DOM0_IOCTL_GET_NUM_MEMSEG, &num_memseg);
+	if (ret < 0) {
+		RTE_LOG(ERR, EAL, "XEN DOM0:failed to get memseg count.\n");
+		err = -EIO;
+		goto fail;
+	}
+
+	if(num_memseg > RTE_MAX_MEMSEG){
+		RTE_LOG(ERR, EAL, "XEN DOM0: the memseg count %d is greater"
+			" than max memseg %d.\n",num_memseg, RTE_MAX_MEMSEG);
+		err = -EIO;
+		goto fail;
+	}
+
+	/* get all memory segements information */
+	ret = ioctl(xen_fd, RTE_DOM0_IOCTL_GET_MEMSEG_INFO, seginfo);
+	if (ret < 0) {
+		RTE_LOG(ERR, EAL, "XEN DOM0:failed to get memseg info.\n");
+		err = -EIO;
+		goto fail;
+	}
+
+	/* map all memory segments to contiguous user space */
+	for (memseg_idx = 0; memseg_idx < num_memseg; memseg_idx++)
+	{
+		vma_len = seginfo[memseg_idx].size;
+
+		/**
+		 * get the biggest virtual memory area up to vma_len. If it fails,
+		 * vma_addr is NULL, so let the kernel provide the address.
+		 */
+		vma_addr = xen_get_virtual_area(&vma_len, RTE_PGSIZE_2M);
+		if (vma_addr == NULL) {
+			flags = MAP_SHARED;
+			vma_len = RTE_PGSIZE_2M;
+		} else
+			flags = MAP_SHARED | MAP_FIXED;
+
+		seginfo[memseg_idx].size = vma_len;
+		vir_addr = mmap(vma_addr, seginfo[memseg_idx].size,
+			PROT_READ|PROT_WRITE, flags, xen_fd,
+			memseg_idx * page_size);
+		if (vir_addr == MAP_FAILED) {
+			RTE_LOG(ERR, EAL, "XEN DOM0:Could not mmap %s\n",
+				DOM0_MM_DEV);
+			err = -EIO;
+			goto fail;
+		}
+
+		memseg[memseg_idx].addr = vir_addr;
+		memseg[memseg_idx].phys_addr = page_size *
+			seginfo[memseg_idx].pfn ;
+		memseg[memseg_idx].len = seginfo[memseg_idx].size;
+		for ( i = 0; i < seginfo[memseg_idx].size / RTE_PGSIZE_2M; i++)
+			memseg[memseg_idx].mfn[i] = seginfo[memseg_idx].mfn[i];
+
+		/* MFNs are continuous in 2M, so assume that page size is 2M */
+		memseg[memseg_idx].hugepage_sz = RTE_PGSIZE_2M;
+
+		memseg[memseg_idx].nchannel = mcfg->nchannel;
+		memseg[memseg_idx].nrank = mcfg->nrank;
+
+		/* NUMA is not suppoted in Xen Dom0, so only set socket 0*/
+		memseg[memseg_idx].socket_id = 0;
+	}
+
+	return 0;
+fail:
+	if (xen_fd > 0) {
+		close(xen_fd);
+		xen_fd = -1;
+	}
+	return err;
+}
+
+/*
+ * This creates the memory mappings in the secondary process to match that of
+ * the server process. It goes through each memory segment in the DPDK runtime
+ * configuration, mapping them in order to form a contiguous block in the
+ * virtual memory space
+ */
+int
+rte_xen_dom0_memory_attach(void)
+{
+	const struct rte_mem_config *mcfg;
+	unsigned s = 0; /* s used to track the segment number */
+	int xen_fd = -1;
+	int ret = -1;
+	void *vir_addr;
+	char name[DOM0_NAME_MAX] = {0};
+	int page_size = getpagesize();
+
+	mcfg = rte_eal_get_configuration()->mem_config;
+
+	/* Check FD and open once */
+	if (xen_fd < 0) {
+		xen_fd = open(DOM0_MM_DEV, O_RDWR);
+		if (xen_fd < 0) {
+			RTE_LOG(ERR, EAL, "Can not open %s\n",DOM0_MM_DEV);
+			goto error;
+		}
+	}
+
+	/* construct memory mangement name for Dom0 */
+	snprintf(name, DOM0_NAME_MAX, "%s-%s",
+		internal_config.hugefile_prefix, DEFAUL_DOM0_NAME);
+	/* attach to memory segments of primary process */
+	ret = ioctl(xen_fd, RTE_DOM0_IOCTL_ATTACH_TO_MEMSEG, name);
+	if (ret) {
+		RTE_LOG(ERR, EAL,"attach memory segments fail.\n");
+		goto error;
+	}
+
+	/* map all segments into memory to make sure we get the addrs */
+	for (s = 0; s < RTE_MAX_MEMSEG; ++s) {
+
+		/*
+		 * the first memory segment with len==0 is the one that
+		 * follows the last valid segment.
+		 */
+		if (mcfg->memseg[s].len == 0)
+			break;
+
+		vir_addr = mmap(mcfg->memseg[s].addr, mcfg->memseg[s].len,
+				PROT_READ|PROT_WRITE, MAP_SHARED|MAP_FIXED, xen_fd,
+				s * page_size);
+		if (vir_addr == MAP_FAILED) {
+			RTE_LOG(ERR, EAL, "Could not mmap %llu bytes "
+				"in %s to requested address [%p]\n",
+				(unsigned long long)mcfg->memseg[s].len, DOM0_MM_DEV,
+				mcfg->memseg[s].addr);
+			goto error;
+		}
+	}
+	return 0;
+
+error:
+	if (xen_fd >= 0) {
+		close(xen_fd);
+		xen_fd = -1;
+	}
+	return -1;
+}
diff --git a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_dom0_common.h b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_dom0_common.h
new file mode 100644
index 00000000..d9707780
--- /dev/null
+++ b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_dom0_common.h
@@ -0,0 +1,108 @@
+/*-
+ *   This file is provided under a dual BSD/LGPLv2 license.  When using or
+ *   redistributing this file, you may do so under either license.
+ *
+ *   GNU LESSER GENERAL PUBLIC LICENSE
+ *
+ *   Copyright(c) 2007-2014 Intel Corporation. All rights reserved.
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of version 2.1 of the GNU Lesser General Public License
+ *   as published by the Free Software Foundation.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *   Lesser General Public License for more details.
+ *
+ *   You should have received a copy of the GNU Lesser General Public License
+ *   along with this program; if not, write to the Free Software
+ *   Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ *   Contact Information:
+ *   Intel Corporation
+ *
+ *
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *   * Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in
+ *     the documentation and/or other materials provided with the
+ *     distribution.
+ *   * Neither the name of Intel Corporation nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ *    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef _RTE_DOM0_COMMON_H_
+#define _RTE_DOM0_COMMON_H_
+
+#ifdef __KERNEL__
+#include <linux/if.h>
+#endif
+
+#define DOM0_NAME_MAX   256
+#define DOM0_MM_DEV   "/dev/dom0_mm"
+
+#define DOM0_CONTIG_NUM_ORDER       9       /**< order of 2M */
+#define DOM0_NUM_MEMSEG             512     /**< Maximum nb. of memory segment. */
+#define DOM0_MEMBLOCK_SIZE          0x200000 /**< size of memory block(2M). */
+#define DOM0_CONFIG_MEMSIZE         4096     /**< Maximum config memory size(4G). */
+#define DOM0_NUM_MEMBLOCK (DOM0_CONFIG_MEMSIZE / 2) /**< Maximum nb. of 2M memory block. */
+
+#define RTE_DOM0_IOCTL_PREPARE_MEMSEG    _IOWR(0, 1 , struct memory_info)
+#define RTE_DOM0_IOCTL_ATTACH_TO_MEMSEG  _IOWR(0, 2 , char *)
+#define RTE_DOM0_IOCTL_GET_NUM_MEMSEG    _IOWR(0, 3, int)
+#define RTE_DOM0_IOCTL_GET_MEMSEG_INFO   _IOWR(0, 4, void *)
+
+/**
+ * A structure used to store memory information.
+ */
+struct memory_info {
+	char name[DOM0_NAME_MAX];
+	uint64_t size;
+};
+
+/**
+ * A structure used to store memory segment information.
+ */
+struct memseg_info {
+	uint32_t idx;
+	uint64_t pfn;
+	uint64_t size;
+	uint64_t mfn[DOM0_NUM_MEMBLOCK];
+};
+
+/**
+ * A structure used to store memory block information.
+ */
+struct memblock_info {
+	uint8_t exchange_flag;
+	uint8_t used;
+	uint64_t vir_addr;
+	uint64_t pfn;
+	uint64_t mfn;
+};
+#endif /* _RTE_DOM0_COMMON_H_ */
diff --git a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h
new file mode 100644
index 00000000..3dacbff8
--- /dev/null
+++ b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h
@@ -0,0 +1,228 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_INTERRUPTS_H_
+#error "don't include this file directly, please include generic <rte_interrupts.h>"
+#endif
+
+#ifndef _RTE_LINUXAPP_INTERRUPTS_H_
+#define _RTE_LINUXAPP_INTERRUPTS_H_
+
+#define RTE_MAX_RXTX_INTR_VEC_ID     32
+#define RTE_INTR_VEC_ZERO_OFFSET      0
+#define RTE_INTR_VEC_RXTX_OFFSET      1
+
+enum rte_intr_handle_type {
+	RTE_INTR_HANDLE_UNKNOWN = 0,
+	RTE_INTR_HANDLE_UIO,          /**< uio device handle */
+	RTE_INTR_HANDLE_UIO_INTX,     /**< uio generic handle */
+	RTE_INTR_HANDLE_VFIO_LEGACY,  /**< vfio device handle (legacy) */
+	RTE_INTR_HANDLE_VFIO_MSI,     /**< vfio device handle (MSI) */
+	RTE_INTR_HANDLE_VFIO_MSIX,    /**< vfio device handle (MSIX) */
+	RTE_INTR_HANDLE_ALARM,    /**< alarm handle */
+	RTE_INTR_HANDLE_EXT, /**< external handler */
+	RTE_INTR_HANDLE_MAX
+};
+
+#define RTE_INTR_EVENT_ADD            1UL
+#define RTE_INTR_EVENT_DEL            2UL
+
+typedef void (*rte_intr_event_cb_t)(int fd, void *arg);
+
+struct rte_epoll_data {
+	uint32_t event;               /**< event type */
+	void *data;                   /**< User data */
+	rte_intr_event_cb_t cb_fun;   /**< IN: callback fun */
+	void *cb_arg;	              /**< IN: callback arg */
+};
+
+enum {
+	RTE_EPOLL_INVALID = 0,
+	RTE_EPOLL_VALID,
+	RTE_EPOLL_EXEC,
+};
+
+/** interrupt epoll event obj, taken by epoll_event.ptr */
+struct rte_epoll_event {
+	volatile uint32_t status;  /**< OUT: event status */
+	int fd;                    /**< OUT: event fd */
+	int epfd;       /**< OUT: epoll instance the ev associated with */
+	struct rte_epoll_data epdata;
+};
+
+/** Handle for interrupts. */
+struct rte_intr_handle {
+	union {
+		int vfio_dev_fd;  /**< VFIO device file descriptor */
+		int uio_cfg_fd;  /**< UIO config file descriptor
+					for uio_pci_generic */
+	};
+	int fd;	 /**< interrupt event file descriptor */
+	enum rte_intr_handle_type type;  /**< handle type */
+	uint32_t max_intr;             /**< max interrupt requested */
+	uint32_t nb_efd;               /**< number of available efd(event fd) */
+	int efds[RTE_MAX_RXTX_INTR_VEC_ID];  /**< intr vectors/efds mapping */
+	struct rte_epoll_event elist[RTE_MAX_RXTX_INTR_VEC_ID];
+				       /**< intr vector epoll event */
+	int *intr_vec;                 /**< intr vector number array */
+};
+
+#define RTE_EPOLL_PER_THREAD        -1  /**< to hint using per thread epfd */
+
+/**
+ * It waits for events on the epoll instance.
+ *
+ * @param epfd
+ *   Epoll instance fd on which the caller wait for events.
+ * @param events
+ *   Memory area contains the events that will be available for the caller.
+ * @param maxevents
+ *   Up to maxevents are returned, must greater than zero.
+ * @param timeout
+ *   Specifying a timeout of -1 causes a block indefinitely.
+ *   Specifying a timeout equal to zero cause to return immediately.
+ * @return
+ *   - On success, returns the number of available event.
+ *   - On failure, a negative value.
+ */
+int
+rte_epoll_wait(int epfd, struct rte_epoll_event *events,
+	       int maxevents, int timeout);
+
+/**
+ * It performs control operations on epoll instance referred by the epfd.
+ * It requests that the operation op be performed for the target fd.
+ *
+ * @param epfd
+ *   Epoll instance fd on which the caller perform control operations.
+ * @param op
+ *   The operation be performed for the target fd.
+ * @param fd
+ *   The target fd on which the control ops perform.
+ * @param event
+ *   Describes the object linked to the fd.
+ *   Note: The caller must take care the object deletion after CTL_DEL.
+ * @return
+ *   - On success, zero.
+ *   - On failure, a negative value.
+ */
+int
+rte_epoll_ctl(int epfd, int op, int fd,
+	      struct rte_epoll_event *event);
+
+/**
+ * The function returns the per thread epoll instance.
+ *
+ * @return
+ *   epfd the epoll instance referred to.
+ */
+int
+rte_intr_tls_epfd(void);
+
+/**
+ * @param intr_handle
+ *   Pointer to the interrupt handle.
+ * @param epfd
+ *   Epoll instance fd which the intr vector associated to.
+ * @param op
+ *   The operation be performed for the vector.
+ *   Operation type of {ADD, DEL}.
+ * @param vec
+ *   RX intr vector number added to the epoll instance wait list.
+ * @param data
+ *   User raw data.
+ * @return
+ *   - On success, zero.
+ *   - On failure, a negative value.
+ */
+int
+rte_intr_rx_ctl(struct rte_intr_handle *intr_handle,
+		int epfd, int op, unsigned int vec, void *data);
+
+/**
+ * It enables the packet I/O interrupt event if it's necessary.
+ * It creates event fd for each interrupt vector when MSIX is used,
+ * otherwise it multiplexes a single event fd.
+ *
+ * @param intr_handle
+ *   Pointer to the interrupt handle.
+ * @param nb_efd
+ *   Number of interrupt vector trying to enable.
+ *   The value 0 is not allowed.
+ * @return
+ *   - On success, zero.
+ *   - On failure, a negative value.
+ */
+int
+rte_intr_efd_enable(struct rte_intr_handle *intr_handle, uint32_t nb_efd);
+
+/**
+ * It disables the packet I/O interrupt event.
+ * It deletes registered eventfds and closes the open fds.
+ *
+ * @param intr_handle
+ *   Pointer to the interrupt handle.
+ */
+void
+rte_intr_efd_disable(struct rte_intr_handle *intr_handle);
+
+/**
+ * The packet I/O interrupt on datapath is enabled or not.
+ *
+ * @param intr_handle
+ *   Pointer to the interrupt handle.
+ */
+int
+rte_intr_dp_is_en(struct rte_intr_handle *intr_handle);
+
+/**
+ * The interrupt handle instance allows other causes or not.
+ * Other causes stand for any none packet I/O interrupts.
+ *
+ * @param intr_handle
+ *   Pointer to the interrupt handle.
+ */
+int
+rte_intr_allow_others(struct rte_intr_handle *intr_handle);
+
+/**
+ * The multiple interrupt vector capability of interrupt handle instance.
+ * It returns zero if no multiple interrupt vector support.
+ *
+ * @param intr_handle
+ *   Pointer to the interrupt handle.
+ */
+int
+rte_intr_cap_multiple(struct rte_intr_handle *intr_handle);
+
+#endif /* _RTE_LINUXAPP_INTERRUPTS_H_ */
diff --git a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h
new file mode 100644
index 00000000..7e5e5984
--- /dev/null
+++ b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h
@@ -0,0 +1,172 @@
+/*-
+ *   This file is provided under a dual BSD/LGPLv2 license.  When using or
+ *   redistributing this file, you may do so under either license.
+ *
+ *   GNU LESSER GENERAL PUBLIC LICENSE
+ *
+ *   Copyright(c) 2007-2014 Intel Corporation. All rights reserved.
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of version 2.1 of the GNU Lesser General Public License
+ *   as published by the Free Software Foundation.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *   Lesser General Public License for more details.
+ *
+ *   You should have received a copy of the GNU Lesser General Public License
+ *   along with this program; if not, write to the Free Software
+ *   Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ *   Contact Information:
+ *   Intel Corporation
+ *
+ *
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *   * Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in
+ *     the documentation and/or other materials provided with the
+ *     distribution.
+ *   * Neither the name of Intel Corporation nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ *    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef _RTE_KNI_COMMON_H_
+#define _RTE_KNI_COMMON_H_
+
+#ifdef __KERNEL__
+#include <linux/if.h>
+#endif
+
+/**
+ * KNI name is part of memzone name.
+ */
+#define RTE_KNI_NAMESIZE 32
+
+#define RTE_CACHE_LINE_MIN_SIZE 64
+
+/*
+ * Request id.
+ */
+enum rte_kni_req_id {
+	RTE_KNI_REQ_UNKNOWN = 0,
+	RTE_KNI_REQ_CHANGE_MTU,
+	RTE_KNI_REQ_CFG_NETWORK_IF,
+	RTE_KNI_REQ_MAX,
+};
+
+/*
+ * Structure for KNI request.
+ */
+struct rte_kni_request {
+	uint32_t req_id;             /**< Request id */
+	union {
+		uint32_t new_mtu;    /**< New MTU */
+		uint8_t if_up;       /**< 1: interface up, 0: interface down */
+	};
+	int32_t result;               /**< Result for processing request */
+} __attribute__((__packed__));
+
+/*
+ * Fifo struct mapped in a shared memory. It describes a circular buffer FIFO
+ * Write and read should wrap around. Fifo is empty when write == read
+ * Writing should never overwrite the read position
+ */
+struct rte_kni_fifo {
+	volatile unsigned write;     /**< Next position to be written*/
+	volatile unsigned read;      /**< Next position to be read */
+	unsigned len;                /**< Circular buffer length */
+	unsigned elem_size;          /**< Pointer size - for 32/64 bit OS */
+	void * volatile buffer[0];   /**< The buffer contains mbuf pointers */
+};
+
+/*
+ * The kernel image of the rte_mbuf struct, with only the relevant fields.
+ * Padding is necessary to assure the offsets of these fields
+ */
+struct rte_kni_mbuf {
+	void *buf_addr __attribute__((__aligned__(RTE_CACHE_LINE_SIZE)));
+	char pad0[10];
+	uint16_t data_off;      /**< Start address of data in segment buffer. */
+	char pad1[4];
+	uint64_t ol_flags;      /**< Offload features. */
+	char pad2[4];
+	uint32_t pkt_len;       /**< Total pkt len: sum of all segment data_len. */
+	uint16_t data_len;      /**< Amount of data in segment buffer. */
+
+	/* fields on second cache line */
+	char pad3[8] __attribute__((__aligned__(RTE_CACHE_LINE_MIN_SIZE)));
+	void *pool;
+	void *next;
+};
+
+/*
+ * Struct used to create a KNI device. Passed to the kernel in IOCTL call
+ */
+
+struct rte_kni_device_info {
+	char name[RTE_KNI_NAMESIZE];  /**< Network device name for KNI */
+
+	phys_addr_t tx_phys;
+	phys_addr_t rx_phys;
+	phys_addr_t alloc_phys;
+	phys_addr_t free_phys;
+
+	/* Used by Ethtool */
+	phys_addr_t req_phys;
+	phys_addr_t resp_phys;
+	phys_addr_t sync_phys;
+	void * sync_va;
+
+	/* mbuf mempool */
+	void * mbuf_va;
+	phys_addr_t mbuf_phys;
+
+	/* PCI info */
+	uint16_t vendor_id;           /**< Vendor ID or PCI_ANY_ID. */
+	uint16_t device_id;           /**< Device ID or PCI_ANY_ID. */
+	uint8_t bus;                  /**< Device bus */
+	uint8_t devid;                /**< Device ID */
+	uint8_t function;             /**< Device function. */
+
+	uint16_t group_id;            /**< Group ID */
+	uint32_t core_id;             /**< core ID to bind for kernel thread */
+
+	uint8_t force_bind : 1;       /**< Flag for kernel thread binding */
+
+	/* mbuf size */
+	unsigned mbuf_size;
+};
+
+#define KNI_DEVICE "kni"
+
+#define RTE_KNI_IOCTL_TEST    _IOWR(0, 1, int)
+#define RTE_KNI_IOCTL_CREATE  _IOWR(0, 2, struct rte_kni_device_info)
+#define RTE_KNI_IOCTL_RELEASE _IOWR(0, 3, struct rte_kni_device_info)
+
+#endif /* _RTE_KNI_COMMON_H_ */
diff --git a/lib/librte_eal/linuxapp/eal/rte_eal_version.map b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
new file mode 100644
index 00000000..12503efa
--- /dev/null
+++ b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
@@ -0,0 +1,156 @@
+DPDK_2.0 {
+	global:
+
+	__rte_panic;
+	devargs_list;
+	eal_parse_sysfs_value;
+	eal_timer_source;
+	lcore_config;
+	pci_device_list;
+	pci_driver_list;
+	per_lcore__lcore_id;
+	per_lcore__rte_errno;
+	rte_calloc;
+	rte_calloc_socket;
+	rte_cpu_check_supported;
+	rte_cpu_get_flag_enabled;
+	rte_cycles_vmware_tsc_map;
+	rte_delay_us;
+	rte_dump_physmem_layout;
+	rte_dump_registers;
+	rte_dump_stack;
+	rte_dump_tailq;
+	rte_eal_alarm_cancel;
+	rte_eal_alarm_set;
+	rte_eal_dev_init;
+	rte_eal_devargs_add;
+	rte_eal_devargs_dump;
+	rte_eal_devargs_type_count;
+	rte_eal_driver_register;
+	rte_eal_driver_unregister;
+	rte_eal_get_configuration;
+	rte_eal_get_lcore_state;
+	rte_eal_get_physmem_layout;
+	rte_eal_get_physmem_size;
+	rte_eal_has_hugepages;
+	rte_eal_hpet_init;
+	rte_eal_init;
+	rte_eal_iopl_init;
+	rte_eal_lcore_role;
+	rte_eal_mp_remote_launch;
+	rte_eal_mp_wait_lcore;
+	rte_eal_parse_devargs_str;
+	rte_eal_pci_dump;
+	rte_eal_pci_probe;
+	rte_eal_pci_probe_one;
+	rte_eal_pci_register;
+	rte_eal_pci_scan;
+	rte_eal_pci_unregister;
+	rte_eal_process_type;
+	rte_eal_remote_launch;
+	rte_eal_tailq_lookup;
+	rte_eal_tailq_register;
+	rte_eal_vdev_init;
+	rte_eal_vdev_uninit;
+	rte_eal_wait_lcore;
+	rte_exit;
+	rte_free;
+	rte_get_hpet_cycles;
+	rte_get_hpet_hz;
+	rte_get_log_level;
+	rte_get_log_type;
+	rte_get_tsc_hz;
+	rte_hexdump;
+	rte_intr_callback_register;
+	rte_intr_callback_unregister;
+	rte_intr_disable;
+	rte_intr_enable;
+	rte_log;
+	rte_log_add_in_history;
+	rte_log_cur_msg_loglevel;
+	rte_log_cur_msg_logtype;
+	rte_log_dump_history;
+	rte_log_set_history;
+	rte_logs;
+	rte_malloc;
+	rte_malloc_dump_stats;
+	rte_malloc_get_socket_stats;
+	rte_malloc_set_limit;
+	rte_malloc_socket;
+	rte_malloc_validate;
+	rte_malloc_virt2phy;
+	rte_mem_lock_page;
+	rte_mem_phy2mch;
+	rte_mem_virt2phy;
+	rte_memdump;
+	rte_memory_get_nchannel;
+	rte_memory_get_nrank;
+	rte_memzone_dump;
+	rte_memzone_lookup;
+	rte_memzone_reserve;
+	rte_memzone_reserve_aligned;
+	rte_memzone_reserve_bounded;
+	rte_memzone_walk;
+	rte_openlog_stream;
+	rte_realloc;
+	rte_set_application_usage_hook;
+	rte_set_log_level;
+	rte_set_log_type;
+	rte_socket_id;
+	rte_strerror;
+	rte_strsplit;
+	rte_sys_gettid;
+	rte_thread_get_affinity;
+	rte_thread_set_affinity;
+	rte_vlog;
+	rte_xen_dom0_memory_attach;
+	rte_xen_dom0_memory_init;
+	rte_zmalloc;
+	rte_zmalloc_socket;
+
+	local: *;
+};
+
+DPDK_2.1 {
+	global:
+
+	rte_eal_pci_detach;
+	rte_eal_pci_read_config;
+	rte_eal_pci_write_config;
+	rte_epoll_ctl;
+	rte_epoll_wait;
+	rte_intr_allow_others;
+	rte_intr_dp_is_en;
+	rte_intr_efd_disable;
+	rte_intr_efd_enable;
+	rte_intr_rx_ctl;
+	rte_intr_tls_epfd;
+	rte_memzone_free;
+
+} DPDK_2.0;
+
+DPDK_2.2 {
+	global:
+
+	rte_intr_cap_multiple;
+	rte_keepalive_create;
+	rte_keepalive_dispatch_pings;
+	rte_keepalive_mark_alive;
+	rte_keepalive_register_core;
+	rte_xen_dom0_supported;
+
+} DPDK_2.1;
+
+DPDK_16.04 {
+	global:
+
+	rte_cpu_get_flag_name;
+	rte_eal_pci_ioport_map;
+	rte_eal_pci_ioport_read;
+	rte_eal_pci_ioport_unmap;
+	rte_eal_pci_ioport_write;
+	rte_eal_pci_map_device;
+	rte_eal_pci_unmap_device;
+	rte_eal_primary_proc_alive;
+
+} DPDK_2.2;
diff --git a/lib/librte_eal/linuxapp/igb_uio/Makefile b/lib/librte_eal/linuxapp/igb_uio/Makefile
new file mode 100644
index 00000000..ec6e702f
--- /dev/null
+++ b/lib/librte_eal/linuxapp/igb_uio/Makefile
@@ -0,0 +1,53 @@
+#   BSD LICENSE
+#
+#   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+#   All rights reserved.
+#
+#   Redistribution and use in source and binary forms, with or without
+#   modification, are permitted provided that the following conditions
+#   are met:
+#
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in
+#       the documentation and/or other materials provided with the
+#       distribution.
+#     * Neither the name of Intel Corporation nor the names of its
+#       contributors may be used to endorse or promote products derived
+#       from this software without specific prior written permission.
+#
+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+#
+# module name and path
+#
+MODULE = igb_uio
+MODULE_PATH = drivers/net/igb_uio
+
+#
+# CFLAGS
+#
+MODULE_CFLAGS += -I$(SRCDIR) --param max-inline-insns-single=100
+MODULE_CFLAGS += -I$(RTE_OUTPUT)/include
+MODULE_CFLAGS += -Winline -Wall -Werror
+MODULE_CFLAGS += -include $(RTE_OUTPUT)/include/rte_config.h
+
+#
+# all source are stored in SRCS-y
+#
+SRCS-y := igb_uio.c
+
+include $(RTE_SDK)/mk/rte.module.mk
diff --git a/lib/librte_eal/linuxapp/igb_uio/compat.h b/lib/librte_eal/linuxapp/igb_uio/compat.h
new file mode 100644
index 00000000..c1d45a66
--- /dev/null
+++ b/lib/librte_eal/linuxapp/igb_uio/compat.h
@@ -0,0 +1,116 @@
+/*
+ * Minimal wrappers to allow compiling igb_uio on older kernels.
+ */
+
+#ifndef RHEL_RELEASE_VERSION
+#define RHEL_RELEASE_VERSION(a, b) (((a) << 8) + (b))
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 3, 0)
+#define pci_cfg_access_lock   pci_block_user_cfg_access
+#define pci_cfg_access_unlock pci_unblock_user_cfg_access
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 18, 0)
+#define HAVE_PTE_MASK_PAGE_IOMAP
+#endif
+
+#ifndef PCI_MSIX_ENTRY_SIZE
+#define PCI_MSIX_ENTRY_SIZE             16
+#define  PCI_MSIX_ENTRY_LOWER_ADDR      0
+#define  PCI_MSIX_ENTRY_UPPER_ADDR      4
+#define  PCI_MSIX_ENTRY_DATA            8
+#define  PCI_MSIX_ENTRY_VECTOR_CTRL     12
+#define   PCI_MSIX_ENTRY_CTRL_MASKBIT   1
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 34) && \
+	(!(defined(RHEL_RELEASE_CODE) && \
+	 RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(5, 9)))
+
+static int pci_num_vf(struct pci_dev *dev)
+{
+	struct iov {
+		int pos;
+		int nres;
+		u32 cap;
+		u16 ctrl;
+		u16 total;
+		u16 initial;
+		u16 nr_virtfn;
+	} *iov = (struct iov *)dev->sriov;
+
+	if (!dev->is_physfn)
+		return 0;
+
+	return iov->nr_virtfn;
+}
+
+#endif /* < 2.6.34 */
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 39) && \
+	(!(defined(RHEL_RELEASE_CODE) && \
+	   RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6, 4)))
+
+#define kstrtoul strict_strtoul
+
+#endif /* < 2.6.39 */
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 3, 0) && \
+	(!(defined(RHEL_RELEASE_CODE) && \
+	   RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6, 3)))
+
+/* Check if INTX works to control irq's.
+ * Set's INTX_DISABLE flag and reads it back
+ */
+static bool pci_intx_mask_supported(struct pci_dev *pdev)
+{
+	bool mask_supported = false;
+	uint16_t orig, new;
+
+	pci_block_user_cfg_access(pdev);
+	pci_read_config_word(pdev, PCI_COMMAND, &orig);
+	pci_write_config_word(pdev, PCI_COMMAND,
+			      orig ^ PCI_COMMAND_INTX_DISABLE);
+	pci_read_config_word(pdev, PCI_COMMAND, &new);
+
+	if ((new ^ orig) & ~PCI_COMMAND_INTX_DISABLE) {
+		dev_err(&pdev->dev, "Command register changed from "
+			"0x%x to 0x%x: driver or hardware bug?\n", orig, new);
+	} else if ((new ^ orig) & PCI_COMMAND_INTX_DISABLE) {
+		mask_supported = true;
+		pci_write_config_word(pdev, PCI_COMMAND, orig);
+	}
+	pci_unblock_user_cfg_access(pdev);
+
+	return mask_supported;
+}
+
+static bool pci_check_and_mask_intx(struct pci_dev *pdev)
+{
+	bool pending;
+	uint32_t status;
+
+	pci_block_user_cfg_access(pdev);
+	pci_read_config_dword(pdev, PCI_COMMAND, &status);
+
+	/* interrupt is not ours, goes to out */
+	pending = (((status >> 16) & PCI_STATUS_INTERRUPT) != 0);
+	if (pending) {
+		uint16_t old, new;
+
+		old = status;
+		if (status != 0)
+			new = old & (~PCI_COMMAND_INTX_DISABLE);
+		else
+			new = old | PCI_COMMAND_INTX_DISABLE;
+
+		if (old != new)
+			pci_write_config_word(pdev, PCI_COMMAND, new);
+	}
+	pci_unblock_user_cfg_access(pdev);
+
+	return pending;
+}
+
+#endif /* < 3.3.0 */
diff --git a/lib/librte_eal/linuxapp/igb_uio/igb_uio.c b/lib/librte_eal/linuxapp/igb_uio/igb_uio.c
new file mode 100644
index 00000000..72b26923
--- /dev/null
+++ b/lib/librte_eal/linuxapp/igb_uio/igb_uio.c
@@ -0,0 +1,580 @@
+/*-
+ * GPL LICENSE SUMMARY
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of version 2 of the GNU General Public License as
+ *   published by the Free Software Foundation.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *   General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program; if not, write to the Free Software
+ *   Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *   The full GNU General Public License is included in this distribution
+ *   in the file called LICENSE.GPL.
+ *
+ *   Contact Information:
+ *   Intel Corporation
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/device.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/uio_driver.h>
+#include <linux/io.h>
+#include <linux/msi.h>
+#include <linux/version.h>
+#include <linux/slab.h>
+
+#ifdef CONFIG_XEN_DOM0
+#include <xen/xen.h>
+#endif
+#include <rte_pci_dev_features.h>
+
+#include "compat.h"
+
+/**
+ * A structure describing the private information for a uio device.
+ */
+struct rte_uio_pci_dev {
+	struct uio_info info;
+	struct pci_dev *pdev;
+	enum rte_intr_mode mode;
+};
+
+static char *intr_mode;
+static enum rte_intr_mode igbuio_intr_mode_preferred = RTE_INTR_MODE_MSIX;
+
+/* sriov sysfs */
+static ssize_t
+show_max_vfs(struct device *dev, struct device_attribute *attr,
+	     char *buf)
+{
+	return snprintf(buf, 10, "%u\n", dev_num_vf(dev));
+}
+
+static ssize_t
+store_max_vfs(struct device *dev, struct device_attribute *attr,
+	      const char *buf, size_t count)
+{
+	int err = 0;
+	unsigned long max_vfs;
+	struct pci_dev *pdev = to_pci_dev(dev);
+
+	if (0 != kstrtoul(buf, 0, &max_vfs))
+		return -EINVAL;
+
+	if (0 == max_vfs)
+		pci_disable_sriov(pdev);
+	else if (0 == pci_num_vf(pdev))
+		err = pci_enable_sriov(pdev, max_vfs);
+	else /* do nothing if change max_vfs number */
+		err = -EINVAL;
+
+	return err ? err : count;
+}
+
+#ifdef RTE_PCI_CONFIG
+static ssize_t
+show_extended_tag(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	dev_info(dev, "Deprecated\n");
+
+	return 0;
+}
+
+static ssize_t
+store_extended_tag(struct device *dev,
+		   struct device_attribute *attr,
+		   const char *buf,
+		   size_t count)
+{
+	dev_info(dev, "Deprecated\n");
+
+	return 0;
+}
+
+static ssize_t
+show_max_read_request_size(struct device *dev,
+			   struct device_attribute *attr,
+			   char *buf)
+{
+	dev_info(dev, "Deprecated\n");
+
+	return 0;
+}
+
+static ssize_t
+store_max_read_request_size(struct device *dev,
+			    struct device_attribute *attr,
+			    const char *buf,
+			    size_t count)
+{
+	dev_info(dev, "Deprecated\n");
+
+	return 0;
+}
+#endif
+
+static DEVICE_ATTR(max_vfs, S_IRUGO | S_IWUSR, show_max_vfs, store_max_vfs);
+#ifdef RTE_PCI_CONFIG
+static DEVICE_ATTR(extended_tag, S_IRUGO | S_IWUSR, show_extended_tag,
+	store_extended_tag);
+static DEVICE_ATTR(max_read_request_size, S_IRUGO | S_IWUSR,
+	show_max_read_request_size, store_max_read_request_size);
+#endif
+
+static struct attribute *dev_attrs[] = {
+	&dev_attr_max_vfs.attr,
+#ifdef RTE_PCI_CONFIG
+	&dev_attr_extended_tag.attr,
+	&dev_attr_max_read_request_size.attr,
+#endif
+	NULL,
+};
+
+static const struct attribute_group dev_attr_grp = {
+	.attrs = dev_attrs,
+};
+/*
+ * It masks the msix on/off of generating MSI-X messages.
+ */
+static void
+igbuio_msix_mask_irq(struct msi_desc *desc, int32_t state)
+{
+	u32 mask_bits = desc->masked;
+	unsigned offset = desc->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE +
+						PCI_MSIX_ENTRY_VECTOR_CTRL;
+
+	if (state != 0)
+		mask_bits &= ~PCI_MSIX_ENTRY_CTRL_MASKBIT;
+	else
+		mask_bits |= PCI_MSIX_ENTRY_CTRL_MASKBIT;
+
+	if (mask_bits != desc->masked) {
+		writel(mask_bits, desc->mask_base + offset);
+		readl(desc->mask_base);
+		desc->masked = mask_bits;
+	}
+}
+
+/**
+ * This is the irqcontrol callback to be registered to uio_info.
+ * It can be used to disable/enable interrupt from user space processes.
+ *
+ * @param info
+ *  pointer to uio_info.
+ * @param irq_state
+ *  state value. 1 to enable interrupt, 0 to disable interrupt.
+ *
+ * @return
+ *  - On success, 0.
+ *  - On failure, a negative value.
+ */
+static int
+igbuio_pci_irqcontrol(struct uio_info *info, s32 irq_state)
+{
+	struct rte_uio_pci_dev *udev = info->priv;
+	struct pci_dev *pdev = udev->pdev;
+
+	pci_cfg_access_lock(pdev);
+	if (udev->mode == RTE_INTR_MODE_LEGACY)
+		pci_intx(pdev, !!irq_state);
+
+	else if (udev->mode == RTE_INTR_MODE_MSIX) {
+		struct msi_desc *desc;
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0))
+		list_for_each_entry(desc, &pdev->msi_list, list)
+			igbuio_msix_mask_irq(desc, irq_state);
+#else
+		list_for_each_entry(desc, &pdev->dev.msi_list, list)
+			igbuio_msix_mask_irq(desc, irq_state);
+#endif
+	}
+	pci_cfg_access_unlock(pdev);
+
+	return 0;
+}
+
+/**
+ * This is interrupt handler which will check if the interrupt is for the right device.
+ * If yes, disable it here and will be enable later.
+ */
+static irqreturn_t
+igbuio_pci_irqhandler(int irq, struct uio_info *info)
+{
+	struct rte_uio_pci_dev *udev = info->priv;
+
+	/* Legacy mode need to mask in hardware */
+	if (udev->mode == RTE_INTR_MODE_LEGACY &&
+	    !pci_check_and_mask_intx(udev->pdev))
+		return IRQ_NONE;
+
+	/* Message signal mode, no share IRQ and automasked */
+	return IRQ_HANDLED;
+}
+
+#ifdef CONFIG_XEN_DOM0
+static int
+igbuio_dom0_mmap_phys(struct uio_info *info, struct vm_area_struct *vma)
+{
+	int idx;
+
+	idx = (int)vma->vm_pgoff;
+	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+#ifdef HAVE_PTE_MASK_PAGE_IOMAP
+	vma->vm_page_prot.pgprot |= _PAGE_IOMAP;
+#endif
+
+	return remap_pfn_range(vma,
+			vma->vm_start,
+			info->mem[idx].addr >> PAGE_SHIFT,
+			vma->vm_end - vma->vm_start,
+			vma->vm_page_prot);
+}
+
+/**
+ * This is uio device mmap method which will use igbuio mmap for Xen
+ * Dom0 environment.
+ */
+static int
+igbuio_dom0_pci_mmap(struct uio_info *info, struct vm_area_struct *vma)
+{
+	int idx;
+
+	if (vma->vm_pgoff >= MAX_UIO_MAPS)
+		return -EINVAL;
+
+	if (info->mem[vma->vm_pgoff].size == 0)
+		return -EINVAL;
+
+	idx = (int)vma->vm_pgoff;
+	switch (info->mem[idx].memtype) {
+	case UIO_MEM_PHYS:
+		return igbuio_dom0_mmap_phys(info, vma);
+	case UIO_MEM_LOGICAL:
+	case UIO_MEM_VIRTUAL:
+	default:
+		return -EINVAL;
+	}
+}
+#endif
+
+/* Remap pci resources described by bar #pci_bar in uio resource n. */
+static int
+igbuio_pci_setup_iomem(struct pci_dev *dev, struct uio_info *info,
+		       int n, int pci_bar, const char *name)
+{
+	unsigned long addr, len;
+	void *internal_addr;
+
+	if (n >= ARRAY_SIZE(info->mem))
+		return -EINVAL;
+
+	addr = pci_resource_start(dev, pci_bar);
+	len = pci_resource_len(dev, pci_bar);
+	if (addr == 0 || len == 0)
+		return -1;
+	internal_addr = ioremap(addr, len);
+	if (internal_addr == NULL)
+		return -1;
+	info->mem[n].name = name;
+	info->mem[n].addr = addr;
+	info->mem[n].internal_addr = internal_addr;
+	info->mem[n].size = len;
+	info->mem[n].memtype = UIO_MEM_PHYS;
+	return 0;
+}
+
+/* Get pci port io resources described by bar #pci_bar in uio resource n. */
+static int
+igbuio_pci_setup_ioport(struct pci_dev *dev, struct uio_info *info,
+		int n, int pci_bar, const char *name)
+{
+	unsigned long addr, len;
+
+	if (n >= ARRAY_SIZE(info->port))
+		return -EINVAL;
+
+	addr = pci_resource_start(dev, pci_bar);
+	len = pci_resource_len(dev, pci_bar);
+	if (addr == 0 || len == 0)
+		return -EINVAL;
+
+	info->port[n].name = name;
+	info->port[n].start = addr;
+	info->port[n].size = len;
+	info->port[n].porttype = UIO_PORT_X86;
+
+	return 0;
+}
+
+/* Unmap previously ioremap'd resources */
+static void
+igbuio_pci_release_iomem(struct uio_info *info)
+{
+	int i;
+
+	for (i = 0; i < MAX_UIO_MAPS; i++) {
+		if (info->mem[i].internal_addr)
+			iounmap(info->mem[i].internal_addr);
+	}
+}
+
+static int
+igbuio_setup_bars(struct pci_dev *dev, struct uio_info *info)
+{
+	int i, iom, iop, ret;
+	unsigned long flags;
+	static const char *bar_names[PCI_STD_RESOURCE_END + 1]  = {
+		"BAR0",
+		"BAR1",
+		"BAR2",
+		"BAR3",
+		"BAR4",
+		"BAR5",
+	};
+
+	iom = 0;
+	iop = 0;
+
+	for (i = 0; i < ARRAY_SIZE(bar_names); i++) {
+		if (pci_resource_len(dev, i) != 0 &&
+				pci_resource_start(dev, i) != 0) {
+			flags = pci_resource_flags(dev, i);
+			if (flags & IORESOURCE_MEM) {
+				ret = igbuio_pci_setup_iomem(dev, info, iom,
+							     i, bar_names[i]);
+				if (ret != 0)
+					return ret;
+				iom++;
+			} else if (flags & IORESOURCE_IO) {
+				ret = igbuio_pci_setup_ioport(dev, info, iop,
+							      i, bar_names[i]);
+				if (ret != 0)
+					return ret;
+				iop++;
+			}
+		}
+	}
+
+	return (iom != 0) ? ret : -ENOENT;
+}
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 8, 0)
+static int __devinit
+#else
+static int
+#endif
+igbuio_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
+{
+	struct rte_uio_pci_dev *udev;
+	struct msix_entry msix_entry;
+	int err;
+
+	udev = kzalloc(sizeof(struct rte_uio_pci_dev), GFP_KERNEL);
+	if (!udev)
+		return -ENOMEM;
+
+	/*
+	 * enable device: ask low-level code to enable I/O and
+	 * memory
+	 */
+	err = pci_enable_device(dev);
+	if (err != 0) {
+		dev_err(&dev->dev, "Cannot enable PCI device\n");
+		goto fail_free;
+	}
+
+	/*
+	 * reserve device's PCI memory regions for use by this
+	 * module
+	 */
+	err = pci_request_regions(dev, "igb_uio");
+	if (err != 0) {
+		dev_err(&dev->dev, "Cannot request regions\n");
+		goto fail_disable;
+	}
+
+	/* enable bus mastering on the device */
+	pci_set_master(dev);
+
+	/* remap IO memory */
+	err = igbuio_setup_bars(dev, &udev->info);
+	if (err != 0)
+		goto fail_release_iomem;
+
+	/* set 64-bit DMA mask */
+	err = pci_set_dma_mask(dev,  DMA_BIT_MASK(64));
+	if (err != 0) {
+		dev_err(&dev->dev, "Cannot set DMA mask\n");
+		goto fail_release_iomem;
+	}
+
+	err = pci_set_consistent_dma_mask(dev, DMA_BIT_MASK(64));
+	if (err != 0) {
+		dev_err(&dev->dev, "Cannot set consistent DMA mask\n");
+		goto fail_release_iomem;
+	}
+
+	/* fill uio infos */
+	udev->info.name = "igb_uio";
+	udev->info.version = "0.1";
+	udev->info.handler = igbuio_pci_irqhandler;
+	udev->info.irqcontrol = igbuio_pci_irqcontrol;
+#ifdef CONFIG_XEN_DOM0
+	/* check if the driver run on Xen Dom0 */
+	if (xen_initial_domain())
+		udev->info.mmap = igbuio_dom0_pci_mmap;
+#endif
+	udev->info.priv = udev;
+	udev->pdev = dev;
+
+	switch (igbuio_intr_mode_preferred) {
+	case RTE_INTR_MODE_MSIX:
+		/* Only 1 msi-x vector needed */
+		msix_entry.entry = 0;
+		if (pci_enable_msix(dev, &msix_entry, 1) == 0) {
+			dev_dbg(&dev->dev, "using MSI-X");
+			udev->info.irq = msix_entry.vector;
+			udev->mode = RTE_INTR_MODE_MSIX;
+			break;
+		}
+		/* fall back to INTX */
+	case RTE_INTR_MODE_LEGACY:
+		if (pci_intx_mask_supported(dev)) {
+			dev_dbg(&dev->dev, "using INTX");
+			udev->info.irq_flags = IRQF_SHARED;
+			udev->info.irq = dev->irq;
+			udev->mode = RTE_INTR_MODE_LEGACY;
+			break;
+		}
+		dev_notice(&dev->dev, "PCI INTX mask not supported\n");
+		/* fall back to no IRQ */
+	case RTE_INTR_MODE_NONE:
+		udev->mode = RTE_INTR_MODE_NONE;
+		udev->info.irq = 0;
+		break;
+
+	default:
+		dev_err(&dev->dev, "invalid IRQ mode %u",
+			igbuio_intr_mode_preferred);
+		err = -EINVAL;
+		goto fail_release_iomem;
+	}
+
+	err = sysfs_create_group(&dev->dev.kobj, &dev_attr_grp);
+	if (err != 0)
+		goto fail_release_iomem;
+
+	/* register uio driver */
+	err = uio_register_device(&dev->dev, &udev->info);
+	if (err != 0)
+		goto fail_remove_group;
+
+	pci_set_drvdata(dev, udev);
+
+	dev_info(&dev->dev, "uio device registered with irq %lx\n",
+		 udev->info.irq);
+
+	return 0;
+
+fail_remove_group:
+	sysfs_remove_group(&dev->dev.kobj, &dev_attr_grp);
+fail_release_iomem:
+	igbuio_pci_release_iomem(&udev->info);
+	if (udev->mode == RTE_INTR_MODE_MSIX)
+		pci_disable_msix(udev->pdev);
+	pci_release_regions(dev);
+fail_disable:
+	pci_disable_device(dev);
+fail_free:
+	kfree(udev);
+
+	return err;
+}
+
+static void
+igbuio_pci_remove(struct pci_dev *dev)
+{
+	struct rte_uio_pci_dev *udev = pci_get_drvdata(dev);
+
+	sysfs_remove_group(&dev->dev.kobj, &dev_attr_grp);
+	uio_unregister_device(&udev->info);
+	igbuio_pci_release_iomem(&udev->info);
+	if (udev->mode == RTE_INTR_MODE_MSIX)
+		pci_disable_msix(dev);
+	pci_release_regions(dev);
+	pci_disable_device(dev);
+	pci_set_drvdata(dev, NULL);
+	kfree(udev);
+}
+
+static int
+igbuio_config_intr_mode(char *intr_str)
+{
+	if (!intr_str) {
+		pr_info("Use MSIX interrupt by default\n");
+		return 0;
+	}
+
+	if (!strcmp(intr_str, RTE_INTR_MODE_MSIX_NAME)) {
+		igbuio_intr_mode_preferred = RTE_INTR_MODE_MSIX;
+		pr_info("Use MSIX interrupt\n");
+	} else if (!strcmp(intr_str, RTE_INTR_MODE_LEGACY_NAME)) {
+		igbuio_intr_mode_preferred = RTE_INTR_MODE_LEGACY;
+		pr_info("Use legacy interrupt\n");
+	} else {
+		pr_info("Error: bad parameter - %s\n", intr_str);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static struct pci_driver igbuio_pci_driver = {
+	.name = "igb_uio",
+	.id_table = NULL,
+	.probe = igbuio_pci_probe,
+	.remove = igbuio_pci_remove,
+};
+
+static int __init
+igbuio_pci_init_module(void)
+{
+	int ret;
+
+	ret = igbuio_config_intr_mode(intr_mode);
+	if (ret < 0)
+		return ret;
+
+	return pci_register_driver(&igbuio_pci_driver);
+}
+
+static void __exit
+igbuio_pci_exit_module(void)
+{
+	pci_unregister_driver(&igbuio_pci_driver);
+}
+
+module_init(igbuio_pci_init_module);
+module_exit(igbuio_pci_exit_module);
+
+module_param(intr_mode, charp, S_IRUGO);
+MODULE_PARM_DESC(intr_mode,
+"igb_uio interrupt mode (default=msix):\n"
+"    " RTE_INTR_MODE_MSIX_NAME "       Use MSIX interrupt\n"
+"    " RTE_INTR_MODE_LEGACY_NAME "     Use Legacy interrupt\n"
+"\n");
+
+MODULE_DESCRIPTION("UIO driver for Intel IGB PCI cards");
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Intel Corporation");
diff --git a/lib/librte_eal/linuxapp/kni/Makefile b/lib/librte_eal/linuxapp/kni/Makefile
new file mode 100644
index 00000000..ac99d3f1
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/Makefile
@@ -0,0 +1,93 @@
+#   BSD LICENSE
+#
+#   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+#   All rights reserved.
+#
+#   Redistribution and use in source and binary forms, with or without
+#   modification, are permitted provided that the following conditions
+#   are met:
+#
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in
+#       the documentation and/or other materials provided with the
+#       distribution.
+#     * Neither the name of Intel Corporation nor the names of its
+#       contributors may be used to endorse or promote products derived
+#       from this software without specific prior written permission.
+#
+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+#
+# module name and path
+#
+MODULE = rte_kni
+
+#
+# CFLAGS
+#
+MODULE_CFLAGS += -I$(SRCDIR) --param max-inline-insns-single=50
+MODULE_CFLAGS += -I$(RTE_OUTPUT)/include -I$(SRCDIR)/ethtool/ixgbe -I$(SRCDIR)/ethtool/igb
+MODULE_CFLAGS += -include $(RTE_OUTPUT)/include/rte_config.h
+MODULE_CFLAGS += -Wall -Werror
+
+ifeq ($(shell lsb_release -si 2>/dev/null),Ubuntu)
+MODULE_CFLAGS += -DUBUNTU_RELEASE_CODE=$(shell lsb_release -sr | tr -d .)
+UBUNTU_KERNEL_CODE := $(shell echo `grep UTS_RELEASE $(RTE_KERNELDIR)/include/generated/utsrelease.h \
+	 | cut -d '"' -f2 | cut -d- -f1,2 | tr .- $(comma)`,1)
+MODULE_CFLAGS += -D"UBUNTU_KERNEL_CODE=UBUNTU_KERNEL_VERSION($(UBUNTU_KERNEL_CODE))"
+endif
+
+# this lib needs main eal
+DEPDIRS-y += lib/librte_eal/linuxapp/eal
+
+#
+# all source are stored in SRCS-y
+#
+SRCS-y := ethtool/ixgbe/ixgbe_main.c
+SRCS-y += ethtool/ixgbe/ixgbe_api.c
+SRCS-y += ethtool/ixgbe/ixgbe_common.c
+SRCS-y += ethtool/ixgbe/ixgbe_ethtool.c
+SRCS-y += ethtool/ixgbe/ixgbe_82599.c
+SRCS-y += ethtool/ixgbe/ixgbe_82598.c
+SRCS-y += ethtool/ixgbe/ixgbe_x540.c
+SRCS-y += ethtool/ixgbe/ixgbe_phy.c
+SRCS-y += ethtool/ixgbe/kcompat.c
+
+SRCS-y += ethtool/igb/e1000_82575.c
+SRCS-y += ethtool/igb/e1000_i210.c
+SRCS-y += ethtool/igb/e1000_api.c
+SRCS-y += ethtool/igb/e1000_mac.c
+SRCS-y += ethtool/igb/e1000_manage.c
+SRCS-y += ethtool/igb/e1000_mbx.c
+SRCS-y += ethtool/igb/e1000_nvm.c
+SRCS-y += ethtool/igb/e1000_phy.c
+SRCS-y += ethtool/igb/igb_ethtool.c
+SRCS-y += ethtool/igb/igb_hwmon.c
+SRCS-y += ethtool/igb/igb_main.c
+SRCS-y += ethtool/igb/igb_debugfs.c
+SRCS-y += ethtool/igb/igb_param.c
+SRCS-y += ethtool/igb/igb_procfs.c
+SRCS-y += ethtool/igb/igb_vmdq.c
+#SRCS-y += ethtool/igb/igb_ptp.c
+#SRCS-y += ethtool/igb/kcompat.c
+
+SRCS-y += kni_misc.c
+SRCS-y += kni_net.c
+SRCS-y += kni_ethtool.c
+SRCS-$(CONFIG_RTE_KNI_VHOST) += kni_vhost.c
+
+include $(RTE_SDK)/mk/rte.module.mk
diff --git a/lib/librte_eal/linuxapp/kni/compat.h b/lib/librte_eal/linuxapp/kni/compat.h
new file mode 100644
index 00000000..cf100b67
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/compat.h
@@ -0,0 +1,29 @@
+/*
+ * Minimal wrappers to allow compiling kni on older kernels.
+ */
+
+#ifndef RHEL_RELEASE_VERSION
+#define RHEL_RELEASE_VERSION(a, b) (((a) << 8) + (b))
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 39) && \
+	(!(defined(RHEL_RELEASE_CODE) && \
+	   RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6, 4)))
+
+#define kstrtoul strict_strtoul
+
+#endif /* < 2.6.39 */
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,35)
+
+#define sk_sleep(s) (s)->sk_sleep
+
+#endif /* < 2.6.35 */
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,19,0)
+#define HAVE_IOV_ITER_MSGHDR
+#endif
+
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(4,1,0) )
+#define HAVE_KIOCB_MSG_PARAM
+#endif /* < 4.1.0 */
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/README b/lib/librte_eal/linuxapp/kni/ethtool/README
new file mode 100644
index 00000000..2cfefe72
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/README
@@ -0,0 +1,100 @@
+..
+     BSD LICENSE
+   
+     Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+     All rights reserved.
+   
+     Redistribution and use in source and binary forms, with or without
+     modification, are permitted provided that the following conditions
+     are met:
+   
+       * Redistributions of source code must retain the above copyright
+         notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above copyright
+         notice, this list of conditions and the following disclaimer in
+         the documentation and/or other materials provided with the
+         distribution.
+       * Neither the name of Intel Corporation nor the names of its
+         contributors may be used to endorse or promote products derived
+         from this software without specific prior written permission.
+   
+     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+     A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+     OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+     SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+     LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+     DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+     THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+     (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+     OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+Description
+
+In order to support ethtool in Kernel NIC Interface, the standard Linux kernel
+drivers of ixgbe/igb are needed to be reused here. ixgbe-3.9.17 is the version
+modified from in kernel NIC interface kernel module to support ixgbe NIC, and
+igb-3.4.8 is the version modified from in kernel NIC interface kernel module to
+support igb NIC.
+
+The source code package of ixgbe can be downloaded from sourceforge.net as below.
+http://sourceforge.net/projects/e1000/files/ixgbe%20stable/
+Below source files are copied or modified from ixgbe.
+
+ixgbe_82598.h
+ixgbe_82599.c
+ixgbe_82599.h
+ixgbe_api.c
+ixgbe_api.h
+ixgbe_common.c
+ixgbe_common.h
+ixgbe_dcb.h
+ixgbe_ethtool.c
+ixgbe_fcoe.h
+ixgbe.h
+ixgbe_main.c
+ixgbe_mbx.h
+ixgbe_osdep.h
+ixgbe_phy.c
+ixgbe_phy.h
+ixgbe_sriov.h
+ixgbe_type.h
+kcompat.c
+kcompat.h
+
+The source code package of igb can be downloaded from sourceforge.net as below.
+http://sourceforge.net/projects/e1000/files/igb%20stable/
+Below source files are copied or modified from igb.
+
+e1000_82575.c
+e1000_82575.h
+e1000_api.c
+e1000_api.h
+e1000_defines.h
+e1000_hw.h
+e1000_mac.c
+e1000_mac.h
+e1000_manage.c
+e1000_manage.h
+e1000_mbx.c
+e1000_mbx.h
+e1000_nvm.c
+e1000_nvm.h
+e1000_osdep.h
+e1000_phy.c
+e1000_phy.h
+e1000_regs.h
+igb_ethtool.c
+igb.h
+igb_main.c
+igb_param.c
+igb_procfs.c
+igb_regtest.h
+igb_sysfs.c
+igb_vmdq.c
+igb_vmdq.h
+kcompat.c
+kcompat_ethtool.c
+kcompat.h
+
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/COPYING b/lib/librte_eal/linuxapp/kni/ethtool/igb/COPYING
new file mode 100644
index 00000000..5f297e5b
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/COPYING
@@ -0,0 +1,339 @@
+
+"This software program is licensed subject to the GNU General Public License 
+(GPL). Version 2, June 1991, available at 
+<http://www.fsf.org/copyleft/gpl.html>"
+
+GNU General Public License 
+
+Version 2, June 1991
+
+Copyright (C) 1989, 1991 Free Software Foundation, Inc.  
+59 Temple Place - Suite 330, Boston, MA  02111-1307, USA
+
+Everyone is permitted to copy and distribute verbatim copies of this license
+document, but changing it is not allowed.
+
+Preamble
+
+The licenses for most software are designed to take away your freedom to 
+share and change it. By contrast, the GNU General Public License is intended
+to guarantee your freedom to share and change free software--to make sure 
+the software is free for all its users. This General Public License applies 
+to most of the Free Software Foundation's software and to any other program 
+whose authors commit to using it. (Some other Free Software Foundation 
+software is covered by the GNU Library General Public License instead.) You 
+can apply it to your programs, too.
+
+When we speak of free software, we are referring to freedom, not price. Our
+General Public Licenses are designed to make sure that you have the freedom 
+to distribute copies of free software (and charge for this service if you 
+wish), that you receive source code or can get it if you want it, that you 
+can change the software or use pieces of it in new free programs; and that 
+you know you can do these things.
+
+To protect your rights, we need to make restrictions that forbid anyone to 
+deny you these rights or to ask you to surrender the rights. These 
+restrictions translate to certain responsibilities for you if you distribute
+copies of the software, or if you modify it.
+
+For example, if you distribute copies of such a program, whether gratis or 
+for a fee, you must give the recipients all the rights that you have. You 
+must make sure that they, too, receive or can get the source code. And you 
+must show them these terms so they know their rights.
+ 
+We protect your rights with two steps: (1) copyright the software, and (2) 
+offer you this license which gives you legal permission to copy, distribute 
+and/or modify the software. 
+
+Also, for each author's protection and ours, we want to make certain that 
+everyone understands that there is no warranty for this free software. If 
+the software is modified by someone else and passed on, we want its 
+recipients to know that what they have is not the original, so that any 
+problems introduced by others will not reflect on the original authors' 
+reputations. 
+
+Finally, any free program is threatened constantly by software patents. We 
+wish to avoid the danger that redistributors of a free program will 
+individually obtain patent licenses, in effect making the program 
+proprietary. To prevent this, we have made it clear that any patent must be 
+licensed for everyone's free use or not licensed at all. 
+
+The precise terms and conditions for copying, distribution and modification 
+follow. 
+
+TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+0. This License applies to any program or other work which contains a notice
+   placed by the copyright holder saying it may be distributed under the 
+   terms of this General Public License. The "Program", below, refers to any
+   such program or work, and a "work based on the Program" means either the 
+   Program or any derivative work under copyright law: that is to say, a 
+   work containing the Program or a portion of it, either verbatim or with 
+   modifications and/or translated into another language. (Hereinafter, 
+   translation is included without limitation in the term "modification".) 
+   Each licensee is addressed as "you". 
+
+   Activities other than copying, distribution and modification are not 
+   covered by this License; they are outside its scope. The act of running 
+   the Program is not restricted, and the output from the Program is covered 
+   only if its contents constitute a work based on the Program (independent 
+   of having been made by running the Program). Whether that is true depends
+   on what the Program does. 
+
+1. You may copy and distribute verbatim copies of the Program's source code 
+   as you receive it, in any medium, provided that you conspicuously and 
+   appropriately publish on each copy an appropriate copyright notice and 
+   disclaimer of warranty; keep intact all the notices that refer to this 
+   License and to the absence of any warranty; and give any other recipients 
+   of the Program a copy of this License along with the Program. 
+
+   You may charge a fee for the physical act of transferring a copy, and you 
+   may at your option offer warranty protection in exchange for a fee. 
+
+2. You may modify your copy or copies of the Program or any portion of it, 
+   thus forming a work based on the Program, and copy and distribute such 
+   modifications or work under the terms of Section 1 above, provided that 
+   you also meet all of these conditions: 
+
+   * a) You must cause the modified files to carry prominent notices stating 
+        that you changed the files and the date of any change. 
+
+   * b) You must cause any work that you distribute or publish, that in 
+        whole or in part contains or is derived from the Program or any part 
+        thereof, to be licensed as a whole at no charge to all third parties
+        under the terms of this License. 
+
+   * c) If the modified program normally reads commands interactively when 
+        run, you must cause it, when started running for such interactive 
+        use in the most ordinary way, to print or display an announcement 
+        including an appropriate copyright notice and a notice that there is
+        no warranty (or else, saying that you provide a warranty) and that 
+        users may redistribute the program under these conditions, and 
+        telling the user how to view a copy of this License. (Exception: if 
+        the Program itself is interactive but does not normally print such 
+        an announcement, your work based on the Program is not required to 
+        print an announcement.) 
+
+   These requirements apply to the modified work as a whole. If identifiable 
+   sections of that work are not derived from the Program, and can be 
+   reasonably considered independent and separate works in themselves, then 
+   this License, and its terms, do not apply to those sections when you 
+   distribute them as separate works. But when you distribute the same 
+   sections as part of a whole which is a work based on the Program, the 
+   distribution of the whole must be on the terms of this License, whose 
+   permissions for other licensees extend to the entire whole, and thus to 
+   each and every part regardless of who wrote it. 
+
+   Thus, it is not the intent of this section to claim rights or contest 
+   your rights to work written entirely by you; rather, the intent is to 
+   exercise the right to control the distribution of derivative or 
+   collective works based on the Program. 
+
+   In addition, mere aggregation of another work not based on the Program 
+   with the Program (or with a work based on the Program) on a volume of a 
+   storage or distribution medium does not bring the other work under the 
+   scope of this License. 
+
+3. You may copy and distribute the Program (or a work based on it, under 
+   Section 2) in object code or executable form under the terms of Sections 
+   1 and 2 above provided that you also do one of the following: 
+
+   * a) Accompany it with the complete corresponding machine-readable source 
+        code, which must be distributed under the terms of Sections 1 and 2 
+        above on a medium customarily used for software interchange; or, 
+
+   * b) Accompany it with a written offer, valid for at least three years, 
+        to give any third party, for a charge no more than your cost of 
+        physically performing source distribution, a complete machine-
+        readable copy of the corresponding source code, to be distributed 
+        under the terms of Sections 1 and 2 above on a medium customarily 
+        used for software interchange; or, 
+
+   * c) Accompany it with the information you received as to the offer to 
+        distribute corresponding source code. (This alternative is allowed 
+        only for noncommercial distribution and only if you received the 
+        program in object code or executable form with such an offer, in 
+        accord with Subsection b above.) 
+
+   The source code for a work means the preferred form of the work for 
+   making modifications to it. For an executable work, complete source code 
+   means all the source code for all modules it contains, plus any 
+   associated interface definition files, plus the scripts used to control 
+   compilation and installation of the executable. However, as a special 
+   exception, the source code distributed need not include anything that is 
+   normally distributed (in either source or binary form) with the major 
+   components (compiler, kernel, and so on) of the operating system on which
+   the executable runs, unless that component itself accompanies the 
+   executable. 
+
+   If distribution of executable or object code is made by offering access 
+   to copy from a designated place, then offering equivalent access to copy 
+   the source code from the same place counts as distribution of the source 
+   code, even though third parties are not compelled to copy the source 
+   along with the object code. 
+
+4. You may not copy, modify, sublicense, or distribute the Program except as
+   expressly provided under this License. Any attempt otherwise to copy, 
+   modify, sublicense or distribute the Program is void, and will 
+   automatically terminate your rights under this License. However, parties 
+   who have received copies, or rights, from you under this License will not
+   have their licenses terminated so long as such parties remain in full 
+   compliance. 
+
+5. You are not required to accept this License, since you have not signed 
+   it. However, nothing else grants you permission to modify or distribute 
+   the Program or its derivative works. These actions are prohibited by law 
+   if you do not accept this License. Therefore, by modifying or 
+   distributing the Program (or any work based on the Program), you 
+   indicate your acceptance of this License to do so, and all its terms and
+   conditions for copying, distributing or modifying the Program or works 
+   based on it. 
+
+6. Each time you redistribute the Program (or any work based on the 
+   Program), the recipient automatically receives a license from the 
+   original licensor to copy, distribute or modify the Program subject to 
+   these terms and conditions. You may not impose any further restrictions 
+   on the recipients' exercise of the rights granted herein. You are not 
+   responsible for enforcing compliance by third parties to this License. 
+
+7. If, as a consequence of a court judgment or allegation of patent 
+   infringement or for any other reason (not limited to patent issues), 
+   conditions are imposed on you (whether by court order, agreement or 
+   otherwise) that contradict the conditions of this License, they do not 
+   excuse you from the conditions of this License. If you cannot distribute 
+   so as to satisfy simultaneously your obligations under this License and 
+   any other pertinent obligations, then as a consequence you may not 
+   distribute the Program at all. For example, if a patent license would 
+   not permit royalty-free redistribution of the Program by all those who 
+   receive copies directly or indirectly through you, then the only way you 
+   could satisfy both it and this License would be to refrain entirely from 
+   distribution of the Program. 
+
+   If any portion of this section is held invalid or unenforceable under any
+   particular circumstance, the balance of the section is intended to apply
+   and the section as a whole is intended to apply in other circumstances. 
+
+   It is not the purpose of this section to induce you to infringe any 
+   patents or other property right claims or to contest validity of any 
+   such claims; this section has the sole purpose of protecting the 
+   integrity of the free software distribution system, which is implemented 
+   by public license practices. Many people have made generous contributions
+   to the wide range of software distributed through that system in 
+   reliance on consistent application of that system; it is up to the 
+   author/donor to decide if he or she is willing to distribute software 
+   through any other system and a licensee cannot impose that choice. 
+
+   This section is intended to make thoroughly clear what is believed to be 
+   a consequence of the rest of this License. 
+
+8. If the distribution and/or use of the Program is restricted in certain 
+   countries either by patents or by copyrighted interfaces, the original 
+   copyright holder who places the Program under this License may add an 
+   explicit geographical distribution limitation excluding those countries, 
+   so that distribution is permitted only in or among countries not thus 
+   excluded. In such case, this License incorporates the limitation as if 
+   written in the body of this License. 
+
+9. The Free Software Foundation may publish revised and/or new versions of 
+   the General Public License from time to time. Such new versions will be 
+   similar in spirit to the present version, but may differ in detail to 
+   address new problems or concerns. 
+
+   Each version is given a distinguishing version number. If the Program 
+   specifies a version number of this License which applies to it and "any 
+   later version", you have the option of following the terms and 
+   conditions either of that version or of any later version published by 
+   the Free Software Foundation. If the Program does not specify a version 
+   number of this License, you may choose any version ever published by the 
+   Free Software Foundation. 
+
+10. If you wish to incorporate parts of the Program into other free programs
+    whose distribution conditions are different, write to the author to ask 
+    for permission. For software which is copyrighted by the Free Software 
+    Foundation, write to the Free Software Foundation; we sometimes make 
+    exceptions for this. Our decision will be guided by the two goals of 
+    preserving the free status of all derivatives of our free software and 
+    of promoting the sharing and reuse of software generally. 
+
+   NO WARRANTY
+
+11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY 
+    FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN 
+    OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES 
+    PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER 
+    EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 
+    WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE 
+    ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH 
+    YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL 
+    NECESSARY SERVICING, REPAIR OR CORRECTION. 
+
+12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 
+    WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR 
+    REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR 
+    DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL 
+    DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM 
+    (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED 
+    INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF 
+    THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR 
+    OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. 
+
+END OF TERMS AND CONDITIONS
+
+How to Apply These Terms to Your New Programs
+
+If you develop a new program, and you want it to be of the greatest 
+possible use to the public, the best way to achieve this is to make it free 
+software which everyone can redistribute and change under these terms. 
+
+To do so, attach the following notices to the program. It is safest to 
+attach them to the start of each source file to most effectively convey the
+exclusion of warranty; and each file should have at least the "copyright" 
+line and a pointer to where the full notice is found. 
+
+one line to give the program's name and an idea of what it does.
+Copyright (C) yyyy  name of author
+
+This program is free software; you can redistribute it and/or modify it 
+under the terms of the GNU General Public License as published by the Free 
+Software Foundation; either version 2 of the License, or (at your option) 
+any later version.
+
+This program is distributed in the hope that it will be useful, but WITHOUT 
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for 
+more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 
+Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+
+Also add information on how to contact you by electronic and paper mail. 
+
+If the program is interactive, make it output a short notice like this when 
+it starts in an interactive mode: 
+
+Gnomovision version 69, Copyright (C) year name of author Gnomovision comes 
+with ABSOLUTELY NO WARRANTY; for details type 'show w'.  This is free 
+software, and you are welcome to redistribute it under certain conditions; 
+type 'show c' for details.
+
+The hypothetical commands 'show w' and 'show c' should show the appropriate 
+parts of the General Public License. Of course, the commands you use may be 
+called something other than 'show w' and 'show c'; they could even be 
+mouse-clicks or menu items--whatever suits your program. 
+
+You should also get your employer (if you work as a programmer) or your 
+school, if any, to sign a "copyright disclaimer" for the program, if 
+necessary. Here is a sample; alter the names: 
+
+Yoyodyne, Inc., hereby disclaims all copyright interest in the program 
+'Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+signature of Ty Coon, 1 April 1989
+Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into 
+proprietary programs. If your program is a subroutine library, you may 
+consider it more useful to permit linking proprietary applications with the 
+library. If this is what you want to do, use the GNU Library General Public 
+License instead of this License.
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_82575.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_82575.c
new file mode 100644
index 00000000..b8c9a13f
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_82575.c
@@ -0,0 +1,3665 @@
+/*******************************************************************************
+
+  Intel(R) Gigabit Ethernet Linux driver
+  Copyright(c) 2007-2013 Intel Corporation.
+
+  This program is free software; you can redistribute it and/or modify it
+  under the terms and conditions of the GNU General Public License,
+  version 2, as published by the Free Software Foundation.
+
+  This program is distributed in the hope it will be useful, but WITHOUT
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+  more details.
+
+  You should have received a copy of the GNU General Public License along with
+  this program; if not, write to the Free Software Foundation, Inc.,
+  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+  The full GNU General Public License is included in this distribution in
+  the file called "COPYING".
+
+  Contact Information:
+  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+/*
+ * 82575EB Gigabit Network Connection
+ * 82575EB Gigabit Backplane Connection
+ * 82575GB Gigabit Network Connection
+ * 82576 Gigabit Network Connection
+ * 82576 Quad Port Gigabit Mezzanine Adapter
+ * 82580 Gigabit Network Connection
+ * I350 Gigabit Network Connection
+ */
+
+#include "e1000_api.h"
+#include "e1000_i210.h"
+
+static s32  e1000_init_phy_params_82575(struct e1000_hw *hw);
+static s32  e1000_init_mac_params_82575(struct e1000_hw *hw);
+static s32  e1000_acquire_phy_82575(struct e1000_hw *hw);
+static void e1000_release_phy_82575(struct e1000_hw *hw);
+static s32  e1000_acquire_nvm_82575(struct e1000_hw *hw);
+static void e1000_release_nvm_82575(struct e1000_hw *hw);
+static s32  e1000_check_for_link_82575(struct e1000_hw *hw);
+static s32  e1000_check_for_link_media_swap(struct e1000_hw *hw);
+static s32  e1000_get_cfg_done_82575(struct e1000_hw *hw);
+static s32  e1000_get_link_up_info_82575(struct e1000_hw *hw, u16 *speed,
+					 u16 *duplex);
+static s32  e1000_init_hw_82575(struct e1000_hw *hw);
+static s32  e1000_phy_hw_reset_sgmii_82575(struct e1000_hw *hw);
+static s32  e1000_read_phy_reg_sgmii_82575(struct e1000_hw *hw, u32 offset,
+					   u16 *data);
+static s32  e1000_reset_hw_82575(struct e1000_hw *hw);
+static s32  e1000_reset_hw_82580(struct e1000_hw *hw);
+static s32  e1000_read_phy_reg_82580(struct e1000_hw *hw,
+				     u32 offset, u16 *data);
+static s32  e1000_write_phy_reg_82580(struct e1000_hw *hw,
+				      u32 offset, u16 data);
+static s32  e1000_set_d0_lplu_state_82580(struct e1000_hw *hw,
+					  bool active);
+static s32  e1000_set_d3_lplu_state_82580(struct e1000_hw *hw,
+					  bool active);
+static s32  e1000_set_d0_lplu_state_82575(struct e1000_hw *hw,
+					  bool active);
+static s32  e1000_setup_copper_link_82575(struct e1000_hw *hw);
+static s32  e1000_setup_serdes_link_82575(struct e1000_hw *hw);
+static s32  e1000_get_media_type_82575(struct e1000_hw *hw);
+static s32  e1000_set_sfp_media_type_82575(struct e1000_hw *hw);
+static s32  e1000_valid_led_default_82575(struct e1000_hw *hw, u16 *data);
+static s32  e1000_write_phy_reg_sgmii_82575(struct e1000_hw *hw,
+					    u32 offset, u16 data);
+static void e1000_clear_hw_cntrs_82575(struct e1000_hw *hw);
+static s32  e1000_acquire_swfw_sync_82575(struct e1000_hw *hw, u16 mask);
+static s32  e1000_get_pcs_speed_and_duplex_82575(struct e1000_hw *hw,
+						 u16 *speed, u16 *duplex);
+static s32  e1000_get_phy_id_82575(struct e1000_hw *hw);
+static void e1000_release_swfw_sync_82575(struct e1000_hw *hw, u16 mask);
+static bool e1000_sgmii_active_82575(struct e1000_hw *hw);
+static s32  e1000_reset_init_script_82575(struct e1000_hw *hw);
+static s32  e1000_read_mac_addr_82575(struct e1000_hw *hw);
+static void e1000_config_collision_dist_82575(struct e1000_hw *hw);
+static void e1000_power_down_phy_copper_82575(struct e1000_hw *hw);
+static void e1000_shutdown_serdes_link_82575(struct e1000_hw *hw);
+static void e1000_power_up_serdes_link_82575(struct e1000_hw *hw);
+static s32 e1000_set_pcie_completion_timeout(struct e1000_hw *hw);
+static s32 e1000_reset_mdicnfg_82580(struct e1000_hw *hw);
+static s32 e1000_validate_nvm_checksum_82580(struct e1000_hw *hw);
+static s32 e1000_update_nvm_checksum_82580(struct e1000_hw *hw);
+static s32 e1000_update_nvm_checksum_with_offset(struct e1000_hw *hw,
+						 u16 offset);
+static s32 e1000_validate_nvm_checksum_with_offset(struct e1000_hw *hw,
+						   u16 offset);
+static s32 e1000_validate_nvm_checksum_i350(struct e1000_hw *hw);
+static s32 e1000_update_nvm_checksum_i350(struct e1000_hw *hw);
+static void e1000_write_vfta_i350(struct e1000_hw *hw, u32 offset, u32 value);
+static void e1000_clear_vfta_i350(struct e1000_hw *hw);
+
+static void e1000_i2c_start(struct e1000_hw *hw);
+static void e1000_i2c_stop(struct e1000_hw *hw);
+static s32 e1000_clock_in_i2c_byte(struct e1000_hw *hw, u8 *data);
+static s32 e1000_clock_out_i2c_byte(struct e1000_hw *hw, u8 data);
+static s32 e1000_get_i2c_ack(struct e1000_hw *hw);
+static s32 e1000_clock_in_i2c_bit(struct e1000_hw *hw, bool *data);
+static s32 e1000_clock_out_i2c_bit(struct e1000_hw *hw, bool data);
+static void e1000_raise_i2c_clk(struct e1000_hw *hw, u32 *i2cctl);
+static void e1000_lower_i2c_clk(struct e1000_hw *hw, u32 *i2cctl);
+static s32 e1000_set_i2c_data(struct e1000_hw *hw, u32 *i2cctl, bool data);
+static bool e1000_get_i2c_data(u32 *i2cctl);
+
+static const u16 e1000_82580_rxpbs_table[] = {
+	36, 72, 144, 1, 2, 4, 8, 16, 35, 70, 140 };
+#define E1000_82580_RXPBS_TABLE_SIZE \
+	(sizeof(e1000_82580_rxpbs_table)/sizeof(u16))
+
+
+/**
+ *  e1000_sgmii_uses_mdio_82575 - Determine if I2C pins are for external MDIO
+ *  @hw: pointer to the HW structure
+ *
+ *  Called to determine if the I2C pins are being used for I2C or as an
+ *  external MDIO interface since the two options are mutually exclusive.
+ **/
+static bool e1000_sgmii_uses_mdio_82575(struct e1000_hw *hw)
+{
+	u32 reg = 0;
+	bool ext_mdio = false;
+
+	DEBUGFUNC("e1000_sgmii_uses_mdio_82575");
+
+	switch (hw->mac.type) {
+	case e1000_82575:
+	case e1000_82576:
+		reg = E1000_READ_REG(hw, E1000_MDIC);
+		ext_mdio = !!(reg & E1000_MDIC_DEST);
+		break;
+	case e1000_82580:
+	case e1000_i350:
+	case e1000_i354:
+	case e1000_i210:
+	case e1000_i211:
+		reg = E1000_READ_REG(hw, E1000_MDICNFG);
+		ext_mdio = !!(reg & E1000_MDICNFG_EXT_MDIO);
+		break;
+	default:
+		break;
+	}
+	return ext_mdio;
+}
+
+/**
+ *  e1000_init_phy_params_82575 - Init PHY func ptrs.
+ *  @hw: pointer to the HW structure
+ **/
+static s32 e1000_init_phy_params_82575(struct e1000_hw *hw)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+	s32 ret_val = E1000_SUCCESS;
+	u32 ctrl_ext;
+
+	DEBUGFUNC("e1000_init_phy_params_82575");
+
+	phy->ops.read_i2c_byte = e1000_read_i2c_byte_generic;
+	phy->ops.write_i2c_byte = e1000_write_i2c_byte_generic;
+
+	if (hw->phy.media_type != e1000_media_type_copper) {
+		phy->type = e1000_phy_none;
+		goto out;
+	}
+
+	phy->ops.power_up   = e1000_power_up_phy_copper;
+	phy->ops.power_down = e1000_power_down_phy_copper_82575;
+
+	phy->autoneg_mask	= AUTONEG_ADVERTISE_SPEED_DEFAULT;
+	phy->reset_delay_us	= 100;
+
+	phy->ops.acquire	= e1000_acquire_phy_82575;
+	phy->ops.check_reset_block = e1000_check_reset_block_generic;
+	phy->ops.commit		= e1000_phy_sw_reset_generic;
+	phy->ops.get_cfg_done	= e1000_get_cfg_done_82575;
+	phy->ops.release	= e1000_release_phy_82575;
+
+	ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT);
+
+	if (e1000_sgmii_active_82575(hw)) {
+		phy->ops.reset = e1000_phy_hw_reset_sgmii_82575;
+		ctrl_ext |= E1000_CTRL_I2C_ENA;
+	} else {
+		phy->ops.reset = e1000_phy_hw_reset_generic;
+		ctrl_ext &= ~E1000_CTRL_I2C_ENA;
+	}
+
+	E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext);
+	e1000_reset_mdicnfg_82580(hw);
+
+	if (e1000_sgmii_active_82575(hw) && !e1000_sgmii_uses_mdio_82575(hw)) {
+		phy->ops.read_reg = e1000_read_phy_reg_sgmii_82575;
+		phy->ops.write_reg = e1000_write_phy_reg_sgmii_82575;
+	} else {
+		switch (hw->mac.type) {
+		case e1000_82580:
+		case e1000_i350:
+		case e1000_i354:
+			phy->ops.read_reg = e1000_read_phy_reg_82580;
+			phy->ops.write_reg = e1000_write_phy_reg_82580;
+			break;
+		case e1000_i210:
+		case e1000_i211:
+			phy->ops.read_reg = e1000_read_phy_reg_gs40g;
+			phy->ops.write_reg = e1000_write_phy_reg_gs40g;
+			break;
+		default:
+			phy->ops.read_reg = e1000_read_phy_reg_igp;
+			phy->ops.write_reg = e1000_write_phy_reg_igp;
+		}
+	}
+
+	/* Set phy->phy_addr and phy->id. */
+	ret_val = e1000_get_phy_id_82575(hw);
+
+	/* Verify phy id and set remaining function pointers */
+	switch (phy->id) {
+	case M88E1543_E_PHY_ID:
+	case I347AT4_E_PHY_ID:
+	case M88E1112_E_PHY_ID:
+	case M88E1340M_E_PHY_ID:
+	case M88E1111_I_PHY_ID:
+		phy->type		= e1000_phy_m88;
+		phy->ops.check_polarity	= e1000_check_polarity_m88;
+		phy->ops.get_info	= e1000_get_phy_info_m88;
+		if (phy->id == I347AT4_E_PHY_ID ||
+		    phy->id == M88E1112_E_PHY_ID ||
+		    phy->id == M88E1340M_E_PHY_ID)
+			phy->ops.get_cable_length =
+					 e1000_get_cable_length_m88_gen2;
+		else if (phy->id == M88E1543_E_PHY_ID)
+			phy->ops.get_cable_length =
+					 e1000_get_cable_length_m88_gen2;
+		else
+			phy->ops.get_cable_length = e1000_get_cable_length_m88;
+		phy->ops.force_speed_duplex = e1000_phy_force_speed_duplex_m88;
+		/* Check if this PHY is confgured for media swap. */
+		if (phy->id == M88E1112_E_PHY_ID) {
+			u16 data;
+
+			ret_val = phy->ops.write_reg(hw,
+						     E1000_M88E1112_PAGE_ADDR,
+						     2);
+			if (ret_val)
+				goto out;
+
+			ret_val = phy->ops.read_reg(hw,
+						    E1000_M88E1112_MAC_CTRL_1,
+						    &data);
+			if (ret_val)
+				goto out;
+
+			data = (data & E1000_M88E1112_MAC_CTRL_1_MODE_MASK) >>
+			       E1000_M88E1112_MAC_CTRL_1_MODE_SHIFT;
+			if (data == E1000_M88E1112_AUTO_COPPER_SGMII ||
+			    data == E1000_M88E1112_AUTO_COPPER_BASEX)
+				hw->mac.ops.check_for_link =
+						e1000_check_for_link_media_swap;
+		}
+		break;
+	case IGP03E1000_E_PHY_ID:
+	case IGP04E1000_E_PHY_ID:
+		phy->type = e1000_phy_igp_3;
+		phy->ops.check_polarity = e1000_check_polarity_igp;
+		phy->ops.get_info = e1000_get_phy_info_igp;
+		phy->ops.get_cable_length = e1000_get_cable_length_igp_2;
+		phy->ops.force_speed_duplex = e1000_phy_force_speed_duplex_igp;
+		phy->ops.set_d0_lplu_state = e1000_set_d0_lplu_state_82575;
+		phy->ops.set_d3_lplu_state = e1000_set_d3_lplu_state_generic;
+		break;
+	case I82580_I_PHY_ID:
+	case I350_I_PHY_ID:
+		phy->type = e1000_phy_82580;
+		phy->ops.check_polarity = e1000_check_polarity_82577;
+		phy->ops.force_speed_duplex =
+					 e1000_phy_force_speed_duplex_82577;
+		phy->ops.get_cable_length = e1000_get_cable_length_82577;
+		phy->ops.get_info = e1000_get_phy_info_82577;
+		phy->ops.set_d0_lplu_state = e1000_set_d0_lplu_state_82580;
+		phy->ops.set_d3_lplu_state = e1000_set_d3_lplu_state_82580;
+		break;
+	case I210_I_PHY_ID:
+		phy->type		= e1000_phy_i210;
+		phy->ops.check_polarity	= e1000_check_polarity_m88;
+		phy->ops.get_info	= e1000_get_phy_info_m88;
+		phy->ops.get_cable_length = e1000_get_cable_length_m88_gen2;
+		phy->ops.set_d0_lplu_state = e1000_set_d0_lplu_state_82580;
+		phy->ops.set_d3_lplu_state = e1000_set_d3_lplu_state_82580;
+		phy->ops.force_speed_duplex = e1000_phy_force_speed_duplex_m88;
+		break;
+	default:
+		ret_val = -E1000_ERR_PHY;
+		goto out;
+	}
+
+out:
+	return ret_val;
+}
+
+/**
+ *  e1000_init_nvm_params_82575 - Init NVM func ptrs.
+ *  @hw: pointer to the HW structure
+ **/
+s32 e1000_init_nvm_params_82575(struct e1000_hw *hw)
+{
+	struct e1000_nvm_info *nvm = &hw->nvm;
+	u32 eecd = E1000_READ_REG(hw, E1000_EECD);
+	u16 size;
+
+	DEBUGFUNC("e1000_init_nvm_params_82575");
+
+	size = (u16)((eecd & E1000_EECD_SIZE_EX_MASK) >>
+		     E1000_EECD_SIZE_EX_SHIFT);
+	/*
+	 * Added to a constant, "size" becomes the left-shift value
+	 * for setting word_size.
+	 */
+	size += NVM_WORD_SIZE_BASE_SHIFT;
+
+	/* Just in case size is out of range, cap it to the largest
+	 * EEPROM size supported
+	 */
+	if (size > 15)
+		size = 15;
+
+	nvm->word_size = 1 << size;
+	if (hw->mac.type < e1000_i210) {
+		nvm->opcode_bits = 8;
+		nvm->delay_usec = 1;
+
+		switch (nvm->override) {
+		case e1000_nvm_override_spi_large:
+			nvm->page_size = 32;
+			nvm->address_bits = 16;
+			break;
+		case e1000_nvm_override_spi_small:
+			nvm->page_size = 8;
+			nvm->address_bits = 8;
+			break;
+		default:
+			nvm->page_size = eecd & E1000_EECD_ADDR_BITS ? 32 : 8;
+			nvm->address_bits = eecd & E1000_EECD_ADDR_BITS ?
+					    16 : 8;
+			break;
+		}
+		if (nvm->word_size == (1 << 15))
+			nvm->page_size = 128;
+
+		nvm->type = e1000_nvm_eeprom_spi;
+	} else {
+		nvm->type = e1000_nvm_flash_hw;
+	}
+
+	/* Function Pointers */
+	nvm->ops.acquire = e1000_acquire_nvm_82575;
+	nvm->ops.release = e1000_release_nvm_82575;
+	if (nvm->word_size < (1 << 15))
+		nvm->ops.read = e1000_read_nvm_eerd;
+	else
+		nvm->ops.read = e1000_read_nvm_spi;
+
+	nvm->ops.write = e1000_write_nvm_spi;
+	nvm->ops.validate = e1000_validate_nvm_checksum_generic;
+	nvm->ops.update = e1000_update_nvm_checksum_generic;
+	nvm->ops.valid_led_default = e1000_valid_led_default_82575;
+
+	/* override generic family function pointers for specific descendants */
+	switch (hw->mac.type) {
+	case e1000_82580:
+		nvm->ops.validate = e1000_validate_nvm_checksum_82580;
+		nvm->ops.update = e1000_update_nvm_checksum_82580;
+		break;
+	case e1000_i350:
+	//case e1000_i354:
+		nvm->ops.validate = e1000_validate_nvm_checksum_i350;
+		nvm->ops.update = e1000_update_nvm_checksum_i350;
+		break;
+	default:
+		break;
+	}
+
+	return E1000_SUCCESS;
+}
+
+/**
+ *  e1000_init_mac_params_82575 - Init MAC func ptrs.
+ *  @hw: pointer to the HW structure
+ **/
+static s32 e1000_init_mac_params_82575(struct e1000_hw *hw)
+{
+	struct e1000_mac_info *mac = &hw->mac;
+	struct e1000_dev_spec_82575 *dev_spec = &hw->dev_spec._82575;
+
+	DEBUGFUNC("e1000_init_mac_params_82575");
+
+	/* Derives media type */
+	e1000_get_media_type_82575(hw);
+	/* Set mta register count */
+	mac->mta_reg_count = 128;
+	/* Set uta register count */
+	mac->uta_reg_count = (hw->mac.type == e1000_82575) ? 0 : 128;
+	/* Set rar entry count */
+	mac->rar_entry_count = E1000_RAR_ENTRIES_82575;
+	if (mac->type == e1000_82576)
+		mac->rar_entry_count = E1000_RAR_ENTRIES_82576;
+	if (mac->type == e1000_82580)
+		mac->rar_entry_count = E1000_RAR_ENTRIES_82580;
+	if (mac->type == e1000_i350 || mac->type == e1000_i354)
+		mac->rar_entry_count = E1000_RAR_ENTRIES_I350;
+
+	/* Enable EEE default settings for EEE supported devices */
+	if (mac->type >= e1000_i350)
+		dev_spec->eee_disable = false;
+
+	/* Allow a single clear of the SW semaphore on I210 and newer */
+	if (mac->type >= e1000_i210)
+		dev_spec->clear_semaphore_once = true;
+
+	/* Set if part includes ASF firmware */
+	mac->asf_firmware_present = true;
+	/* FWSM register */
+	mac->has_fwsm = true;
+	/* ARC supported; valid only if manageability features are enabled. */
+	mac->arc_subsystem_valid =
+		!!(E1000_READ_REG(hw, E1000_FWSM) & E1000_FWSM_MODE_MASK);
+
+	/* Function pointers */
+
+	/* bus type/speed/width */
+	mac->ops.get_bus_info = e1000_get_bus_info_pcie_generic;
+	/* reset */
+	if (mac->type >= e1000_82580)
+		mac->ops.reset_hw = e1000_reset_hw_82580;
+	else
+	mac->ops.reset_hw = e1000_reset_hw_82575;
+	/* hw initialization */
+	mac->ops.init_hw = e1000_init_hw_82575;
+	/* link setup */
+	mac->ops.setup_link = e1000_setup_link_generic;
+	/* physical interface link setup */
+	mac->ops.setup_physical_interface =
+		(hw->phy.media_type == e1000_media_type_copper)
+		? e1000_setup_copper_link_82575 : e1000_setup_serdes_link_82575;
+	/* physical interface shutdown */
+	mac->ops.shutdown_serdes = e1000_shutdown_serdes_link_82575;
+	/* physical interface power up */
+	mac->ops.power_up_serdes = e1000_power_up_serdes_link_82575;
+	/* check for link */
+	mac->ops.check_for_link = e1000_check_for_link_82575;
+	/* read mac address */
+	mac->ops.read_mac_addr = e1000_read_mac_addr_82575;
+	/* configure collision distance */
+	mac->ops.config_collision_dist = e1000_config_collision_dist_82575;
+	/* multicast address update */
+	mac->ops.update_mc_addr_list = e1000_update_mc_addr_list_generic;
+	if (hw->mac.type == e1000_i350 || mac->type == e1000_i354) {
+		/* writing VFTA */
+		mac->ops.write_vfta = e1000_write_vfta_i350;
+		/* clearing VFTA */
+		mac->ops.clear_vfta = e1000_clear_vfta_i350;
+	} else {
+		/* writing VFTA */
+		mac->ops.write_vfta = e1000_write_vfta_generic;
+		/* clearing VFTA */
+		mac->ops.clear_vfta = e1000_clear_vfta_generic;
+	}
+	if (hw->mac.type >= e1000_82580)
+		mac->ops.validate_mdi_setting =
+				e1000_validate_mdi_setting_crossover_generic;
+	/* ID LED init */
+	mac->ops.id_led_init = e1000_id_led_init_generic;
+	/* blink LED */
+	mac->ops.blink_led = e1000_blink_led_generic;
+	/* setup LED */
+	mac->ops.setup_led = e1000_setup_led_generic;
+	/* cleanup LED */
+	mac->ops.cleanup_led = e1000_cleanup_led_generic;
+	/* turn on/off LED */
+	mac->ops.led_on = e1000_led_on_generic;
+	mac->ops.led_off = e1000_led_off_generic;
+	/* clear hardware counters */
+	mac->ops.clear_hw_cntrs = e1000_clear_hw_cntrs_82575;
+	/* link info */
+	mac->ops.get_link_up_info = e1000_get_link_up_info_82575;
+	/* get thermal sensor data */
+	mac->ops.get_thermal_sensor_data =
+				e1000_get_thermal_sensor_data_generic;
+	mac->ops.init_thermal_sensor_thresh =
+				e1000_init_thermal_sensor_thresh_generic;
+	/* acquire SW_FW sync */
+	mac->ops.acquire_swfw_sync = e1000_acquire_swfw_sync_82575;
+	mac->ops.release_swfw_sync = e1000_release_swfw_sync_82575;
+	if (mac->type >= e1000_i210) {
+		mac->ops.acquire_swfw_sync = e1000_acquire_swfw_sync_i210;
+		mac->ops.release_swfw_sync = e1000_release_swfw_sync_i210;
+	}
+
+	/* set lan id for port to determine which phy lock to use */
+	hw->mac.ops.set_lan_id(hw);
+
+	return E1000_SUCCESS;
+}
+
+/**
+ *  e1000_init_function_pointers_82575 - Init func ptrs.
+ *  @hw: pointer to the HW structure
+ *
+ *  Called to initialize all function pointers and parameters.
+ **/
+void e1000_init_function_pointers_82575(struct e1000_hw *hw)
+{
+	DEBUGFUNC("e1000_init_function_pointers_82575");
+
+	hw->mac.ops.init_params = e1000_init_mac_params_82575;
+	hw->nvm.ops.init_params = e1000_init_nvm_params_82575;
+	hw->phy.ops.init_params = e1000_init_phy_params_82575;
+	hw->mbx.ops.init_params = e1000_init_mbx_params_pf;
+}
+
+/**
+ *  e1000_acquire_phy_82575 - Acquire rights to access PHY
+ *  @hw: pointer to the HW structure
+ *
+ *  Acquire access rights to the correct PHY.
+ **/
+static s32 e1000_acquire_phy_82575(struct e1000_hw *hw)
+{
+	u16 mask = E1000_SWFW_PHY0_SM;
+
+	DEBUGFUNC("e1000_acquire_phy_82575");
+
+	if (hw->bus.func == E1000_FUNC_1)
+		mask = E1000_SWFW_PHY1_SM;
+	else if (hw->bus.func == E1000_FUNC_2)
+		mask = E1000_SWFW_PHY2_SM;
+	else if (hw->bus.func == E1000_FUNC_3)
+		mask = E1000_SWFW_PHY3_SM;
+
+	return hw->mac.ops.acquire_swfw_sync(hw, mask);
+}
+
+/**
+ *  e1000_release_phy_82575 - Release rights to access PHY
+ *  @hw: pointer to the HW structure
+ *
+ *  A wrapper to release access rights to the correct PHY.
+ **/
+static void e1000_release_phy_82575(struct e1000_hw *hw)
+{
+	u16 mask = E1000_SWFW_PHY0_SM;
+
+	DEBUGFUNC("e1000_release_phy_82575");
+
+	if (hw->bus.func == E1000_FUNC_1)
+		mask = E1000_SWFW_PHY1_SM;
+	else if (hw->bus.func == E1000_FUNC_2)
+		mask = E1000_SWFW_PHY2_SM;
+	else if (hw->bus.func == E1000_FUNC_3)
+		mask = E1000_SWFW_PHY3_SM;
+
+	hw->mac.ops.release_swfw_sync(hw, mask);
+}
+
+/**
+ *  e1000_read_phy_reg_sgmii_82575 - Read PHY register using sgmii
+ *  @hw: pointer to the HW structure
+ *  @offset: register offset to be read
+ *  @data: pointer to the read data
+ *
+ *  Reads the PHY register at offset using the serial gigabit media independent
+ *  interface and stores the retrieved information in data.
+ **/
+static s32 e1000_read_phy_reg_sgmii_82575(struct e1000_hw *hw, u32 offset,
+					  u16 *data)
+{
+	s32 ret_val = -E1000_ERR_PARAM;
+
+	DEBUGFUNC("e1000_read_phy_reg_sgmii_82575");
+
+	if (offset > E1000_MAX_SGMII_PHY_REG_ADDR) {
+		DEBUGOUT1("PHY Address %u is out of range\n", offset);
+		goto out;
+	}
+
+	ret_val = hw->phy.ops.acquire(hw);
+	if (ret_val)
+		goto out;
+
+	ret_val = e1000_read_phy_reg_i2c(hw, offset, data);
+
+	hw->phy.ops.release(hw);
+
+out:
+	return ret_val;
+}
+
+/**
+ *  e1000_write_phy_reg_sgmii_82575 - Write PHY register using sgmii
+ *  @hw: pointer to the HW structure
+ *  @offset: register offset to write to
+ *  @data: data to write at register offset
+ *
+ *  Writes the data to PHY register at the offset using the serial gigabit
+ *  media independent interface.
+ **/
+static s32 e1000_write_phy_reg_sgmii_82575(struct e1000_hw *hw, u32 offset,
+					   u16 data)
+{
+	s32 ret_val = -E1000_ERR_PARAM;
+
+	DEBUGFUNC("e1000_write_phy_reg_sgmii_82575");
+
+	if (offset > E1000_MAX_SGMII_PHY_REG_ADDR) {
+		DEBUGOUT1("PHY Address %d is out of range\n", offset);
+		goto out;
+	}
+
+	ret_val = hw->phy.ops.acquire(hw);
+	if (ret_val)
+		goto out;
+
+	ret_val = e1000_write_phy_reg_i2c(hw, offset, data);
+
+	hw->phy.ops.release(hw);
+
+out:
+	return ret_val;
+}
+
+/**
+ *  e1000_get_phy_id_82575 - Retrieve PHY addr and id
+ *  @hw: pointer to the HW structure
+ *
+ *  Retrieves the PHY address and ID for both PHY's which do and do not use
+ *  sgmi interface.
+ **/
+static s32 e1000_get_phy_id_82575(struct e1000_hw *hw)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+	s32  ret_val = E1000_SUCCESS;
+	u16 phy_id;
+	u32 ctrl_ext;
+	u32 mdic;
+
+	DEBUGFUNC("e1000_get_phy_id_82575");
+
+	/* i354 devices can have a PHY that needs an extra read for id */
+	if (hw->mac.type == e1000_i354)
+		e1000_get_phy_id(hw);
+
+
+	/*
+	 * For SGMII PHYs, we try the list of possible addresses until
+	 * we find one that works.  For non-SGMII PHYs
+	 * (e.g. integrated copper PHYs), an address of 1 should
+	 * work.  The result of this function should mean phy->phy_addr
+	 * and phy->id are set correctly.
+	 */
+	if (!e1000_sgmii_active_82575(hw)) {
+		phy->addr = 1;
+		ret_val = e1000_get_phy_id(hw);
+		goto out;
+	}
+
+	if (e1000_sgmii_uses_mdio_82575(hw)) {
+		switch (hw->mac.type) {
+		case e1000_82575:
+		case e1000_82576:
+			mdic = E1000_READ_REG(hw, E1000_MDIC);
+			mdic &= E1000_MDIC_PHY_MASK;
+			phy->addr = mdic >> E1000_MDIC_PHY_SHIFT;
+			break;
+		case e1000_82580:
+		case e1000_i350:
+		case e1000_i354:
+		case e1000_i210:
+		case e1000_i211:
+			mdic = E1000_READ_REG(hw, E1000_MDICNFG);
+			mdic &= E1000_MDICNFG_PHY_MASK;
+			phy->addr = mdic >> E1000_MDICNFG_PHY_SHIFT;
+			break;
+		default:
+			ret_val = -E1000_ERR_PHY;
+			goto out;
+			break;
+		}
+		ret_val = e1000_get_phy_id(hw);
+		goto out;
+	}
+
+	/* Power on sgmii phy if it is disabled */
+	ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT);
+	E1000_WRITE_REG(hw, E1000_CTRL_EXT,
+			ctrl_ext & ~E1000_CTRL_EXT_SDP3_DATA);
+	E1000_WRITE_FLUSH(hw);
+	msec_delay(300);
+
+	/*
+	 * The address field in the I2CCMD register is 3 bits and 0 is invalid.
+	 * Therefore, we need to test 1-7
+	 */
+	for (phy->addr = 1; phy->addr < 8; phy->addr++) {
+		ret_val = e1000_read_phy_reg_sgmii_82575(hw, PHY_ID1, &phy_id);
+		if (ret_val == E1000_SUCCESS) {
+			DEBUGOUT2("Vendor ID 0x%08X read at address %u\n",
+				  phy_id, phy->addr);
+			/*
+			 * At the time of this writing, The M88 part is
+			 * the only supported SGMII PHY product.
+			 */
+			if (phy_id == M88_VENDOR)
+				break;
+		} else {
+			DEBUGOUT1("PHY address %u was unreadable\n",
+				  phy->addr);
+		}
+	}
+
+	/* A valid PHY type couldn't be found. */
+	if (phy->addr == 8) {
+		phy->addr = 0;
+		ret_val = -E1000_ERR_PHY;
+	} else {
+		ret_val = e1000_get_phy_id(hw);
+	}
+
+	/* restore previous sfp cage power state */
+	E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext);
+
+out:
+	return ret_val;
+}
+
+/**
+ *  e1000_phy_hw_reset_sgmii_82575 - Performs a PHY reset
+ *  @hw: pointer to the HW structure
+ *
+ *  Resets the PHY using the serial gigabit media independent interface.
+ **/
+static s32 e1000_phy_hw_reset_sgmii_82575(struct e1000_hw *hw)
+{
+	s32 ret_val = E1000_SUCCESS;
+
+	DEBUGFUNC("e1000_phy_hw_reset_sgmii_82575");
+
+	/*
+	 * This isn't a true "hard" reset, but is the only reset
+	 * available to us at this time.
+	 */
+
+	DEBUGOUT("Soft resetting SGMII attached PHY...\n");
+
+	if (!(hw->phy.ops.write_reg))
+		goto out;
+
+	/*
+	 * SFP documentation requires the following to configure the SPF module
+	 * to work on SGMII.  No further documentation is given.
+	 */
+	ret_val = hw->phy.ops.write_reg(hw, 0x1B, 0x8084);
+	if (ret_val)
+		goto out;
+
+	ret_val = hw->phy.ops.commit(hw);
+
+out:
+	return ret_val;
+}
+
+/**
+ *  e1000_set_d0_lplu_state_82575 - Set Low Power Linkup D0 state
+ *  @hw: pointer to the HW structure
+ *  @active: true to enable LPLU, false to disable
+ *
+ *  Sets the LPLU D0 state according to the active flag.  When
+ *  activating LPLU this function also disables smart speed
+ *  and vice versa.  LPLU will not be activated unless the
+ *  device autonegotiation advertisement meets standards of
+ *  either 10 or 10/100 or 10/100/1000 at all duplexes.
+ *  This is a function pointer entry point only called by
+ *  PHY setup routines.
+ **/
+static s32 e1000_set_d0_lplu_state_82575(struct e1000_hw *hw, bool active)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+	s32 ret_val = E1000_SUCCESS;
+	u16 data;
+
+	DEBUGFUNC("e1000_set_d0_lplu_state_82575");
+
+	if (!(hw->phy.ops.read_reg))
+		goto out;
+
+	ret_val = phy->ops.read_reg(hw, IGP02E1000_PHY_POWER_MGMT, &data);
+	if (ret_val)
+		goto out;
+
+	if (active) {
+		data |= IGP02E1000_PM_D0_LPLU;
+		ret_val = phy->ops.write_reg(hw, IGP02E1000_PHY_POWER_MGMT,
+					     data);
+		if (ret_val)
+			goto out;
+
+		/* When LPLU is enabled, we should disable SmartSpeed */
+		ret_val = phy->ops.read_reg(hw, IGP01E1000_PHY_PORT_CONFIG,
+					    &data);
+		data &= ~IGP01E1000_PSCFR_SMART_SPEED;
+		ret_val = phy->ops.write_reg(hw, IGP01E1000_PHY_PORT_CONFIG,
+					     data);
+		if (ret_val)
+			goto out;
+	} else {
+		data &= ~IGP02E1000_PM_D0_LPLU;
+		ret_val = phy->ops.write_reg(hw, IGP02E1000_PHY_POWER_MGMT,
+					     data);
+		/*
+		 * LPLU and SmartSpeed are mutually exclusive.  LPLU is used
+		 * during Dx states where the power conservation is most
+		 * important.  During driver activity we should enable
+		 * SmartSpeed, so performance is maintained.
+		 */
+		if (phy->smart_speed == e1000_smart_speed_on) {
+			ret_val = phy->ops.read_reg(hw,
+						    IGP01E1000_PHY_PORT_CONFIG,
+						    &data);
+			if (ret_val)
+				goto out;
+
+			data |= IGP01E1000_PSCFR_SMART_SPEED;
+			ret_val = phy->ops.write_reg(hw,
+						     IGP01E1000_PHY_PORT_CONFIG,
+						     data);
+			if (ret_val)
+				goto out;
+		} else if (phy->smart_speed == e1000_smart_speed_off) {
+			ret_val = phy->ops.read_reg(hw,
+						    IGP01E1000_PHY_PORT_CONFIG,
+						    &data);
+			if (ret_val)
+				goto out;
+
+			data &= ~IGP01E1000_PSCFR_SMART_SPEED;
+			ret_val = phy->ops.write_reg(hw,
+						     IGP01E1000_PHY_PORT_CONFIG,
+						     data);
+			if (ret_val)
+				goto out;
+		}
+	}
+
+out:
+	return ret_val;
+}
+
+/**
+ *  e1000_set_d0_lplu_state_82580 - Set Low Power Linkup D0 state
+ *  @hw: pointer to the HW structure
+ *  @active: true to enable LPLU, false to disable
+ *
+ *  Sets the LPLU D0 state according to the active flag.  When
+ *  activating LPLU this function also disables smart speed
+ *  and vice versa.  LPLU will not be activated unless the
+ *  device autonegotiation advertisement meets standards of
+ *  either 10 or 10/100 or 10/100/1000 at all duplexes.
+ *  This is a function pointer entry point only called by
+ *  PHY setup routines.
+ **/
+static s32 e1000_set_d0_lplu_state_82580(struct e1000_hw *hw, bool active)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+	s32 ret_val = E1000_SUCCESS;
+	u32 data;
+
+	DEBUGFUNC("e1000_set_d0_lplu_state_82580");
+
+	data = E1000_READ_REG(hw, E1000_82580_PHY_POWER_MGMT);
+
+	if (active) {
+		data |= E1000_82580_PM_D0_LPLU;
+
+		/* When LPLU is enabled, we should disable SmartSpeed */
+		data &= ~E1000_82580_PM_SPD;
+	} else {
+		data &= ~E1000_82580_PM_D0_LPLU;
+
+		/*
+		 * LPLU and SmartSpeed are mutually exclusive.  LPLU is used
+		 * during Dx states where the power conservation is most
+		 * important.  During driver activity we should enable
+		 * SmartSpeed, so performance is maintained.
+		 */
+		if (phy->smart_speed == e1000_smart_speed_on)
+			data |= E1000_82580_PM_SPD;
+		else if (phy->smart_speed == e1000_smart_speed_off)
+			data &= ~E1000_82580_PM_SPD;
+	}
+
+	E1000_WRITE_REG(hw, E1000_82580_PHY_POWER_MGMT, data);
+	return ret_val;
+}
+
+/**
+ *  e1000_set_d3_lplu_state_82580 - Sets low power link up state for D3
+ *  @hw: pointer to the HW structure
+ *  @active: boolean used to enable/disable lplu
+ *
+ *  Success returns 0, Failure returns 1
+ *
+ *  The low power link up (lplu) state is set to the power management level D3
+ *  and SmartSpeed is disabled when active is true, else clear lplu for D3
+ *  and enable Smartspeed.  LPLU and Smartspeed are mutually exclusive.  LPLU
+ *  is used during Dx states where the power conservation is most important.
+ *  During driver activity, SmartSpeed should be enabled so performance is
+ *  maintained.
+ **/
+s32 e1000_set_d3_lplu_state_82580(struct e1000_hw *hw, bool active)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+	s32 ret_val = E1000_SUCCESS;
+	u32 data;
+
+	DEBUGFUNC("e1000_set_d3_lplu_state_82580");
+
+	data = E1000_READ_REG(hw, E1000_82580_PHY_POWER_MGMT);
+
+	if (!active) {
+		data &= ~E1000_82580_PM_D3_LPLU;
+		/*
+		 * LPLU and SmartSpeed are mutually exclusive.  LPLU is used
+		 * during Dx states where the power conservation is most
+		 * important.  During driver activity we should enable
+		 * SmartSpeed, so performance is maintained.
+		 */
+		if (phy->smart_speed == e1000_smart_speed_on)
+			data |= E1000_82580_PM_SPD;
+		else if (phy->smart_speed == e1000_smart_speed_off)
+			data &= ~E1000_82580_PM_SPD;
+	} else if ((phy->autoneg_advertised == E1000_ALL_SPEED_DUPLEX) ||
+		   (phy->autoneg_advertised == E1000_ALL_NOT_GIG) ||
+		   (phy->autoneg_advertised == E1000_ALL_10_SPEED)) {
+		data |= E1000_82580_PM_D3_LPLU;
+		/* When LPLU is enabled, we should disable SmartSpeed */
+		data &= ~E1000_82580_PM_SPD;
+	}
+
+	E1000_WRITE_REG(hw, E1000_82580_PHY_POWER_MGMT, data);
+	return ret_val;
+}
+
+/**
+ *  e1000_acquire_nvm_82575 - Request for access to EEPROM
+ *  @hw: pointer to the HW structure
+ *
+ *  Acquire the necessary semaphores for exclusive access to the EEPROM.
+ *  Set the EEPROM access request bit and wait for EEPROM access grant bit.
+ *  Return successful if access grant bit set, else clear the request for
+ *  EEPROM access and return -E1000_ERR_NVM (-1).
+ **/
+static s32 e1000_acquire_nvm_82575(struct e1000_hw *hw)
+{
+	s32 ret_val;
+
+	DEBUGFUNC("e1000_acquire_nvm_82575");
+
+	ret_val = e1000_acquire_swfw_sync_82575(hw, E1000_SWFW_EEP_SM);
+	if (ret_val)
+		goto out;
+
+	/*
+	 * Check if there is some access
+	 * error this access may hook on
+	 */
+	if (hw->mac.type == e1000_i350) {
+		u32 eecd = E1000_READ_REG(hw, E1000_EECD);
+		if (eecd & (E1000_EECD_BLOCKED | E1000_EECD_ABORT |
+		    E1000_EECD_TIMEOUT)) {
+			/* Clear all access error flags */
+			E1000_WRITE_REG(hw, E1000_EECD, eecd |
+					E1000_EECD_ERROR_CLR);
+			DEBUGOUT("Nvm bit banging access error detected and cleared.\n");
+		}
+	}
+	if (hw->mac.type == e1000_82580) {
+		u32 eecd = E1000_READ_REG(hw, E1000_EECD);
+		if (eecd & E1000_EECD_BLOCKED) {
+			/* Clear access error flag */
+			E1000_WRITE_REG(hw, E1000_EECD, eecd |
+					E1000_EECD_BLOCKED);
+			DEBUGOUT("Nvm bit banging access error detected and cleared.\n");
+		}
+	}
+
+
+	ret_val = e1000_acquire_nvm_generic(hw);
+	if (ret_val)
+		e1000_release_swfw_sync_82575(hw, E1000_SWFW_EEP_SM);
+
+out:
+	return ret_val;
+}
+
+/**
+ *  e1000_release_nvm_82575 - Release exclusive access to EEPROM
+ *  @hw: pointer to the HW structure
+ *
+ *  Stop any current commands to the EEPROM and clear the EEPROM request bit,
+ *  then release the semaphores acquired.
+ **/
+static void e1000_release_nvm_82575(struct e1000_hw *hw)
+{
+	DEBUGFUNC("e1000_release_nvm_82575");
+
+	e1000_release_nvm_generic(hw);
+
+	e1000_release_swfw_sync_82575(hw, E1000_SWFW_EEP_SM);
+}
+
+/**
+ *  e1000_acquire_swfw_sync_82575 - Acquire SW/FW semaphore
+ *  @hw: pointer to the HW structure
+ *  @mask: specifies which semaphore to acquire
+ *
+ *  Acquire the SW/FW semaphore to access the PHY or NVM.  The mask
+ *  will also specify which port we're acquiring the lock for.
+ **/
+static s32 e1000_acquire_swfw_sync_82575(struct e1000_hw *hw, u16 mask)
+{
+	u32 swfw_sync;
+	u32 swmask = mask;
+	u32 fwmask = mask << 16;
+	s32 ret_val = E1000_SUCCESS;
+	s32 i = 0, timeout = 200; /* FIXME: find real value to use here */
+
+	DEBUGFUNC("e1000_acquire_swfw_sync_82575");
+
+	while (i < timeout) {
+		if (e1000_get_hw_semaphore_generic(hw)) {
+			ret_val = -E1000_ERR_SWFW_SYNC;
+			goto out;
+		}
+
+		swfw_sync = E1000_READ_REG(hw, E1000_SW_FW_SYNC);
+		if (!(swfw_sync & (fwmask | swmask)))
+			break;
+
+		/*
+		 * Firmware currently using resource (fwmask)
+		 * or other software thread using resource (swmask)
+		 */
+		e1000_put_hw_semaphore_generic(hw);
+		msec_delay_irq(5);
+		i++;
+	}
+
+	if (i == timeout) {
+		DEBUGOUT("Driver can't access resource, SW_FW_SYNC timeout.\n");
+		ret_val = -E1000_ERR_SWFW_SYNC;
+		goto out;
+	}
+
+	swfw_sync |= swmask;
+	E1000_WRITE_REG(hw, E1000_SW_FW_SYNC, swfw_sync);
+
+	e1000_put_hw_semaphore_generic(hw);
+
+out:
+	return ret_val;
+}
+
+/**
+ *  e1000_release_swfw_sync_82575 - Release SW/FW semaphore
+ *  @hw: pointer to the HW structure
+ *  @mask: specifies which semaphore to acquire
+ *
+ *  Release the SW/FW semaphore used to access the PHY or NVM.  The mask
+ *  will also specify which port we're releasing the lock for.
+ **/
+static void e1000_release_swfw_sync_82575(struct e1000_hw *hw, u16 mask)
+{
+	u32 swfw_sync;
+
+	DEBUGFUNC("e1000_release_swfw_sync_82575");
+
+	while (e1000_get_hw_semaphore_generic(hw) != E1000_SUCCESS)
+		; /* Empty */
+
+	swfw_sync = E1000_READ_REG(hw, E1000_SW_FW_SYNC);
+	swfw_sync &= ~mask;
+	E1000_WRITE_REG(hw, E1000_SW_FW_SYNC, swfw_sync);
+
+	e1000_put_hw_semaphore_generic(hw);
+}
+
+/**
+ *  e1000_get_cfg_done_82575 - Read config done bit
+ *  @hw: pointer to the HW structure
+ *
+ *  Read the management control register for the config done bit for
+ *  completion status.  NOTE: silicon which is EEPROM-less will fail trying
+ *  to read the config done bit, so an error is *ONLY* logged and returns
+ *  E1000_SUCCESS.  If we were to return with error, EEPROM-less silicon
+ *  would not be able to be reset or change link.
+ **/
+static s32 e1000_get_cfg_done_82575(struct e1000_hw *hw)
+{
+	s32 timeout = PHY_CFG_TIMEOUT;
+	s32 ret_val = E1000_SUCCESS;
+	u32 mask = E1000_NVM_CFG_DONE_PORT_0;
+
+	DEBUGFUNC("e1000_get_cfg_done_82575");
+
+	if (hw->bus.func == E1000_FUNC_1)
+		mask = E1000_NVM_CFG_DONE_PORT_1;
+	else if (hw->bus.func == E1000_FUNC_2)
+		mask = E1000_NVM_CFG_DONE_PORT_2;
+	else if (hw->bus.func == E1000_FUNC_3)
+		mask = E1000_NVM_CFG_DONE_PORT_3;
+	while (timeout) {
+		if (E1000_READ_REG(hw, E1000_EEMNGCTL) & mask)
+			break;
+		msec_delay(1);
+		timeout--;
+	}
+	if (!timeout)
+		DEBUGOUT("MNG configuration cycle has not completed.\n");
+
+	/* If EEPROM is not marked present, init the PHY manually */
+	if (!(E1000_READ_REG(hw, E1000_EECD) & E1000_EECD_PRES) &&
+	    (hw->phy.type == e1000_phy_igp_3))
+		e1000_phy_init_script_igp3(hw);
+
+	return ret_val;
+}
+
+/**
+ *  e1000_get_link_up_info_82575 - Get link speed/duplex info
+ *  @hw: pointer to the HW structure
+ *  @speed: stores the current speed
+ *  @duplex: stores the current duplex
+ *
+ *  This is a wrapper function, if using the serial gigabit media independent
+ *  interface, use PCS to retrieve the link speed and duplex information.
+ *  Otherwise, use the generic function to get the link speed and duplex info.
+ **/
+static s32 e1000_get_link_up_info_82575(struct e1000_hw *hw, u16 *speed,
+					u16 *duplex)
+{
+	s32 ret_val;
+
+	DEBUGFUNC("e1000_get_link_up_info_82575");
+
+	if (hw->phy.media_type != e1000_media_type_copper)
+		ret_val = e1000_get_pcs_speed_and_duplex_82575(hw, speed,
+							       duplex);
+	else
+		ret_val = e1000_get_speed_and_duplex_copper_generic(hw, speed,
+								    duplex);
+
+	return ret_val;
+}
+
+/**
+ *  e1000_check_for_link_82575 - Check for link
+ *  @hw: pointer to the HW structure
+ *
+ *  If sgmii is enabled, then use the pcs register to determine link, otherwise
+ *  use the generic interface for determining link.
+ **/
+static s32 e1000_check_for_link_82575(struct e1000_hw *hw)
+{
+	s32 ret_val;
+	u16 speed, duplex;
+
+	DEBUGFUNC("e1000_check_for_link_82575");
+
+	if (hw->phy.media_type != e1000_media_type_copper) {
+		ret_val = e1000_get_pcs_speed_and_duplex_82575(hw, &speed,
+							       &duplex);
+		/*
+		 * Use this flag to determine if link needs to be checked or
+		 * not.  If we have link clear the flag so that we do not
+		 * continue to check for link.
+		 */
+		hw->mac.get_link_status = !hw->mac.serdes_has_link;
+
+		/*
+		 * Configure Flow Control now that Auto-Neg has completed.
+		 * First, we need to restore the desired flow control
+		 * settings because we may have had to re-autoneg with a
+		 * different link partner.
+		 */
+		ret_val = e1000_config_fc_after_link_up_generic(hw);
+		if (ret_val)
+			DEBUGOUT("Error configuring flow control\n");
+	} else {
+		ret_val = e1000_check_for_copper_link_generic(hw);
+	}
+
+	return ret_val;
+}
+
+/**
+ *  e1000_check_for_link_media_swap - Check which M88E1112 interface linked
+ *  @hw: pointer to the HW structure
+ *
+ *  Poll the M88E1112 interfaces to see which interface achieved link.
+ */
+static s32 e1000_check_for_link_media_swap(struct e1000_hw *hw)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+	s32 ret_val;
+	u16 data;
+	u8 port = 0;
+
+	DEBUGFUNC("e1000_check_for_link_media_swap");
+
+	/* Check the copper medium. */
+	ret_val = phy->ops.write_reg(hw, E1000_M88E1112_PAGE_ADDR, 0);
+	if (ret_val)
+		return ret_val;
+
+	ret_val = phy->ops.read_reg(hw, E1000_M88E1112_STATUS, &data);
+	if (ret_val)
+		return ret_val;
+
+	if (data & E1000_M88E1112_STATUS_LINK)
+		port = E1000_MEDIA_PORT_COPPER;
+
+	/* Check the other medium. */
+	ret_val = phy->ops.write_reg(hw, E1000_M88E1112_PAGE_ADDR, 1);
+	if (ret_val)
+		return ret_val;
+
+	ret_val = phy->ops.read_reg(hw, E1000_M88E1112_STATUS, &data);
+	if (ret_val)
+		return ret_val;
+
+	if (data & E1000_M88E1112_STATUS_LINK)
+		port = E1000_MEDIA_PORT_OTHER;
+
+	/* Determine if a swap needs to happen. */
+	if (port && (hw->dev_spec._82575.media_port != port)) {
+		hw->dev_spec._82575.media_port = port;
+		hw->dev_spec._82575.media_changed = true;
+	} else {
+		ret_val = e1000_check_for_link_82575(hw);
+	}
+
+	return E1000_SUCCESS;
+}
+
+/**
+ *  e1000_power_up_serdes_link_82575 - Power up the serdes link after shutdown
+ *  @hw: pointer to the HW structure
+ **/
+static void e1000_power_up_serdes_link_82575(struct e1000_hw *hw)
+{
+	u32 reg;
+
+	DEBUGFUNC("e1000_power_up_serdes_link_82575");
+
+	if ((hw->phy.media_type != e1000_media_type_internal_serdes) &&
+	    !e1000_sgmii_active_82575(hw))
+		return;
+
+	/* Enable PCS to turn on link */
+	reg = E1000_READ_REG(hw, E1000_PCS_CFG0);
+	reg |= E1000_PCS_CFG_PCS_EN;
+	E1000_WRITE_REG(hw, E1000_PCS_CFG0, reg);
+
+	/* Power up the laser */
+	reg = E1000_READ_REG(hw, E1000_CTRL_EXT);
+	reg &= ~E1000_CTRL_EXT_SDP3_DATA;
+	E1000_WRITE_REG(hw, E1000_CTRL_EXT, reg);
+
+	/* flush the write to verify completion */
+	E1000_WRITE_FLUSH(hw);
+	msec_delay(1);
+}
+
+/**
+ *  e1000_get_pcs_speed_and_duplex_82575 - Retrieve current speed/duplex
+ *  @hw: pointer to the HW structure
+ *  @speed: stores the current speed
+ *  @duplex: stores the current duplex
+ *
+ *  Using the physical coding sub-layer (PCS), retrieve the current speed and
+ *  duplex, then store the values in the pointers provided.
+ **/
+static s32 e1000_get_pcs_speed_and_duplex_82575(struct e1000_hw *hw,
+						u16 *speed, u16 *duplex)
+{
+	struct e1000_mac_info *mac = &hw->mac;
+	u32 pcs;
+	u32 status;
+
+	DEBUGFUNC("e1000_get_pcs_speed_and_duplex_82575");
+
+	/*
+	 * Read the PCS Status register for link state. For non-copper mode,
+	 * the status register is not accurate. The PCS status register is
+	 * used instead.
+	 */
+	pcs = E1000_READ_REG(hw, E1000_PCS_LSTAT);
+
+	/*
+	 * The link up bit determines when link is up on autoneg.
+	 */
+	if (pcs & E1000_PCS_LSTS_LINK_OK) {
+		mac->serdes_has_link = true;
+
+		/* Detect and store PCS speed */
+		if (pcs & E1000_PCS_LSTS_SPEED_1000)
+			*speed = SPEED_1000;
+		else if (pcs & E1000_PCS_LSTS_SPEED_100)
+			*speed = SPEED_100;
+		else
+			*speed = SPEED_10;
+
+		/* Detect and store PCS duplex */
+		if (pcs & E1000_PCS_LSTS_DUPLEX_FULL)
+			*duplex = FULL_DUPLEX;
+		else
+			*duplex = HALF_DUPLEX;
+
+		/* Check if it is an I354 2.5Gb backplane connection. */
+		if (mac->type == e1000_i354) {
+			status = E1000_READ_REG(hw, E1000_STATUS);
+			if ((status & E1000_STATUS_2P5_SKU) &&
+			    !(status & E1000_STATUS_2P5_SKU_OVER)) {
+				*speed = SPEED_2500;
+				*duplex = FULL_DUPLEX;
+				DEBUGOUT("2500 Mbs, ");
+				DEBUGOUT("Full Duplex\n");
+			}
+		}
+
+	} else {
+		mac->serdes_has_link = false;
+		*speed = 0;
+		*duplex = 0;
+	}
+
+	return E1000_SUCCESS;
+}
+
+/**
+ *  e1000_shutdown_serdes_link_82575 - Remove link during power down
+ *  @hw: pointer to the HW structure
+ *
+ *  In the case of serdes shut down sfp and PCS on driver unload
+ *  when management pass thru is not enabled.
+ **/
+void e1000_shutdown_serdes_link_82575(struct e1000_hw *hw)
+{
+	u32 reg;
+
+	DEBUGFUNC("e1000_shutdown_serdes_link_82575");
+
+	if ((hw->phy.media_type != e1000_media_type_internal_serdes) &&
+	    !e1000_sgmii_active_82575(hw))
+		return;
+
+	if (!e1000_enable_mng_pass_thru(hw)) {
+		/* Disable PCS to turn off link */
+		reg = E1000_READ_REG(hw, E1000_PCS_CFG0);
+		reg &= ~E1000_PCS_CFG_PCS_EN;
+		E1000_WRITE_REG(hw, E1000_PCS_CFG0, reg);
+
+		/* shutdown the laser */
+		reg = E1000_READ_REG(hw, E1000_CTRL_EXT);
+		reg |= E1000_CTRL_EXT_SDP3_DATA;
+		E1000_WRITE_REG(hw, E1000_CTRL_EXT, reg);
+
+		/* flush the write to verify completion */
+		E1000_WRITE_FLUSH(hw);
+		msec_delay(1);
+	}
+
+	return;
+}
+
+/**
+ *  e1000_reset_hw_82575 - Reset hardware
+ *  @hw: pointer to the HW structure
+ *
+ *  This resets the hardware into a known state.
+ **/
+static s32 e1000_reset_hw_82575(struct e1000_hw *hw)
+{
+	u32 ctrl;
+	s32 ret_val;
+
+	DEBUGFUNC("e1000_reset_hw_82575");
+
+	/*
+	 * Prevent the PCI-E bus from sticking if there is no TLP connection
+	 * on the last TLP read/write transaction when MAC is reset.
+	 */
+	ret_val = e1000_disable_pcie_master_generic(hw);
+	if (ret_val)
+		DEBUGOUT("PCI-E Master disable polling has failed.\n");
+
+	/* set the completion timeout for interface */
+	ret_val = e1000_set_pcie_completion_timeout(hw);
+	if (ret_val)
+		DEBUGOUT("PCI-E Set completion timeout has failed.\n");
+
+	DEBUGOUT("Masking off all interrupts\n");
+	E1000_WRITE_REG(hw, E1000_IMC, 0xffffffff);
+
+	E1000_WRITE_REG(hw, E1000_RCTL, 0);
+	E1000_WRITE_REG(hw, E1000_TCTL, E1000_TCTL_PSP);
+	E1000_WRITE_FLUSH(hw);
+
+	msec_delay(10);
+
+	ctrl = E1000_READ_REG(hw, E1000_CTRL);
+
+	DEBUGOUT("Issuing a global reset to MAC\n");
+	E1000_WRITE_REG(hw, E1000_CTRL, ctrl | E1000_CTRL_RST);
+
+	ret_val = e1000_get_auto_rd_done_generic(hw);
+	if (ret_val) {
+		/*
+		 * When auto config read does not complete, do not
+		 * return with an error. This can happen in situations
+		 * where there is no eeprom and prevents getting link.
+		 */
+		DEBUGOUT("Auto Read Done did not complete\n");
+	}
+
+	/* If EEPROM is not present, run manual init scripts */
+	if (!(E1000_READ_REG(hw, E1000_EECD) & E1000_EECD_PRES))
+		e1000_reset_init_script_82575(hw);
+
+	/* Clear any pending interrupt events. */
+	E1000_WRITE_REG(hw, E1000_IMC, 0xffffffff);
+	E1000_READ_REG(hw, E1000_ICR);
+
+	/* Install any alternate MAC address into RAR0 */
+	ret_val = e1000_check_alt_mac_addr_generic(hw);
+
+	return ret_val;
+}
+
+/**
+ *  e1000_init_hw_82575 - Initialize hardware
+ *  @hw: pointer to the HW structure
+ *
+ *  This inits the hardware readying it for operation.
+ **/
+static s32 e1000_init_hw_82575(struct e1000_hw *hw)
+{
+	struct e1000_mac_info *mac = &hw->mac;
+	s32 ret_val;
+	u16 i, rar_count = mac->rar_entry_count;
+
+	DEBUGFUNC("e1000_init_hw_82575");
+
+	/* Initialize identification LED */
+	ret_val = mac->ops.id_led_init(hw);
+	if (ret_val) {
+		DEBUGOUT("Error initializing identification LED\n");
+		/* This is not fatal and we should not stop init due to this */
+	}
+
+	/* Disabling VLAN filtering */
+	DEBUGOUT("Initializing the IEEE VLAN\n");
+	mac->ops.clear_vfta(hw);
+
+	/* Setup the receive address */
+	e1000_init_rx_addrs_generic(hw, rar_count);
+
+	/* Zero out the Multicast HASH table */
+	DEBUGOUT("Zeroing the MTA\n");
+	for (i = 0; i < mac->mta_reg_count; i++)
+		E1000_WRITE_REG_ARRAY(hw, E1000_MTA, i, 0);
+
+	/* Zero out the Unicast HASH table */
+	DEBUGOUT("Zeroing the UTA\n");
+	for (i = 0; i < mac->uta_reg_count; i++)
+		E1000_WRITE_REG_ARRAY(hw, E1000_UTA, i, 0);
+
+	/* Setup link and flow control */
+	ret_val = mac->ops.setup_link(hw);
+
+	/* Set the default MTU size */
+	hw->dev_spec._82575.mtu = 1500;
+
+	/*
+	 * Clear all of the statistics registers (clear on read).  It is
+	 * important that we do this after we have tried to establish link
+	 * because the symbol error count will increment wildly if there
+	 * is no link.
+	 */
+	e1000_clear_hw_cntrs_82575(hw);
+
+	return ret_val;
+}
+
+/**
+ *  e1000_setup_copper_link_82575 - Configure copper link settings
+ *  @hw: pointer to the HW structure
+ *
+ *  Configures the link for auto-neg or forced speed and duplex.  Then we check
+ *  for link, once link is established calls to configure collision distance
+ *  and flow control are called.
+ **/
+static s32 e1000_setup_copper_link_82575(struct e1000_hw *hw)
+{
+	u32 ctrl;
+	s32 ret_val;
+	u32 phpm_reg;
+
+	DEBUGFUNC("e1000_setup_copper_link_82575");
+
+	ctrl = E1000_READ_REG(hw, E1000_CTRL);
+	ctrl |= E1000_CTRL_SLU;
+	ctrl &= ~(E1000_CTRL_FRCSPD | E1000_CTRL_FRCDPX);
+	E1000_WRITE_REG(hw, E1000_CTRL, ctrl);
+
+	/* Clear Go Link Disconnect bit on supported devices */
+	switch (hw->mac.type) {
+	case e1000_82580:
+	case e1000_i350:
+	case e1000_i210:
+	case e1000_i211:
+		phpm_reg = E1000_READ_REG(hw, E1000_82580_PHY_POWER_MGMT);
+		phpm_reg &= ~E1000_82580_PM_GO_LINKD;
+		E1000_WRITE_REG(hw, E1000_82580_PHY_POWER_MGMT, phpm_reg);
+		break;
+	default:
+		break;
+	}
+
+	ret_val = e1000_setup_serdes_link_82575(hw);
+	if (ret_val)
+		goto out;
+
+	if (e1000_sgmii_active_82575(hw) && !hw->phy.reset_disable) {
+		/* allow time for SFP cage time to power up phy */
+		msec_delay(300);
+
+		ret_val = hw->phy.ops.reset(hw);
+		if (ret_val) {
+			DEBUGOUT("Error resetting the PHY.\n");
+			goto out;
+		}
+	}
+	switch (hw->phy.type) {
+	case e1000_phy_i210:
+	case e1000_phy_m88:
+		switch (hw->phy.id) {
+		case I347AT4_E_PHY_ID:
+		case M88E1112_E_PHY_ID:
+		case M88E1340M_E_PHY_ID:
+		case M88E1543_E_PHY_ID:
+		case I210_I_PHY_ID:
+			ret_val = e1000_copper_link_setup_m88_gen2(hw);
+			break;
+		default:
+			ret_val = e1000_copper_link_setup_m88(hw);
+			break;
+		}
+		break;
+	case e1000_phy_igp_3:
+		ret_val = e1000_copper_link_setup_igp(hw);
+		break;
+	case e1000_phy_82580:
+		ret_val = e1000_copper_link_setup_82577(hw);
+		break;
+	default:
+		ret_val = -E1000_ERR_PHY;
+		break;
+	}
+
+	if (ret_val)
+		goto out;
+
+	ret_val = e1000_setup_copper_link_generic(hw);
+out:
+	return ret_val;
+}
+
+/**
+ *  e1000_setup_serdes_link_82575 - Setup link for serdes
+ *  @hw: pointer to the HW structure
+ *
+ *  Configure the physical coding sub-layer (PCS) link.  The PCS link is
+ *  used on copper connections where the serialized gigabit media independent
+ *  interface (sgmii), or serdes fiber is being used.  Configures the link
+ *  for auto-negotiation or forces speed/duplex.
+ **/
+static s32 e1000_setup_serdes_link_82575(struct e1000_hw *hw)
+{
+	u32 ctrl_ext, ctrl_reg, reg, anadv_reg;
+	bool pcs_autoneg;
+	s32 ret_val = E1000_SUCCESS;
+	u16 data;
+
+	DEBUGFUNC("e1000_setup_serdes_link_82575");
+
+	if ((hw->phy.media_type != e1000_media_type_internal_serdes) &&
+	    !e1000_sgmii_active_82575(hw))
+		return ret_val;
+
+	/*
+	 * On the 82575, SerDes loopback mode persists until it is
+	 * explicitly turned off or a power cycle is performed.  A read to
+	 * the register does not indicate its status.  Therefore, we ensure
+	 * loopback mode is disabled during initialization.
+	 */
+	E1000_WRITE_REG(hw, E1000_SCTL, E1000_SCTL_DISABLE_SERDES_LOOPBACK);
+
+	/* power on the sfp cage if present */
+	ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT);
+	ctrl_ext &= ~E1000_CTRL_EXT_SDP3_DATA;
+	E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext);
+
+	ctrl_reg = E1000_READ_REG(hw, E1000_CTRL);
+	ctrl_reg |= E1000_CTRL_SLU;
+
+	/* set both sw defined pins on 82575/82576*/
+	if (hw->mac.type == e1000_82575 || hw->mac.type == e1000_82576)
+		ctrl_reg |= E1000_CTRL_SWDPIN0 | E1000_CTRL_SWDPIN1;
+
+	reg = E1000_READ_REG(hw, E1000_PCS_LCTL);
+
+	/* default pcs_autoneg to the same setting as mac autoneg */
+	pcs_autoneg = hw->mac.autoneg;
+
+	switch (ctrl_ext & E1000_CTRL_EXT_LINK_MODE_MASK) {
+	case E1000_CTRL_EXT_LINK_MODE_SGMII:
+		/* sgmii mode lets the phy handle forcing speed/duplex */
+		pcs_autoneg = true;
+		/* autoneg time out should be disabled for SGMII mode */
+		reg &= ~(E1000_PCS_LCTL_AN_TIMEOUT);
+		break;
+	case E1000_CTRL_EXT_LINK_MODE_1000BASE_KX:
+		/* disable PCS autoneg and support parallel detect only */
+		pcs_autoneg = false;
+		/* fall through to default case */
+	default:
+		if (hw->mac.type == e1000_82575 ||
+		    hw->mac.type == e1000_82576) {
+			ret_val = hw->nvm.ops.read(hw, NVM_COMPAT, 1, &data);
+			if (ret_val) {
+				DEBUGOUT("NVM Read Error\n");
+				return ret_val;
+			}
+
+			if (data & E1000_EEPROM_PCS_AUTONEG_DISABLE_BIT)
+				pcs_autoneg = false;
+		}
+
+		/*
+		 * non-SGMII modes only supports a speed of 1000/Full for the
+		 * link so it is best to just force the MAC and let the pcs
+		 * link either autoneg or be forced to 1000/Full
+		 */
+		ctrl_reg |= E1000_CTRL_SPD_1000 | E1000_CTRL_FRCSPD |
+			    E1000_CTRL_FD | E1000_CTRL_FRCDPX;
+
+		/* set speed of 1000/Full if speed/duplex is forced */
+		reg |= E1000_PCS_LCTL_FSV_1000 | E1000_PCS_LCTL_FDV_FULL;
+		break;
+	}
+
+	E1000_WRITE_REG(hw, E1000_CTRL, ctrl_reg);
+
+	/*
+	 * New SerDes mode allows for forcing speed or autonegotiating speed
+	 * at 1gb. Autoneg should be default set by most drivers. This is the
+	 * mode that will be compatible with older link partners and switches.
+	 * However, both are supported by the hardware and some drivers/tools.
+	 */
+	reg &= ~(E1000_PCS_LCTL_AN_ENABLE | E1000_PCS_LCTL_FLV_LINK_UP |
+		 E1000_PCS_LCTL_FSD | E1000_PCS_LCTL_FORCE_LINK);
+
+	if (pcs_autoneg) {
+		/* Set PCS register for autoneg */
+		reg |= E1000_PCS_LCTL_AN_ENABLE | /* Enable Autoneg */
+		       E1000_PCS_LCTL_AN_RESTART; /* Restart autoneg */
+
+		/* Disable force flow control for autoneg */
+		reg &= ~E1000_PCS_LCTL_FORCE_FCTRL;
+
+		/* Configure flow control advertisement for autoneg */
+		anadv_reg = E1000_READ_REG(hw, E1000_PCS_ANADV);
+		anadv_reg &= ~(E1000_TXCW_ASM_DIR | E1000_TXCW_PAUSE);
+
+		switch (hw->fc.requested_mode) {
+		case e1000_fc_full:
+		case e1000_fc_rx_pause:
+			anadv_reg |= E1000_TXCW_ASM_DIR;
+			anadv_reg |= E1000_TXCW_PAUSE;
+			break;
+		case e1000_fc_tx_pause:
+			anadv_reg |= E1000_TXCW_ASM_DIR;
+			break;
+		default:
+			break;
+		}
+
+		E1000_WRITE_REG(hw, E1000_PCS_ANADV, anadv_reg);
+
+		DEBUGOUT1("Configuring Autoneg:PCS_LCTL=0x%08X\n", reg);
+	} else {
+		/* Set PCS register for forced link */
+		reg |= E1000_PCS_LCTL_FSD;	/* Force Speed */
+
+		/* Force flow control for forced link */
+		reg |= E1000_PCS_LCTL_FORCE_FCTRL;
+
+		DEBUGOUT1("Configuring Forced Link:PCS_LCTL=0x%08X\n", reg);
+	}
+
+	E1000_WRITE_REG(hw, E1000_PCS_LCTL, reg);
+
+	if (!pcs_autoneg && !e1000_sgmii_active_82575(hw))
+		e1000_force_mac_fc_generic(hw);
+
+	return ret_val;
+}
+
+/**
+ *  e1000_get_media_type_82575 - derives current media type.
+ *  @hw: pointer to the HW structure
+ *
+ *  The media type is chosen reflecting few settings.
+ *  The following are taken into account:
+ *  - link mode set in the current port Init Control Word #3
+ *  - current link mode settings in CSR register
+ *  - MDIO vs. I2C PHY control interface chosen
+ *  - SFP module media type
+ **/
+static s32 e1000_get_media_type_82575(struct e1000_hw *hw)
+{
+	struct e1000_dev_spec_82575 *dev_spec = &hw->dev_spec._82575;
+	s32 ret_val = E1000_SUCCESS;
+	u32 ctrl_ext = 0;
+	u32 link_mode = 0;
+
+	/* Set internal phy as default */
+	dev_spec->sgmii_active = false;
+	dev_spec->module_plugged = false;
+
+	/* Get CSR setting */
+	ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT);
+
+	/* extract link mode setting */
+	link_mode = ctrl_ext & E1000_CTRL_EXT_LINK_MODE_MASK;
+
+	switch (link_mode) {
+	case E1000_CTRL_EXT_LINK_MODE_1000BASE_KX:
+		hw->phy.media_type = e1000_media_type_internal_serdes;
+		break;
+	case E1000_CTRL_EXT_LINK_MODE_GMII:
+		hw->phy.media_type = e1000_media_type_copper;
+		break;
+	case E1000_CTRL_EXT_LINK_MODE_SGMII:
+		/* Get phy control interface type set (MDIO vs. I2C)*/
+		if (e1000_sgmii_uses_mdio_82575(hw)) {
+			hw->phy.media_type = e1000_media_type_copper;
+			dev_spec->sgmii_active = true;
+			break;
+		}
+		/* fall through for I2C based SGMII */
+	case E1000_CTRL_EXT_LINK_MODE_PCIE_SERDES:
+		/* read media type from SFP EEPROM */
+		ret_val = e1000_set_sfp_media_type_82575(hw);
+		if ((ret_val != E1000_SUCCESS) ||
+		    (hw->phy.media_type == e1000_media_type_unknown)) {
+			/*
+			 * If media type was not identified then return media
+			 * type defined by the CTRL_EXT settings.
+			 */
+			hw->phy.media_type = e1000_media_type_internal_serdes;
+
+			if (link_mode == E1000_CTRL_EXT_LINK_MODE_SGMII) {
+				hw->phy.media_type = e1000_media_type_copper;
+				dev_spec->sgmii_active = true;
+			}
+
+			break;
+		}
+
+		/* do not change link mode for 100BaseFX */
+		if (dev_spec->eth_flags.e100_base_fx)
+			break;
+
+		/* change current link mode setting */
+		ctrl_ext &= ~E1000_CTRL_EXT_LINK_MODE_MASK;
+
+		if (hw->phy.media_type == e1000_media_type_copper)
+			ctrl_ext |= E1000_CTRL_EXT_LINK_MODE_SGMII;
+		else
+			ctrl_ext |= E1000_CTRL_EXT_LINK_MODE_PCIE_SERDES;
+
+		E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext);
+
+		break;
+	}
+
+	return ret_val;
+}
+
+/**
+ *  e1000_set_sfp_media_type_82575 - derives SFP module media type.
+ *  @hw: pointer to the HW structure
+ *
+ *  The media type is chosen based on SFP module.
+ *  compatibility flags retrieved from SFP ID EEPROM.
+ **/
+static s32 e1000_set_sfp_media_type_82575(struct e1000_hw *hw)
+{
+	s32 ret_val = E1000_ERR_CONFIG;
+	u32 ctrl_ext = 0;
+	struct e1000_dev_spec_82575 *dev_spec = &hw->dev_spec._82575;
+	struct sfp_e1000_flags *eth_flags = &dev_spec->eth_flags;
+	u8 tranceiver_type = 0;
+	s32 timeout = 3;
+
+	/* Turn I2C interface ON and power on sfp cage */
+	ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT);
+	ctrl_ext &= ~E1000_CTRL_EXT_SDP3_DATA;
+	E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext | E1000_CTRL_I2C_ENA);
+
+	E1000_WRITE_FLUSH(hw);
+
+	/* Read SFP module data */
+	while (timeout) {
+		ret_val = e1000_read_sfp_data_byte(hw,
+			E1000_I2CCMD_SFP_DATA_ADDR(E1000_SFF_IDENTIFIER_OFFSET),
+			&tranceiver_type);
+		if (ret_val == E1000_SUCCESS)
+			break;
+		msec_delay(100);
+		timeout--;
+	}
+	if (ret_val != E1000_SUCCESS)
+		goto out;
+
+	ret_val = e1000_read_sfp_data_byte(hw,
+			E1000_I2CCMD_SFP_DATA_ADDR(E1000_SFF_ETH_FLAGS_OFFSET),
+			(u8 *)eth_flags);
+	if (ret_val != E1000_SUCCESS)
+		goto out;
+
+	/* Check if there is some SFP module plugged and powered */
+	if ((tranceiver_type == E1000_SFF_IDENTIFIER_SFP) ||
+	    (tranceiver_type == E1000_SFF_IDENTIFIER_SFF)) {
+		dev_spec->module_plugged = true;
+		if (eth_flags->e1000_base_lx || eth_flags->e1000_base_sx) {
+			hw->phy.media_type = e1000_media_type_internal_serdes;
+		} else if (eth_flags->e100_base_fx) {
+			dev_spec->sgmii_active = true;
+			hw->phy.media_type = e1000_media_type_internal_serdes;
+		} else if (eth_flags->e1000_base_t) {
+			dev_spec->sgmii_active = true;
+			hw->phy.media_type = e1000_media_type_copper;
+		} else {
+			hw->phy.media_type = e1000_media_type_unknown;
+			DEBUGOUT("PHY module has not been recognized\n");
+			goto out;
+		}
+	} else {
+		hw->phy.media_type = e1000_media_type_unknown;
+	}
+	ret_val = E1000_SUCCESS;
+out:
+	/* Restore I2C interface setting */
+	E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext);
+	return ret_val;
+}
+
+/**
+ *  e1000_valid_led_default_82575 - Verify a valid default LED config
+ *  @hw: pointer to the HW structure
+ *  @data: pointer to the NVM (EEPROM)
+ *
+ *  Read the EEPROM for the current default LED configuration.  If the
+ *  LED configuration is not valid, set to a valid LED configuration.
+ **/
+static s32 e1000_valid_led_default_82575(struct e1000_hw *hw, u16 *data)
+{
+	s32 ret_val;
+
+	DEBUGFUNC("e1000_valid_led_default_82575");
+
+	ret_val = hw->nvm.ops.read(hw, NVM_ID_LED_SETTINGS, 1, data);
+	if (ret_val) {
+		DEBUGOUT("NVM Read Error\n");
+		goto out;
+	}
+
+	if (*data == ID_LED_RESERVED_0000 || *data == ID_LED_RESERVED_FFFF) {
+		switch (hw->phy.media_type) {
+		case e1000_media_type_internal_serdes:
+			*data = ID_LED_DEFAULT_82575_SERDES;
+			break;
+		case e1000_media_type_copper:
+		default:
+			*data = ID_LED_DEFAULT;
+			break;
+		}
+	}
+out:
+	return ret_val;
+}
+
+/**
+ *  e1000_sgmii_active_82575 - Return sgmii state
+ *  @hw: pointer to the HW structure
+ *
+ *  82575 silicon has a serialized gigabit media independent interface (sgmii)
+ *  which can be enabled for use in the embedded applications.  Simply
+ *  return the current state of the sgmii interface.
+ **/
+static bool e1000_sgmii_active_82575(struct e1000_hw *hw)
+{
+	struct e1000_dev_spec_82575 *dev_spec = &hw->dev_spec._82575;
+	return dev_spec->sgmii_active;
+}
+
+/**
+ *  e1000_reset_init_script_82575 - Inits HW defaults after reset
+ *  @hw: pointer to the HW structure
+ *
+ *  Inits recommended HW defaults after a reset when there is no EEPROM
+ *  detected. This is only for the 82575.
+ **/
+static s32 e1000_reset_init_script_82575(struct e1000_hw *hw)
+{
+	DEBUGFUNC("e1000_reset_init_script_82575");
+
+	if (hw->mac.type == e1000_82575) {
+		DEBUGOUT("Running reset init script for 82575\n");
+		/* SerDes configuration via SERDESCTRL */
+		e1000_write_8bit_ctrl_reg_generic(hw, E1000_SCTL, 0x00, 0x0C);
+		e1000_write_8bit_ctrl_reg_generic(hw, E1000_SCTL, 0x01, 0x78);
+		e1000_write_8bit_ctrl_reg_generic(hw, E1000_SCTL, 0x1B, 0x23);
+		e1000_write_8bit_ctrl_reg_generic(hw, E1000_SCTL, 0x23, 0x15);
+
+		/* CCM configuration via CCMCTL register */
+		e1000_write_8bit_ctrl_reg_generic(hw, E1000_CCMCTL, 0x14, 0x00);
+		e1000_write_8bit_ctrl_reg_generic(hw, E1000_CCMCTL, 0x10, 0x00);
+
+		/* PCIe lanes configuration */
+		e1000_write_8bit_ctrl_reg_generic(hw, E1000_GIOCTL, 0x00, 0xEC);
+		e1000_write_8bit_ctrl_reg_generic(hw, E1000_GIOCTL, 0x61, 0xDF);
+		e1000_write_8bit_ctrl_reg_generic(hw, E1000_GIOCTL, 0x34, 0x05);
+		e1000_write_8bit_ctrl_reg_generic(hw, E1000_GIOCTL, 0x2F, 0x81);
+
+		/* PCIe PLL Configuration */
+		e1000_write_8bit_ctrl_reg_generic(hw, E1000_SCCTL, 0x02, 0x47);
+		e1000_write_8bit_ctrl_reg_generic(hw, E1000_SCCTL, 0x14, 0x00);
+		e1000_write_8bit_ctrl_reg_generic(hw, E1000_SCCTL, 0x10, 0x00);
+	}
+
+	return E1000_SUCCESS;
+}
+
+/**
+ *  e1000_read_mac_addr_82575 - Read device MAC address
+ *  @hw: pointer to the HW structure
+ **/
+static s32 e1000_read_mac_addr_82575(struct e1000_hw *hw)
+{
+	s32 ret_val = E1000_SUCCESS;
+
+	DEBUGFUNC("e1000_read_mac_addr_82575");
+
+	/*
+	 * If there's an alternate MAC address place it in RAR0
+	 * so that it will override the Si installed default perm
+	 * address.
+	 */
+	ret_val = e1000_check_alt_mac_addr_generic(hw);
+	if (ret_val)
+		goto out;
+
+	ret_val = e1000_read_mac_addr_generic(hw);
+
+out:
+	return ret_val;
+}
+
+/**
+ *  e1000_config_collision_dist_82575 - Configure collision distance
+ *  @hw: pointer to the HW structure
+ *
+ *  Configures the collision distance to the default value and is used
+ *  during link setup.
+ **/
+static void e1000_config_collision_dist_82575(struct e1000_hw *hw)
+{
+	u32 tctl_ext;
+
+	DEBUGFUNC("e1000_config_collision_dist_82575");
+
+	tctl_ext = E1000_READ_REG(hw, E1000_TCTL_EXT);
+
+	tctl_ext &= ~E1000_TCTL_EXT_COLD;
+	tctl_ext |= E1000_COLLISION_DISTANCE << E1000_TCTL_EXT_COLD_SHIFT;
+
+	E1000_WRITE_REG(hw, E1000_TCTL_EXT, tctl_ext);
+	E1000_WRITE_FLUSH(hw);
+}
+
+/**
+ * e1000_power_down_phy_copper_82575 - Remove link during PHY power down
+ * @hw: pointer to the HW structure
+ *
+ * In the case of a PHY power down to save power, or to turn off link during a
+ * driver unload, or wake on lan is not enabled, remove the link.
+ **/
+static void e1000_power_down_phy_copper_82575(struct e1000_hw *hw)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+
+	if (!(phy->ops.check_reset_block))
+		return;
+
+	/* If the management interface is not enabled, then power down */
+	if (!(e1000_enable_mng_pass_thru(hw) || phy->ops.check_reset_block(hw)))
+		e1000_power_down_phy_copper(hw);
+
+	return;
+}
+
+/**
+ *  e1000_clear_hw_cntrs_82575 - Clear device specific hardware counters
+ *  @hw: pointer to the HW structure
+ *
+ *  Clears the hardware counters by reading the counter registers.
+ **/
+static void e1000_clear_hw_cntrs_82575(struct e1000_hw *hw)
+{
+	DEBUGFUNC("e1000_clear_hw_cntrs_82575");
+
+	e1000_clear_hw_cntrs_base_generic(hw);
+
+	E1000_READ_REG(hw, E1000_PRC64);
+	E1000_READ_REG(hw, E1000_PRC127);
+	E1000_READ_REG(hw, E1000_PRC255);
+	E1000_READ_REG(hw, E1000_PRC511);
+	E1000_READ_REG(hw, E1000_PRC1023);
+	E1000_READ_REG(hw, E1000_PRC1522);
+	E1000_READ_REG(hw, E1000_PTC64);
+	E1000_READ_REG(hw, E1000_PTC127);
+	E1000_READ_REG(hw, E1000_PTC255);
+	E1000_READ_REG(hw, E1000_PTC511);
+	E1000_READ_REG(hw, E1000_PTC1023);
+	E1000_READ_REG(hw, E1000_PTC1522);
+
+	E1000_READ_REG(hw, E1000_ALGNERRC);
+	E1000_READ_REG(hw, E1000_RXERRC);
+	E1000_READ_REG(hw, E1000_TNCRS);
+	E1000_READ_REG(hw, E1000_CEXTERR);
+	E1000_READ_REG(hw, E1000_TSCTC);
+	E1000_READ_REG(hw, E1000_TSCTFC);
+
+	E1000_READ_REG(hw, E1000_MGTPRC);
+	E1000_READ_REG(hw, E1000_MGTPDC);
+	E1000_READ_REG(hw, E1000_MGTPTC);
+
+	E1000_READ_REG(hw, E1000_IAC);
+	E1000_READ_REG(hw, E1000_ICRXOC);
+
+	E1000_READ_REG(hw, E1000_ICRXPTC);
+	E1000_READ_REG(hw, E1000_ICRXATC);
+	E1000_READ_REG(hw, E1000_ICTXPTC);
+	E1000_READ_REG(hw, E1000_ICTXATC);
+	E1000_READ_REG(hw, E1000_ICTXQEC);
+	E1000_READ_REG(hw, E1000_ICTXQMTC);
+	E1000_READ_REG(hw, E1000_ICRXDMTC);
+
+	E1000_READ_REG(hw, E1000_CBTMPC);
+	E1000_READ_REG(hw, E1000_HTDPMC);
+	E1000_READ_REG(hw, E1000_CBRMPC);
+	E1000_READ_REG(hw, E1000_RPTHC);
+	E1000_READ_REG(hw, E1000_HGPTC);
+	E1000_READ_REG(hw, E1000_HTCBDPC);
+	E1000_READ_REG(hw, E1000_HGORCL);
+	E1000_READ_REG(hw, E1000_HGORCH);
+	E1000_READ_REG(hw, E1000_HGOTCL);
+	E1000_READ_REG(hw, E1000_HGOTCH);
+	E1000_READ_REG(hw, E1000_LENERRS);
+
+	/* This register should not be read in copper configurations */
+	if ((hw->phy.media_type == e1000_media_type_internal_serdes) ||
+	    e1000_sgmii_active_82575(hw))
+		E1000_READ_REG(hw, E1000_SCVPC);
+}
+
+/**
+ *  e1000_rx_fifo_flush_82575 - Clean rx fifo after Rx enable
+ *  @hw: pointer to the HW structure
+ *
+ *  After rx enable if managability is enabled then there is likely some
+ *  bad data at the start of the fifo and possibly in the DMA fifo.  This
+ *  function clears the fifos and flushes any packets that came in as rx was
+ *  being enabled.
+ **/
+void e1000_rx_fifo_flush_82575(struct e1000_hw *hw)
+{
+	u32 rctl, rlpml, rxdctl[4], rfctl, temp_rctl, rx_enabled;
+	int i, ms_wait;
+
+	DEBUGFUNC("e1000_rx_fifo_workaround_82575");
+	if (hw->mac.type != e1000_82575 ||
+	    !(E1000_READ_REG(hw, E1000_MANC) & E1000_MANC_RCV_TCO_EN))
+		return;
+
+	/* Disable all Rx queues */
+	for (i = 0; i < 4; i++) {
+		rxdctl[i] = E1000_READ_REG(hw, E1000_RXDCTL(i));
+		E1000_WRITE_REG(hw, E1000_RXDCTL(i),
+				rxdctl[i] & ~E1000_RXDCTL_QUEUE_ENABLE);
+	}
+	/* Poll all queues to verify they have shut down */
+	for (ms_wait = 0; ms_wait < 10; ms_wait++) {
+		msec_delay(1);
+		rx_enabled = 0;
+		for (i = 0; i < 4; i++)
+			rx_enabled |= E1000_READ_REG(hw, E1000_RXDCTL(i));
+		if (!(rx_enabled & E1000_RXDCTL_QUEUE_ENABLE))
+			break;
+	}
+
+	if (ms_wait == 10)
+		DEBUGOUT("Queue disable timed out after 10ms\n");
+
+	/* Clear RLPML, RCTL.SBP, RFCTL.LEF, and set RCTL.LPE so that all
+	 * incoming packets are rejected.  Set enable and wait 2ms so that
+	 * any packet that was coming in as RCTL.EN was set is flushed
+	 */
+	rfctl = E1000_READ_REG(hw, E1000_RFCTL);
+	E1000_WRITE_REG(hw, E1000_RFCTL, rfctl & ~E1000_RFCTL_LEF);
+
+	rlpml = E1000_READ_REG(hw, E1000_RLPML);
+	E1000_WRITE_REG(hw, E1000_RLPML, 0);
+
+	rctl = E1000_READ_REG(hw, E1000_RCTL);
+	temp_rctl = rctl & ~(E1000_RCTL_EN | E1000_RCTL_SBP);
+	temp_rctl |= E1000_RCTL_LPE;
+
+	E1000_WRITE_REG(hw, E1000_RCTL, temp_rctl);
+	E1000_WRITE_REG(hw, E1000_RCTL, temp_rctl | E1000_RCTL_EN);
+	E1000_WRITE_FLUSH(hw);
+	msec_delay(2);
+
+	/* Enable Rx queues that were previously enabled and restore our
+	 * previous state
+	 */
+	for (i = 0; i < 4; i++)
+		E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl[i]);
+	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
+	E1000_WRITE_FLUSH(hw);
+
+	E1000_WRITE_REG(hw, E1000_RLPML, rlpml);
+	E1000_WRITE_REG(hw, E1000_RFCTL, rfctl);
+
+	/* Flush receive errors generated by workaround */
+	E1000_READ_REG(hw, E1000_ROC);
+	E1000_READ_REG(hw, E1000_RNBC);
+	E1000_READ_REG(hw, E1000_MPC);
+}
+
+/**
+ *  e1000_set_pcie_completion_timeout - set pci-e completion timeout
+ *  @hw: pointer to the HW structure
+ *
+ *  The defaults for 82575 and 82576 should be in the range of 50us to 50ms,
+ *  however the hardware default for these parts is 500us to 1ms which is less
+ *  than the 10ms recommended by the pci-e spec.  To address this we need to
+ *  increase the value to either 10ms to 200ms for capability version 1 config,
+ *  or 16ms to 55ms for version 2.
+ **/
+static s32 e1000_set_pcie_completion_timeout(struct e1000_hw *hw)
+{
+	u32 gcr = E1000_READ_REG(hw, E1000_GCR);
+	s32 ret_val = E1000_SUCCESS;
+	u16 pcie_devctl2;
+
+	/* only take action if timeout value is defaulted to 0 */
+	if (gcr & E1000_GCR_CMPL_TMOUT_MASK)
+		goto out;
+
+	/*
+	 * if capababilities version is type 1 we can write the
+	 * timeout of 10ms to 200ms through the GCR register
+	 */
+	if (!(gcr & E1000_GCR_CAP_VER2)) {
+		gcr |= E1000_GCR_CMPL_TMOUT_10ms;
+		goto out;
+	}
+
+	/*
+	 * for version 2 capabilities we need to write the config space
+	 * directly in order to set the completion timeout value for
+	 * 16ms to 55ms
+	 */
+	ret_val = e1000_read_pcie_cap_reg(hw, PCIE_DEVICE_CONTROL2,
+					  &pcie_devctl2);
+	if (ret_val)
+		goto out;
+
+	pcie_devctl2 |= PCIE_DEVICE_CONTROL2_16ms;
+
+	ret_val = e1000_write_pcie_cap_reg(hw, PCIE_DEVICE_CONTROL2,
+					   &pcie_devctl2);
+out:
+	/* disable completion timeout resend */
+	gcr &= ~E1000_GCR_CMPL_TMOUT_RESEND;
+
+	E1000_WRITE_REG(hw, E1000_GCR, gcr);
+	return ret_val;
+}
+
+/**
+ *  e1000_vmdq_set_anti_spoofing_pf - enable or disable anti-spoofing
+ *  @hw: pointer to the hardware struct
+ *  @enable: state to enter, either enabled or disabled
+ *  @pf: Physical Function pool - do not set anti-spoofing for the PF
+ *
+ *  enables/disables L2 switch anti-spoofing functionality.
+ **/
+void e1000_vmdq_set_anti_spoofing_pf(struct e1000_hw *hw, bool enable, int pf)
+{
+	u32 reg_val, reg_offset;
+
+	switch (hw->mac.type) {
+	case e1000_82576:
+		reg_offset = E1000_DTXSWC;
+		break;
+	case e1000_i350:
+	case e1000_i354:
+		reg_offset = E1000_TXSWC;
+		break;
+	default:
+		return;
+	}
+
+	reg_val = E1000_READ_REG(hw, reg_offset);
+	if (enable) {
+		reg_val |= (E1000_DTXSWC_MAC_SPOOF_MASK |
+			     E1000_DTXSWC_VLAN_SPOOF_MASK);
+		/* The PF can spoof - it has to in order to
+		 * support emulation mode NICs
+		 */
+		reg_val ^= (1 << pf | 1 << (pf + MAX_NUM_VFS));
+	} else {
+		reg_val &= ~(E1000_DTXSWC_MAC_SPOOF_MASK |
+			     E1000_DTXSWC_VLAN_SPOOF_MASK);
+	}
+	E1000_WRITE_REG(hw, reg_offset, reg_val);
+}
+
+/**
+ *  e1000_vmdq_set_loopback_pf - enable or disable vmdq loopback
+ *  @hw: pointer to the hardware struct
+ *  @enable: state to enter, either enabled or disabled
+ *
+ *  enables/disables L2 switch loopback functionality.
+ **/
+void e1000_vmdq_set_loopback_pf(struct e1000_hw *hw, bool enable)
+{
+	u32 dtxswc;
+
+	switch (hw->mac.type) {
+	case e1000_82576:
+		dtxswc = E1000_READ_REG(hw, E1000_DTXSWC);
+		if (enable)
+			dtxswc |= E1000_DTXSWC_VMDQ_LOOPBACK_EN;
+		else
+			dtxswc &= ~E1000_DTXSWC_VMDQ_LOOPBACK_EN;
+		E1000_WRITE_REG(hw, E1000_DTXSWC, dtxswc);
+		break;
+	case e1000_i350:
+	case e1000_i354:
+		dtxswc = E1000_READ_REG(hw, E1000_TXSWC);
+		if (enable)
+			dtxswc |= E1000_DTXSWC_VMDQ_LOOPBACK_EN;
+		else
+			dtxswc &= ~E1000_DTXSWC_VMDQ_LOOPBACK_EN;
+		E1000_WRITE_REG(hw, E1000_TXSWC, dtxswc);
+		break;
+	default:
+		/* Currently no other hardware supports loopback */
+		break;
+	}
+
+
+}
+
+/**
+ *  e1000_vmdq_set_replication_pf - enable or disable vmdq replication
+ *  @hw: pointer to the hardware struct
+ *  @enable: state to enter, either enabled or disabled
+ *
+ *  enables/disables replication of packets across multiple pools.
+ **/
+void e1000_vmdq_set_replication_pf(struct e1000_hw *hw, bool enable)
+{
+	u32 vt_ctl = E1000_READ_REG(hw, E1000_VT_CTL);
+
+	if (enable)
+		vt_ctl |= E1000_VT_CTL_VM_REPL_EN;
+	else
+		vt_ctl &= ~E1000_VT_CTL_VM_REPL_EN;
+
+	E1000_WRITE_REG(hw, E1000_VT_CTL, vt_ctl);
+}
+
+/**
+ *  e1000_read_phy_reg_82580 - Read 82580 MDI control register
+ *  @hw: pointer to the HW structure
+ *  @offset: register offset to be read
+ *  @data: pointer to the read data
+ *
+ *  Reads the MDI control register in the PHY at offset and stores the
+ *  information read to data.
+ **/
+static s32 e1000_read_phy_reg_82580(struct e1000_hw *hw, u32 offset, u16 *data)
+{
+	s32 ret_val;
+
+	DEBUGFUNC("e1000_read_phy_reg_82580");
+
+	ret_val = hw->phy.ops.acquire(hw);
+	if (ret_val)
+		goto out;
+
+	ret_val = e1000_read_phy_reg_mdic(hw, offset, data);
+
+	hw->phy.ops.release(hw);
+
+out:
+	return ret_val;
+}
+
+/**
+ *  e1000_write_phy_reg_82580 - Write 82580 MDI control register
+ *  @hw: pointer to the HW structure
+ *  @offset: register offset to write to
+ *  @data: data to write to register at offset
+ *
+ *  Writes data to MDI control register in the PHY at offset.
+ **/
+static s32 e1000_write_phy_reg_82580(struct e1000_hw *hw, u32 offset, u16 data)
+{
+	s32 ret_val;
+
+	DEBUGFUNC("e1000_write_phy_reg_82580");
+
+	ret_val = hw->phy.ops.acquire(hw);
+	if (ret_val)
+		goto out;
+
+	ret_val = e1000_write_phy_reg_mdic(hw, offset, data);
+
+	hw->phy.ops.release(hw);
+
+out:
+	return ret_val;
+}
+
+/**
+ *  e1000_reset_mdicnfg_82580 - Reset MDICNFG destination and com_mdio bits
+ *  @hw: pointer to the HW structure
+ *
+ *  This resets the the MDICNFG.Destination and MDICNFG.Com_MDIO bits based on
+ *  the values found in the EEPROM.  This addresses an issue in which these
+ *  bits are not restored from EEPROM after reset.
+ **/
+static s32 e1000_reset_mdicnfg_82580(struct e1000_hw *hw)
+{
+	s32 ret_val = E1000_SUCCESS;
+	u32 mdicnfg;
+	u16 nvm_data = 0;
+
+	DEBUGFUNC("e1000_reset_mdicnfg_82580");
+
+	if (hw->mac.type != e1000_82580)
+		goto out;
+	if (!e1000_sgmii_active_82575(hw))
+		goto out;
+
+	ret_val = hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
+				   NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
+				   &nvm_data);
+	if (ret_val) {
+		DEBUGOUT("NVM Read Error\n");
+		goto out;
+	}
+
+	mdicnfg = E1000_READ_REG(hw, E1000_MDICNFG);
+	if (nvm_data & NVM_WORD24_EXT_MDIO)
+		mdicnfg |= E1000_MDICNFG_EXT_MDIO;
+	if (nvm_data & NVM_WORD24_COM_MDIO)
+		mdicnfg |= E1000_MDICNFG_COM_MDIO;
+	E1000_WRITE_REG(hw, E1000_MDICNFG, mdicnfg);
+out:
+	return ret_val;
+}
+
+/**
+ *  e1000_reset_hw_82580 - Reset hardware
+ *  @hw: pointer to the HW structure
+ *
+ *  This resets function or entire device (all ports, etc.)
+ *  to a known state.
+ **/
+static s32 e1000_reset_hw_82580(struct e1000_hw *hw)
+{
+	s32 ret_val = E1000_SUCCESS;
+	/* BH SW mailbox bit in SW_FW_SYNC */
+	u16 swmbsw_mask = E1000_SW_SYNCH_MB;
+	u32 ctrl;
+	bool global_device_reset = hw->dev_spec._82575.global_device_reset;
+
+	DEBUGFUNC("e1000_reset_hw_82580");
+
+	hw->dev_spec._82575.global_device_reset = false;
+
+	/* 82580 does not reliably do global_device_reset due to hw errata */
+	if (hw->mac.type == e1000_82580)
+		global_device_reset = false;
+
+	/* Get current control state. */
+	ctrl = E1000_READ_REG(hw, E1000_CTRL);
+
+	/*
+	 * Prevent the PCI-E bus from sticking if there is no TLP connection
+	 * on the last TLP read/write transaction when MAC is reset.
+	 */
+	ret_val = e1000_disable_pcie_master_generic(hw);
+	if (ret_val)
+		DEBUGOUT("PCI-E Master disable polling has failed.\n");
+
+	DEBUGOUT("Masking off all interrupts\n");
+	E1000_WRITE_REG(hw, E1000_IMC, 0xffffffff);
+	E1000_WRITE_REG(hw, E1000_RCTL, 0);
+	E1000_WRITE_REG(hw, E1000_TCTL, E1000_TCTL_PSP);
+	E1000_WRITE_FLUSH(hw);
+
+	msec_delay(10);
+
+	/* Determine whether or not a global dev reset is requested */
+	if (global_device_reset && hw->mac.ops.acquire_swfw_sync(hw,
+	    swmbsw_mask))
+			global_device_reset = false;
+
+	if (global_device_reset && !(E1000_READ_REG(hw, E1000_STATUS) &
+	    E1000_STAT_DEV_RST_SET))
+		ctrl |= E1000_CTRL_DEV_RST;
+	else
+		ctrl |= E1000_CTRL_RST;
+
+	E1000_WRITE_REG(hw, E1000_CTRL, ctrl);
+	E1000_WRITE_FLUSH(hw);
+
+	/* Add delay to insure DEV_RST has time to complete */
+	if (global_device_reset)
+		msec_delay(5);
+
+	ret_val = e1000_get_auto_rd_done_generic(hw);
+	if (ret_val) {
+		/*
+		 * When auto config read does not complete, do not
+		 * return with an error. This can happen in situations
+		 * where there is no eeprom and prevents getting link.
+		 */
+		DEBUGOUT("Auto Read Done did not complete\n");
+	}
+
+	/* clear global device reset status bit */
+	E1000_WRITE_REG(hw, E1000_STATUS, E1000_STAT_DEV_RST_SET);
+
+	/* Clear any pending interrupt events. */
+	E1000_WRITE_REG(hw, E1000_IMC, 0xffffffff);
+	E1000_READ_REG(hw, E1000_ICR);
+
+	ret_val = e1000_reset_mdicnfg_82580(hw);
+	if (ret_val)
+		DEBUGOUT("Could not reset MDICNFG based on EEPROM\n");
+
+	/* Install any alternate MAC address into RAR0 */
+	ret_val = e1000_check_alt_mac_addr_generic(hw);
+
+	/* Release semaphore */
+	if (global_device_reset)
+		hw->mac.ops.release_swfw_sync(hw, swmbsw_mask);
+
+	return ret_val;
+}
+
+/**
+ *  e1000_rxpbs_adjust_82580 - adjust RXPBS value to reflect actual Rx PBA size
+ *  @data: data received by reading RXPBS register
+ *
+ *  The 82580 uses a table based approach for packet buffer allocation sizes.
+ *  This function converts the retrieved value into the correct table value
+ *     0x0 0x1 0x2 0x3 0x4 0x5 0x6 0x7
+ *  0x0 36  72 144   1   2   4   8  16
+ *  0x8 35  70 140 rsv rsv rsv rsv rsv
+ */
+u16 e1000_rxpbs_adjust_82580(u32 data)
+{
+	u16 ret_val = 0;
+
+	if (data < E1000_82580_RXPBS_TABLE_SIZE)
+		ret_val = e1000_82580_rxpbs_table[data];
+
+	return ret_val;
+}
+
+/**
+ *  e1000_validate_nvm_checksum_with_offset - Validate EEPROM
+ *  checksum
+ *  @hw: pointer to the HW structure
+ *  @offset: offset in words of the checksum protected region
+ *
+ *  Calculates the EEPROM checksum by reading/adding each word of the EEPROM
+ *  and then verifies that the sum of the EEPROM is equal to 0xBABA.
+ **/
+s32 e1000_validate_nvm_checksum_with_offset(struct e1000_hw *hw, u16 offset)
+{
+	s32 ret_val = E1000_SUCCESS;
+	u16 checksum = 0;
+	u16 i, nvm_data;
+
+	DEBUGFUNC("e1000_validate_nvm_checksum_with_offset");
+
+	for (i = offset; i < ((NVM_CHECKSUM_REG + offset) + 1); i++) {
+		ret_val = hw->nvm.ops.read(hw, i, 1, &nvm_data);
+		if (ret_val) {
+			DEBUGOUT("NVM Read Error\n");
+			goto out;
+		}
+		checksum += nvm_data;
+	}
+
+	if (checksum != (u16) NVM_SUM) {
+		DEBUGOUT("NVM Checksum Invalid\n");
+		ret_val = -E1000_ERR_NVM;
+		goto out;
+	}
+
+out:
+	return ret_val;
+}
+
+/**
+ *  e1000_update_nvm_checksum_with_offset - Update EEPROM
+ *  checksum
+ *  @hw: pointer to the HW structure
+ *  @offset: offset in words of the checksum protected region
+ *
+ *  Updates the EEPROM checksum by reading/adding each word of the EEPROM
+ *  up to the checksum.  Then calculates the EEPROM checksum and writes the
+ *  value to the EEPROM.
+ **/
+s32 e1000_update_nvm_checksum_with_offset(struct e1000_hw *hw, u16 offset)
+{
+	s32 ret_val;
+	u16 checksum = 0;
+	u16 i, nvm_data;
+
+	DEBUGFUNC("e1000_update_nvm_checksum_with_offset");
+
+	for (i = offset; i < (NVM_CHECKSUM_REG + offset); i++) {
+		ret_val = hw->nvm.ops.read(hw, i, 1, &nvm_data);
+		if (ret_val) {
+			DEBUGOUT("NVM Read Error while updating checksum.\n");
+			goto out;
+		}
+		checksum += nvm_data;
+	}
+	checksum = (u16) NVM_SUM - checksum;
+	ret_val = hw->nvm.ops.write(hw, (NVM_CHECKSUM_REG + offset), 1,
+				    &checksum);
+	if (ret_val)
+		DEBUGOUT("NVM Write Error while updating checksum.\n");
+
+out:
+	return ret_val;
+}
+
+/**
+ *  e1000_validate_nvm_checksum_82580 - Validate EEPROM checksum
+ *  @hw: pointer to the HW structure
+ *
+ *  Calculates the EEPROM section checksum by reading/adding each word of
+ *  the EEPROM and then verifies that the sum of the EEPROM is
+ *  equal to 0xBABA.
+ **/
+static s32 e1000_validate_nvm_checksum_82580(struct e1000_hw *hw)
+{
+	s32 ret_val = E1000_SUCCESS;
+	u16 eeprom_regions_count = 1;
+	u16 j, nvm_data;
+	u16 nvm_offset;
+
+	DEBUGFUNC("e1000_validate_nvm_checksum_82580");
+
+	ret_val = hw->nvm.ops.read(hw, NVM_COMPATIBILITY_REG_3, 1, &nvm_data);
+	if (ret_val) {
+		DEBUGOUT("NVM Read Error\n");
+		goto out;
+	}
+
+	if (nvm_data & NVM_COMPATIBILITY_BIT_MASK) {
+		/* if chekcsums compatibility bit is set validate checksums
+		 * for all 4 ports. */
+		eeprom_regions_count = 4;
+	}
+
+	for (j = 0; j < eeprom_regions_count; j++) {
+		nvm_offset = NVM_82580_LAN_FUNC_OFFSET(j);
+		ret_val = e1000_validate_nvm_checksum_with_offset(hw,
+								  nvm_offset);
+		if (ret_val != E1000_SUCCESS)
+			goto out;
+	}
+
+out:
+	return ret_val;
+}
+
+/**
+ *  e1000_update_nvm_checksum_82580 - Update EEPROM checksum
+ *  @hw: pointer to the HW structure
+ *
+ *  Updates the EEPROM section checksums for all 4 ports by reading/adding
+ *  each word of the EEPROM up to the checksum.  Then calculates the EEPROM
+ *  checksum and writes the value to the EEPROM.
+ **/
+static s32 e1000_update_nvm_checksum_82580(struct e1000_hw *hw)
+{
+	s32 ret_val;
+	u16 j, nvm_data;
+	u16 nvm_offset;
+
+	DEBUGFUNC("e1000_update_nvm_checksum_82580");
+
+	ret_val = hw->nvm.ops.read(hw, NVM_COMPATIBILITY_REG_3, 1, &nvm_data);
+	if (ret_val) {
+		DEBUGOUT("NVM Read Error while updating checksum compatibility bit.\n");
+		goto out;
+	}
+
+	if (!(nvm_data & NVM_COMPATIBILITY_BIT_MASK)) {
+		/* set compatibility bit to validate checksums appropriately */
+		nvm_data = nvm_data | NVM_COMPATIBILITY_BIT_MASK;
+		ret_val = hw->nvm.ops.write(hw, NVM_COMPATIBILITY_REG_3, 1,
+					    &nvm_data);
+		if (ret_val) {
+			DEBUGOUT("NVM Write Error while updating checksum compatibility bit.\n");
+			goto out;
+		}
+	}
+
+	for (j = 0; j < 4; j++) {
+		nvm_offset = NVM_82580_LAN_FUNC_OFFSET(j);
+		ret_val = e1000_update_nvm_checksum_with_offset(hw, nvm_offset);
+		if (ret_val)
+			goto out;
+	}
+
+out:
+	return ret_val;
+}
+
+/**
+ *  e1000_validate_nvm_checksum_i350 - Validate EEPROM checksum
+ *  @hw: pointer to the HW structure
+ *
+ *  Calculates the EEPROM section checksum by reading/adding each word of
+ *  the EEPROM and then verifies that the sum of the EEPROM is
+ *  equal to 0xBABA.
+ **/
+static s32 e1000_validate_nvm_checksum_i350(struct e1000_hw *hw)
+{
+	s32 ret_val = E1000_SUCCESS;
+	u16 j;
+	u16 nvm_offset;
+
+	DEBUGFUNC("e1000_validate_nvm_checksum_i350");
+
+	for (j = 0; j < 4; j++) {
+		nvm_offset = NVM_82580_LAN_FUNC_OFFSET(j);
+		ret_val = e1000_validate_nvm_checksum_with_offset(hw,
+								  nvm_offset);
+		if (ret_val != E1000_SUCCESS)
+			goto out;
+	}
+
+out:
+	return ret_val;
+}
+
+/**
+ *  e1000_update_nvm_checksum_i350 - Update EEPROM checksum
+ *  @hw: pointer to the HW structure
+ *
+ *  Updates the EEPROM section checksums for all 4 ports by reading/adding
+ *  each word of the EEPROM up to the checksum.  Then calculates the EEPROM
+ *  checksum and writes the value to the EEPROM.
+ **/
+static s32 e1000_update_nvm_checksum_i350(struct e1000_hw *hw)
+{
+	s32 ret_val = E1000_SUCCESS;
+	u16 j;
+	u16 nvm_offset;
+
+	DEBUGFUNC("e1000_update_nvm_checksum_i350");
+
+	for (j = 0; j < 4; j++) {
+		nvm_offset = NVM_82580_LAN_FUNC_OFFSET(j);
+		ret_val = e1000_update_nvm_checksum_with_offset(hw, nvm_offset);
+		if (ret_val != E1000_SUCCESS)
+			goto out;
+	}
+
+out:
+	return ret_val;
+}
+
+/**
+ *  __e1000_access_emi_reg - Read/write EMI register
+ *  @hw: pointer to the HW structure
+ *  @addr: EMI address to program
+ *  @data: pointer to value to read/write from/to the EMI address
+ *  @read: boolean flag to indicate read or write
+ **/
+static s32 __e1000_access_emi_reg(struct e1000_hw *hw, u16 address,
+				  u16 *data, bool read)
+{
+	s32 ret_val = E1000_SUCCESS;
+
+	DEBUGFUNC("__e1000_access_emi_reg");
+
+	ret_val = hw->phy.ops.write_reg(hw, E1000_EMIADD, address);
+	if (ret_val)
+		return ret_val;
+
+	if (read)
+		ret_val = hw->phy.ops.read_reg(hw, E1000_EMIDATA, data);
+	else
+		ret_val = hw->phy.ops.write_reg(hw, E1000_EMIDATA, *data);
+
+	return ret_val;
+}
+
+/**
+ *  e1000_read_emi_reg - Read Extended Management Interface register
+ *  @hw: pointer to the HW structure
+ *  @addr: EMI address to program
+ *  @data: value to be read from the EMI address
+ **/
+s32 e1000_read_emi_reg(struct e1000_hw *hw, u16 addr, u16 *data)
+{
+	DEBUGFUNC("e1000_read_emi_reg");
+
+	return __e1000_access_emi_reg(hw, addr, data, true);
+}
+
+/**
+ *  e1000_set_eee_i350 - Enable/disable EEE support
+ *  @hw: pointer to the HW structure
+ *
+ *  Enable/disable EEE based on setting in dev_spec structure.
+ *
+ **/
+s32 e1000_set_eee_i350(struct e1000_hw *hw)
+{
+	s32 ret_val = E1000_SUCCESS;
+	u32 ipcnfg, eeer;
+
+	DEBUGFUNC("e1000_set_eee_i350");
+
+	if ((hw->mac.type < e1000_i350) ||
+	    (hw->phy.media_type != e1000_media_type_copper))
+		goto out;
+	ipcnfg = E1000_READ_REG(hw, E1000_IPCNFG);
+	eeer = E1000_READ_REG(hw, E1000_EEER);
+
+	/* enable or disable per user setting */
+	if (!(hw->dev_spec._82575.eee_disable)) {
+		u32 eee_su = E1000_READ_REG(hw, E1000_EEE_SU);
+
+		ipcnfg |= (E1000_IPCNFG_EEE_1G_AN | E1000_IPCNFG_EEE_100M_AN);
+		eeer |= (E1000_EEER_TX_LPI_EN | E1000_EEER_RX_LPI_EN |
+			 E1000_EEER_LPI_FC);
+
+		/* This bit should not be set in normal operation. */
+		if (eee_su & E1000_EEE_SU_LPI_CLK_STP)
+			DEBUGOUT("LPI Clock Stop Bit should not be set!\n");
+	} else {
+		ipcnfg &= ~(E1000_IPCNFG_EEE_1G_AN | E1000_IPCNFG_EEE_100M_AN);
+		eeer &= ~(E1000_EEER_TX_LPI_EN | E1000_EEER_RX_LPI_EN |
+			  E1000_EEER_LPI_FC);
+	}
+	E1000_WRITE_REG(hw, E1000_IPCNFG, ipcnfg);
+	E1000_WRITE_REG(hw, E1000_EEER, eeer);
+	E1000_READ_REG(hw, E1000_IPCNFG);
+	E1000_READ_REG(hw, E1000_EEER);
+out:
+
+	return ret_val;
+}
+
+/**
+ *  e1000_set_eee_i354 - Enable/disable EEE support
+ *  @hw: pointer to the HW structure
+ *
+ *  Enable/disable EEE legacy mode based on setting in dev_spec structure.
+ *
+ **/
+s32 e1000_set_eee_i354(struct e1000_hw *hw)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+	s32 ret_val = E1000_SUCCESS;
+	u16 phy_data;
+
+	DEBUGFUNC("e1000_set_eee_i354");
+
+	if ((hw->phy.media_type != e1000_media_type_copper) ||
+	    ((phy->id != M88E1543_E_PHY_ID)))
+		goto out;
+
+	if (!hw->dev_spec._82575.eee_disable) {
+		/* Switch to PHY page 18. */
+		ret_val = phy->ops.write_reg(hw, E1000_M88E1543_PAGE_ADDR, 18);
+		if (ret_val)
+			goto out;
+
+		ret_val = phy->ops.read_reg(hw, E1000_M88E1543_EEE_CTRL_1,
+					    &phy_data);
+		if (ret_val)
+			goto out;
+
+		phy_data |= E1000_M88E1543_EEE_CTRL_1_MS;
+		ret_val = phy->ops.write_reg(hw, E1000_M88E1543_EEE_CTRL_1,
+					     phy_data);
+		if (ret_val)
+			goto out;
+
+		/* Return the PHY to page 0. */
+		ret_val = phy->ops.write_reg(hw, E1000_M88E1543_PAGE_ADDR, 0);
+		if (ret_val)
+			goto out;
+
+		/* Turn on EEE advertisement. */
+		ret_val = e1000_read_xmdio_reg(hw, E1000_EEE_ADV_ADDR_I354,
+					       E1000_EEE_ADV_DEV_I354,
+					       &phy_data);
+		if (ret_val)
+			goto out;
+
+		phy_data |= E1000_EEE_ADV_100_SUPPORTED |
+			    E1000_EEE_ADV_1000_SUPPORTED;
+		ret_val = e1000_write_xmdio_reg(hw, E1000_EEE_ADV_ADDR_I354,
+						E1000_EEE_ADV_DEV_I354,
+						phy_data);
+	} else {
+		/* Turn off EEE advertisement. */
+		ret_val = e1000_read_xmdio_reg(hw, E1000_EEE_ADV_ADDR_I354,
+					       E1000_EEE_ADV_DEV_I354,
+					       &phy_data);
+		if (ret_val)
+			goto out;
+
+		phy_data &= ~(E1000_EEE_ADV_100_SUPPORTED |
+			      E1000_EEE_ADV_1000_SUPPORTED);
+		ret_val = e1000_write_xmdio_reg(hw, E1000_EEE_ADV_ADDR_I354,
+						E1000_EEE_ADV_DEV_I354,
+						phy_data);
+	}
+
+out:
+	return ret_val;
+}
+
+/**
+ *  e1000_get_eee_status_i354 - Get EEE status
+ *  @hw: pointer to the HW structure
+ *  @status: EEE status
+ *
+ *  Get EEE status by guessing based on whether Tx or Rx LPI indications have
+ *  been received.
+ **/
+s32 e1000_get_eee_status_i354(struct e1000_hw *hw, bool *status)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+	s32 ret_val = E1000_SUCCESS;
+	u16 phy_data;
+
+	DEBUGFUNC("e1000_get_eee_status_i354");
+
+	/* Check if EEE is supported on this device. */
+	if ((hw->phy.media_type != e1000_media_type_copper) ||
+	    ((phy->id != M88E1543_E_PHY_ID)))
+		goto out;
+
+	ret_val = e1000_read_xmdio_reg(hw, E1000_PCS_STATUS_ADDR_I354,
+				       E1000_PCS_STATUS_DEV_I354,
+				       &phy_data);
+	if (ret_val)
+		goto out;
+
+	*status = phy_data & (E1000_PCS_STATUS_TX_LPI_RCVD |
+			      E1000_PCS_STATUS_RX_LPI_RCVD) ? true : false;
+
+out:
+	return ret_val;
+}
+
+/* Due to a hw errata, if the host tries to  configure the VFTA register
+ * while performing queries from the BMC or DMA, then the VFTA in some
+ * cases won't be written.
+ */
+
+/**
+ *  e1000_clear_vfta_i350 - Clear VLAN filter table
+ *  @hw: pointer to the HW structure
+ *
+ *  Clears the register array which contains the VLAN filter table by
+ *  setting all the values to 0.
+ **/
+void e1000_clear_vfta_i350(struct e1000_hw *hw)
+{
+	u32 offset;
+	int i;
+
+	DEBUGFUNC("e1000_clear_vfta_350");
+
+	for (offset = 0; offset < E1000_VLAN_FILTER_TBL_SIZE; offset++) {
+		for (i = 0; i < 10; i++)
+			E1000_WRITE_REG_ARRAY(hw, E1000_VFTA, offset, 0);
+
+		E1000_WRITE_FLUSH(hw);
+	}
+}
+
+/**
+ *  e1000_write_vfta_i350 - Write value to VLAN filter table
+ *  @hw: pointer to the HW structure
+ *  @offset: register offset in VLAN filter table
+ *  @value: register value written to VLAN filter table
+ *
+ *  Writes value at the given offset in the register array which stores
+ *  the VLAN filter table.
+ **/
+void e1000_write_vfta_i350(struct e1000_hw *hw, u32 offset, u32 value)
+{
+	int i;
+
+	DEBUGFUNC("e1000_write_vfta_350");
+
+	for (i = 0; i < 10; i++)
+		E1000_WRITE_REG_ARRAY(hw, E1000_VFTA, offset, value);
+
+	E1000_WRITE_FLUSH(hw);
+}
+
+
+/**
+ *  e1000_set_i2c_bb - Enable I2C bit-bang
+ *  @hw: pointer to the HW structure
+ *
+ *  Enable I2C bit-bang interface
+ *
+ **/
+s32 e1000_set_i2c_bb(struct e1000_hw *hw)
+{
+	s32 ret_val = E1000_SUCCESS;
+	u32 ctrl_ext, i2cparams;
+
+	DEBUGFUNC("e1000_set_i2c_bb");
+
+	ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT);
+	ctrl_ext |= E1000_CTRL_I2C_ENA;
+	E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext);
+	E1000_WRITE_FLUSH(hw);
+
+	i2cparams = E1000_READ_REG(hw, E1000_I2CPARAMS);
+	i2cparams |= E1000_I2CBB_EN;
+	i2cparams |= E1000_I2C_DATA_OE_N;
+	i2cparams |= E1000_I2C_CLK_OE_N;
+	E1000_WRITE_REG(hw, E1000_I2CPARAMS, i2cparams);
+	E1000_WRITE_FLUSH(hw);
+
+	return ret_val;
+}
+
+/**
+ *  e1000_read_i2c_byte_generic - Reads 8 bit word over I2C
+ *  @hw: pointer to hardware structure
+ *  @byte_offset: byte offset to read
+ *  @dev_addr: device address
+ *  @data: value read
+ *
+ *  Performs byte read operation over I2C interface at
+ *  a specified device address.
+ **/
+s32 e1000_read_i2c_byte_generic(struct e1000_hw *hw, u8 byte_offset,
+				u8 dev_addr, u8 *data)
+{
+	s32 status = E1000_SUCCESS;
+	u32 max_retry = 10;
+	u32 retry = 1;
+	u16 swfw_mask = 0;
+
+	bool nack = true;
+
+	DEBUGFUNC("e1000_read_i2c_byte_generic");
+
+	swfw_mask = E1000_SWFW_PHY0_SM;
+
+	do {
+		if (hw->mac.ops.acquire_swfw_sync(hw, swfw_mask)
+		    != E1000_SUCCESS) {
+			status = E1000_ERR_SWFW_SYNC;
+			goto read_byte_out;
+		}
+
+		e1000_i2c_start(hw);
+
+		/* Device Address and write indication */
+		status = e1000_clock_out_i2c_byte(hw, dev_addr);
+		if (status != E1000_SUCCESS)
+			goto fail;
+
+		status = e1000_get_i2c_ack(hw);
+		if (status != E1000_SUCCESS)
+			goto fail;
+
+		status = e1000_clock_out_i2c_byte(hw, byte_offset);
+		if (status != E1000_SUCCESS)
+			goto fail;
+
+		status = e1000_get_i2c_ack(hw);
+		if (status != E1000_SUCCESS)
+			goto fail;
+
+		e1000_i2c_start(hw);
+
+		/* Device Address and read indication */
+		status = e1000_clock_out_i2c_byte(hw, (dev_addr | 0x1));
+		if (status != E1000_SUCCESS)
+			goto fail;
+
+		status = e1000_get_i2c_ack(hw);
+		if (status != E1000_SUCCESS)
+			goto fail;
+
+		status = e1000_clock_in_i2c_byte(hw, data);
+		if (status != E1000_SUCCESS)
+			goto fail;
+
+		status = e1000_clock_out_i2c_bit(hw, nack);
+		if (status != E1000_SUCCESS)
+			goto fail;
+
+		e1000_i2c_stop(hw);
+		break;
+
+fail:
+		hw->mac.ops.release_swfw_sync(hw, swfw_mask);
+		msec_delay(100);
+		e1000_i2c_bus_clear(hw);
+		retry++;
+		if (retry < max_retry)
+			DEBUGOUT("I2C byte read error - Retrying.\n");
+		else
+			DEBUGOUT("I2C byte read error.\n");
+
+	} while (retry < max_retry);
+
+	hw->mac.ops.release_swfw_sync(hw, swfw_mask);
+
+read_byte_out:
+
+	return status;
+}
+
+/**
+ *  e1000_write_i2c_byte_generic - Writes 8 bit word over I2C
+ *  @hw: pointer to hardware structure
+ *  @byte_offset: byte offset to write
+ *  @dev_addr: device address
+ *  @data: value to write
+ *
+ *  Performs byte write operation over I2C interface at
+ *  a specified device address.
+ **/
+s32 e1000_write_i2c_byte_generic(struct e1000_hw *hw, u8 byte_offset,
+				 u8 dev_addr, u8 data)
+{
+	s32 status = E1000_SUCCESS;
+	u32 max_retry = 1;
+	u32 retry = 0;
+	u16 swfw_mask = 0;
+
+	DEBUGFUNC("e1000_write_i2c_byte_generic");
+
+	swfw_mask = E1000_SWFW_PHY0_SM;
+
+	if (hw->mac.ops.acquire_swfw_sync(hw, swfw_mask) != E1000_SUCCESS) {
+		status = E1000_ERR_SWFW_SYNC;
+		goto write_byte_out;
+	}
+
+	do {
+		e1000_i2c_start(hw);
+
+		status = e1000_clock_out_i2c_byte(hw, dev_addr);
+		if (status != E1000_SUCCESS)
+			goto fail;
+
+		status = e1000_get_i2c_ack(hw);
+		if (status != E1000_SUCCESS)
+			goto fail;
+
+		status = e1000_clock_out_i2c_byte(hw, byte_offset);
+		if (status != E1000_SUCCESS)
+			goto fail;
+
+		status = e1000_get_i2c_ack(hw);
+		if (status != E1000_SUCCESS)
+			goto fail;
+
+		status = e1000_clock_out_i2c_byte(hw, data);
+		if (status != E1000_SUCCESS)
+			goto fail;
+
+		status = e1000_get_i2c_ack(hw);
+		if (status != E1000_SUCCESS)
+			goto fail;
+
+		e1000_i2c_stop(hw);
+		break;
+
+fail:
+		e1000_i2c_bus_clear(hw);
+		retry++;
+		if (retry < max_retry)
+			DEBUGOUT("I2C byte write error - Retrying.\n");
+		else
+			DEBUGOUT("I2C byte write error.\n");
+	} while (retry < max_retry);
+
+	hw->mac.ops.release_swfw_sync(hw, swfw_mask);
+
+write_byte_out:
+
+	return status;
+}
+
+/**
+ *  e1000_i2c_start - Sets I2C start condition
+ *  @hw: pointer to hardware structure
+ *
+ *  Sets I2C start condition (High -> Low on SDA while SCL is High)
+ **/
+static void e1000_i2c_start(struct e1000_hw *hw)
+{
+	u32 i2cctl = E1000_READ_REG(hw, E1000_I2CPARAMS);
+
+	DEBUGFUNC("e1000_i2c_start");
+
+	/* Start condition must begin with data and clock high */
+	e1000_set_i2c_data(hw, &i2cctl, 1);
+	e1000_raise_i2c_clk(hw, &i2cctl);
+
+	/* Setup time for start condition (4.7us) */
+	usec_delay(E1000_I2C_T_SU_STA);
+
+	e1000_set_i2c_data(hw, &i2cctl, 0);
+
+	/* Hold time for start condition (4us) */
+	usec_delay(E1000_I2C_T_HD_STA);
+
+	e1000_lower_i2c_clk(hw, &i2cctl);
+
+	/* Minimum low period of clock is 4.7 us */
+	usec_delay(E1000_I2C_T_LOW);
+
+}
+
+/**
+ *  e1000_i2c_stop - Sets I2C stop condition
+ *  @hw: pointer to hardware structure
+ *
+ *  Sets I2C stop condition (Low -> High on SDA while SCL is High)
+ **/
+static void e1000_i2c_stop(struct e1000_hw *hw)
+{
+	u32 i2cctl = E1000_READ_REG(hw, E1000_I2CPARAMS);
+
+	DEBUGFUNC("e1000_i2c_stop");
+
+	/* Stop condition must begin with data low and clock high */
+	e1000_set_i2c_data(hw, &i2cctl, 0);
+	e1000_raise_i2c_clk(hw, &i2cctl);
+
+	/* Setup time for stop condition (4us) */
+	usec_delay(E1000_I2C_T_SU_STO);
+
+	e1000_set_i2c_data(hw, &i2cctl, 1);
+
+	/* bus free time between stop and start (4.7us)*/
+	usec_delay(E1000_I2C_T_BUF);
+}
+
+/**
+ *  e1000_clock_in_i2c_byte - Clocks in one byte via I2C
+ *  @hw: pointer to hardware structure
+ *  @data: data byte to clock in
+ *
+ *  Clocks in one byte data via I2C data/clock
+ **/
+static s32 e1000_clock_in_i2c_byte(struct e1000_hw *hw, u8 *data)
+{
+	s32 i;
+	bool bit = 0;
+
+	DEBUGFUNC("e1000_clock_in_i2c_byte");
+
+	*data = 0;
+	for (i = 7; i >= 0; i--) {
+		e1000_clock_in_i2c_bit(hw, &bit);
+		*data |= bit << i;
+	}
+
+	return E1000_SUCCESS;
+}
+
+/**
+ *  e1000_clock_out_i2c_byte - Clocks out one byte via I2C
+ *  @hw: pointer to hardware structure
+ *  @data: data byte clocked out
+ *
+ *  Clocks out one byte data via I2C data/clock
+ **/
+static s32 e1000_clock_out_i2c_byte(struct e1000_hw *hw, u8 data)
+{
+	s32 status = E1000_SUCCESS;
+	s32 i;
+	u32 i2cctl;
+	bool bit = 0;
+
+	DEBUGFUNC("e1000_clock_out_i2c_byte");
+
+	for (i = 7; i >= 0; i--) {
+		bit = (data >> i) & 0x1;
+		status = e1000_clock_out_i2c_bit(hw, bit);
+
+		if (status != E1000_SUCCESS)
+			break;
+	}
+
+	/* Release SDA line (set high) */
+	i2cctl = E1000_READ_REG(hw, E1000_I2CPARAMS);
+
+	i2cctl |= E1000_I2C_DATA_OE_N;
+	E1000_WRITE_REG(hw, E1000_I2CPARAMS, i2cctl);
+	E1000_WRITE_FLUSH(hw);
+
+	return status;
+}
+
+/**
+ *  e1000_get_i2c_ack - Polls for I2C ACK
+ *  @hw: pointer to hardware structure
+ *
+ *  Clocks in/out one bit via I2C data/clock
+ **/
+static s32 e1000_get_i2c_ack(struct e1000_hw *hw)
+{
+	s32 status = E1000_SUCCESS;
+	u32 i = 0;
+	u32 i2cctl = E1000_READ_REG(hw, E1000_I2CPARAMS);
+	u32 timeout = 10;
+	bool ack = true;
+
+	DEBUGFUNC("e1000_get_i2c_ack");
+
+	e1000_raise_i2c_clk(hw, &i2cctl);
+
+	/* Minimum high period of clock is 4us */
+	usec_delay(E1000_I2C_T_HIGH);
+
+	/* Wait until SCL returns high */
+	for (i = 0; i < timeout; i++) {
+		usec_delay(1);
+		i2cctl = E1000_READ_REG(hw, E1000_I2CPARAMS);
+		if (i2cctl & E1000_I2C_CLK_IN)
+			break;
+	}
+	if (!(i2cctl & E1000_I2C_CLK_IN))
+		return E1000_ERR_I2C;
+
+	ack = e1000_get_i2c_data(&i2cctl);
+	if (ack) {
+		DEBUGOUT("I2C ack was not received.\n");
+		status = E1000_ERR_I2C;
+	}
+
+	e1000_lower_i2c_clk(hw, &i2cctl);
+
+	/* Minimum low period of clock is 4.7 us */
+	usec_delay(E1000_I2C_T_LOW);
+
+	return status;
+}
+
+/**
+ *  e1000_clock_in_i2c_bit - Clocks in one bit via I2C data/clock
+ *  @hw: pointer to hardware structure
+ *  @data: read data value
+ *
+ *  Clocks in one bit via I2C data/clock
+ **/
+static s32 e1000_clock_in_i2c_bit(struct e1000_hw *hw, bool *data)
+{
+	u32 i2cctl = E1000_READ_REG(hw, E1000_I2CPARAMS);
+
+	DEBUGFUNC("e1000_clock_in_i2c_bit");
+
+	e1000_raise_i2c_clk(hw, &i2cctl);
+
+	/* Minimum high period of clock is 4us */
+	usec_delay(E1000_I2C_T_HIGH);
+
+	i2cctl = E1000_READ_REG(hw, E1000_I2CPARAMS);
+	*data = e1000_get_i2c_data(&i2cctl);
+
+	e1000_lower_i2c_clk(hw, &i2cctl);
+
+	/* Minimum low period of clock is 4.7 us */
+	usec_delay(E1000_I2C_T_LOW);
+
+	return E1000_SUCCESS;
+}
+
+/**
+ *  e1000_clock_out_i2c_bit - Clocks in/out one bit via I2C data/clock
+ *  @hw: pointer to hardware structure
+ *  @data: data value to write
+ *
+ *  Clocks out one bit via I2C data/clock
+ **/
+static s32 e1000_clock_out_i2c_bit(struct e1000_hw *hw, bool data)
+{
+	s32 status;
+	u32 i2cctl = E1000_READ_REG(hw, E1000_I2CPARAMS);
+
+	DEBUGFUNC("e1000_clock_out_i2c_bit");
+
+	status = e1000_set_i2c_data(hw, &i2cctl, data);
+	if (status == E1000_SUCCESS) {
+		e1000_raise_i2c_clk(hw, &i2cctl);
+
+		/* Minimum high period of clock is 4us */
+		usec_delay(E1000_I2C_T_HIGH);
+
+		e1000_lower_i2c_clk(hw, &i2cctl);
+
+		/* Minimum low period of clock is 4.7 us.
+		 * This also takes care of the data hold time.
+		 */
+		usec_delay(E1000_I2C_T_LOW);
+	} else {
+		status = E1000_ERR_I2C;
+		DEBUGOUT1("I2C data was not set to %X\n", data);
+	}
+
+	return status;
+}
+/**
+ *  e1000_raise_i2c_clk - Raises the I2C SCL clock
+ *  @hw: pointer to hardware structure
+ *  @i2cctl: Current value of I2CCTL register
+ *
+ *  Raises the I2C clock line '0'->'1'
+ **/
+static void e1000_raise_i2c_clk(struct e1000_hw *hw, u32 *i2cctl)
+{
+	DEBUGFUNC("e1000_raise_i2c_clk");
+
+	*i2cctl |= E1000_I2C_CLK_OUT;
+	*i2cctl &= ~E1000_I2C_CLK_OE_N;
+	E1000_WRITE_REG(hw, E1000_I2CPARAMS, *i2cctl);
+	E1000_WRITE_FLUSH(hw);
+
+	/* SCL rise time (1000ns) */
+	usec_delay(E1000_I2C_T_RISE);
+}
+
+/**
+ *  e1000_lower_i2c_clk - Lowers the I2C SCL clock
+ *  @hw: pointer to hardware structure
+ *  @i2cctl: Current value of I2CCTL register
+ *
+ *  Lowers the I2C clock line '1'->'0'
+ **/
+static void e1000_lower_i2c_clk(struct e1000_hw *hw, u32 *i2cctl)
+{
+
+	DEBUGFUNC("e1000_lower_i2c_clk");
+
+	*i2cctl &= ~E1000_I2C_CLK_OUT;
+	*i2cctl &= ~E1000_I2C_CLK_OE_N;
+	E1000_WRITE_REG(hw, E1000_I2CPARAMS, *i2cctl);
+	E1000_WRITE_FLUSH(hw);
+
+	/* SCL fall time (300ns) */
+	usec_delay(E1000_I2C_T_FALL);
+}
+
+/**
+ *  e1000_set_i2c_data - Sets the I2C data bit
+ *  @hw: pointer to hardware structure
+ *  @i2cctl: Current value of I2CCTL register
+ *  @data: I2C data value (0 or 1) to set
+ *
+ *  Sets the I2C data bit
+ **/
+static s32 e1000_set_i2c_data(struct e1000_hw *hw, u32 *i2cctl, bool data)
+{
+	s32 status = E1000_SUCCESS;
+
+	DEBUGFUNC("e1000_set_i2c_data");
+
+	if (data)
+		*i2cctl |= E1000_I2C_DATA_OUT;
+	else
+		*i2cctl &= ~E1000_I2C_DATA_OUT;
+
+	*i2cctl &= ~E1000_I2C_DATA_OE_N;
+	*i2cctl |= E1000_I2C_CLK_OE_N;
+	E1000_WRITE_REG(hw, E1000_I2CPARAMS, *i2cctl);
+	E1000_WRITE_FLUSH(hw);
+
+	/* Data rise/fall (1000ns/300ns) and set-up time (250ns) */
+	usec_delay(E1000_I2C_T_RISE + E1000_I2C_T_FALL + E1000_I2C_T_SU_DATA);
+
+	*i2cctl = E1000_READ_REG(hw, E1000_I2CPARAMS);
+	if (data != e1000_get_i2c_data(i2cctl)) {
+		status = E1000_ERR_I2C;
+		DEBUGOUT1("Error - I2C data was not set to %X.\n", data);
+	}
+
+	return status;
+}
+
+/**
+ *  e1000_get_i2c_data - Reads the I2C SDA data bit
+ *  @hw: pointer to hardware structure
+ *  @i2cctl: Current value of I2CCTL register
+ *
+ *  Returns the I2C data bit value
+ **/
+static bool e1000_get_i2c_data(u32 *i2cctl)
+{
+	bool data;
+
+	DEBUGFUNC("e1000_get_i2c_data");
+
+	if (*i2cctl & E1000_I2C_DATA_IN)
+		data = 1;
+	else
+		data = 0;
+
+	return data;
+}
+
+/**
+ *  e1000_i2c_bus_clear - Clears the I2C bus
+ *  @hw: pointer to hardware structure
+ *
+ *  Clears the I2C bus by sending nine clock pulses.
+ *  Used when data line is stuck low.
+ **/
+void e1000_i2c_bus_clear(struct e1000_hw *hw)
+{
+	u32 i2cctl = E1000_READ_REG(hw, E1000_I2CPARAMS);
+	u32 i;
+
+	DEBUGFUNC("e1000_i2c_bus_clear");
+
+	e1000_i2c_start(hw);
+
+	e1000_set_i2c_data(hw, &i2cctl, 1);
+
+	for (i = 0; i < 9; i++) {
+		e1000_raise_i2c_clk(hw, &i2cctl);
+
+		/* Min high period of clock is 4us */
+		usec_delay(E1000_I2C_T_HIGH);
+
+		e1000_lower_i2c_clk(hw, &i2cctl);
+
+		/* Min low period of clock is 4.7us*/
+		usec_delay(E1000_I2C_T_LOW);
+	}
+
+	e1000_i2c_start(hw);
+
+	/* Put the i2c bus back to default state */
+	e1000_i2c_stop(hw);
+}
+
+static const u8 e1000_emc_temp_data[4] = {
+	E1000_EMC_INTERNAL_DATA,
+	E1000_EMC_DIODE1_DATA,
+	E1000_EMC_DIODE2_DATA,
+	E1000_EMC_DIODE3_DATA
+};
+static const u8 e1000_emc_therm_limit[4] = {
+	E1000_EMC_INTERNAL_THERM_LIMIT,
+	E1000_EMC_DIODE1_THERM_LIMIT,
+	E1000_EMC_DIODE2_THERM_LIMIT,
+	E1000_EMC_DIODE3_THERM_LIMIT
+};
+
+/**
+ *  e1000_get_thermal_sensor_data_generic - Gathers thermal sensor data
+ *  @hw: pointer to hardware structure
+ *
+ *  Updates the temperatures in mac.thermal_sensor_data
+ **/
+s32 e1000_get_thermal_sensor_data_generic(struct e1000_hw *hw)
+{
+	s32 status = E1000_SUCCESS;
+	u16 ets_offset;
+	u16 ets_cfg;
+	u16 ets_sensor;
+	u8  num_sensors;
+	u8  sensor_index;
+	u8  sensor_location;
+	u8  i;
+	struct e1000_thermal_sensor_data *data = &hw->mac.thermal_sensor_data;
+
+	DEBUGFUNC("e1000_get_thermal_sensor_data_generic");
+
+	if ((hw->mac.type != e1000_i350) || (hw->bus.func != 0))
+		return E1000_NOT_IMPLEMENTED;
+
+	data->sensor[0].temp = (E1000_READ_REG(hw, E1000_THMJT) & 0xFF);
+
+	/* Return the internal sensor only if ETS is unsupported */
+	e1000_read_nvm(hw, NVM_ETS_CFG, 1, &ets_offset);
+	if ((ets_offset == 0x0000) || (ets_offset == 0xFFFF))
+		return status;
+
+	e1000_read_nvm(hw, ets_offset, 1, &ets_cfg);
+	if (((ets_cfg & NVM_ETS_TYPE_MASK) >> NVM_ETS_TYPE_SHIFT)
+	    != NVM_ETS_TYPE_EMC)
+		return E1000_NOT_IMPLEMENTED;
+
+	num_sensors = (ets_cfg & NVM_ETS_NUM_SENSORS_MASK);
+	if (num_sensors > E1000_MAX_SENSORS)
+		num_sensors = E1000_MAX_SENSORS;
+
+	for (i = 1; i < num_sensors; i++) {
+		e1000_read_nvm(hw, (ets_offset + i), 1, &ets_sensor);
+		sensor_index = ((ets_sensor & NVM_ETS_DATA_INDEX_MASK) >>
+				NVM_ETS_DATA_INDEX_SHIFT);
+		sensor_location = ((ets_sensor & NVM_ETS_DATA_LOC_MASK) >>
+				   NVM_ETS_DATA_LOC_SHIFT);
+
+		if (sensor_location != 0)
+			hw->phy.ops.read_i2c_byte(hw,
+					e1000_emc_temp_data[sensor_index],
+					E1000_I2C_THERMAL_SENSOR_ADDR,
+					&data->sensor[i].temp);
+	}
+	return status;
+}
+
+/**
+ *  e1000_init_thermal_sensor_thresh_generic - Sets thermal sensor thresholds
+ *  @hw: pointer to hardware structure
+ *
+ *  Sets the thermal sensor thresholds according to the NVM map
+ *  and save off the threshold and location values into mac.thermal_sensor_data
+ **/
+s32 e1000_init_thermal_sensor_thresh_generic(struct e1000_hw *hw)
+{
+	s32 status = E1000_SUCCESS;
+	u16 ets_offset;
+	u16 ets_cfg;
+	u16 ets_sensor;
+	u8  low_thresh_delta;
+	u8  num_sensors;
+	u8  sensor_index;
+	u8  sensor_location;
+	u8  therm_limit;
+	u8  i;
+	struct e1000_thermal_sensor_data *data = &hw->mac.thermal_sensor_data;
+
+	DEBUGFUNC("e1000_init_thermal_sensor_thresh_generic");
+
+	if ((hw->mac.type != e1000_i350) || (hw->bus.func != 0))
+		return E1000_NOT_IMPLEMENTED;
+
+	memset(data, 0, sizeof(struct e1000_thermal_sensor_data));
+
+	data->sensor[0].location = 0x1;
+	data->sensor[0].caution_thresh =
+		(E1000_READ_REG(hw, E1000_THHIGHTC) & 0xFF);
+	data->sensor[0].max_op_thresh =
+		(E1000_READ_REG(hw, E1000_THLOWTC) & 0xFF);
+
+	/* Return the internal sensor only if ETS is unsupported */
+	e1000_read_nvm(hw, NVM_ETS_CFG, 1, &ets_offset);
+	if ((ets_offset == 0x0000) || (ets_offset == 0xFFFF))
+		return status;
+
+	e1000_read_nvm(hw, ets_offset, 1, &ets_cfg);
+	if (((ets_cfg & NVM_ETS_TYPE_MASK) >> NVM_ETS_TYPE_SHIFT)
+	    != NVM_ETS_TYPE_EMC)
+		return E1000_NOT_IMPLEMENTED;
+
+	low_thresh_delta = ((ets_cfg & NVM_ETS_LTHRES_DELTA_MASK) >>
+			    NVM_ETS_LTHRES_DELTA_SHIFT);
+	num_sensors = (ets_cfg & NVM_ETS_NUM_SENSORS_MASK);
+
+	for (i = 1; i <= num_sensors; i++) {
+		e1000_read_nvm(hw, (ets_offset + i), 1, &ets_sensor);
+		sensor_index = ((ets_sensor & NVM_ETS_DATA_INDEX_MASK) >>
+				NVM_ETS_DATA_INDEX_SHIFT);
+		sensor_location = ((ets_sensor & NVM_ETS_DATA_LOC_MASK) >>
+				   NVM_ETS_DATA_LOC_SHIFT);
+		therm_limit = ets_sensor & NVM_ETS_DATA_HTHRESH_MASK;
+
+		hw->phy.ops.write_i2c_byte(hw,
+			e1000_emc_therm_limit[sensor_index],
+			E1000_I2C_THERMAL_SENSOR_ADDR,
+			therm_limit);
+
+		if ((i < E1000_MAX_SENSORS) && (sensor_location != 0)) {
+			data->sensor[i].location = sensor_location;
+			data->sensor[i].caution_thresh = therm_limit;
+			data->sensor[i].max_op_thresh = therm_limit -
+							low_thresh_delta;
+		}
+	}
+	return status;
+}
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_82575.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_82575.h
new file mode 100644
index 00000000..1aec75ab
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_82575.h
@@ -0,0 +1,509 @@
+/*******************************************************************************
+
+  Intel(R) Gigabit Ethernet Linux driver
+  Copyright(c) 2007-2013 Intel Corporation.
+
+  This program is free software; you can redistribute it and/or modify it
+  under the terms and conditions of the GNU General Public License,
+  version 2, as published by the Free Software Foundation.
+
+  This program is distributed in the hope it will be useful, but WITHOUT
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+  more details.
+
+  You should have received a copy of the GNU General Public License along with
+  this program; if not, write to the Free Software Foundation, Inc.,
+  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+  The full GNU General Public License is included in this distribution in
+  the file called "COPYING".
+
+  Contact Information:
+  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+#ifndef _E1000_82575_H_
+#define _E1000_82575_H_
+
+#define ID_LED_DEFAULT_82575_SERDES	((ID_LED_DEF1_DEF2 << 12) | \
+					 (ID_LED_DEF1_DEF2 <<  8) | \
+					 (ID_LED_DEF1_DEF2 <<  4) | \
+					 (ID_LED_OFF1_ON2))
+/*
+ * Receive Address Register Count
+ * Number of high/low register pairs in the RAR.  The RAR (Receive Address
+ * Registers) holds the directed and multicast addresses that we monitor.
+ * These entries are also used for MAC-based filtering.
+ */
+/*
+ * For 82576, there are an additional set of RARs that begin at an offset
+ * separate from the first set of RARs.
+ */
+#define E1000_RAR_ENTRIES_82575	16
+#define E1000_RAR_ENTRIES_82576	24
+#define E1000_RAR_ENTRIES_82580	24
+#define E1000_RAR_ENTRIES_I350	32
+#define E1000_SW_SYNCH_MB	0x00000100
+#define E1000_STAT_DEV_RST_SET	0x00100000
+#define E1000_CTRL_DEV_RST	0x20000000
+
+struct e1000_adv_data_desc {
+	__le64 buffer_addr;    /* Address of the descriptor's data buffer */
+	union {
+		u32 data;
+		struct {
+			u32 datalen:16; /* Data buffer length */
+			u32 rsvd:4;
+			u32 dtyp:4;  /* Descriptor type */
+			u32 dcmd:8;  /* Descriptor command */
+		} config;
+	} lower;
+	union {
+		u32 data;
+		struct {
+			u32 status:4;  /* Descriptor status */
+			u32 idx:4;
+			u32 popts:6;  /* Packet Options */
+			u32 paylen:18; /* Payload length */
+		} options;
+	} upper;
+};
+
+#define E1000_TXD_DTYP_ADV_C	0x2  /* Advanced Context Descriptor */
+#define E1000_TXD_DTYP_ADV_D	0x3  /* Advanced Data Descriptor */
+#define E1000_ADV_TXD_CMD_DEXT	0x20 /* Descriptor extension (0 = legacy) */
+#define E1000_ADV_TUCMD_IPV4	0x2  /* IP Packet Type: 1=IPv4 */
+#define E1000_ADV_TUCMD_IPV6	0x0  /* IP Packet Type: 0=IPv6 */
+#define E1000_ADV_TUCMD_L4T_UDP	0x0  /* L4 Packet TYPE of UDP */
+#define E1000_ADV_TUCMD_L4T_TCP	0x4  /* L4 Packet TYPE of TCP */
+#define E1000_ADV_TUCMD_MKRREQ	0x10 /* Indicates markers are required */
+#define E1000_ADV_DCMD_EOP	0x1  /* End of Packet */
+#define E1000_ADV_DCMD_IFCS	0x2  /* Insert FCS (Ethernet CRC) */
+#define E1000_ADV_DCMD_RS	0x8  /* Report Status */
+#define E1000_ADV_DCMD_VLE	0x40 /* Add VLAN tag */
+#define E1000_ADV_DCMD_TSE	0x80 /* TCP Seg enable */
+/* Extended Device Control */
+#define E1000_CTRL_EXT_NSICR	0x00000001 /* Disable Intr Clear all on read */
+
+struct e1000_adv_context_desc {
+	union {
+		u32 ip_config;
+		struct {
+			u32 iplen:9;
+			u32 maclen:7;
+			u32 vlan_tag:16;
+		} fields;
+	} ip_setup;
+	u32 seq_num;
+	union {
+		u64 l4_config;
+		struct {
+			u32 mkrloc:9;
+			u32 tucmd:11;
+			u32 dtyp:4;
+			u32 adv:8;
+			u32 rsvd:4;
+			u32 idx:4;
+			u32 l4len:8;
+			u32 mss:16;
+		} fields;
+	} l4_setup;
+};
+
+/* SRRCTL bit definitions */
+#define E1000_SRRCTL_BSIZEPKT_SHIFT		10 /* Shift _right_ */
+#define E1000_SRRCTL_BSIZEHDRSIZE_MASK		0x00000F00
+#define E1000_SRRCTL_BSIZEHDRSIZE_SHIFT		2  /* Shift _left_ */
+#define E1000_SRRCTL_DESCTYPE_LEGACY		0x00000000
+#define E1000_SRRCTL_DESCTYPE_ADV_ONEBUF	0x02000000
+#define E1000_SRRCTL_DESCTYPE_HDR_SPLIT		0x04000000
+#define E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS	0x0A000000
+#define E1000_SRRCTL_DESCTYPE_HDR_REPLICATION	0x06000000
+#define E1000_SRRCTL_DESCTYPE_HDR_REPLICATION_LARGE_PKT 0x08000000
+#define E1000_SRRCTL_DESCTYPE_MASK		0x0E000000
+#define E1000_SRRCTL_TIMESTAMP			0x40000000
+#define E1000_SRRCTL_DROP_EN			0x80000000
+
+#define E1000_SRRCTL_BSIZEPKT_MASK		0x0000007F
+#define E1000_SRRCTL_BSIZEHDR_MASK		0x00003F00
+
+#define E1000_TX_HEAD_WB_ENABLE		0x1
+#define E1000_TX_SEQNUM_WB_ENABLE	0x2
+
+#define E1000_MRQC_ENABLE_RSS_4Q		0x00000002
+#define E1000_MRQC_ENABLE_VMDQ			0x00000003
+#define E1000_MRQC_ENABLE_VMDQ_RSS_2Q		0x00000005
+#define E1000_MRQC_RSS_FIELD_IPV4_UDP		0x00400000
+#define E1000_MRQC_RSS_FIELD_IPV6_UDP		0x00800000
+#define E1000_MRQC_RSS_FIELD_IPV6_UDP_EX	0x01000000
+#define E1000_MRQC_ENABLE_RSS_8Q		0x00000002
+
+#define E1000_VMRCTL_MIRROR_PORT_SHIFT		8
+#define E1000_VMRCTL_MIRROR_DSTPORT_MASK	(7 << \
+						 E1000_VMRCTL_MIRROR_PORT_SHIFT)
+#define E1000_VMRCTL_POOL_MIRROR_ENABLE		(1 << 0)
+#define E1000_VMRCTL_UPLINK_MIRROR_ENABLE	(1 << 1)
+#define E1000_VMRCTL_DOWNLINK_MIRROR_ENABLE	(1 << 2)
+
+#define E1000_EICR_TX_QUEUE ( \
+	E1000_EICR_TX_QUEUE0 |    \
+	E1000_EICR_TX_QUEUE1 |    \
+	E1000_EICR_TX_QUEUE2 |    \
+	E1000_EICR_TX_QUEUE3)
+
+#define E1000_EICR_RX_QUEUE ( \
+	E1000_EICR_RX_QUEUE0 |    \
+	E1000_EICR_RX_QUEUE1 |    \
+	E1000_EICR_RX_QUEUE2 |    \
+	E1000_EICR_RX_QUEUE3)
+
+#define E1000_EIMS_RX_QUEUE	E1000_EICR_RX_QUEUE
+#define E1000_EIMS_TX_QUEUE	E1000_EICR_TX_QUEUE
+
+#define EIMS_ENABLE_MASK ( \
+	E1000_EIMS_RX_QUEUE  | \
+	E1000_EIMS_TX_QUEUE  | \
+	E1000_EIMS_TCP_TIMER | \
+	E1000_EIMS_OTHER)
+
+/* Immediate Interrupt Rx (A.K.A. Low Latency Interrupt) */
+#define E1000_IMIR_PORT_IM_EN	0x00010000  /* TCP port enable */
+#define E1000_IMIR_PORT_BP	0x00020000  /* TCP port check bypass */
+#define E1000_IMIREXT_SIZE_BP	0x00001000  /* Packet size bypass */
+#define E1000_IMIREXT_CTRL_URG	0x00002000  /* Check URG bit in header */
+#define E1000_IMIREXT_CTRL_ACK	0x00004000  /* Check ACK bit in header */
+#define E1000_IMIREXT_CTRL_PSH	0x00008000  /* Check PSH bit in header */
+#define E1000_IMIREXT_CTRL_RST	0x00010000  /* Check RST bit in header */
+#define E1000_IMIREXT_CTRL_SYN	0x00020000  /* Check SYN bit in header */
+#define E1000_IMIREXT_CTRL_FIN	0x00040000  /* Check FIN bit in header */
+#define E1000_IMIREXT_CTRL_BP	0x00080000  /* Bypass check of ctrl bits */
+
+/* Receive Descriptor - Advanced */
+union e1000_adv_rx_desc {
+	struct {
+		__le64 pkt_addr; /* Packet buffer address */
+		__le64 hdr_addr; /* Header buffer address */
+	} read;
+	struct {
+		struct {
+			union {
+				__le32 data;
+				struct {
+					__le16 pkt_info; /*RSS type, Pkt type*/
+					/* Split Header, header buffer len */
+					__le16 hdr_info;
+				} hs_rss;
+			} lo_dword;
+			union {
+				__le32 rss; /* RSS Hash */
+				struct {
+					__le16 ip_id; /* IP id */
+					__le16 csum; /* Packet Checksum */
+				} csum_ip;
+			} hi_dword;
+		} lower;
+		struct {
+			__le32 status_error; /* ext status/error */
+			__le16 length; /* Packet length */
+			__le16 vlan; /* VLAN tag */
+		} upper;
+	} wb;  /* writeback */
+};
+
+#define E1000_RXDADV_RSSTYPE_MASK	0x0000000F
+#define E1000_RXDADV_RSSTYPE_SHIFT	12
+#define E1000_RXDADV_HDRBUFLEN_MASK	0x7FE0
+#define E1000_RXDADV_HDRBUFLEN_SHIFT	5
+#define E1000_RXDADV_SPLITHEADER_EN	0x00001000
+#define E1000_RXDADV_SPH		0x8000
+#define E1000_RXDADV_STAT_TS		0x10000 /* Pkt was time stamped */
+#define E1000_RXDADV_STAT_TSIP		0x08000 /* timestamp in packet */
+#define E1000_RXDADV_ERR_HBO		0x00800000
+
+/* RSS Hash results */
+#define E1000_RXDADV_RSSTYPE_NONE	0x00000000
+#define E1000_RXDADV_RSSTYPE_IPV4_TCP	0x00000001
+#define E1000_RXDADV_RSSTYPE_IPV4	0x00000002
+#define E1000_RXDADV_RSSTYPE_IPV6_TCP	0x00000003
+#define E1000_RXDADV_RSSTYPE_IPV6_EX	0x00000004
+#define E1000_RXDADV_RSSTYPE_IPV6	0x00000005
+#define E1000_RXDADV_RSSTYPE_IPV6_TCP_EX 0x00000006
+#define E1000_RXDADV_RSSTYPE_IPV4_UDP	0x00000007
+#define E1000_RXDADV_RSSTYPE_IPV6_UDP	0x00000008
+#define E1000_RXDADV_RSSTYPE_IPV6_UDP_EX 0x00000009
+
+/* RSS Packet Types as indicated in the receive descriptor */
+#define E1000_RXDADV_PKTTYPE_NONE	0x00000000
+#define E1000_RXDADV_PKTTYPE_IPV4	0x00000010 /* IPV4 hdr present */
+#define E1000_RXDADV_PKTTYPE_IPV4_EX	0x00000020 /* IPV4 hdr + extensions */
+#define E1000_RXDADV_PKTTYPE_IPV6	0x00000040 /* IPV6 hdr present */
+#define E1000_RXDADV_PKTTYPE_IPV6_EX	0x00000080 /* IPV6 hdr + extensions */
+#define E1000_RXDADV_PKTTYPE_TCP	0x00000100 /* TCP hdr present */
+#define E1000_RXDADV_PKTTYPE_UDP	0x00000200 /* UDP hdr present */
+#define E1000_RXDADV_PKTTYPE_SCTP	0x00000400 /* SCTP hdr present */
+#define E1000_RXDADV_PKTTYPE_NFS	0x00000800 /* NFS hdr present */
+
+#define E1000_RXDADV_PKTTYPE_IPSEC_ESP	0x00001000 /* IPSec ESP */
+#define E1000_RXDADV_PKTTYPE_IPSEC_AH	0x00002000 /* IPSec AH */
+#define E1000_RXDADV_PKTTYPE_LINKSEC	0x00004000 /* LinkSec Encap */
+#define E1000_RXDADV_PKTTYPE_ETQF	0x00008000 /* PKTTYPE is ETQF index */
+#define E1000_RXDADV_PKTTYPE_ETQF_MASK	0x00000070 /* ETQF has 8 indices */
+#define E1000_RXDADV_PKTTYPE_ETQF_SHIFT	4 /* Right-shift 4 bits */
+
+/* LinkSec results */
+/* Security Processing bit Indication */
+#define E1000_RXDADV_LNKSEC_STATUS_SECP		0x00020000
+#define E1000_RXDADV_LNKSEC_ERROR_BIT_MASK	0x18000000
+#define E1000_RXDADV_LNKSEC_ERROR_NO_SA_MATCH	0x08000000
+#define E1000_RXDADV_LNKSEC_ERROR_REPLAY_ERROR	0x10000000
+#define E1000_RXDADV_LNKSEC_ERROR_BAD_SIG	0x18000000
+
+#define E1000_RXDADV_IPSEC_STATUS_SECP			0x00020000
+#define E1000_RXDADV_IPSEC_ERROR_BIT_MASK		0x18000000
+#define E1000_RXDADV_IPSEC_ERROR_INVALID_PROTOCOL	0x08000000
+#define E1000_RXDADV_IPSEC_ERROR_INVALID_LENGTH		0x10000000
+#define E1000_RXDADV_IPSEC_ERROR_AUTHENTICATION_FAILED	0x18000000
+
+/* Transmit Descriptor - Advanced */
+union e1000_adv_tx_desc {
+	struct {
+		__le64 buffer_addr;    /* Address of descriptor's data buf */
+		__le32 cmd_type_len;
+		__le32 olinfo_status;
+	} read;
+	struct {
+		__le64 rsvd;       /* Reserved */
+		__le32 nxtseq_seed;
+		__le32 status;
+	} wb;
+};
+
+/* Adv Transmit Descriptor Config Masks */
+#define E1000_ADVTXD_DTYP_CTXT	0x00200000 /* Advanced Context Descriptor */
+#define E1000_ADVTXD_DTYP_DATA	0x00300000 /* Advanced Data Descriptor */
+#define E1000_ADVTXD_DCMD_EOP	0x01000000 /* End of Packet */
+#define E1000_ADVTXD_DCMD_IFCS	0x02000000 /* Insert FCS (Ethernet CRC) */
+#define E1000_ADVTXD_DCMD_RS	0x08000000 /* Report Status */
+#define E1000_ADVTXD_DCMD_DDTYP_ISCSI	0x10000000 /* DDP hdr type or iSCSI */
+#define E1000_ADVTXD_DCMD_DEXT	0x20000000 /* Descriptor extension (1=Adv) */
+#define E1000_ADVTXD_DCMD_VLE	0x40000000 /* VLAN pkt enable */
+#define E1000_ADVTXD_DCMD_TSE	0x80000000 /* TCP Seg enable */
+#define E1000_ADVTXD_MAC_LINKSEC	0x00040000 /* Apply LinkSec on pkt */
+#define E1000_ADVTXD_MAC_TSTAMP		0x00080000 /* IEEE1588 Timestamp pkt */
+#define E1000_ADVTXD_STAT_SN_CRC	0x00000002 /* NXTSEQ/SEED prsnt in WB */
+#define E1000_ADVTXD_IDX_SHIFT		4  /* Adv desc Index shift */
+#define E1000_ADVTXD_POPTS_ISCO_1ST	0x00000000 /* 1st TSO of iSCSI PDU */
+#define E1000_ADVTXD_POPTS_ISCO_MDL	0x00000800 /* Middle TSO of iSCSI PDU */
+#define E1000_ADVTXD_POPTS_ISCO_LAST	0x00001000 /* Last TSO of iSCSI PDU */
+/* 1st & Last TSO-full iSCSI PDU*/
+#define E1000_ADVTXD_POPTS_ISCO_FULL	0x00001800
+#define E1000_ADVTXD_POPTS_IPSEC	0x00000400 /* IPSec offload request */
+#define E1000_ADVTXD_PAYLEN_SHIFT	14 /* Adv desc PAYLEN shift */
+
+/* Context descriptors */
+struct e1000_adv_tx_context_desc {
+	__le32 vlan_macip_lens;
+	__le32 seqnum_seed;
+	__le32 type_tucmd_mlhl;
+	__le32 mss_l4len_idx;
+};
+
+#define E1000_ADVTXD_MACLEN_SHIFT	9  /* Adv ctxt desc mac len shift */
+#define E1000_ADVTXD_VLAN_SHIFT		16  /* Adv ctxt vlan tag shift */
+#define E1000_ADVTXD_TUCMD_IPV4		0x00000400  /* IP Packet Type: 1=IPv4 */
+#define E1000_ADVTXD_TUCMD_IPV6		0x00000000  /* IP Packet Type: 0=IPv6 */
+#define E1000_ADVTXD_TUCMD_L4T_UDP	0x00000000  /* L4 Packet TYPE of UDP */
+#define E1000_ADVTXD_TUCMD_L4T_TCP	0x00000800  /* L4 Packet TYPE of TCP */
+#define E1000_ADVTXD_TUCMD_L4T_SCTP	0x00001000  /* L4 Packet TYPE of SCTP */
+#define E1000_ADVTXD_TUCMD_IPSEC_TYPE_ESP	0x00002000 /* IPSec Type ESP */
+/* IPSec Encrypt Enable for ESP */
+#define E1000_ADVTXD_TUCMD_IPSEC_ENCRYPT_EN	0x00004000
+/* Req requires Markers and CRC */
+#define E1000_ADVTXD_TUCMD_MKRREQ	0x00002000
+#define E1000_ADVTXD_L4LEN_SHIFT	8  /* Adv ctxt L4LEN shift */
+#define E1000_ADVTXD_MSS_SHIFT		16  /* Adv ctxt MSS shift */
+/* Adv ctxt IPSec SA IDX mask */
+#define E1000_ADVTXD_IPSEC_SA_INDEX_MASK	0x000000FF
+/* Adv ctxt IPSec ESP len mask */
+#define E1000_ADVTXD_IPSEC_ESP_LEN_MASK		0x000000FF
+
+/* Additional Transmit Descriptor Control definitions */
+#define E1000_TXDCTL_QUEUE_ENABLE	0x02000000 /* Ena specific Tx Queue */
+#define E1000_TXDCTL_SWFLSH		0x04000000 /* Tx Desc. wbk flushing */
+/* Tx Queue Arbitration Priority 0=low, 1=high */
+#define E1000_TXDCTL_PRIORITY		0x08000000
+
+/* Additional Receive Descriptor Control definitions */
+#define E1000_RXDCTL_QUEUE_ENABLE	0x02000000 /* Ena specific Rx Queue */
+#define E1000_RXDCTL_SWFLSH		0x04000000 /* Rx Desc. wbk flushing */
+
+/* Direct Cache Access (DCA) definitions */
+#define E1000_DCA_CTRL_DCA_ENABLE	0x00000000 /* DCA Enable */
+#define E1000_DCA_CTRL_DCA_DISABLE	0x00000001 /* DCA Disable */
+
+#define E1000_DCA_CTRL_DCA_MODE_CB1	0x00 /* DCA Mode CB1 */
+#define E1000_DCA_CTRL_DCA_MODE_CB2	0x02 /* DCA Mode CB2 */
+
+#define E1000_DCA_RXCTRL_CPUID_MASK	0x0000001F /* Rx CPUID Mask */
+#define E1000_DCA_RXCTRL_DESC_DCA_EN	(1 << 5) /* DCA Rx Desc enable */
+#define E1000_DCA_RXCTRL_HEAD_DCA_EN	(1 << 6) /* DCA Rx Desc header ena */
+#define E1000_DCA_RXCTRL_DATA_DCA_EN	(1 << 7) /* DCA Rx Desc payload ena */
+#define E1000_DCA_RXCTRL_DESC_RRO_EN	(1 << 9) /* DCA Rx Desc Relax Order */
+
+#define E1000_DCA_TXCTRL_CPUID_MASK	0x0000001F /* Tx CPUID Mask */
+#define E1000_DCA_TXCTRL_DESC_DCA_EN	(1 << 5) /* DCA Tx Desc enable */
+#define E1000_DCA_TXCTRL_DESC_RRO_EN	(1 << 9) /* Tx rd Desc Relax Order */
+#define E1000_DCA_TXCTRL_TX_WB_RO_EN	(1 << 11) /* Tx Desc writeback RO bit */
+#define E1000_DCA_TXCTRL_DATA_RRO_EN	(1 << 13) /* Tx rd data Relax Order */
+
+#define E1000_DCA_TXCTRL_CPUID_MASK_82576	0xFF000000 /* Tx CPUID Mask */
+#define E1000_DCA_RXCTRL_CPUID_MASK_82576	0xFF000000 /* Rx CPUID Mask */
+#define E1000_DCA_TXCTRL_CPUID_SHIFT_82576	24 /* Tx CPUID */
+#define E1000_DCA_RXCTRL_CPUID_SHIFT_82576	24 /* Rx CPUID */
+
+/* Additional interrupt register bit definitions */
+#define E1000_ICR_LSECPNS	0x00000020 /* PN threshold - server */
+#define E1000_IMS_LSECPNS	E1000_ICR_LSECPNS /* PN threshold - server */
+#define E1000_ICS_LSECPNS	E1000_ICR_LSECPNS /* PN threshold - server */
+
+/* ETQF register bit definitions */
+#define E1000_ETQF_FILTER_ENABLE	(1 << 26)
+#define E1000_ETQF_IMM_INT		(1 << 29)
+#define E1000_ETQF_1588			(1 << 30)
+#define E1000_ETQF_QUEUE_ENABLE		(1 << 31)
+/*
+ * ETQF filter list: one static filter per filter consumer. This is
+ *                   to avoid filter collisions later. Add new filters
+ *                   here!!
+ *
+ * Current filters:
+ *    EAPOL 802.1x (0x888e): Filter 0
+ */
+#define E1000_ETQF_FILTER_EAPOL		0
+
+#define E1000_FTQF_VF_BP		0x00008000
+#define E1000_FTQF_1588_TIME_STAMP	0x08000000
+#define E1000_FTQF_MASK			0xF0000000
+#define E1000_FTQF_MASK_PROTO_BP	0x10000000
+#define E1000_FTQF_MASK_SOURCE_ADDR_BP	0x20000000
+#define E1000_FTQF_MASK_DEST_ADDR_BP	0x40000000
+#define E1000_FTQF_MASK_SOURCE_PORT_BP	0x80000000
+
+#define E1000_NVM_APME_82575		0x0400
+#define MAX_NUM_VFS			7
+
+#define E1000_DTXSWC_MAC_SPOOF_MASK	0x000000FF /* Per VF MAC spoof cntrl */
+#define E1000_DTXSWC_VLAN_SPOOF_MASK	0x0000FF00 /* Per VF VLAN spoof cntrl */
+#define E1000_DTXSWC_LLE_MASK		0x00FF0000 /* Per VF Local LB enables */
+#define E1000_DTXSWC_VLAN_SPOOF_SHIFT	8
+#define E1000_DTXSWC_LLE_SHIFT		16
+#define E1000_DTXSWC_VMDQ_LOOPBACK_EN	(1 << 31)  /* global VF LB enable */
+
+/* Easy defines for setting default pool, would normally be left a zero */
+#define E1000_VT_CTL_DEFAULT_POOL_SHIFT	7
+#define E1000_VT_CTL_DEFAULT_POOL_MASK	(0x7 << E1000_VT_CTL_DEFAULT_POOL_SHIFT)
+
+/* Other useful VMD_CTL register defines */
+#define E1000_VT_CTL_IGNORE_MAC		(1 << 28)
+#define E1000_VT_CTL_DISABLE_DEF_POOL	(1 << 29)
+#define E1000_VT_CTL_VM_REPL_EN		(1 << 30)
+
+/* Per VM Offload register setup */
+#define E1000_VMOLR_RLPML_MASK	0x00003FFF /* Long Packet Maximum Length mask */
+#define E1000_VMOLR_LPE		0x00010000 /* Accept Long packet */
+#define E1000_VMOLR_RSSE	0x00020000 /* Enable RSS */
+#define E1000_VMOLR_AUPE	0x01000000 /* Accept untagged packets */
+#define E1000_VMOLR_ROMPE	0x02000000 /* Accept overflow multicast */
+#define E1000_VMOLR_ROPE	0x04000000 /* Accept overflow unicast */
+#define E1000_VMOLR_BAM		0x08000000 /* Accept Broadcast packets */
+#define E1000_VMOLR_MPME	0x10000000 /* Multicast promiscuous mode */
+#define E1000_VMOLR_STRVLAN	0x40000000 /* Vlan stripping enable */
+#define E1000_VMOLR_STRCRC	0x80000000 /* CRC stripping enable */
+
+#define E1000_VMOLR_VPE		0x00800000 /* VLAN promiscuous enable */
+#define E1000_VMOLR_UPE		0x20000000 /* Unicast promisuous enable */
+#define E1000_DVMOLR_HIDVLAN	0x20000000 /* Vlan hiding enable */
+#define E1000_DVMOLR_STRVLAN	0x40000000 /* Vlan stripping enable */
+#define E1000_DVMOLR_STRCRC	0x80000000 /* CRC stripping enable */
+
+#define E1000_PBRWAC_WALPB	0x00000007 /* Wrap around event on LAN Rx PB */
+#define E1000_PBRWAC_PBE	0x00000008 /* Rx packet buffer empty */
+
+#define E1000_VLVF_ARRAY_SIZE		32
+#define E1000_VLVF_VLANID_MASK		0x00000FFF
+#define E1000_VLVF_POOLSEL_SHIFT	12
+#define E1000_VLVF_POOLSEL_MASK		(0xFF << E1000_VLVF_POOLSEL_SHIFT)
+#define E1000_VLVF_LVLAN		0x00100000
+#define E1000_VLVF_VLANID_ENABLE	0x80000000
+
+#define E1000_VMVIR_VLANA_DEFAULT	0x40000000 /* Always use default VLAN */
+#define E1000_VMVIR_VLANA_NEVER		0x80000000 /* Never insert VLAN tag */
+
+#define E1000_VF_INIT_TIMEOUT	200 /* Number of retries to clear RSTI */
+
+#define E1000_IOVCTL		0x05BBC
+#define E1000_IOVCTL_REUSE_VFQ	0x00000001
+
+#define E1000_RPLOLR_STRVLAN	0x40000000
+#define E1000_RPLOLR_STRCRC	0x80000000
+
+#define E1000_TCTL_EXT_COLD	0x000FFC00
+#define E1000_TCTL_EXT_COLD_SHIFT	10
+
+#define E1000_DTXCTL_8023LL	0x0004
+#define E1000_DTXCTL_VLAN_ADDED	0x0008
+#define E1000_DTXCTL_OOS_ENABLE	0x0010
+#define E1000_DTXCTL_MDP_EN	0x0020
+#define E1000_DTXCTL_SPOOF_INT	0x0040
+
+#define E1000_EEPROM_PCS_AUTONEG_DISABLE_BIT	(1 << 14)
+
+#define ALL_QUEUES		0xFFFF
+
+/* Rx packet buffer size defines */
+#define E1000_RXPBS_SIZE_MASK_82576	0x0000007F
+void e1000_vmdq_set_loopback_pf(struct e1000_hw *hw, bool enable);
+void e1000_vmdq_set_anti_spoofing_pf(struct e1000_hw *hw, bool enable, int pf);
+void e1000_vmdq_set_replication_pf(struct e1000_hw *hw, bool enable);
+s32 e1000_init_nvm_params_82575(struct e1000_hw *hw);
+
+u16 e1000_rxpbs_adjust_82580(u32 data);
+s32 e1000_read_emi_reg(struct e1000_hw *hw, u16 addr, u16 *data);
+s32 e1000_set_eee_i350(struct e1000_hw *);
+s32 e1000_set_eee_i354(struct e1000_hw *);
+s32 e1000_get_eee_status_i354(struct e1000_hw *, bool *);
+#define E1000_I2C_THERMAL_SENSOR_ADDR	0xF8
+#define E1000_EMC_INTERNAL_DATA		0x00
+#define E1000_EMC_INTERNAL_THERM_LIMIT	0x20
+#define E1000_EMC_DIODE1_DATA		0x01
+#define E1000_EMC_DIODE1_THERM_LIMIT	0x19
+#define E1000_EMC_DIODE2_DATA		0x23
+#define E1000_EMC_DIODE2_THERM_LIMIT	0x1A
+#define E1000_EMC_DIODE3_DATA		0x2A
+#define E1000_EMC_DIODE3_THERM_LIMIT	0x30
+
+s32 e1000_get_thermal_sensor_data_generic(struct e1000_hw *hw);
+s32 e1000_init_thermal_sensor_thresh_generic(struct e1000_hw *hw);
+
+/* I2C SDA and SCL timing parameters for standard mode */
+#define E1000_I2C_T_HD_STA	4
+#define E1000_I2C_T_LOW		5
+#define E1000_I2C_T_HIGH	4
+#define E1000_I2C_T_SU_STA	5
+#define E1000_I2C_T_HD_DATA	5
+#define E1000_I2C_T_SU_DATA	1
+#define E1000_I2C_T_RISE	1
+#define E1000_I2C_T_FALL	1
+#define E1000_I2C_T_SU_STO	4
+#define E1000_I2C_T_BUF		5
+
+s32 e1000_set_i2c_bb(struct e1000_hw *hw);
+s32 e1000_read_i2c_byte_generic(struct e1000_hw *hw, u8 byte_offset,
+				u8 dev_addr, u8 *data);
+s32 e1000_write_i2c_byte_generic(struct e1000_hw *hw, u8 byte_offset,
+				 u8 dev_addr, u8 data);
+void e1000_i2c_bus_clear(struct e1000_hw *hw);
+#endif /* _E1000_82575_H_ */
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_api.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_api.c
new file mode 100644
index 00000000..6095d3b4
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_api.c
@@ -0,0 +1,1159 @@
+/*******************************************************************************
+
+  Intel(R) Gigabit Ethernet Linux driver
+  Copyright(c) 2007-2013 Intel Corporation.
+
+  This program is free software; you can redistribute it and/or modify it
+  under the terms and conditions of the GNU General Public License,
+  version 2, as published by the Free Software Foundation.
+
+  This program is distributed in the hope it will be useful, but WITHOUT
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+  more details.
+
+  You should have received a copy of the GNU General Public License along with
+  this program; if not, write to the Free Software Foundation, Inc.,
+  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+  The full GNU General Public License is included in this distribution in
+  the file called "COPYING".
+
+  Contact Information:
+  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+#include "e1000_api.h"
+
+/**
+ *  e1000_init_mac_params - Initialize MAC function pointers
+ *  @hw: pointer to the HW structure
+ *
+ *  This function initializes the function pointers for the MAC
+ *  set of functions.  Called by drivers or by e1000_setup_init_funcs.
+ **/
+s32 e1000_init_mac_params(struct e1000_hw *hw)
+{
+	s32 ret_val = E1000_SUCCESS;
+
+	if (hw->mac.ops.init_params) {
+		ret_val = hw->mac.ops.init_params(hw);
+		if (ret_val) {
+			DEBUGOUT("MAC Initialization Error\n");
+			goto out;
+		}
+	} else {
+		DEBUGOUT("mac.init_mac_params was NULL\n");
+		ret_val = -E1000_ERR_CONFIG;
+	}
+
+out:
+	return ret_val;
+}
+
+/**
+ *  e1000_init_nvm_params - Initialize NVM function pointers
+ *  @hw: pointer to the HW structure
+ *
+ *  This function initializes the function pointers for the NVM
+ *  set of functions.  Called by drivers or by e1000_setup_init_funcs.
+ **/
+s32 e1000_init_nvm_params(struct e1000_hw *hw)
+{
+	s32 ret_val = E1000_SUCCESS;
+
+	if (hw->nvm.ops.init_params) {
+		ret_val = hw->nvm.ops.init_params(hw);
+		if (ret_val) {
+			DEBUGOUT("NVM Initialization Error\n");
+			goto out;
+		}
+	} else {
+		DEBUGOUT("nvm.init_nvm_params was NULL\n");
+		ret_val = -E1000_ERR_CONFIG;
+	}
+
+out:
+	return ret_val;
+}
+
+/**
+ *  e1000_init_phy_params - Initialize PHY function pointers
+ *  @hw: pointer to the HW structure
+ *
+ *  This function initializes the function pointers for the PHY
+ *  set of functions.  Called by drivers or by e1000_setup_init_funcs.
+ **/
+s32 e1000_init_phy_params(struct e1000_hw *hw)
+{
+	s32 ret_val = E1000_SUCCESS;
+
+	if (hw->phy.ops.init_params) {
+		ret_val = hw->phy.ops.init_params(hw);
+		if (ret_val) {
+			DEBUGOUT("PHY Initialization Error\n");
+			goto out;
+		}
+	} else {
+		DEBUGOUT("phy.init_phy_params was NULL\n");
+		ret_val =  -E1000_ERR_CONFIG;
+	}
+
+out:
+	return ret_val;
+}
+
+/**
+ *  e1000_init_mbx_params - Initialize mailbox function pointers
+ *  @hw: pointer to the HW structure
+ *
+ *  This function initializes the function pointers for the PHY
+ *  set of functions.  Called by drivers or by e1000_setup_init_funcs.
+ **/
+s32 e1000_init_mbx_params(struct e1000_hw *hw)
+{
+	s32 ret_val = E1000_SUCCESS;
+
+	if (hw->mbx.ops.init_params) {
+		ret_val = hw->mbx.ops.init_params(hw);
+		if (ret_val) {
+			DEBUGOUT("Mailbox Initialization Error\n");
+			goto out;
+		}
+	} else {
+		DEBUGOUT("mbx.init_mbx_params was NULL\n");
+		ret_val =  -E1000_ERR_CONFIG;
+	}
+
+out:
+	return ret_val;
+}
+
+/**
+ *  e1000_set_mac_type - Sets MAC type
+ *  @hw: pointer to the HW structure
+ *
+ *  This function sets the mac type of the adapter based on the
+ *  device ID stored in the hw structure.
+ *  MUST BE FIRST FUNCTION CALLED (explicitly or through
+ *  e1000_setup_init_funcs()).
+ **/
+s32 e1000_set_mac_type(struct e1000_hw *hw)
+{
+	struct e1000_mac_info *mac = &hw->mac;
+	s32 ret_val = E1000_SUCCESS;
+
+	DEBUGFUNC("e1000_set_mac_type");
+
+	switch (hw->device_id) {
+	case E1000_DEV_ID_82575EB_COPPER:
+	case E1000_DEV_ID_82575EB_FIBER_SERDES:
+	case E1000_DEV_ID_82575GB_QUAD_COPPER:
+		mac->type = e1000_82575;
+		break;
+	case E1000_DEV_ID_82576:
+	case E1000_DEV_ID_82576_FIBER:
+	case E1000_DEV_ID_82576_SERDES:
+	case E1000_DEV_ID_82576_QUAD_COPPER:
+	case E1000_DEV_ID_82576_QUAD_COPPER_ET2:
+	case E1000_DEV_ID_82576_NS:
+	case E1000_DEV_ID_82576_NS_SERDES:
+	case E1000_DEV_ID_82576_SERDES_QUAD:
+		mac->type = e1000_82576;
+		break;
+	case E1000_DEV_ID_82580_COPPER:
+	case E1000_DEV_ID_82580_FIBER:
+	case E1000_DEV_ID_82580_SERDES:
+	case E1000_DEV_ID_82580_SGMII:
+	case E1000_DEV_ID_82580_COPPER_DUAL:
+	case E1000_DEV_ID_82580_QUAD_FIBER:
+	case E1000_DEV_ID_DH89XXCC_SGMII:
+	case E1000_DEV_ID_DH89XXCC_SERDES:
+	case E1000_DEV_ID_DH89XXCC_BACKPLANE:
+	case E1000_DEV_ID_DH89XXCC_SFP:
+		mac->type = e1000_82580;
+		break;
+	case E1000_DEV_ID_I350_COPPER:
+	case E1000_DEV_ID_I350_FIBER:
+	case E1000_DEV_ID_I350_SERDES:
+	case E1000_DEV_ID_I350_SGMII:
+	case E1000_DEV_ID_I350_DA4:
+		mac->type = e1000_i350;
+		break;
+	case E1000_DEV_ID_I210_COPPER_FLASHLESS:
+	case E1000_DEV_ID_I210_SERDES_FLASHLESS:
+	case E1000_DEV_ID_I210_COPPER:
+	case E1000_DEV_ID_I210_COPPER_OEM1:
+	case E1000_DEV_ID_I210_COPPER_IT:
+	case E1000_DEV_ID_I210_FIBER:
+	case E1000_DEV_ID_I210_SERDES:
+	case E1000_DEV_ID_I210_SGMII:
+		mac->type = e1000_i210;
+		break;
+	case E1000_DEV_ID_I211_COPPER:
+		mac->type = e1000_i211;
+		break;
+
+	case E1000_DEV_ID_I354_BACKPLANE_1GBPS:
+	case E1000_DEV_ID_I354_SGMII:
+	case E1000_DEV_ID_I354_BACKPLANE_2_5GBPS:
+		mac->type = e1000_i354;
+		break;
+	default:
+		/* Should never have loaded on this device */
+		ret_val = -E1000_ERR_MAC_INIT;
+		break;
+	}
+
+	return ret_val;
+}
+
+/**
+ *  e1000_setup_init_funcs - Initializes function pointers
+ *  @hw: pointer to the HW structure
+ *  @init_device: true will initialize the rest of the function pointers
+ *		  getting the device ready for use.  false will only set
+ *		  MAC type and the function pointers for the other init
+ *		  functions.  Passing false will not generate any hardware
+ *		  reads or writes.
+ *
+ *  This function must be called by a driver in order to use the rest
+ *  of the 'shared' code files. Called by drivers only.
+ **/
+s32 e1000_setup_init_funcs(struct e1000_hw *hw, bool init_device)
+{
+	s32 ret_val;
+
+	/* Can't do much good without knowing the MAC type. */
+	ret_val = e1000_set_mac_type(hw);
+	if (ret_val) {
+		DEBUGOUT("ERROR: MAC type could not be set properly.\n");
+		goto out;
+	}
+
+	if (!hw->hw_addr) {
+		DEBUGOUT("ERROR: Registers not mapped\n");
+		ret_val = -E1000_ERR_CONFIG;
+		goto out;
+	}
+
+	/*
+	 * Init function pointers to generic implementations. We do this first
+	 * allowing a driver module to override it afterward.
+	 */
+	e1000_init_mac_ops_generic(hw);
+	e1000_init_phy_ops_generic(hw);
+	e1000_init_nvm_ops_generic(hw);
+	e1000_init_mbx_ops_generic(hw);
+
+	/*
+	 * Set up the init function pointers. These are functions within the
+	 * adapter family file that sets up function pointers for the rest of
+	 * the functions in that family.
+	 */
+	switch (hw->mac.type) {
+	case e1000_82575:
+	case e1000_82576:
+	case e1000_82580:
+	case e1000_i350:
+	case e1000_i354:
+		e1000_init_function_pointers_82575(hw);
+		break;
+	case e1000_i210:
+	case e1000_i211:
+		e1000_init_function_pointers_i210(hw);
+		break;
+	default:
+		DEBUGOUT("Hardware not supported\n");
+		ret_val = -E1000_ERR_CONFIG;
+		break;
+	}
+
+	/*
+	 * Initialize the rest of the function pointers. These require some
+	 * register reads/writes in some cases.
+	 */
+	if (!(ret_val) && init_device) {
+		ret_val = e1000_init_mac_params(hw);
+		if (ret_val)
+			goto out;
+
+		ret_val = e1000_init_nvm_params(hw);
+		if (ret_val)
+			goto out;
+
+		ret_val = e1000_init_phy_params(hw);
+		if (ret_val)
+			goto out;
+
+		ret_val = e1000_init_mbx_params(hw);
+		if (ret_val)
+			goto out;
+	}
+
+out:
+	return ret_val;
+}
+
+/**
+ *  e1000_get_bus_info - Obtain bus information for adapter
+ *  @hw: pointer to the HW structure
+ *
+ *  This will obtain information about the HW bus for which the
+ *  adapter is attached and stores it in the hw structure. This is a
+ *  function pointer entry point called by drivers.
+ **/
+s32 e1000_get_bus_info(struct e1000_hw *hw)
+{
+	if (hw->mac.ops.get_bus_info)
+		return hw->mac.ops.get_bus_info(hw);
+
+	return E1000_SUCCESS;
+}
+
+/**
+ *  e1000_clear_vfta - Clear VLAN filter table
+ *  @hw: pointer to the HW structure
+ *
+ *  This clears the VLAN filter table on the adapter. This is a function
+ *  pointer entry point called by drivers.
+ **/
+void e1000_clear_vfta(struct e1000_hw *hw)
+{
+	if (hw->mac.ops.clear_vfta)
+		hw->mac.ops.clear_vfta(hw);
+}
+
+/**
+ *  e1000_write_vfta - Write value to VLAN filter table
+ *  @hw: pointer to the HW structure
+ *  @offset: the 32-bit offset in which to write the value to.
+ *  @value: the 32-bit value to write at location offset.
+ *
+ *  This writes a 32-bit value to a 32-bit offset in the VLAN filter
+ *  table. This is a function pointer entry point called by drivers.
+ **/
+void e1000_write_vfta(struct e1000_hw *hw, u32 offset, u32 value)
+{
+	if (hw->mac.ops.write_vfta)
+		hw->mac.ops.write_vfta(hw, offset, value);
+}
+
+/**
+ *  e1000_update_mc_addr_list - Update Multicast addresses
+ *  @hw: pointer to the HW structure
+ *  @mc_addr_list: array of multicast addresses to program
+ *  @mc_addr_count: number of multicast addresses to program
+ *
+ *  Updates the Multicast Table Array.
+ *  The caller must have a packed mc_addr_list of multicast addresses.
+ **/
+void e1000_update_mc_addr_list(struct e1000_hw *hw, u8 *mc_addr_list,
+			       u32 mc_addr_count)
+{
+	if (hw->mac.ops.update_mc_addr_list)
+		hw->mac.ops.update_mc_addr_list(hw, mc_addr_list,
+						mc_addr_count);
+}
+
+/**
+ *  e1000_force_mac_fc - Force MAC flow control
+ *  @hw: pointer to the HW structure
+ *
+ *  Force the MAC's flow control settings. Currently no func pointer exists
+ *  and all implementations are handled in the generic version of this
+ *  function.
+ **/
+s32 e1000_force_mac_fc(struct e1000_hw *hw)
+{
+	return e1000_force_mac_fc_generic(hw);
+}
+
+/**
+ *  e1000_check_for_link - Check/Store link connection
+ *  @hw: pointer to the HW structure
+ *
+ *  This checks the link condition of the adapter and stores the
+ *  results in the hw->mac structure. This is a function pointer entry
+ *  point called by drivers.
+ **/
+s32 e1000_check_for_link(struct e1000_hw *hw)
+{
+	if (hw->mac.ops.check_for_link)
+		return hw->mac.ops.check_for_link(hw);
+
+	return -E1000_ERR_CONFIG;
+}
+
+/**
+ *  e1000_check_mng_mode - Check management mode
+ *  @hw: pointer to the HW structure
+ *
+ *  This checks if the adapter has manageability enabled.
+ *  This is a function pointer entry point called by drivers.
+ **/
+bool e1000_check_mng_mode(struct e1000_hw *hw)
+{
+	if (hw->mac.ops.check_mng_mode)
+		return hw->mac.ops.check_mng_mode(hw);
+
+	return false;
+}
+
+/**
+ *  e1000_mng_write_dhcp_info - Writes DHCP info to host interface
+ *  @hw: pointer to the HW structure
+ *  @buffer: pointer to the host interface
+ *  @length: size of the buffer
+ *
+ *  Writes the DHCP information to the host interface.
+ **/
+s32 e1000_mng_write_dhcp_info(struct e1000_hw *hw, u8 *buffer, u16 length)
+{
+	return e1000_mng_write_dhcp_info_generic(hw, buffer, length);
+}
+
+/**
+ *  e1000_reset_hw - Reset hardware
+ *  @hw: pointer to the HW structure
+ *
+ *  This resets the hardware into a known state. This is a function pointer
+ *  entry point called by drivers.
+ **/
+s32 e1000_reset_hw(struct e1000_hw *hw)
+{
+	if (hw->mac.ops.reset_hw)
+		return hw->mac.ops.reset_hw(hw);
+
+	return -E1000_ERR_CONFIG;
+}
+
+/**
+ *  e1000_init_hw - Initialize hardware
+ *  @hw: pointer to the HW structure
+ *
+ *  This inits the hardware readying it for operation. This is a function
+ *  pointer entry point called by drivers.
+ **/
+s32 e1000_init_hw(struct e1000_hw *hw)
+{
+	if (hw->mac.ops.init_hw)
+		return hw->mac.ops.init_hw(hw);
+
+	return -E1000_ERR_CONFIG;
+}
+
+/**
+ *  e1000_setup_link - Configures link and flow control
+ *  @hw: pointer to the HW structure
+ *
+ *  This configures link and flow control settings for the adapter. This
+ *  is a function pointer entry point called by drivers. While modules can
+ *  also call this, they probably call their own version of this function.
+ **/
+s32 e1000_setup_link(struct e1000_hw *hw)
+{
+	if (hw->mac.ops.setup_link)
+		return hw->mac.ops.setup_link(hw);
+
+	return -E1000_ERR_CONFIG;
+}
+
+/**
+ *  e1000_get_speed_and_duplex - Returns current speed and duplex
+ *  @hw: pointer to the HW structure
+ *  @speed: pointer to a 16-bit value to store the speed
+ *  @duplex: pointer to a 16-bit value to store the duplex.
+ *
+ *  This returns the speed and duplex of the adapter in the two 'out'
+ *  variables passed in. This is a function pointer entry point called
+ *  by drivers.
+ **/
+s32 e1000_get_speed_and_duplex(struct e1000_hw *hw, u16 *speed, u16 *duplex)
+{
+	if (hw->mac.ops.get_link_up_info)
+		return hw->mac.ops.get_link_up_info(hw, speed, duplex);
+
+	return -E1000_ERR_CONFIG;
+}
+
+/**
+ *  e1000_setup_led - Configures SW controllable LED
+ *  @hw: pointer to the HW structure
+ *
+ *  This prepares the SW controllable LED for use and saves the current state
+ *  of the LED so it can be later restored. This is a function pointer entry
+ *  point called by drivers.
+ **/
+s32 e1000_setup_led(struct e1000_hw *hw)
+{
+	if (hw->mac.ops.setup_led)
+		return hw->mac.ops.setup_led(hw);
+
+	return E1000_SUCCESS;
+}
+
+/**
+ *  e1000_cleanup_led - Restores SW controllable LED
+ *  @hw: pointer to the HW structure
+ *
+ *  This restores the SW controllable LED to the value saved off by
+ *  e1000_setup_led. This is a function pointer entry point called by drivers.
+ **/
+s32 e1000_cleanup_led(struct e1000_hw *hw)
+{
+	if (hw->mac.ops.cleanup_led)
+		return hw->mac.ops.cleanup_led(hw);
+
+	return E1000_SUCCESS;
+}
+
+/**
+ *  e1000_blink_led - Blink SW controllable LED
+ *  @hw: pointer to the HW structure
+ *
+ *  This starts the adapter LED blinking. Request the LED to be setup first
+ *  and cleaned up after. This is a function pointer entry point called by
+ *  drivers.
+ **/
+s32 e1000_blink_led(struct e1000_hw *hw)
+{
+	if (hw->mac.ops.blink_led)
+		return hw->mac.ops.blink_led(hw);
+
+	return E1000_SUCCESS;
+}
+
+/**
+ *  e1000_id_led_init - store LED configurations in SW
+ *  @hw: pointer to the HW structure
+ *
+ *  Initializes the LED config in SW. This is a function pointer entry point
+ *  called by drivers.
+ **/
+s32 e1000_id_led_init(struct e1000_hw *hw)
+{
+	if (hw->mac.ops.id_led_init)
+		return hw->mac.ops.id_led_init(hw);
+
+	return E1000_SUCCESS;
+}
+
+/**
+ *  e1000_led_on - Turn on SW controllable LED
+ *  @hw: pointer to the HW structure
+ *
+ *  Turns the SW defined LED on. This is a function pointer entry point
+ *  called by drivers.
+ **/
+s32 e1000_led_on(struct e1000_hw *hw)
+{
+	if (hw->mac.ops.led_on)
+		return hw->mac.ops.led_on(hw);
+
+	return E1000_SUCCESS;
+}
+
+/**
+ *  e1000_led_off - Turn off SW controllable LED
+ *  @hw: pointer to the HW structure
+ *
+ *  Turns the SW defined LED off. This is a function pointer entry point
+ *  called by drivers.
+ **/
+s32 e1000_led_off(struct e1000_hw *hw)
+{
+	if (hw->mac.ops.led_off)
+		return hw->mac.ops.led_off(hw);
+
+	return E1000_SUCCESS;
+}
+
+/**
+ *  e1000_reset_adaptive - Reset adaptive IFS
+ *  @hw: pointer to the HW structure
+ *
+ *  Resets the adaptive IFS. Currently no func pointer exists and all
+ *  implementations are handled in the generic version of this function.
+ **/
+void e1000_reset_adaptive(struct e1000_hw *hw)
+{
+	e1000_reset_adaptive_generic(hw);
+}
+
+/**
+ *  e1000_update_adaptive - Update adaptive IFS
+ *  @hw: pointer to the HW structure
+ *
+ *  Updates adapter IFS. Currently no func pointer exists and all
+ *  implementations are handled in the generic version of this function.
+ **/
+void e1000_update_adaptive(struct e1000_hw *hw)
+{
+	e1000_update_adaptive_generic(hw);
+}
+
+/**
+ *  e1000_disable_pcie_master - Disable PCI-Express master access
+ *  @hw: pointer to the HW structure
+ *
+ *  Disables PCI-Express master access and verifies there are no pending
+ *  requests. Currently no func pointer exists and all implementations are
+ *  handled in the generic version of this function.
+ **/
+s32 e1000_disable_pcie_master(struct e1000_hw *hw)
+{
+	return e1000_disable_pcie_master_generic(hw);
+}
+
+/**
+ *  e1000_config_collision_dist - Configure collision distance
+ *  @hw: pointer to the HW structure
+ *
+ *  Configures the collision distance to the default value and is used
+ *  during link setup.
+ **/
+void e1000_config_collision_dist(struct e1000_hw *hw)
+{
+	if (hw->mac.ops.config_collision_dist)
+		hw->mac.ops.config_collision_dist(hw);
+}
+
+/**
+ *  e1000_rar_set - Sets a receive address register
+ *  @hw: pointer to the HW structure
+ *  @addr: address to set the RAR to
+ *  @index: the RAR to set
+ *
+ *  Sets a Receive Address Register (RAR) to the specified address.
+ **/
+void e1000_rar_set(struct e1000_hw *hw, u8 *addr, u32 index)
+{
+	if (hw->mac.ops.rar_set)
+		hw->mac.ops.rar_set(hw, addr, index);
+}
+
+/**
+ *  e1000_validate_mdi_setting - Ensures valid MDI/MDIX SW state
+ *  @hw: pointer to the HW structure
+ *
+ *  Ensures that the MDI/MDIX SW state is valid.
+ **/
+s32 e1000_validate_mdi_setting(struct e1000_hw *hw)
+{
+	if (hw->mac.ops.validate_mdi_setting)
+		return hw->mac.ops.validate_mdi_setting(hw);
+
+	return E1000_SUCCESS;
+}
+
+/**
+ *  e1000_hash_mc_addr - Determines address location in multicast table
+ *  @hw: pointer to the HW structure
+ *  @mc_addr: Multicast address to hash.
+ *
+ *  This hashes an address to determine its location in the multicast
+ *  table. Currently no func pointer exists and all implementations
+ *  are handled in the generic version of this function.
+ **/
+u32 e1000_hash_mc_addr(struct e1000_hw *hw, u8 *mc_addr)
+{
+	return e1000_hash_mc_addr_generic(hw, mc_addr);
+}
+
+/**
+ *  e1000_enable_tx_pkt_filtering - Enable packet filtering on TX
+ *  @hw: pointer to the HW structure
+ *
+ *  Enables packet filtering on transmit packets if manageability is enabled
+ *  and host interface is enabled.
+ *  Currently no func pointer exists and all implementations are handled in the
+ *  generic version of this function.
+ **/
+bool e1000_enable_tx_pkt_filtering(struct e1000_hw *hw)
+{
+	return e1000_enable_tx_pkt_filtering_generic(hw);
+}
+
+/**
+ *  e1000_mng_host_if_write - Writes to the manageability host interface
+ *  @hw: pointer to the HW structure
+ *  @buffer: pointer to the host interface buffer
+ *  @length: size of the buffer
+ *  @offset: location in the buffer to write to
+ *  @sum: sum of the data (not checksum)
+ *
+ *  This function writes the buffer content at the offset given on the host if.
+ *  It also does alignment considerations to do the writes in most efficient
+ *  way.  Also fills up the sum of the buffer in *buffer parameter.
+ **/
+s32 e1000_mng_host_if_write(struct e1000_hw *hw, u8 *buffer, u16 length,
+			    u16 offset, u8 *sum)
+{
+	return e1000_mng_host_if_write_generic(hw, buffer, length, offset, sum);
+}
+
+/**
+ *  e1000_mng_write_cmd_header - Writes manageability command header
+ *  @hw: pointer to the HW structure
+ *  @hdr: pointer to the host interface command header
+ *
+ *  Writes the command header after does the checksum calculation.
+ **/
+s32 e1000_mng_write_cmd_header(struct e1000_hw *hw,
+			       struct e1000_host_mng_command_header *hdr)
+{
+	return e1000_mng_write_cmd_header_generic(hw, hdr);
+}
+
+/**
+ *  e1000_mng_enable_host_if - Checks host interface is enabled
+ *  @hw: pointer to the HW structure
+ *
+ *  Returns E1000_success upon success, else E1000_ERR_HOST_INTERFACE_COMMAND
+ *
+ *  This function checks whether the HOST IF is enabled for command operation
+ *  and also checks whether the previous command is completed.  It busy waits
+ *  in case of previous command is not completed.
+ **/
+s32 e1000_mng_enable_host_if(struct e1000_hw *hw)
+{
+	return e1000_mng_enable_host_if_generic(hw);
+}
+
+/**
+ *  e1000_check_reset_block - Verifies PHY can be reset
+ *  @hw: pointer to the HW structure
+ *
+ *  Checks if the PHY is in a state that can be reset or if manageability
+ *  has it tied up. This is a function pointer entry point called by drivers.
+ **/
+s32 e1000_check_reset_block(struct e1000_hw *hw)
+{
+	if (hw->phy.ops.check_reset_block)
+		return hw->phy.ops.check_reset_block(hw);
+
+	return E1000_SUCCESS;
+}
+
+/**
+ *  e1000_read_phy_reg - Reads PHY register
+ *  @hw: pointer to the HW structure
+ *  @offset: the register to read
+ *  @data: the buffer to store the 16-bit read.
+ *
+ *  Reads the PHY register and returns the value in data.
+ *  This is a function pointer entry point called by drivers.
+ **/
+s32 e1000_read_phy_reg(struct e1000_hw *hw, u32 offset, u16 *data)
+{
+	if (hw->phy.ops.read_reg)
+		return hw->phy.ops.read_reg(hw, offset, data);
+
+	return E1000_SUCCESS;
+}
+
+/**
+ *  e1000_write_phy_reg - Writes PHY register
+ *  @hw: pointer to the HW structure
+ *  @offset: the register to write
+ *  @data: the value to write.
+ *
+ *  Writes the PHY register at offset with the value in data.
+ *  This is a function pointer entry point called by drivers.
+ **/
+s32 e1000_write_phy_reg(struct e1000_hw *hw, u32 offset, u16 data)
+{
+	if (hw->phy.ops.write_reg)
+		return hw->phy.ops.write_reg(hw, offset, data);
+
+	return E1000_SUCCESS;
+}
+
+/**
+ *  e1000_release_phy - Generic release PHY
+ *  @hw: pointer to the HW structure
+ *
+ *  Return if silicon family does not require a semaphore when accessing the
+ *  PHY.
+ **/
+void e1000_release_phy(struct e1000_hw *hw)
+{
+	if (hw->phy.ops.release)
+		hw->phy.ops.release(hw);
+}
+
+/**
+ *  e1000_acquire_phy - Generic acquire PHY
+ *  @hw: pointer to the HW structure
+ *
+ *  Return success if silicon family does not require a semaphore when
+ *  accessing the PHY.
+ **/
+s32 e1000_acquire_phy(struct e1000_hw *hw)
+{
+	if (hw->phy.ops.acquire)
+		return hw->phy.ops.acquire(hw);
+
+	return E1000_SUCCESS;
+}
+
+/**
+ *  e1000_read_kmrn_reg - Reads register using Kumeran interface
+ *  @hw: pointer to the HW structure
+ *  @offset: the register to read
+ *  @data: the location to store the 16-bit value read.
+ *
+ *  Reads a register out of the Kumeran interface. Currently no func pointer
+ *  exists and all implementations are handled in the generic version of
+ *  this function.
+ **/
+s32 e1000_read_kmrn_reg(struct e1000_hw *hw, u32 offset, u16 *data)
+{
+	return e1000_read_kmrn_reg_generic(hw, offset, data);
+}
+
+/**
+ *  e1000_write_kmrn_reg - Writes register using Kumeran interface
+ *  @hw: pointer to the HW structure
+ *  @offset: the register to write
+ *  @data: the value to write.
+ *
+ *  Writes a register to the Kumeran interface. Currently no func pointer
+ *  exists and all implementations are handled in the generic version of
+ *  this function.
+ **/
+s32 e1000_write_kmrn_reg(struct e1000_hw *hw, u32 offset, u16 data)
+{
+	return e1000_write_kmrn_reg_generic(hw, offset, data);
+}
+
+/**
+ *  e1000_get_cable_length - Retrieves cable length estimation
+ *  @hw: pointer to the HW structure
+ *
+ *  This function estimates the cable length and stores them in
+ *  hw->phy.min_length and hw->phy.max_length. This is a function pointer
+ *  entry point called by drivers.
+ **/
+s32 e1000_get_cable_length(struct e1000_hw *hw)
+{
+	if (hw->phy.ops.get_cable_length)
+		return hw->phy.ops.get_cable_length(hw);
+
+	return E1000_SUCCESS;
+}
+
+/**
+ *  e1000_get_phy_info - Retrieves PHY information from registers
+ *  @hw: pointer to the HW structure
+ *
+ *  This function gets some information from various PHY registers and
+ *  populates hw->phy values with it. This is a function pointer entry
+ *  point called by drivers.
+ **/
+s32 e1000_get_phy_info(struct e1000_hw *hw)
+{
+	if (hw->phy.ops.get_info)
+		return hw->phy.ops.get_info(hw);
+
+	return E1000_SUCCESS;
+}
+
+/**
+ *  e1000_phy_hw_reset - Hard PHY reset
+ *  @hw: pointer to the HW structure
+ *
+ *  Performs a hard PHY reset. This is a function pointer entry point called
+ *  by drivers.
+ **/
+s32 e1000_phy_hw_reset(struct e1000_hw *hw)
+{
+	if (hw->phy.ops.reset)
+		return hw->phy.ops.reset(hw);
+
+	return E1000_SUCCESS;
+}
+
+/**
+ *  e1000_phy_commit - Soft PHY reset
+ *  @hw: pointer to the HW structure
+ *
+ *  Performs a soft PHY reset on those that apply. This is a function pointer
+ *  entry point called by drivers.
+ **/
+s32 e1000_phy_commit(struct e1000_hw *hw)
+{
+	if (hw->phy.ops.commit)
+		return hw->phy.ops.commit(hw);
+
+	return E1000_SUCCESS;
+}
+
+/**
+ *  e1000_set_d0_lplu_state - Sets low power link up state for D0
+ *  @hw: pointer to the HW structure
+ *  @active: boolean used to enable/disable lplu
+ *
+ *  Success returns 0, Failure returns 1
+ *
+ *  The low power link up (lplu) state is set to the power management level D0
+ *  and SmartSpeed is disabled when active is true, else clear lplu for D0
+ *  and enable Smartspeed.  LPLU and Smartspeed are mutually exclusive.  LPLU
+ *  is used during Dx states where the power conservation is most important.
+ *  During driver activity, SmartSpeed should be enabled so performance is
+ *  maintained.  This is a function pointer entry point called by drivers.
+ **/
+s32 e1000_set_d0_lplu_state(struct e1000_hw *hw, bool active)
+{
+	if (hw->phy.ops.set_d0_lplu_state)
+		return hw->phy.ops.set_d0_lplu_state(hw, active);
+
+	return E1000_SUCCESS;
+}
+
+/**
+ *  e1000_set_d3_lplu_state - Sets low power link up state for D3
+ *  @hw: pointer to the HW structure
+ *  @active: boolean used to enable/disable lplu
+ *
+ *  Success returns 0, Failure returns 1
+ *
+ *  The low power link up (lplu) state is set to the power management level D3
+ *  and SmartSpeed is disabled when active is true, else clear lplu for D3
+ *  and enable Smartspeed.  LPLU and Smartspeed are mutually exclusive.  LPLU
+ *  is used during Dx states where the power conservation is most important.
+ *  During driver activity, SmartSpeed should be enabled so performance is
+ *  maintained.  This is a function pointer entry point called by drivers.
+ **/
+s32 e1000_set_d3_lplu_state(struct e1000_hw *hw, bool active)
+{
+	if (hw->phy.ops.set_d3_lplu_state)
+		return hw->phy.ops.set_d3_lplu_state(hw, active);
+
+	return E1000_SUCCESS;
+}
+
+/**
+ *  e1000_read_mac_addr - Reads MAC address
+ *  @hw: pointer to the HW structure
+ *
+ *  Reads the MAC address out of the adapter and stores it in the HW structure.
+ *  Currently no func pointer exists and all implementations are handled in the
+ *  generic version of this function.
+ **/
+s32 e1000_read_mac_addr(struct e1000_hw *hw)
+{
+	if (hw->mac.ops.read_mac_addr)
+		return hw->mac.ops.read_mac_addr(hw);
+
+	return e1000_read_mac_addr_generic(hw);
+}
+
+/**
+ *  e1000_read_pba_string - Read device part number string
+ *  @hw: pointer to the HW structure
+ *  @pba_num: pointer to device part number
+ *  @pba_num_size: size of part number buffer
+ *
+ *  Reads the product board assembly (PBA) number from the EEPROM and stores
+ *  the value in pba_num.
+ *  Currently no func pointer exists and all implementations are handled in the
+ *  generic version of this function.
+ **/
+s32 e1000_read_pba_string(struct e1000_hw *hw, u8 *pba_num, u32 pba_num_size)
+{
+	return e1000_read_pba_string_generic(hw, pba_num, pba_num_size);
+}
+
+/**
+ *  e1000_read_pba_length - Read device part number string length
+ *  @hw: pointer to the HW structure
+ *  @pba_num_size: size of part number buffer
+ *
+ *  Reads the product board assembly (PBA) number length from the EEPROM and
+ *  stores the value in pba_num.
+ *  Currently no func pointer exists and all implementations are handled in the
+ *  generic version of this function.
+ **/
+s32 e1000_read_pba_length(struct e1000_hw *hw, u32 *pba_num_size)
+{
+	return e1000_read_pba_length_generic(hw, pba_num_size);
+}
+
+/**
+ *  e1000_validate_nvm_checksum - Verifies NVM (EEPROM) checksum
+ *  @hw: pointer to the HW structure
+ *
+ *  Validates the NVM checksum is correct. This is a function pointer entry
+ *  point called by drivers.
+ **/
+s32 e1000_validate_nvm_checksum(struct e1000_hw *hw)
+{
+	if (hw->nvm.ops.validate)
+		return hw->nvm.ops.validate(hw);
+
+	return -E1000_ERR_CONFIG;
+}
+
+/**
+ *  e1000_update_nvm_checksum - Updates NVM (EEPROM) checksum
+ *  @hw: pointer to the HW structure
+ *
+ *  Updates the NVM checksum. Currently no func pointer exists and all
+ *  implementations are handled in the generic version of this function.
+ **/
+s32 e1000_update_nvm_checksum(struct e1000_hw *hw)
+{
+	if (hw->nvm.ops.update)
+		return hw->nvm.ops.update(hw);
+
+	return -E1000_ERR_CONFIG;
+}
+
+/**
+ *  e1000_reload_nvm - Reloads EEPROM
+ *  @hw: pointer to the HW structure
+ *
+ *  Reloads the EEPROM by setting the "Reinitialize from EEPROM" bit in the
+ *  extended control register.
+ **/
+void e1000_reload_nvm(struct e1000_hw *hw)
+{
+	if (hw->nvm.ops.reload)
+		hw->nvm.ops.reload(hw);
+}
+
+/**
+ *  e1000_read_nvm - Reads NVM (EEPROM)
+ *  @hw: pointer to the HW structure
+ *  @offset: the word offset to read
+ *  @words: number of 16-bit words to read
+ *  @data: pointer to the properly sized buffer for the data.
+ *
+ *  Reads 16-bit chunks of data from the NVM (EEPROM). This is a function
+ *  pointer entry point called by drivers.
+ **/
+s32 e1000_read_nvm(struct e1000_hw *hw, u16 offset, u16 words, u16 *data)
+{
+	if (hw->nvm.ops.read)
+		return hw->nvm.ops.read(hw, offset, words, data);
+
+	return -E1000_ERR_CONFIG;
+}
+
+/**
+ *  e1000_write_nvm - Writes to NVM (EEPROM)
+ *  @hw: pointer to the HW structure
+ *  @offset: the word offset to read
+ *  @words: number of 16-bit words to write
+ *  @data: pointer to the properly sized buffer for the data.
+ *
+ *  Writes 16-bit chunks of data to the NVM (EEPROM). This is a function
+ *  pointer entry point called by drivers.
+ **/
+s32 e1000_write_nvm(struct e1000_hw *hw, u16 offset, u16 words, u16 *data)
+{
+	if (hw->nvm.ops.write)
+		return hw->nvm.ops.write(hw, offset, words, data);
+
+	return E1000_SUCCESS;
+}
+
+/**
+ *  e1000_write_8bit_ctrl_reg - Writes 8bit Control register
+ *  @hw: pointer to the HW structure
+ *  @reg: 32bit register offset
+ *  @offset: the register to write
+ *  @data: the value to write.
+ *
+ *  Writes the PHY register at offset with the value in data.
+ *  This is a function pointer entry point called by drivers.
+ **/
+s32 e1000_write_8bit_ctrl_reg(struct e1000_hw *hw, u32 reg, u32 offset,
+			      u8 data)
+{
+	return e1000_write_8bit_ctrl_reg_generic(hw, reg, offset, data);
+}
+
+/**
+ * e1000_power_up_phy - Restores link in case of PHY power down
+ * @hw: pointer to the HW structure
+ *
+ * The phy may be powered down to save power, to turn off link when the
+ * driver is unloaded, or wake on lan is not enabled (among others).
+ **/
+void e1000_power_up_phy(struct e1000_hw *hw)
+{
+	if (hw->phy.ops.power_up)
+		hw->phy.ops.power_up(hw);
+
+	e1000_setup_link(hw);
+}
+
+/**
+ * e1000_power_down_phy - Power down PHY
+ * @hw: pointer to the HW structure
+ *
+ * The phy may be powered down to save power, to turn off link when the
+ * driver is unloaded, or wake on lan is not enabled (among others).
+ **/
+void e1000_power_down_phy(struct e1000_hw *hw)
+{
+	if (hw->phy.ops.power_down)
+		hw->phy.ops.power_down(hw);
+}
+
+/**
+ *  e1000_power_up_fiber_serdes_link - Power up serdes link
+ *  @hw: pointer to the HW structure
+ *
+ *  Power on the optics and PCS.
+ **/
+void e1000_power_up_fiber_serdes_link(struct e1000_hw *hw)
+{
+	if (hw->mac.ops.power_up_serdes)
+		hw->mac.ops.power_up_serdes(hw);
+}
+
+/**
+ *  e1000_shutdown_fiber_serdes_link - Remove link during power down
+ *  @hw: pointer to the HW structure
+ *
+ *  Shutdown the optics and PCS on driver unload.
+ **/
+void e1000_shutdown_fiber_serdes_link(struct e1000_hw *hw)
+{
+	if (hw->mac.ops.shutdown_serdes)
+		hw->mac.ops.shutdown_serdes(hw);
+}
+
+/**
+ *  e1000_get_thermal_sensor_data - Gathers thermal sensor data
+ *  @hw: pointer to hardware structure
+ *
+ *  Updates the temperatures in mac.thermal_sensor_data
+ **/
+s32 e1000_get_thermal_sensor_data(struct e1000_hw *hw)
+{
+	if (hw->mac.ops.get_thermal_sensor_data)
+		return hw->mac.ops.get_thermal_sensor_data(hw);
+
+	return E1000_SUCCESS;
+}
+
+/**
+ *  e1000_init_thermal_sensor_thresh - Sets thermal sensor thresholds
+ *  @hw: pointer to hardware structure
+ *
+ *  Sets the thermal sensor thresholds according to the NVM map
+ **/
+s32 e1000_init_thermal_sensor_thresh(struct e1000_hw *hw)
+{
+	if (hw->mac.ops.init_thermal_sensor_thresh)
+		return hw->mac.ops.init_thermal_sensor_thresh(hw);
+
+	return E1000_SUCCESS;
+}
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_api.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_api.h
new file mode 100644
index 00000000..b21294ec
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_api.h
@@ -0,0 +1,157 @@
+/*******************************************************************************
+
+  Intel(R) Gigabit Ethernet Linux driver
+  Copyright(c) 2007-2013 Intel Corporation.
+
+  This program is free software; you can redistribute it and/or modify it
+  under the terms and conditions of the GNU General Public License,
+  version 2, as published by the Free Software Foundation.
+
+  This program is distributed in the hope it will be useful, but WITHOUT
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+  more details.
+
+  You should have received a copy of the GNU General Public License along with
+  this program; if not, write to the Free Software Foundation, Inc.,
+  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+  The full GNU General Public License is included in this distribution in
+  the file called "COPYING".
+
+  Contact Information:
+  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+#ifndef _E1000_API_H_
+#define _E1000_API_H_
+
+#include "e1000_hw.h"
+
+extern void e1000_init_function_pointers_82575(struct e1000_hw *hw);
+extern void e1000_rx_fifo_flush_82575(struct e1000_hw *hw);
+extern void e1000_init_function_pointers_vf(struct e1000_hw *hw);
+extern void e1000_power_up_fiber_serdes_link(struct e1000_hw *hw);
+extern void e1000_shutdown_fiber_serdes_link(struct e1000_hw *hw);
+extern void e1000_init_function_pointers_i210(struct e1000_hw *hw);
+
+s32 e1000_set_obff_timer(struct e1000_hw *hw, u32 itr);
+s32 e1000_set_mac_type(struct e1000_hw *hw);
+s32 e1000_setup_init_funcs(struct e1000_hw *hw, bool init_device);
+s32 e1000_init_mac_params(struct e1000_hw *hw);
+s32 e1000_init_nvm_params(struct e1000_hw *hw);
+s32 e1000_init_phy_params(struct e1000_hw *hw);
+s32 e1000_init_mbx_params(struct e1000_hw *hw);
+s32 e1000_get_bus_info(struct e1000_hw *hw);
+void e1000_clear_vfta(struct e1000_hw *hw);
+void e1000_write_vfta(struct e1000_hw *hw, u32 offset, u32 value);
+s32 e1000_force_mac_fc(struct e1000_hw *hw);
+s32 e1000_check_for_link(struct e1000_hw *hw);
+s32 e1000_reset_hw(struct e1000_hw *hw);
+s32 e1000_init_hw(struct e1000_hw *hw);
+s32 e1000_setup_link(struct e1000_hw *hw);
+s32 e1000_get_speed_and_duplex(struct e1000_hw *hw, u16 *speed, u16 *duplex);
+s32 e1000_disable_pcie_master(struct e1000_hw *hw);
+void e1000_config_collision_dist(struct e1000_hw *hw);
+void e1000_rar_set(struct e1000_hw *hw, u8 *addr, u32 index);
+u32 e1000_hash_mc_addr(struct e1000_hw *hw, u8 *mc_addr);
+void e1000_update_mc_addr_list(struct e1000_hw *hw, u8 *mc_addr_list,
+			       u32 mc_addr_count);
+s32 e1000_setup_led(struct e1000_hw *hw);
+s32 e1000_cleanup_led(struct e1000_hw *hw);
+s32 e1000_check_reset_block(struct e1000_hw *hw);
+s32 e1000_blink_led(struct e1000_hw *hw);
+s32 e1000_led_on(struct e1000_hw *hw);
+s32 e1000_led_off(struct e1000_hw *hw);
+s32 e1000_id_led_init(struct e1000_hw *hw);
+void e1000_reset_adaptive(struct e1000_hw *hw);
+void e1000_update_adaptive(struct e1000_hw *hw);
+s32 e1000_get_cable_length(struct e1000_hw *hw);
+s32 e1000_validate_mdi_setting(struct e1000_hw *hw);
+s32 e1000_read_phy_reg(struct e1000_hw *hw, u32 offset, u16 *data);
+s32 e1000_write_phy_reg(struct e1000_hw *hw, u32 offset, u16 data);
+s32 e1000_write_8bit_ctrl_reg(struct e1000_hw *hw, u32 reg, u32 offset,
+			      u8 data);
+s32 e1000_get_phy_info(struct e1000_hw *hw);
+void e1000_release_phy(struct e1000_hw *hw);
+s32 e1000_acquire_phy(struct e1000_hw *hw);
+s32 e1000_phy_hw_reset(struct e1000_hw *hw);
+s32 e1000_phy_commit(struct e1000_hw *hw);
+void e1000_power_up_phy(struct e1000_hw *hw);
+void e1000_power_down_phy(struct e1000_hw *hw);
+s32 e1000_read_mac_addr(struct e1000_hw *hw);
+s32 e1000_read_pba_string(struct e1000_hw *hw, u8 *pba_num, u32 pba_num_size);
+s32 e1000_read_pba_length(struct e1000_hw *hw, u32 *pba_num_size);
+void e1000_reload_nvm(struct e1000_hw *hw);
+s32 e1000_update_nvm_checksum(struct e1000_hw *hw);
+s32 e1000_validate_nvm_checksum(struct e1000_hw *hw);
+s32 e1000_read_nvm(struct e1000_hw *hw, u16 offset, u16 words, u16 *data);
+s32 e1000_read_kmrn_reg(struct e1000_hw *hw, u32 offset, u16 *data);
+s32 e1000_write_kmrn_reg(struct e1000_hw *hw, u32 offset, u16 data);
+s32 e1000_write_nvm(struct e1000_hw *hw, u16 offset, u16 words, u16 *data);
+s32 e1000_set_d3_lplu_state(struct e1000_hw *hw, bool active);
+s32 e1000_set_d0_lplu_state(struct e1000_hw *hw, bool active);
+bool e1000_check_mng_mode(struct e1000_hw *hw);
+bool e1000_enable_tx_pkt_filtering(struct e1000_hw *hw);
+s32 e1000_mng_enable_host_if(struct e1000_hw *hw);
+s32 e1000_mng_host_if_write(struct e1000_hw *hw, u8 *buffer, u16 length,
+			    u16 offset, u8 *sum);
+s32 e1000_mng_write_cmd_header(struct e1000_hw *hw,
+			       struct e1000_host_mng_command_header *hdr);
+s32 e1000_mng_write_dhcp_info(struct e1000_hw *hw, u8 *buffer, u16 length);
+s32 e1000_get_thermal_sensor_data(struct e1000_hw *hw);
+s32 e1000_init_thermal_sensor_thresh(struct e1000_hw *hw);
+
+
+
+/*
+ * TBI_ACCEPT macro definition:
+ *
+ * This macro requires:
+ *      adapter = a pointer to struct e1000_hw
+ *      status = the 8 bit status field of the Rx descriptor with EOP set
+ *      error = the 8 bit error field of the Rx descriptor with EOP set
+ *      length = the sum of all the length fields of the Rx descriptors that
+ *               make up the current frame
+ *      last_byte = the last byte of the frame DMAed by the hardware
+ *      max_frame_length = the maximum frame length we want to accept.
+ *      min_frame_length = the minimum frame length we want to accept.
+ *
+ * This macro is a conditional that should be used in the interrupt
+ * handler's Rx processing routine when RxErrors have been detected.
+ *
+ * Typical use:
+ *  ...
+ *  if (TBI_ACCEPT) {
+ *      accept_frame = true;
+ *      e1000_tbi_adjust_stats(adapter, MacAddress);
+ *      frame_length--;
+ *  } else {
+ *      accept_frame = false;
+ *  }
+ *  ...
+ */
+
+/* The carrier extension symbol, as received by the NIC. */
+#define CARRIER_EXTENSION   0x0F
+
+#define TBI_ACCEPT(a, status, errors, length, last_byte, \
+		   min_frame_size, max_frame_size) \
+	(e1000_tbi_sbp_enabled_82543(a) && \
+	 (((errors) & E1000_RXD_ERR_FRAME_ERR_MASK) == E1000_RXD_ERR_CE) && \
+	 ((last_byte) == CARRIER_EXTENSION) && \
+	 (((status) & E1000_RXD_STAT_VP) ? \
+	  (((length) > (min_frame_size - VLAN_TAG_SIZE)) && \
+	  ((length) <= (max_frame_size + 1))) : \
+	  (((length) > min_frame_size) && \
+	  ((length) <= (max_frame_size + VLAN_TAG_SIZE + 1)))))
+
+#ifndef E1000_MAX
+#define E1000_MAX(a, b) ((a) > (b) ? (a) : (b))
+#endif
+#ifndef E1000_DIVIDE_ROUND_UP
+#define E1000_DIVIDE_ROUND_UP(a, b)	(((a) + (b) - 1) / (b)) /* ceil(a/b) */
+#endif
+#endif /* _E1000_API_H_ */
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_defines.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_defines.h
new file mode 100644
index 00000000..63b228c5
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_defines.h
@@ -0,0 +1,1380 @@
+/*******************************************************************************
+
+  Intel(R) Gigabit Ethernet Linux driver
+  Copyright(c) 2007-2013 Intel Corporation.
+
+  This program is free software; you can redistribute it and/or modify it
+  under the terms and conditions of the GNU General Public License,
+  version 2, as published by the Free Software Foundation.
+
+  This program is distributed in the hope it will be useful, but WITHOUT
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+  more details.
+
+  You should have received a copy of the GNU General Public License along with
+  this program; if not, write to the Free Software Foundation, Inc.,
+  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+  The full GNU General Public License is included in this distribution in
+  the file called "COPYING".
+
+  Contact Information:
+  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+#ifndef _E1000_DEFINES_H_
+#define _E1000_DEFINES_H_
+
+/* Number of Transmit and Receive Descriptors must be a multiple of 8 */
+#define REQ_TX_DESCRIPTOR_MULTIPLE  8
+#define REQ_RX_DESCRIPTOR_MULTIPLE  8
+
+/* Definitions for power management and wakeup registers */
+/* Wake Up Control */
+#define E1000_WUC_APME		0x00000001 /* APM Enable */
+#define E1000_WUC_PME_EN	0x00000002 /* PME Enable */
+#define E1000_WUC_PME_STATUS	0x00000004 /* PME Status */
+#define E1000_WUC_APMPME	0x00000008 /* Assert PME on APM Wakeup */
+#define E1000_WUC_PHY_WAKE	0x00000100 /* if PHY supports wakeup */
+
+/* Wake Up Filter Control */
+#define E1000_WUFC_LNKC	0x00000001 /* Link Status Change Wakeup Enable */
+#define E1000_WUFC_MAG	0x00000002 /* Magic Packet Wakeup Enable */
+#define E1000_WUFC_EX	0x00000004 /* Directed Exact Wakeup Enable */
+#define E1000_WUFC_MC	0x00000008 /* Directed Multicast Wakeup Enable */
+#define E1000_WUFC_BC	0x00000010 /* Broadcast Wakeup Enable */
+#define E1000_WUFC_ARP	0x00000020 /* ARP Request Packet Wakeup Enable */
+#define E1000_WUFC_IPV4	0x00000040 /* Directed IPv4 Packet Wakeup Enable */
+#define E1000_WUFC_FLX0		0x00010000 /* Flexible Filter 0 Enable */
+
+/* Wake Up Status */
+#define E1000_WUS_LNKC		E1000_WUFC_LNKC
+#define E1000_WUS_MAG		E1000_WUFC_MAG
+#define E1000_WUS_EX		E1000_WUFC_EX
+#define E1000_WUS_MC		E1000_WUFC_MC
+#define E1000_WUS_BC		E1000_WUFC_BC
+
+/* Extended Device Control */
+#define E1000_CTRL_EXT_SDP4_DATA	0x00000010 /* SW Definable Pin 4 data */
+#define E1000_CTRL_EXT_SDP6_DATA	0x00000040 /* SW Definable Pin 6 data */
+#define E1000_CTRL_EXT_SDP3_DATA	0x00000080 /* SW Definable Pin 3 data */
+#define E1000_CTRL_EXT_SDP6_DIR	0x00000400 /* Direction of SDP6 0=in 1=out */
+#define E1000_CTRL_EXT_SDP3_DIR	0x00000800 /* Direction of SDP3 0=in 1=out */
+#define E1000_CTRL_EXT_EE_RST	0x00002000 /* Reinitialize from EEPROM */
+/* Physical Func Reset Done Indication */
+#define E1000_CTRL_EXT_PFRSTD	0x00004000
+#define E1000_CTRL_EXT_SPD_BYPS	0x00008000 /* Speed Select Bypass */
+#define E1000_CTRL_EXT_RO_DIS	0x00020000 /* Relaxed Ordering disable */
+#define E1000_CTRL_EXT_DMA_DYN_CLK_EN	0x00080000 /* DMA Dynamic Clk Gating */
+#define E1000_CTRL_EXT_LINK_MODE_MASK	0x00C00000
+/* Offset of the link mode field in Ctrl Ext register */
+#define E1000_CTRL_EXT_LINK_MODE_OFFSET	22
+#define E1000_CTRL_EXT_LINK_MODE_1000BASE_KX	0x00400000
+#define E1000_CTRL_EXT_LINK_MODE_GMII	0x00000000
+#define E1000_CTRL_EXT_LINK_MODE_PCIE_SERDES	0x00C00000
+#define E1000_CTRL_EXT_LINK_MODE_SGMII	0x00800000
+#define E1000_CTRL_EXT_EIAME		0x01000000
+#define E1000_CTRL_EXT_IRCA		0x00000001
+#define E1000_CTRL_EXT_DRV_LOAD		0x10000000 /* Drv loaded bit for FW */
+#define E1000_CTRL_EXT_IAME		0x08000000 /* Int ACK Auto-mask */
+#define E1000_CTRL_EXT_PBA_CLR		0x80000000 /* PBA Clear */
+#define E1000_I2CCMD_REG_ADDR_SHIFT	16
+#define E1000_I2CCMD_PHY_ADDR_SHIFT	24
+#define E1000_I2CCMD_OPCODE_READ	0x08000000
+#define E1000_I2CCMD_OPCODE_WRITE	0x00000000
+#define E1000_I2CCMD_READY		0x20000000
+#define E1000_I2CCMD_ERROR		0x80000000
+#define E1000_I2CCMD_SFP_DATA_ADDR(a)	(0x0000 + (a))
+#define E1000_I2CCMD_SFP_DIAG_ADDR(a)	(0x0100 + (a))
+#define E1000_MAX_SGMII_PHY_REG_ADDR	255
+#define E1000_I2CCMD_PHY_TIMEOUT	200
+#define E1000_IVAR_VALID	0x80
+#define E1000_GPIE_NSICR	0x00000001
+#define E1000_GPIE_MSIX_MODE	0x00000010
+#define E1000_GPIE_EIAME	0x40000000
+#define E1000_GPIE_PBA		0x80000000
+
+/* Receive Descriptor bit definitions */
+#define E1000_RXD_STAT_DD	0x01    /* Descriptor Done */
+#define E1000_RXD_STAT_EOP	0x02    /* End of Packet */
+#define E1000_RXD_STAT_IXSM	0x04    /* Ignore checksum */
+#define E1000_RXD_STAT_VP	0x08    /* IEEE VLAN Packet */
+#define E1000_RXD_STAT_UDPCS	0x10    /* UDP xsum calculated */
+#define E1000_RXD_STAT_TCPCS	0x20    /* TCP xsum calculated */
+#define E1000_RXD_STAT_IPCS	0x40    /* IP xsum calculated */
+#define E1000_RXD_STAT_PIF	0x80    /* passed in-exact filter */
+#define E1000_RXD_STAT_IPIDV	0x200   /* IP identification valid */
+#define E1000_RXD_STAT_UDPV	0x400   /* Valid UDP checksum */
+#define E1000_RXD_STAT_DYNINT	0x800   /* Pkt caused INT via DYNINT */
+#define E1000_RXD_ERR_CE	0x01    /* CRC Error */
+#define E1000_RXD_ERR_SE	0x02    /* Symbol Error */
+#define E1000_RXD_ERR_SEQ	0x04    /* Sequence Error */
+#define E1000_RXD_ERR_CXE	0x10    /* Carrier Extension Error */
+#define E1000_RXD_ERR_TCPE	0x20    /* TCP/UDP Checksum Error */
+#define E1000_RXD_ERR_IPE	0x40    /* IP Checksum Error */
+#define E1000_RXD_ERR_RXE	0x80    /* Rx Data Error */
+#define E1000_RXD_SPC_VLAN_MASK	0x0FFF  /* VLAN ID is in lower 12 bits */
+
+#define E1000_RXDEXT_STATERR_TST	0x00000100 /* Time Stamp taken */
+#define E1000_RXDEXT_STATERR_LB		0x00040000
+#define E1000_RXDEXT_STATERR_CE		0x01000000
+#define E1000_RXDEXT_STATERR_SE		0x02000000
+#define E1000_RXDEXT_STATERR_SEQ	0x04000000
+#define E1000_RXDEXT_STATERR_CXE	0x10000000
+#define E1000_RXDEXT_STATERR_TCPE	0x20000000
+#define E1000_RXDEXT_STATERR_IPE	0x40000000
+#define E1000_RXDEXT_STATERR_RXE	0x80000000
+
+/* mask to determine if packets should be dropped due to frame errors */
+#define E1000_RXD_ERR_FRAME_ERR_MASK ( \
+	E1000_RXD_ERR_CE  |		\
+	E1000_RXD_ERR_SE  |		\
+	E1000_RXD_ERR_SEQ |		\
+	E1000_RXD_ERR_CXE |		\
+	E1000_RXD_ERR_RXE)
+
+/* Same mask, but for extended and packet split descriptors */
+#define E1000_RXDEXT_ERR_FRAME_ERR_MASK ( \
+	E1000_RXDEXT_STATERR_CE  |	\
+	E1000_RXDEXT_STATERR_SE  |	\
+	E1000_RXDEXT_STATERR_SEQ |	\
+	E1000_RXDEXT_STATERR_CXE |	\
+	E1000_RXDEXT_STATERR_RXE)
+
+#define E1000_MRQC_RSS_FIELD_MASK		0xFFFF0000
+#define E1000_MRQC_RSS_FIELD_IPV4_TCP		0x00010000
+#define E1000_MRQC_RSS_FIELD_IPV4		0x00020000
+#define E1000_MRQC_RSS_FIELD_IPV6_TCP_EX	0x00040000
+#define E1000_MRQC_RSS_FIELD_IPV6		0x00100000
+#define E1000_MRQC_RSS_FIELD_IPV6_TCP		0x00200000
+
+#define E1000_RXDPS_HDRSTAT_HDRSP		0x00008000
+
+/* Management Control */
+#define E1000_MANC_SMBUS_EN	0x00000001 /* SMBus Enabled - RO */
+#define E1000_MANC_ASF_EN	0x00000002 /* ASF Enabled - RO */
+#define E1000_MANC_ARP_EN	0x00002000 /* Enable ARP Request Filtering */
+#define E1000_MANC_RCV_TCO_EN	0x00020000 /* Receive TCO Packets Enabled */
+#define E1000_MANC_BLK_PHY_RST_ON_IDE	0x00040000 /* Block phy resets */
+/* Enable MAC address filtering */
+#define E1000_MANC_EN_MAC_ADDR_FILTER	0x00100000
+/* Enable MNG packets to host memory */
+#define E1000_MANC_EN_MNG2HOST		0x00200000
+
+#define E1000_MANC2H_PORT_623		0x00000020 /* Port 0x26f */
+#define E1000_MANC2H_PORT_664		0x00000040 /* Port 0x298 */
+#define E1000_MDEF_PORT_623		0x00000800 /* Port 0x26f */
+#define E1000_MDEF_PORT_664		0x00000400 /* Port 0x298 */
+
+/* Receive Control */
+#define E1000_RCTL_RST		0x00000001 /* Software reset */
+#define E1000_RCTL_EN		0x00000002 /* enable */
+#define E1000_RCTL_SBP		0x00000004 /* store bad packet */
+#define E1000_RCTL_UPE		0x00000008 /* unicast promisc enable */
+#define E1000_RCTL_MPE		0x00000010 /* multicast promisc enable */
+#define E1000_RCTL_LPE		0x00000020 /* long packet enable */
+#define E1000_RCTL_LBM_NO	0x00000000 /* no loopback mode */
+#define E1000_RCTL_LBM_MAC	0x00000040 /* MAC loopback mode */
+#define E1000_RCTL_LBM_TCVR	0x000000C0 /* tcvr loopback mode */
+#define E1000_RCTL_DTYP_PS	0x00000400 /* Packet Split descriptor */
+#define E1000_RCTL_RDMTS_HALF	0x00000000 /* Rx desc min thresh size */
+#define E1000_RCTL_MO_SHIFT	12 /* multicast offset shift */
+#define E1000_RCTL_MO_3		0x00003000 /* multicast offset 15:4 */
+#define E1000_RCTL_BAM		0x00008000 /* broadcast enable */
+/* these buffer sizes are valid if E1000_RCTL_BSEX is 0 */
+#define E1000_RCTL_SZ_2048	0x00000000 /* Rx buffer size 2048 */
+#define E1000_RCTL_SZ_1024	0x00010000 /* Rx buffer size 1024 */
+#define E1000_RCTL_SZ_512	0x00020000 /* Rx buffer size 512 */
+#define E1000_RCTL_SZ_256	0x00030000 /* Rx buffer size 256 */
+/* these buffer sizes are valid if E1000_RCTL_BSEX is 1 */
+#define E1000_RCTL_SZ_16384	0x00010000 /* Rx buffer size 16384 */
+#define E1000_RCTL_SZ_8192	0x00020000 /* Rx buffer size 8192 */
+#define E1000_RCTL_SZ_4096	0x00030000 /* Rx buffer size 4096 */
+#define E1000_RCTL_VFE		0x00040000 /* vlan filter enable */
+#define E1000_RCTL_CFIEN	0x00080000 /* canonical form enable */
+#define E1000_RCTL_CFI		0x00100000 /* canonical form indicator */
+#define E1000_RCTL_DPF		0x00400000 /* discard pause frames */
+#define E1000_RCTL_PMCF		0x00800000 /* pass MAC control frames */
+#define E1000_RCTL_BSEX		0x02000000 /* Buffer size extension */
+#define E1000_RCTL_SECRC	0x04000000 /* Strip Ethernet CRC */
+
+/* Use byte values for the following shift parameters
+ * Usage:
+ *     psrctl |= (((ROUNDUP(value0, 128) >> E1000_PSRCTL_BSIZE0_SHIFT) &
+ *		  E1000_PSRCTL_BSIZE0_MASK) |
+ *		((ROUNDUP(value1, 1024) >> E1000_PSRCTL_BSIZE1_SHIFT) &
+ *		  E1000_PSRCTL_BSIZE1_MASK) |
+ *		((ROUNDUP(value2, 1024) << E1000_PSRCTL_BSIZE2_SHIFT) &
+ *		  E1000_PSRCTL_BSIZE2_MASK) |
+ *		((ROUNDUP(value3, 1024) << E1000_PSRCTL_BSIZE3_SHIFT) |;
+ *		  E1000_PSRCTL_BSIZE3_MASK))
+ * where value0 = [128..16256],  default=256
+ *       value1 = [1024..64512], default=4096
+ *       value2 = [0..64512],    default=4096
+ *       value3 = [0..64512],    default=0
+ */
+
+#define E1000_PSRCTL_BSIZE0_MASK	0x0000007F
+#define E1000_PSRCTL_BSIZE1_MASK	0x00003F00
+#define E1000_PSRCTL_BSIZE2_MASK	0x003F0000
+#define E1000_PSRCTL_BSIZE3_MASK	0x3F000000
+
+#define E1000_PSRCTL_BSIZE0_SHIFT	7    /* Shift _right_ 7 */
+#define E1000_PSRCTL_BSIZE1_SHIFT	2    /* Shift _right_ 2 */
+#define E1000_PSRCTL_BSIZE2_SHIFT	6    /* Shift _left_ 6 */
+#define E1000_PSRCTL_BSIZE3_SHIFT	14   /* Shift _left_ 14 */
+
+/* SWFW_SYNC Definitions */
+#define E1000_SWFW_EEP_SM	0x01
+#define E1000_SWFW_PHY0_SM	0x02
+#define E1000_SWFW_PHY1_SM	0x04
+#define E1000_SWFW_CSR_SM	0x08
+#define E1000_SWFW_PHY2_SM	0x20
+#define E1000_SWFW_PHY3_SM	0x40
+#define E1000_SWFW_SW_MNG_SM	0x400
+
+/* Device Control */
+#define E1000_CTRL_FD		0x00000001  /* Full duplex.0=half; 1=full */
+#define E1000_CTRL_PRIOR	0x00000004  /* Priority on PCI. 0=rx,1=fair */
+#define E1000_CTRL_GIO_MASTER_DISABLE 0x00000004 /*Blocks new Master reqs */
+#define E1000_CTRL_LRST		0x00000008  /* Link reset. 0=normal,1=reset */
+#define E1000_CTRL_ASDE		0x00000020  /* Auto-speed detect enable */
+#define E1000_CTRL_SLU		0x00000040  /* Set link up (Force Link) */
+#define E1000_CTRL_ILOS		0x00000080  /* Invert Loss-Of Signal */
+#define E1000_CTRL_SPD_SEL	0x00000300  /* Speed Select Mask */
+#define E1000_CTRL_SPD_10	0x00000000  /* Force 10Mb */
+#define E1000_CTRL_SPD_100	0x00000100  /* Force 100Mb */
+#define E1000_CTRL_SPD_1000	0x00000200  /* Force 1Gb */
+#define E1000_CTRL_FRCSPD	0x00000800  /* Force Speed */
+#define E1000_CTRL_FRCDPX	0x00001000  /* Force Duplex */
+#define E1000_CTRL_SWDPIN0	0x00040000 /* SWDPIN 0 value */
+#define E1000_CTRL_SWDPIN1	0x00080000 /* SWDPIN 1 value */
+#define E1000_CTRL_SWDPIN2	0x00100000 /* SWDPIN 2 value */
+#define E1000_CTRL_ADVD3WUC	0x00100000 /* D3 WUC */
+#define E1000_CTRL_SWDPIN3	0x00200000 /* SWDPIN 3 value */
+#define E1000_CTRL_SWDPIO0	0x00400000 /* SWDPIN 0 Input or output */
+#define E1000_CTRL_RST		0x04000000 /* Global reset */
+#define E1000_CTRL_RFCE		0x08000000 /* Receive Flow Control enable */
+#define E1000_CTRL_TFCE		0x10000000 /* Transmit flow control enable */
+#define E1000_CTRL_VME		0x40000000 /* IEEE VLAN mode enable */
+#define E1000_CTRL_PHY_RST	0x80000000 /* PHY Reset */
+#define E1000_CTRL_I2C_ENA	0x02000000 /* I2C enable */
+
+
+#define E1000_CONNSW_ENRGSRC		0x4
+#define E1000_CONNSW_PHYSD		0x400
+#define E1000_CONNSW_PHY_PDN		0x800
+#define E1000_CONNSW_SERDESD		0x200
+#define E1000_CONNSW_AUTOSENSE_CONF	0x2
+#define E1000_CONNSW_AUTOSENSE_EN	0x1
+#define E1000_PCS_CFG_PCS_EN		8
+#define E1000_PCS_LCTL_FLV_LINK_UP	1
+#define E1000_PCS_LCTL_FSV_10		0
+#define E1000_PCS_LCTL_FSV_100		2
+#define E1000_PCS_LCTL_FSV_1000		4
+#define E1000_PCS_LCTL_FDV_FULL		8
+#define E1000_PCS_LCTL_FSD		0x10
+#define E1000_PCS_LCTL_FORCE_LINK	0x20
+#define E1000_PCS_LCTL_FORCE_FCTRL	0x80
+#define E1000_PCS_LCTL_AN_ENABLE	0x10000
+#define E1000_PCS_LCTL_AN_RESTART	0x20000
+#define E1000_PCS_LCTL_AN_TIMEOUT	0x40000
+#define E1000_ENABLE_SERDES_LOOPBACK	0x0410
+
+#define E1000_PCS_LSTS_LINK_OK		1
+#define E1000_PCS_LSTS_SPEED_100	2
+#define E1000_PCS_LSTS_SPEED_1000	4
+#define E1000_PCS_LSTS_DUPLEX_FULL	8
+#define E1000_PCS_LSTS_SYNK_OK		0x10
+#define E1000_PCS_LSTS_AN_COMPLETE	0x10000
+
+/* Device Status */
+#define E1000_STATUS_FD			0x00000001 /* Duplex 0=half 1=full */
+#define E1000_STATUS_LU			0x00000002 /* Link up.0=no,1=link */
+#define E1000_STATUS_FUNC_MASK		0x0000000C /* PCI Function Mask */
+#define E1000_STATUS_FUNC_SHIFT		2
+#define E1000_STATUS_FUNC_1		0x00000004 /* Function 1 */
+#define E1000_STATUS_TXOFF		0x00000010 /* transmission paused */
+#define E1000_STATUS_SPEED_MASK	0x000000C0
+#define E1000_STATUS_SPEED_10		0x00000000 /* Speed 10Mb/s */
+#define E1000_STATUS_SPEED_100		0x00000040 /* Speed 100Mb/s */
+#define E1000_STATUS_SPEED_1000		0x00000080 /* Speed 1000Mb/s */
+#define E1000_STATUS_LAN_INIT_DONE	0x00000200 /* Lan Init Compltn by NVM */
+#define E1000_STATUS_PHYRA		0x00000400 /* PHY Reset Asserted */
+#define E1000_STATUS_GIO_MASTER_ENABLE	0x00080000 /* Master request status */
+#define E1000_STATUS_2P5_SKU		0x00001000 /* Val of 2.5GBE SKU strap */
+#define E1000_STATUS_2P5_SKU_OVER	0x00002000 /* Val of 2.5GBE SKU Over */
+
+#define SPEED_10	10
+#define SPEED_100	100
+#define SPEED_1000	1000
+#define SPEED_2500	2500
+#define HALF_DUPLEX	1
+#define FULL_DUPLEX	2
+
+
+#define ADVERTISE_10_HALF		0x0001
+#define ADVERTISE_10_FULL		0x0002
+#define ADVERTISE_100_HALF		0x0004
+#define ADVERTISE_100_FULL		0x0008
+#define ADVERTISE_1000_HALF		0x0010 /* Not used, just FYI */
+#define ADVERTISE_1000_FULL		0x0020
+
+/* 1000/H is not supported, nor spec-compliant. */
+#define E1000_ALL_SPEED_DUPLEX	( \
+	ADVERTISE_10_HALF | ADVERTISE_10_FULL | ADVERTISE_100_HALF | \
+	ADVERTISE_100_FULL | ADVERTISE_1000_FULL)
+#define E1000_ALL_NOT_GIG	( \
+	ADVERTISE_10_HALF | ADVERTISE_10_FULL | ADVERTISE_100_HALF | \
+	ADVERTISE_100_FULL)
+#define E1000_ALL_100_SPEED	(ADVERTISE_100_HALF | ADVERTISE_100_FULL)
+#define E1000_ALL_10_SPEED	(ADVERTISE_10_HALF | ADVERTISE_10_FULL)
+#define E1000_ALL_HALF_DUPLEX	(ADVERTISE_10_HALF | ADVERTISE_100_HALF)
+
+#define AUTONEG_ADVERTISE_SPEED_DEFAULT		E1000_ALL_SPEED_DUPLEX
+
+/* LED Control */
+#define E1000_LEDCTL_LED0_MODE_MASK	0x0000000F
+#define E1000_LEDCTL_LED0_MODE_SHIFT	0
+#define E1000_LEDCTL_LED0_IVRT		0x00000040
+#define E1000_LEDCTL_LED0_BLINK		0x00000080
+
+#define E1000_LEDCTL_MODE_LED_ON	0xE
+#define E1000_LEDCTL_MODE_LED_OFF	0xF
+
+/* Transmit Descriptor bit definitions */
+#define E1000_TXD_DTYP_D	0x00100000 /* Data Descriptor */
+#define E1000_TXD_DTYP_C	0x00000000 /* Context Descriptor */
+#define E1000_TXD_POPTS_IXSM	0x01       /* Insert IP checksum */
+#define E1000_TXD_POPTS_TXSM	0x02       /* Insert TCP/UDP checksum */
+#define E1000_TXD_CMD_EOP	0x01000000 /* End of Packet */
+#define E1000_TXD_CMD_IFCS	0x02000000 /* Insert FCS (Ethernet CRC) */
+#define E1000_TXD_CMD_IC	0x04000000 /* Insert Checksum */
+#define E1000_TXD_CMD_RS	0x08000000 /* Report Status */
+#define E1000_TXD_CMD_RPS	0x10000000 /* Report Packet Sent */
+#define E1000_TXD_CMD_DEXT	0x20000000 /* Desc extension (0 = legacy) */
+#define E1000_TXD_CMD_VLE	0x40000000 /* Add VLAN tag */
+#define E1000_TXD_CMD_IDE	0x80000000 /* Enable Tidv register */
+#define E1000_TXD_STAT_DD	0x00000001 /* Descriptor Done */
+#define E1000_TXD_STAT_EC	0x00000002 /* Excess Collisions */
+#define E1000_TXD_STAT_LC	0x00000004 /* Late Collisions */
+#define E1000_TXD_STAT_TU	0x00000008 /* Transmit underrun */
+#define E1000_TXD_CMD_TCP	0x01000000 /* TCP packet */
+#define E1000_TXD_CMD_IP	0x02000000 /* IP packet */
+#define E1000_TXD_CMD_TSE	0x04000000 /* TCP Seg enable */
+#define E1000_TXD_STAT_TC	0x00000004 /* Tx Underrun */
+#define E1000_TXD_EXTCMD_TSTAMP	0x00000010 /* IEEE1588 Timestamp packet */
+
+/* Transmit Control */
+#define E1000_TCTL_EN		0x00000002 /* enable Tx */
+#define E1000_TCTL_PSP		0x00000008 /* pad short packets */
+#define E1000_TCTL_CT		0x00000ff0 /* collision threshold */
+#define E1000_TCTL_COLD		0x003ff000 /* collision distance */
+#define E1000_TCTL_RTLC		0x01000000 /* Re-transmit on late collision */
+#define E1000_TCTL_MULR		0x10000000 /* Multiple request support */
+
+/* Transmit Arbitration Count */
+#define E1000_TARC0_ENABLE	0x00000400 /* Enable Tx Queue 0 */
+
+/* SerDes Control */
+#define E1000_SCTL_DISABLE_SERDES_LOOPBACK	0x0400
+#define E1000_SCTL_ENABLE_SERDES_LOOPBACK	0x0410
+
+/* Receive Checksum Control */
+#define E1000_RXCSUM_IPOFL	0x00000100 /* IPv4 checksum offload */
+#define E1000_RXCSUM_TUOFL	0x00000200 /* TCP / UDP checksum offload */
+#define E1000_RXCSUM_CRCOFL	0x00000800 /* CRC32 offload enable */
+#define E1000_RXCSUM_IPPCSE	0x00001000 /* IP payload checksum enable */
+#define E1000_RXCSUM_PCSD	0x00002000 /* packet checksum disabled */
+
+/* Header split receive */
+#define E1000_RFCTL_NFSW_DIS		0x00000040
+#define E1000_RFCTL_NFSR_DIS		0x00000080
+#define E1000_RFCTL_ACK_DIS		0x00001000
+#define E1000_RFCTL_EXTEN		0x00008000
+#define E1000_RFCTL_IPV6_EX_DIS		0x00010000
+#define E1000_RFCTL_NEW_IPV6_EXT_DIS	0x00020000
+#define E1000_RFCTL_LEF			0x00040000
+
+/* Collision related configuration parameters */
+#define E1000_COLLISION_THRESHOLD	15
+#define E1000_CT_SHIFT			4
+#define E1000_COLLISION_DISTANCE	63
+#define E1000_COLD_SHIFT		12
+
+/* Default values for the transmit IPG register */
+#define DEFAULT_82543_TIPG_IPGT_FIBER	9
+#define DEFAULT_82543_TIPG_IPGT_COPPER	8
+
+#define E1000_TIPG_IPGT_MASK		0x000003FF
+
+#define DEFAULT_82543_TIPG_IPGR1	8
+#define E1000_TIPG_IPGR1_SHIFT		10
+
+#define DEFAULT_82543_TIPG_IPGR2	6
+#define DEFAULT_80003ES2LAN_TIPG_IPGR2	7
+#define E1000_TIPG_IPGR2_SHIFT		20
+
+/* Ethertype field values */
+#define ETHERNET_IEEE_VLAN_TYPE		0x8100  /* 802.3ac packet */
+
+#define ETHERNET_FCS_SIZE		4
+#define MAX_JUMBO_FRAME_SIZE		0x3F00
+
+/* Extended Configuration Control and Size */
+#define E1000_EXTCNF_CTRL_MDIO_SW_OWNERSHIP	0x00000020
+#define E1000_EXTCNF_CTRL_LCD_WRITE_ENABLE	0x00000001
+#define E1000_EXTCNF_CTRL_OEM_WRITE_ENABLE	0x00000008
+#define E1000_EXTCNF_CTRL_SWFLAG		0x00000020
+#define E1000_EXTCNF_CTRL_GATE_PHY_CFG		0x00000080
+#define E1000_EXTCNF_SIZE_EXT_PCIE_LENGTH_MASK	0x00FF0000
+#define E1000_EXTCNF_SIZE_EXT_PCIE_LENGTH_SHIFT	16
+#define E1000_EXTCNF_CTRL_EXT_CNF_POINTER_MASK	0x0FFF0000
+#define E1000_EXTCNF_CTRL_EXT_CNF_POINTER_SHIFT	16
+
+#define E1000_PHY_CTRL_D0A_LPLU			0x00000002
+#define E1000_PHY_CTRL_NOND0A_LPLU		0x00000004
+#define E1000_PHY_CTRL_NOND0A_GBE_DISABLE	0x00000008
+#define E1000_PHY_CTRL_GBE_DISABLE		0x00000040
+
+#define E1000_KABGTXD_BGSQLBIAS			0x00050000
+
+/* PBA constants */
+#define E1000_PBA_8K		0x0008    /* 8KB */
+#define E1000_PBA_10K		0x000A    /* 10KB */
+#define E1000_PBA_12K		0x000C    /* 12KB */
+#define E1000_PBA_14K		0x000E    /* 14KB */
+#define E1000_PBA_16K		0x0010    /* 16KB */
+#define E1000_PBA_18K		0x0012
+#define E1000_PBA_20K		0x0014
+#define E1000_PBA_22K		0x0016
+#define E1000_PBA_24K		0x0018
+#define E1000_PBA_26K		0x001A
+#define E1000_PBA_30K		0x001E
+#define E1000_PBA_32K		0x0020
+#define E1000_PBA_34K		0x0022
+#define E1000_PBA_35K		0x0023
+#define E1000_PBA_38K		0x0026
+#define E1000_PBA_40K		0x0028
+#define E1000_PBA_48K		0x0030    /* 48KB */
+#define E1000_PBA_64K		0x0040    /* 64KB */
+
+#define E1000_PBA_RXA_MASK	0xFFFF
+
+#define E1000_PBS_16K		E1000_PBA_16K
+
+#define IFS_MAX			80
+#define IFS_MIN			40
+#define IFS_RATIO		4
+#define IFS_STEP		10
+#define MIN_NUM_XMITS		1000
+
+/* SW Semaphore Register */
+#define E1000_SWSM_SMBI		0x00000001 /* Driver Semaphore bit */
+#define E1000_SWSM_SWESMBI	0x00000002 /* FW Semaphore bit */
+#define E1000_SWSM_DRV_LOAD	0x00000008 /* Driver Loaded Bit */
+
+#define E1000_SWSM2_LOCK	0x00000002 /* Secondary driver semaphore bit */
+
+/* Interrupt Cause Read */
+#define E1000_ICR_TXDW		0x00000001 /* Transmit desc written back */
+#define E1000_ICR_TXQE		0x00000002 /* Transmit Queue empty */
+#define E1000_ICR_LSC		0x00000004 /* Link Status Change */
+#define E1000_ICR_RXSEQ		0x00000008 /* Rx sequence error */
+#define E1000_ICR_RXDMT0	0x00000010 /* Rx desc min. threshold (0) */
+#define E1000_ICR_RXO		0x00000040 /* Rx overrun */
+#define E1000_ICR_RXT0		0x00000080 /* Rx timer intr (ring 0) */
+#define E1000_ICR_VMMB		0x00000100 /* VM MB event */
+#define E1000_ICR_RXCFG		0x00000400 /* Rx /c/ ordered set */
+#define E1000_ICR_GPI_EN0	0x00000800 /* GP Int 0 */
+#define E1000_ICR_GPI_EN1	0x00001000 /* GP Int 1 */
+#define E1000_ICR_GPI_EN2	0x00002000 /* GP Int 2 */
+#define E1000_ICR_GPI_EN3	0x00004000 /* GP Int 3 */
+#define E1000_ICR_TXD_LOW	0x00008000
+#define E1000_ICR_MNG		0x00040000 /* Manageability event */
+#define E1000_ICR_TS		0x00080000 /* Time Sync Interrupt */
+#define E1000_ICR_DRSTA		0x40000000 /* Device Reset Asserted */
+/* If this bit asserted, the driver should claim the interrupt */
+#define E1000_ICR_INT_ASSERTED	0x80000000
+#define E1000_ICR_DOUTSYNC	0x10000000 /* NIC DMA out of sync */
+#define E1000_ICR_FER		0x00400000 /* Fatal Error */
+
+#define E1000_ICR_THS		0x00800000 /* ICR.THS: Thermal Sensor Event*/
+#define E1000_ICR_MDDET		0x10000000 /* Malicious Driver Detect */
+
+
+/* Extended Interrupt Cause Read */
+#define E1000_EICR_RX_QUEUE0	0x00000001 /* Rx Queue 0 Interrupt */
+#define E1000_EICR_RX_QUEUE1	0x00000002 /* Rx Queue 1 Interrupt */
+#define E1000_EICR_RX_QUEUE2	0x00000004 /* Rx Queue 2 Interrupt */
+#define E1000_EICR_RX_QUEUE3	0x00000008 /* Rx Queue 3 Interrupt */
+#define E1000_EICR_TX_QUEUE0	0x00000100 /* Tx Queue 0 Interrupt */
+#define E1000_EICR_TX_QUEUE1	0x00000200 /* Tx Queue 1 Interrupt */
+#define E1000_EICR_TX_QUEUE2	0x00000400 /* Tx Queue 2 Interrupt */
+#define E1000_EICR_TX_QUEUE3	0x00000800 /* Tx Queue 3 Interrupt */
+#define E1000_EICR_TCP_TIMER	0x40000000 /* TCP Timer */
+#define E1000_EICR_OTHER	0x80000000 /* Interrupt Cause Active */
+/* TCP Timer */
+#define E1000_TCPTIMER_KS	0x00000100 /* KickStart */
+#define E1000_TCPTIMER_COUNT_ENABLE	0x00000200 /* Count Enable */
+#define E1000_TCPTIMER_COUNT_FINISH	0x00000400 /* Count finish */
+#define E1000_TCPTIMER_LOOP	0x00000800 /* Loop */
+
+/* This defines the bits that are set in the Interrupt Mask
+ * Set/Read Register.  Each bit is documented below:
+ *   o RXT0   = Receiver Timer Interrupt (ring 0)
+ *   o TXDW   = Transmit Descriptor Written Back
+ *   o RXDMT0 = Receive Descriptor Minimum Threshold hit (ring 0)
+ *   o RXSEQ  = Receive Sequence Error
+ *   o LSC    = Link Status Change
+ */
+#define IMS_ENABLE_MASK ( \
+	E1000_IMS_RXT0   |    \
+	E1000_IMS_TXDW   |    \
+	E1000_IMS_RXDMT0 |    \
+	E1000_IMS_RXSEQ  |    \
+	E1000_IMS_LSC)
+
+/* Interrupt Mask Set */
+#define E1000_IMS_TXDW		E1000_ICR_TXDW    /* Tx desc written back */
+#define E1000_IMS_TXQE		E1000_ICR_TXQE    /* Transmit Queue empty */
+#define E1000_IMS_LSC		E1000_ICR_LSC     /* Link Status Change */
+#define E1000_IMS_VMMB		E1000_ICR_VMMB    /* Mail box activity */
+#define E1000_IMS_RXSEQ		E1000_ICR_RXSEQ   /* Rx sequence error */
+#define E1000_IMS_RXDMT0	E1000_ICR_RXDMT0  /* Rx desc min. threshold */
+#define E1000_IMS_RXO		E1000_ICR_RXO     /* Rx overrun */
+#define E1000_IMS_RXT0		E1000_ICR_RXT0    /* Rx timer intr */
+#define E1000_IMS_TXD_LOW	E1000_ICR_TXD_LOW
+#define E1000_IMS_TS		E1000_ICR_TS      /* Time Sync Interrupt */
+#define E1000_IMS_DRSTA		E1000_ICR_DRSTA   /* Device Reset Asserted */
+#define E1000_IMS_DOUTSYNC	E1000_ICR_DOUTSYNC /* NIC DMA out of sync */
+#define E1000_IMS_FER		E1000_ICR_FER /* Fatal Error */
+
+#define E1000_IMS_THS		E1000_ICR_THS /* ICR.TS: Thermal Sensor Event*/
+#define E1000_IMS_MDDET		E1000_ICR_MDDET /* Malicious Driver Detect */
+/* Extended Interrupt Mask Set */
+#define E1000_EIMS_RX_QUEUE0	E1000_EICR_RX_QUEUE0 /* Rx Queue 0 Interrupt */
+#define E1000_EIMS_RX_QUEUE1	E1000_EICR_RX_QUEUE1 /* Rx Queue 1 Interrupt */
+#define E1000_EIMS_RX_QUEUE2	E1000_EICR_RX_QUEUE2 /* Rx Queue 2 Interrupt */
+#define E1000_EIMS_RX_QUEUE3	E1000_EICR_RX_QUEUE3 /* Rx Queue 3 Interrupt */
+#define E1000_EIMS_TX_QUEUE0	E1000_EICR_TX_QUEUE0 /* Tx Queue 0 Interrupt */
+#define E1000_EIMS_TX_QUEUE1	E1000_EICR_TX_QUEUE1 /* Tx Queue 1 Interrupt */
+#define E1000_EIMS_TX_QUEUE2	E1000_EICR_TX_QUEUE2 /* Tx Queue 2 Interrupt */
+#define E1000_EIMS_TX_QUEUE3	E1000_EICR_TX_QUEUE3 /* Tx Queue 3 Interrupt */
+#define E1000_EIMS_TCP_TIMER	E1000_EICR_TCP_TIMER /* TCP Timer */
+#define E1000_EIMS_OTHER	E1000_EICR_OTHER   /* Interrupt Cause Active */
+
+/* Interrupt Cause Set */
+#define E1000_ICS_LSC		E1000_ICR_LSC       /* Link Status Change */
+#define E1000_ICS_RXSEQ		E1000_ICR_RXSEQ     /* Rx sequence error */
+#define E1000_ICS_RXDMT0	E1000_ICR_RXDMT0    /* Rx desc min. threshold */
+
+/* Extended Interrupt Cause Set */
+#define E1000_EICS_RX_QUEUE0	E1000_EICR_RX_QUEUE0 /* Rx Queue 0 Interrupt */
+#define E1000_EICS_RX_QUEUE1	E1000_EICR_RX_QUEUE1 /* Rx Queue 1 Interrupt */
+#define E1000_EICS_RX_QUEUE2	E1000_EICR_RX_QUEUE2 /* Rx Queue 2 Interrupt */
+#define E1000_EICS_RX_QUEUE3	E1000_EICR_RX_QUEUE3 /* Rx Queue 3 Interrupt */
+#define E1000_EICS_TX_QUEUE0	E1000_EICR_TX_QUEUE0 /* Tx Queue 0 Interrupt */
+#define E1000_EICS_TX_QUEUE1	E1000_EICR_TX_QUEUE1 /* Tx Queue 1 Interrupt */
+#define E1000_EICS_TX_QUEUE2	E1000_EICR_TX_QUEUE2 /* Tx Queue 2 Interrupt */
+#define E1000_EICS_TX_QUEUE3	E1000_EICR_TX_QUEUE3 /* Tx Queue 3 Interrupt */
+#define E1000_EICS_TCP_TIMER	E1000_EICR_TCP_TIMER /* TCP Timer */
+#define E1000_EICS_OTHER	E1000_EICR_OTHER   /* Interrupt Cause Active */
+
+#define E1000_EITR_ITR_INT_MASK	0x0000FFFF
+/* E1000_EITR_CNT_IGNR is only for 82576 and newer */
+#define E1000_EITR_CNT_IGNR	0x80000000 /* Don't reset counters on write */
+#define E1000_EITR_INTERVAL 0x00007FFC
+
+/* Transmit Descriptor Control */
+#define E1000_TXDCTL_PTHRESH	0x0000003F /* TXDCTL Prefetch Threshold */
+#define E1000_TXDCTL_HTHRESH	0x00003F00 /* TXDCTL Host Threshold */
+#define E1000_TXDCTL_WTHRESH	0x003F0000 /* TXDCTL Writeback Threshold */
+#define E1000_TXDCTL_GRAN	0x01000000 /* TXDCTL Granularity */
+#define E1000_TXDCTL_FULL_TX_DESC_WB	0x01010000 /* GRAN=1, WTHRESH=1 */
+#define E1000_TXDCTL_MAX_TX_DESC_PREFETCH 0x0100001F /* GRAN=1, PTHRESH=31 */
+/* Enable the counting of descriptors still to be processed. */
+#define E1000_TXDCTL_COUNT_DESC	0x00400000
+
+/* Flow Control Constants */
+#define FLOW_CONTROL_ADDRESS_LOW	0x00C28001
+#define FLOW_CONTROL_ADDRESS_HIGH	0x00000100
+#define FLOW_CONTROL_TYPE		0x8808
+
+/* 802.1q VLAN Packet Size */
+#define VLAN_TAG_SIZE			4    /* 802.3ac tag (not DMA'd) */
+#define E1000_VLAN_FILTER_TBL_SIZE	128  /* VLAN Filter Table (4096 bits) */
+
+/* Receive Address
+ * Number of high/low register pairs in the RAR. The RAR (Receive Address
+ * Registers) holds the directed and multicast addresses that we monitor.
+ * Technically, we have 16 spots.  However, we reserve one of these spots
+ * (RAR[15]) for our directed address used by controllers with
+ * manageability enabled, allowing us room for 15 multicast addresses.
+ */
+#define E1000_RAR_ENTRIES	15
+#define E1000_RAH_AV		0x80000000 /* Receive descriptor valid */
+#define E1000_RAL_MAC_ADDR_LEN	4
+#define E1000_RAH_MAC_ADDR_LEN	2
+#define E1000_RAH_QUEUE_MASK_82575	0x000C0000
+#define E1000_RAH_POOL_1	0x00040000
+
+/* Error Codes */
+#define E1000_SUCCESS			0
+#define E1000_ERR_NVM			1
+#define E1000_ERR_PHY			2
+#define E1000_ERR_CONFIG		3
+#define E1000_ERR_PARAM			4
+#define E1000_ERR_MAC_INIT		5
+#define E1000_ERR_PHY_TYPE		6
+#define E1000_ERR_RESET			9
+#define E1000_ERR_MASTER_REQUESTS_PENDING	10
+#define E1000_ERR_HOST_INTERFACE_COMMAND	11
+#define E1000_BLK_PHY_RESET		12
+#define E1000_ERR_SWFW_SYNC		13
+#define E1000_NOT_IMPLEMENTED		14
+#define E1000_ERR_MBX			15
+#define E1000_ERR_INVALID_ARGUMENT	16
+#define E1000_ERR_NO_SPACE		17
+#define E1000_ERR_NVM_PBA_SECTION	18
+#define E1000_ERR_I2C			19
+#define E1000_ERR_INVM_VALUE_NOT_FOUND	20
+
+/* Loop limit on how long we wait for auto-negotiation to complete */
+#define FIBER_LINK_UP_LIMIT		50
+#define COPPER_LINK_UP_LIMIT		10
+#define PHY_AUTO_NEG_LIMIT		45
+#define PHY_FORCE_LIMIT			20
+/* Number of 100 microseconds we wait for PCI Express master disable */
+#define MASTER_DISABLE_TIMEOUT		800
+/* Number of milliseconds we wait for PHY configuration done after MAC reset */
+#define PHY_CFG_TIMEOUT			100
+/* Number of 2 milliseconds we wait for acquiring MDIO ownership. */
+#define MDIO_OWNERSHIP_TIMEOUT		10
+/* Number of milliseconds for NVM auto read done after MAC reset. */
+#define AUTO_READ_DONE_TIMEOUT		10
+
+/* Flow Control */
+#define E1000_FCRTH_RTH		0x0000FFF8 /* Mask Bits[15:3] for RTH */
+#define E1000_FCRTL_RTL		0x0000FFF8 /* Mask Bits[15:3] for RTL */
+#define E1000_FCRTL_XONE	0x80000000 /* Enable XON frame transmission */
+
+/* Transmit Configuration Word */
+#define E1000_TXCW_FD		0x00000020 /* TXCW full duplex */
+#define E1000_TXCW_PAUSE	0x00000080 /* TXCW sym pause request */
+#define E1000_TXCW_ASM_DIR	0x00000100 /* TXCW astm pause direction */
+#define E1000_TXCW_PAUSE_MASK	0x00000180 /* TXCW pause request mask */
+#define E1000_TXCW_ANE		0x80000000 /* Auto-neg enable */
+
+/* Receive Configuration Word */
+#define E1000_RXCW_CW		0x0000ffff /* RxConfigWord mask */
+#define E1000_RXCW_IV		0x08000000 /* Receive config invalid */
+#define E1000_RXCW_C		0x20000000 /* Receive config */
+#define E1000_RXCW_SYNCH	0x40000000 /* Receive config synch */
+
+#define E1000_TSYNCTXCTL_VALID		0x00000001 /* Tx timestamp valid */
+#define E1000_TSYNCTXCTL_ENABLED	0x00000010 /* enable Tx timestamping */
+
+#define E1000_TSYNCRXCTL_VALID		0x00000001 /* Rx timestamp valid */
+#define E1000_TSYNCRXCTL_TYPE_MASK	0x0000000E /* Rx type mask */
+#define E1000_TSYNCRXCTL_TYPE_L2_V2	0x00
+#define E1000_TSYNCRXCTL_TYPE_L4_V1	0x02
+#define E1000_TSYNCRXCTL_TYPE_L2_L4_V2	0x04
+#define E1000_TSYNCRXCTL_TYPE_ALL	0x08
+#define E1000_TSYNCRXCTL_TYPE_EVENT_V2	0x0A
+#define E1000_TSYNCRXCTL_ENABLED	0x00000010 /* enable Rx timestamping */
+#define E1000_TSYNCRXCTL_SYSCFI		0x00000020 /* Sys clock frequency */
+
+#define E1000_TSYNCRXCFG_PTP_V1_CTRLT_MASK		0x000000FF
+#define E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE		0x00
+#define E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE	0x01
+#define E1000_TSYNCRXCFG_PTP_V1_FOLLOWUP_MESSAGE	0x02
+#define E1000_TSYNCRXCFG_PTP_V1_DELAY_RESP_MESSAGE	0x03
+#define E1000_TSYNCRXCFG_PTP_V1_MANAGEMENT_MESSAGE	0x04
+
+#define E1000_TSYNCRXCFG_PTP_V2_MSGID_MASK		0x00000F00
+#define E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE		0x0000
+#define E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE	0x0100
+#define E1000_TSYNCRXCFG_PTP_V2_PATH_DELAY_REQ_MESSAGE	0x0200
+#define E1000_TSYNCRXCFG_PTP_V2_PATH_DELAY_RESP_MESSAGE	0x0300
+#define E1000_TSYNCRXCFG_PTP_V2_FOLLOWUP_MESSAGE	0x0800
+#define E1000_TSYNCRXCFG_PTP_V2_DELAY_RESP_MESSAGE	0x0900
+#define E1000_TSYNCRXCFG_PTP_V2_PATH_DELAY_FOLLOWUP_MESSAGE 0x0A00
+#define E1000_TSYNCRXCFG_PTP_V2_ANNOUNCE_MESSAGE	0x0B00
+#define E1000_TSYNCRXCFG_PTP_V2_SIGNALLING_MESSAGE	0x0C00
+#define E1000_TSYNCRXCFG_PTP_V2_MANAGEMENT_MESSAGE	0x0D00
+
+#define E1000_TIMINCA_16NS_SHIFT	24
+#define E1000_TIMINCA_INCPERIOD_SHIFT	24
+#define E1000_TIMINCA_INCVALUE_MASK	0x00FFFFFF
+
+#define E1000_TSICR_TXTS		0x00000002
+#define E1000_TSIM_TXTS			0x00000002
+/* TUPLE Filtering Configuration */
+#define E1000_TTQF_DISABLE_MASK		0xF0008000 /* TTQF Disable Mask */
+#define E1000_TTQF_QUEUE_ENABLE		0x100   /* TTQF Queue Enable Bit */
+#define E1000_TTQF_PROTOCOL_MASK	0xFF    /* TTQF Protocol Mask */
+/* TTQF TCP Bit, shift with E1000_TTQF_PROTOCOL SHIFT */
+#define E1000_TTQF_PROTOCOL_TCP		0x0
+/* TTQF UDP Bit, shift with E1000_TTQF_PROTOCOL_SHIFT */
+#define E1000_TTQF_PROTOCOL_UDP		0x1
+/* TTQF SCTP Bit, shift with E1000_TTQF_PROTOCOL_SHIFT */
+#define E1000_TTQF_PROTOCOL_SCTP	0x2
+#define E1000_TTQF_PROTOCOL_SHIFT	5       /* TTQF Protocol Shift */
+#define E1000_TTQF_QUEUE_SHIFT		16      /* TTQF Queue Shfit */
+#define E1000_TTQF_RX_QUEUE_MASK	0x70000 /* TTQF Queue Mask */
+#define E1000_TTQF_MASK_ENABLE		0x10000000 /* TTQF Mask Enable Bit */
+#define E1000_IMIR_CLEAR_MASK		0xF001FFFF /* IMIR Reg Clear Mask */
+#define E1000_IMIR_PORT_BYPASS		0x20000 /* IMIR Port Bypass Bit */
+#define E1000_IMIR_PRIORITY_SHIFT	29 /* IMIR Priority Shift */
+#define E1000_IMIREXT_CLEAR_MASK	0x7FFFF /* IMIREXT Reg Clear Mask */
+
+#define E1000_MDICNFG_EXT_MDIO		0x80000000 /* MDI ext/int destination */
+#define E1000_MDICNFG_COM_MDIO		0x40000000 /* MDI shared w/ lan 0 */
+#define E1000_MDICNFG_PHY_MASK		0x03E00000
+#define E1000_MDICNFG_PHY_SHIFT		21
+
+#define E1000_MEDIA_PORT_COPPER			1
+#define E1000_MEDIA_PORT_OTHER			2
+#define E1000_M88E1112_AUTO_COPPER_SGMII	0x2
+#define E1000_M88E1112_AUTO_COPPER_BASEX	0x3
+#define E1000_M88E1112_STATUS_LINK		0x0004 /* Interface Link Bit */
+#define E1000_M88E1112_MAC_CTRL_1		0x10
+#define E1000_M88E1112_MAC_CTRL_1_MODE_MASK	0x0380 /* Mode Select */
+#define E1000_M88E1112_MAC_CTRL_1_MODE_SHIFT	7
+#define E1000_M88E1112_PAGE_ADDR		0x16
+#define E1000_M88E1112_STATUS			0x01
+
+#define E1000_THSTAT_LOW_EVENT		0x20000000 /* Low thermal threshold */
+#define E1000_THSTAT_MID_EVENT		0x00200000 /* Mid thermal threshold */
+#define E1000_THSTAT_HIGH_EVENT		0x00002000 /* High thermal threshold */
+#define E1000_THSTAT_PWR_DOWN		0x00000001 /* Power Down Event */
+#define E1000_THSTAT_LINK_THROTTLE	0x00000002 /* Link Spd Throttle Event */
+
+/* I350 EEE defines */
+#define E1000_IPCNFG_EEE_1G_AN		0x00000008 /* IPCNFG EEE Ena 1G AN */
+#define E1000_IPCNFG_EEE_100M_AN	0x00000004 /* IPCNFG EEE Ena 100M AN */
+#define E1000_EEER_TX_LPI_EN		0x00010000 /* EEER Tx LPI Enable */
+#define E1000_EEER_RX_LPI_EN		0x00020000 /* EEER Rx LPI Enable */
+#define E1000_EEER_LPI_FC		0x00040000 /* EEER Ena on Flow Cntrl */
+/* EEE status */
+#define E1000_EEER_EEE_NEG		0x20000000 /* EEE capability nego */
+#define E1000_EEER_RX_LPI_STATUS	0x40000000 /* Rx in LPI state */
+#define E1000_EEER_TX_LPI_STATUS	0x80000000 /* Tx in LPI state */
+#define E1000_EEE_LP_ADV_ADDR_I350	0x040F     /* EEE LP Advertisement */
+#define E1000_M88E1543_PAGE_ADDR	0x16       /* Page Offset Register */
+#define E1000_M88E1543_EEE_CTRL_1	0x0
+#define E1000_M88E1543_EEE_CTRL_1_MS	0x0001     /* EEE Master/Slave */
+#define E1000_EEE_ADV_DEV_I354		7
+#define E1000_EEE_ADV_ADDR_I354		60
+#define E1000_EEE_ADV_100_SUPPORTED	(1 << 1)   /* 100BaseTx EEE Supported */
+#define E1000_EEE_ADV_1000_SUPPORTED	(1 << 2)   /* 1000BaseT EEE Supported */
+#define E1000_PCS_STATUS_DEV_I354	3
+#define E1000_PCS_STATUS_ADDR_I354	1
+#define E1000_PCS_STATUS_RX_LPI_RCVD	0x0400
+#define E1000_PCS_STATUS_TX_LPI_RCVD	0x0800
+#define E1000_EEE_SU_LPI_CLK_STP	0x00800000 /* EEE LPI Clock Stop */
+#define E1000_EEE_LP_ADV_DEV_I210	7          /* EEE LP Adv Device */
+#define E1000_EEE_LP_ADV_ADDR_I210	61         /* EEE LP Adv Register */
+/* PCI Express Control */
+#define E1000_GCR_RXD_NO_SNOOP		0x00000001
+#define E1000_GCR_RXDSCW_NO_SNOOP	0x00000002
+#define E1000_GCR_RXDSCR_NO_SNOOP	0x00000004
+#define E1000_GCR_TXD_NO_SNOOP		0x00000008
+#define E1000_GCR_TXDSCW_NO_SNOOP	0x00000010
+#define E1000_GCR_TXDSCR_NO_SNOOP	0x00000020
+#define E1000_GCR_CMPL_TMOUT_MASK	0x0000F000
+#define E1000_GCR_CMPL_TMOUT_10ms	0x00001000
+#define E1000_GCR_CMPL_TMOUT_RESEND	0x00010000
+#define E1000_GCR_CAP_VER2		0x00040000
+
+#define PCIE_NO_SNOOP_ALL	(E1000_GCR_RXD_NO_SNOOP | \
+				 E1000_GCR_RXDSCW_NO_SNOOP | \
+				 E1000_GCR_RXDSCR_NO_SNOOP | \
+				 E1000_GCR_TXD_NO_SNOOP    | \
+				 E1000_GCR_TXDSCW_NO_SNOOP | \
+				 E1000_GCR_TXDSCR_NO_SNOOP)
+
+#define E1000_MMDAC_FUNC_DATA	0x4000 /* Data, no post increment */
+
+/* mPHY address control and data registers */
+#define E1000_MPHY_ADDR_CTL		0x0024 /* Address Control Reg */
+#define E1000_MPHY_ADDR_CTL_OFFSET_MASK	0xFFFF0000
+#define E1000_MPHY_DATA			0x0E10 /* Data Register */
+
+/* AFE CSR Offset for PCS CLK */
+#define E1000_MPHY_PCS_CLK_REG_OFFSET	0x0004
+/* Override for near end digital loopback. */
+#define E1000_MPHY_PCS_CLK_REG_DIGINELBEN	0x10
+
+/* PHY Control Register */
+#define MII_CR_SPEED_SELECT_MSB	0x0040  /* bits 6,13: 10=1000, 01=100, 00=10 */
+#define MII_CR_COLL_TEST_ENABLE	0x0080  /* Collision test enable */
+#define MII_CR_FULL_DUPLEX	0x0100  /* FDX =1, half duplex =0 */
+#define MII_CR_RESTART_AUTO_NEG	0x0200  /* Restart auto negotiation */
+#define MII_CR_ISOLATE		0x0400  /* Isolate PHY from MII */
+#define MII_CR_POWER_DOWN	0x0800  /* Power down */
+#define MII_CR_AUTO_NEG_EN	0x1000  /* Auto Neg Enable */
+#define MII_CR_SPEED_SELECT_LSB	0x2000  /* bits 6,13: 10=1000, 01=100, 00=10 */
+#define MII_CR_LOOPBACK		0x4000  /* 0 = normal, 1 = loopback */
+#define MII_CR_RESET		0x8000  /* 0 = normal, 1 = PHY reset */
+#define MII_CR_SPEED_1000	0x0040
+#define MII_CR_SPEED_100	0x2000
+#define MII_CR_SPEED_10		0x0000
+
+/* PHY Status Register */
+#define MII_SR_EXTENDED_CAPS	0x0001 /* Extended register capabilities */
+#define MII_SR_JABBER_DETECT	0x0002 /* Jabber Detected */
+#define MII_SR_LINK_STATUS	0x0004 /* Link Status 1 = link */
+#define MII_SR_AUTONEG_CAPS	0x0008 /* Auto Neg Capable */
+#define MII_SR_REMOTE_FAULT	0x0010 /* Remote Fault Detect */
+#define MII_SR_AUTONEG_COMPLETE	0x0020 /* Auto Neg Complete */
+#define MII_SR_PREAMBLE_SUPPRESS 0x0040 /* Preamble may be suppressed */
+#define MII_SR_EXTENDED_STATUS	0x0100 /* Ext. status info in Reg 0x0F */
+#define MII_SR_100T2_HD_CAPS	0x0200 /* 100T2 Half Duplex Capable */
+#define MII_SR_100T2_FD_CAPS	0x0400 /* 100T2 Full Duplex Capable */
+#define MII_SR_10T_HD_CAPS	0x0800 /* 10T   Half Duplex Capable */
+#define MII_SR_10T_FD_CAPS	0x1000 /* 10T   Full Duplex Capable */
+#define MII_SR_100X_HD_CAPS	0x2000 /* 100X  Half Duplex Capable */
+#define MII_SR_100X_FD_CAPS	0x4000 /* 100X  Full Duplex Capable */
+#define MII_SR_100T4_CAPS	0x8000 /* 100T4 Capable */
+
+/* Autoneg Advertisement Register */
+#define NWAY_AR_SELECTOR_FIELD	0x0001   /* indicates IEEE 802.3 CSMA/CD */
+#define NWAY_AR_10T_HD_CAPS	0x0020   /* 10T   Half Duplex Capable */
+#define NWAY_AR_10T_FD_CAPS	0x0040   /* 10T   Full Duplex Capable */
+#define NWAY_AR_100TX_HD_CAPS	0x0080   /* 100TX Half Duplex Capable */
+#define NWAY_AR_100TX_FD_CAPS	0x0100   /* 100TX Full Duplex Capable */
+#define NWAY_AR_100T4_CAPS	0x0200   /* 100T4 Capable */
+#define NWAY_AR_PAUSE		0x0400   /* Pause operation desired */
+#define NWAY_AR_ASM_DIR		0x0800   /* Asymmetric Pause Direction bit */
+#define NWAY_AR_REMOTE_FAULT	0x2000   /* Remote Fault detected */
+#define NWAY_AR_NEXT_PAGE	0x8000   /* Next Page ability supported */
+
+/* Link Partner Ability Register (Base Page) */
+#define NWAY_LPAR_SELECTOR_FIELD	0x0000 /* LP protocol selector field */
+#define NWAY_LPAR_10T_HD_CAPS		0x0020 /* LP 10T Half Dplx Capable */
+#define NWAY_LPAR_10T_FD_CAPS		0x0040 /* LP 10T Full Dplx Capable */
+#define NWAY_LPAR_100TX_HD_CAPS		0x0080 /* LP 100TX Half Dplx Capable */
+#define NWAY_LPAR_100TX_FD_CAPS		0x0100 /* LP 100TX Full Dplx Capable */
+#define NWAY_LPAR_100T4_CAPS		0x0200 /* LP is 100T4 Capable */
+#define NWAY_LPAR_PAUSE			0x0400 /* LP Pause operation desired */
+#define NWAY_LPAR_ASM_DIR		0x0800 /* LP Asym Pause Direction bit */
+#define NWAY_LPAR_REMOTE_FAULT		0x2000 /* LP detected Remote Fault */
+#define NWAY_LPAR_ACKNOWLEDGE		0x4000 /* LP rx'd link code word */
+#define NWAY_LPAR_NEXT_PAGE		0x8000 /* Next Page ability supported */
+
+/* Autoneg Expansion Register */
+#define NWAY_ER_LP_NWAY_CAPS		0x0001 /* LP has Auto Neg Capability */
+#define NWAY_ER_PAGE_RXD		0x0002 /* LP 10T Half Dplx Capable */
+#define NWAY_ER_NEXT_PAGE_CAPS		0x0004 /* LP 10T Full Dplx Capable */
+#define NWAY_ER_LP_NEXT_PAGE_CAPS	0x0008 /* LP 100TX Half Dplx Capable */
+#define NWAY_ER_PAR_DETECT_FAULT	0x0010 /* LP 100TX Full Dplx Capable */
+
+/* 1000BASE-T Control Register */
+#define CR_1000T_ASYM_PAUSE	0x0080 /* Advertise asymmetric pause bit */
+#define CR_1000T_HD_CAPS	0x0100 /* Advertise 1000T HD capability */
+#define CR_1000T_FD_CAPS	0x0200 /* Advertise 1000T FD capability  */
+/* 1=Repeater/switch device port 0=DTE device */
+#define CR_1000T_REPEATER_DTE	0x0400
+/* 1=Configure PHY as Master 0=Configure PHY as Slave */
+#define CR_1000T_MS_VALUE	0x0800
+/* 1=Master/Slave manual config value 0=Automatic Master/Slave config */
+#define CR_1000T_MS_ENABLE	0x1000
+#define CR_1000T_TEST_MODE_NORMAL 0x0000 /* Normal Operation */
+#define CR_1000T_TEST_MODE_1	0x2000 /* Transmit Waveform test */
+#define CR_1000T_TEST_MODE_2	0x4000 /* Master Transmit Jitter test */
+#define CR_1000T_TEST_MODE_3	0x6000 /* Slave Transmit Jitter test */
+#define CR_1000T_TEST_MODE_4	0x8000 /* Transmitter Distortion test */
+
+/* 1000BASE-T Status Register */
+#define SR_1000T_IDLE_ERROR_CNT		0x00FF /* Num idle err since last rd */
+#define SR_1000T_ASYM_PAUSE_DIR		0x0100 /* LP asym pause direction bit */
+#define SR_1000T_LP_HD_CAPS		0x0400 /* LP is 1000T HD capable */
+#define SR_1000T_LP_FD_CAPS		0x0800 /* LP is 1000T FD capable */
+#define SR_1000T_REMOTE_RX_STATUS	0x1000 /* Remote receiver OK */
+#define SR_1000T_LOCAL_RX_STATUS	0x2000 /* Local receiver OK */
+#define SR_1000T_MS_CONFIG_RES		0x4000 /* 1=Local Tx Master, 0=Slave */
+#define SR_1000T_MS_CONFIG_FAULT	0x8000 /* Master/Slave config fault */
+
+#define SR_1000T_PHY_EXCESSIVE_IDLE_ERR_COUNT	5
+
+/* PHY 1000 MII Register/Bit Definitions */
+/* PHY Registers defined by IEEE */
+#define PHY_CONTROL		0x00 /* Control Register */
+#define PHY_STATUS		0x01 /* Status Register */
+#define PHY_ID1			0x02 /* Phy Id Reg (word 1) */
+#define PHY_ID2			0x03 /* Phy Id Reg (word 2) */
+#define PHY_AUTONEG_ADV		0x04 /* Autoneg Advertisement */
+#define PHY_LP_ABILITY		0x05 /* Link Partner Ability (Base Page) */
+#define PHY_AUTONEG_EXP		0x06 /* Autoneg Expansion Reg */
+#define PHY_NEXT_PAGE_TX	0x07 /* Next Page Tx */
+#define PHY_LP_NEXT_PAGE	0x08 /* Link Partner Next Page */
+#define PHY_1000T_CTRL		0x09 /* 1000Base-T Control Reg */
+#define PHY_1000T_STATUS	0x0A /* 1000Base-T Status Reg */
+#define PHY_EXT_STATUS		0x0F /* Extended Status Reg */
+
+#define PHY_CONTROL_LB		0x4000 /* PHY Loopback bit */
+
+/* NVM Control */
+#define E1000_EECD_SK		0x00000001 /* NVM Clock */
+#define E1000_EECD_CS		0x00000002 /* NVM Chip Select */
+#define E1000_EECD_DI		0x00000004 /* NVM Data In */
+#define E1000_EECD_DO		0x00000008 /* NVM Data Out */
+#define E1000_EECD_REQ		0x00000040 /* NVM Access Request */
+#define E1000_EECD_GNT		0x00000080 /* NVM Access Grant */
+#define E1000_EECD_PRES		0x00000100 /* NVM Present */
+#define E1000_EECD_SIZE		0x00000200 /* NVM Size (0=64 word 1=256 word) */
+#define E1000_EECD_BLOCKED	0x00008000 /* Bit banging access blocked flag */
+#define E1000_EECD_ABORT	0x00010000 /* NVM operation aborted flag */
+#define E1000_EECD_TIMEOUT	0x00020000 /* NVM read operation timeout flag */
+#define E1000_EECD_ERROR_CLR	0x00040000 /* NVM error status clear bit */
+/* NVM Addressing bits based on type 0=small, 1=large */
+#define E1000_EECD_ADDR_BITS	0x00000400
+#define E1000_NVM_GRANT_ATTEMPTS	1000 /* NVM # attempts to gain grant */
+#define E1000_EECD_AUTO_RD		0x00000200  /* NVM Auto Read done */
+#define E1000_EECD_SIZE_EX_MASK		0x00007800  /* NVM Size */
+#define E1000_EECD_SIZE_EX_SHIFT	11
+#define E1000_EECD_FLUPD		0x00080000 /* Update FLASH */
+#define E1000_EECD_AUPDEN		0x00100000 /* Ena Auto FLASH update */
+#define E1000_EECD_SEC1VAL		0x00400000 /* Sector One Valid */
+#define E1000_EECD_SEC1VAL_VALID_MASK	(E1000_EECD_AUTO_RD | E1000_EECD_PRES)
+#define E1000_EECD_FLUPD_I210		0x00800000 /* Update FLASH */
+#define E1000_EECD_FLUDONE_I210		0x04000000 /* Update FLASH done */
+#define E1000_EECD_FLASH_DETECTED_I210	0x00080000 /* FLASH detected */
+#define E1000_EECD_SEC1VAL_I210		0x02000000 /* Sector One Valid */
+#define E1000_FLUDONE_ATTEMPTS		20000
+#define E1000_EERD_EEWR_MAX_COUNT	512 /* buffered EEPROM words rw */
+#define E1000_I210_FIFO_SEL_RX		0x00
+#define E1000_I210_FIFO_SEL_TX_QAV(_i)	(0x02 + (_i))
+#define E1000_I210_FIFO_SEL_TX_LEGACY	E1000_I210_FIFO_SEL_TX_QAV(0)
+#define E1000_I210_FIFO_SEL_BMC2OS_TX	0x06
+#define E1000_I210_FIFO_SEL_BMC2OS_RX	0x01
+
+#define E1000_I210_FLASH_SECTOR_SIZE	0x1000 /* 4KB FLASH sector unit size */
+/* Secure FLASH mode requires removing MSb */
+#define E1000_I210_FW_PTR_MASK		0x7FFF
+/* Firmware code revision field word offset*/
+#define E1000_I210_FW_VER_OFFSET	328
+
+#define E1000_NVM_RW_REG_DATA	16  /* Offset to data in NVM read/write regs */
+#define E1000_NVM_RW_REG_DONE	2   /* Offset to READ/WRITE done bit */
+#define E1000_NVM_RW_REG_START	1   /* Start operation */
+#define E1000_NVM_RW_ADDR_SHIFT	2   /* Shift to the address bits */
+#define E1000_NVM_POLL_WRITE	1   /* Flag for polling for write complete */
+#define E1000_NVM_POLL_READ	0   /* Flag for polling for read complete */
+#define E1000_FLASH_UPDATES	2000
+
+/* NVM Word Offsets */
+#define NVM_COMPAT			0x0003
+#define NVM_ID_LED_SETTINGS		0x0004
+#define NVM_VERSION			0x0005
+#define E1000_I210_NVM_FW_MODULE_PTR	0x0010
+#define E1000_I350_NVM_FW_MODULE_PTR	0x0051
+#define NVM_FUTURE_INIT_WORD1		0x0019
+#define NVM_ETRACK_WORD			0x0042
+#define NVM_ETRACK_HIWORD		0x0043
+#define NVM_COMB_VER_OFF		0x0083
+#define NVM_COMB_VER_PTR		0x003d
+
+/* NVM version defines */
+#define NVM_MAJOR_MASK			0xF000
+#define NVM_MINOR_MASK			0x0FF0
+#define NVM_IMAGE_ID_MASK		0x000F
+#define NVM_COMB_VER_MASK		0x00FF
+#define NVM_MAJOR_SHIFT			12
+#define NVM_MINOR_SHIFT			4
+#define NVM_COMB_VER_SHFT		8
+#define NVM_VER_INVALID			0xFFFF
+#define NVM_ETRACK_SHIFT		16
+#define NVM_ETRACK_VALID		0x8000
+#define NVM_NEW_DEC_MASK		0x0F00
+#define NVM_HEX_CONV			16
+#define NVM_HEX_TENS			10
+
+/* FW version defines */
+/* Offset of "Loader patch ptr" in Firmware Header */
+#define E1000_I350_NVM_FW_LOADER_PATCH_PTR_OFFSET	0x01
+/* Patch generation hour & minutes */
+#define E1000_I350_NVM_FW_VER_WORD1_OFFSET		0x04
+/* Patch generation month & day */
+#define E1000_I350_NVM_FW_VER_WORD2_OFFSET		0x05
+/* Patch generation year */
+#define E1000_I350_NVM_FW_VER_WORD3_OFFSET		0x06
+/* Patch major & minor numbers */
+#define E1000_I350_NVM_FW_VER_WORD4_OFFSET		0x07
+
+#define NVM_MAC_ADDR			0x0000
+#define NVM_SUB_DEV_ID			0x000B
+#define NVM_SUB_VEN_ID			0x000C
+#define NVM_DEV_ID			0x000D
+#define NVM_VEN_ID			0x000E
+#define NVM_INIT_CTRL_2			0x000F
+#define NVM_INIT_CTRL_4			0x0013
+#define NVM_LED_1_CFG			0x001C
+#define NVM_LED_0_2_CFG			0x001F
+
+#define NVM_COMPAT_VALID_CSUM		0x0001
+#define NVM_FUTURE_INIT_WORD1_VALID_CSUM	0x0040
+
+#define NVM_ETS_CFG			0x003E
+#define NVM_ETS_LTHRES_DELTA_MASK	0x07C0
+#define NVM_ETS_LTHRES_DELTA_SHIFT	6
+#define NVM_ETS_TYPE_MASK		0x0038
+#define NVM_ETS_TYPE_SHIFT		3
+#define NVM_ETS_TYPE_EMC		0x000
+#define NVM_ETS_NUM_SENSORS_MASK	0x0007
+#define NVM_ETS_DATA_LOC_MASK		0x3C00
+#define NVM_ETS_DATA_LOC_SHIFT		10
+#define NVM_ETS_DATA_INDEX_MASK		0x0300
+#define NVM_ETS_DATA_INDEX_SHIFT	8
+#define NVM_ETS_DATA_HTHRESH_MASK	0x00FF
+#define NVM_INIT_CONTROL2_REG		0x000F
+#define NVM_INIT_CONTROL3_PORT_B	0x0014
+#define NVM_INIT_3GIO_3			0x001A
+#define NVM_SWDEF_PINS_CTRL_PORT_0	0x0020
+#define NVM_INIT_CONTROL3_PORT_A	0x0024
+#define NVM_CFG				0x0012
+#define NVM_ALT_MAC_ADDR_PTR		0x0037
+#define NVM_CHECKSUM_REG		0x003F
+#define NVM_COMPATIBILITY_REG_3		0x0003
+#define NVM_COMPATIBILITY_BIT_MASK	0x8000
+
+#define E1000_NVM_CFG_DONE_PORT_0	0x040000 /* MNG config cycle done */
+#define E1000_NVM_CFG_DONE_PORT_1	0x080000 /* ...for second port */
+#define E1000_NVM_CFG_DONE_PORT_2	0x100000 /* ...for third port */
+#define E1000_NVM_CFG_DONE_PORT_3	0x200000 /* ...for fourth port */
+
+#define NVM_82580_LAN_FUNC_OFFSET(a)	((a) ? (0x40 + (0x40 * (a))) : 0)
+
+/* Mask bits for fields in Word 0x24 of the NVM */
+#define NVM_WORD24_COM_MDIO		0x0008 /* MDIO interface shared */
+#define NVM_WORD24_EXT_MDIO		0x0004 /* MDIO accesses routed extrnl */
+/* Offset of Link Mode bits for 82575/82576 */
+#define NVM_WORD24_LNK_MODE_OFFSET	8
+/* Offset of Link Mode bits for 82580 up */
+#define NVM_WORD24_82580_LNK_MODE_OFFSET	4
+
+
+/* Mask bits for fields in Word 0x0f of the NVM */
+#define NVM_WORD0F_PAUSE_MASK		0x3000
+#define NVM_WORD0F_PAUSE		0x1000
+#define NVM_WORD0F_ASM_DIR		0x2000
+
+/* Mask bits for fields in Word 0x1a of the NVM */
+#define NVM_WORD1A_ASPM_MASK		0x000C
+
+/* Mask bits for fields in Word 0x03 of the EEPROM */
+#define NVM_COMPAT_LOM			0x0800
+
+/* length of string needed to store PBA number */
+#define E1000_PBANUM_LENGTH		11
+
+/* For checksumming, the sum of all words in the NVM should equal 0xBABA. */
+#define NVM_SUM				0xBABA
+
+/* PBA (printed board assembly) number words */
+#define NVM_PBA_OFFSET_0		8
+#define NVM_PBA_OFFSET_1		9
+#define NVM_PBA_PTR_GUARD		0xFAFA
+#define NVM_RESERVED_WORD		0xFFFF
+#define NVM_WORD_SIZE_BASE_SHIFT	6
+
+/* NVM Commands - SPI */
+#define NVM_MAX_RETRY_SPI	5000 /* Max wait of 5ms, for RDY signal */
+#define NVM_READ_OPCODE_SPI	0x03 /* NVM read opcode */
+#define NVM_WRITE_OPCODE_SPI	0x02 /* NVM write opcode */
+#define NVM_A8_OPCODE_SPI	0x08 /* opcode bit-3 = address bit-8 */
+#define NVM_WREN_OPCODE_SPI	0x06 /* NVM set Write Enable latch */
+#define NVM_RDSR_OPCODE_SPI	0x05 /* NVM read Status register */
+
+/* SPI NVM Status Register */
+#define NVM_STATUS_RDY_SPI	0x01
+
+/* Word definitions for ID LED Settings */
+#define ID_LED_RESERVED_0000	0x0000
+#define ID_LED_RESERVED_FFFF	0xFFFF
+#define ID_LED_DEFAULT		((ID_LED_OFF1_ON2  << 12) | \
+				 (ID_LED_OFF1_OFF2 <<  8) | \
+				 (ID_LED_DEF1_DEF2 <<  4) | \
+				 (ID_LED_DEF1_DEF2))
+#define ID_LED_DEF1_DEF2	0x1
+#define ID_LED_DEF1_ON2		0x2
+#define ID_LED_DEF1_OFF2	0x3
+#define ID_LED_ON1_DEF2		0x4
+#define ID_LED_ON1_ON2		0x5
+#define ID_LED_ON1_OFF2		0x6
+#define ID_LED_OFF1_DEF2	0x7
+#define ID_LED_OFF1_ON2		0x8
+#define ID_LED_OFF1_OFF2	0x9
+
+#define IGP_ACTIVITY_LED_MASK	0xFFFFF0FF
+#define IGP_ACTIVITY_LED_ENABLE	0x0300
+#define IGP_LED3_MODE		0x07000000
+
+/* PCI/PCI-X/PCI-EX Config space */
+#define PCI_HEADER_TYPE_REGISTER	0x0E
+#define PCIE_LINK_STATUS		0x12
+#define PCIE_DEVICE_CONTROL2		0x28
+
+#define PCI_HEADER_TYPE_MULTIFUNC	0x80
+#define PCIE_LINK_WIDTH_MASK		0x3F0
+#define PCIE_LINK_WIDTH_SHIFT		4
+#define PCIE_LINK_SPEED_MASK		0x0F
+#define PCIE_LINK_SPEED_2500		0x01
+#define PCIE_LINK_SPEED_5000		0x02
+#define PCIE_DEVICE_CONTROL2_16ms	0x0005
+
+#ifndef ETH_ADDR_LEN
+#define ETH_ADDR_LEN			6
+#endif
+
+#define PHY_REVISION_MASK		0xFFFFFFF0
+#define MAX_PHY_REG_ADDRESS		0x1F  /* 5 bit address bus (0-0x1F) */
+#define MAX_PHY_MULTI_PAGE_REG		0xF
+
+/* Bit definitions for valid PHY IDs.
+ * I = Integrated
+ * E = External
+ */
+#define M88E1000_E_PHY_ID	0x01410C50
+#define M88E1000_I_PHY_ID	0x01410C30
+#define M88E1011_I_PHY_ID	0x01410C20
+#define IGP01E1000_I_PHY_ID	0x02A80380
+#define M88E1111_I_PHY_ID	0x01410CC0
+#define M88E1543_E_PHY_ID	0x01410EA0
+#define M88E1112_E_PHY_ID	0x01410C90
+#define I347AT4_E_PHY_ID	0x01410DC0
+#define M88E1340M_E_PHY_ID	0x01410DF0
+#define GG82563_E_PHY_ID	0x01410CA0
+#define IGP03E1000_E_PHY_ID	0x02A80390
+#define IFE_E_PHY_ID		0x02A80330
+#define IFE_PLUS_E_PHY_ID	0x02A80320
+#define IFE_C_E_PHY_ID		0x02A80310
+#define I82580_I_PHY_ID		0x015403A0
+#define I350_I_PHY_ID		0x015403B0
+#define I210_I_PHY_ID		0x01410C00
+#define IGP04E1000_E_PHY_ID	0x02A80391
+#define M88_VENDOR		0x0141
+
+/* M88E1000 Specific Registers */
+#define M88E1000_PHY_SPEC_CTRL		0x10  /* PHY Specific Control Reg */
+#define M88E1000_PHY_SPEC_STATUS	0x11  /* PHY Specific Status Reg */
+#define M88E1000_EXT_PHY_SPEC_CTRL	0x14  /* Extended PHY Specific Cntrl */
+#define M88E1000_RX_ERR_CNTR		0x15  /* Receive Error Counter */
+
+#define M88E1000_PHY_PAGE_SELECT	0x1D  /* Reg 29 for pg number setting */
+#define M88E1000_PHY_GEN_CONTROL	0x1E  /* meaning depends on reg 29 */
+
+/* M88E1000 PHY Specific Control Register */
+#define M88E1000_PSCR_POLARITY_REVERSAL	0x0002 /* 1=Polarity Reverse enabled */
+/* MDI Crossover Mode bits 6:5 Manual MDI configuration */
+#define M88E1000_PSCR_MDI_MANUAL_MODE	0x0000
+#define M88E1000_PSCR_MDIX_MANUAL_MODE	0x0020  /* Manual MDIX configuration */
+/* 1000BASE-T: Auto crossover, 100BASE-TX/10BASE-T: MDI Mode */
+#define M88E1000_PSCR_AUTO_X_1000T	0x0040
+/* Auto crossover enabled all speeds */
+#define M88E1000_PSCR_AUTO_X_MODE	0x0060
+#define M88E1000_PSCR_ASSERT_CRS_ON_TX	0x0800 /* 1=Assert CRS on Tx */
+
+/* M88E1000 PHY Specific Status Register */
+#define M88E1000_PSSR_REV_POLARITY	0x0002 /* 1=Polarity reversed */
+#define M88E1000_PSSR_DOWNSHIFT		0x0020 /* 1=Downshifted */
+#define M88E1000_PSSR_MDIX		0x0040 /* 1=MDIX; 0=MDI */
+/* 0 = <50M
+ * 1 = 50-80M
+ * 2 = 80-110M
+ * 3 = 110-140M
+ * 4 = >140M
+ */
+#define M88E1000_PSSR_CABLE_LENGTH	0x0380
+#define M88E1000_PSSR_LINK		0x0400 /* 1=Link up, 0=Link down */
+#define M88E1000_PSSR_SPD_DPLX_RESOLVED	0x0800 /* 1=Speed & Duplex resolved */
+#define M88E1000_PSSR_SPEED		0xC000 /* Speed, bits 14:15 */
+#define M88E1000_PSSR_1000MBS		0x8000 /* 10=1000Mbs */
+
+#define M88E1000_PSSR_CABLE_LENGTH_SHIFT	7
+
+/* Number of times we will attempt to autonegotiate before downshifting if we
+ * are the master
+ */
+#define M88E1000_EPSCR_MASTER_DOWNSHIFT_MASK	0x0C00
+#define M88E1000_EPSCR_MASTER_DOWNSHIFT_1X	0x0000
+/* Number of times we will attempt to autonegotiate before downshifting if we
+ * are the slave
+ */
+#define M88E1000_EPSCR_SLAVE_DOWNSHIFT_MASK	0x0300
+#define M88E1000_EPSCR_SLAVE_DOWNSHIFT_1X	0x0100
+#define M88E1000_EPSCR_TX_CLK_25	0x0070 /* 25  MHz TX_CLK */
+
+/* Intel I347AT4 Registers */
+#define I347AT4_PCDL		0x10 /* PHY Cable Diagnostics Length */
+#define I347AT4_PCDC		0x15 /* PHY Cable Diagnostics Control */
+#define I347AT4_PAGE_SELECT	0x16
+
+/* I347AT4 Extended PHY Specific Control Register */
+
+/* Number of times we will attempt to autonegotiate before downshifting if we
+ * are the master
+ */
+#define I347AT4_PSCR_DOWNSHIFT_ENABLE	0x0800
+#define I347AT4_PSCR_DOWNSHIFT_MASK	0x7000
+#define I347AT4_PSCR_DOWNSHIFT_1X	0x0000
+#define I347AT4_PSCR_DOWNSHIFT_2X	0x1000
+#define I347AT4_PSCR_DOWNSHIFT_3X	0x2000
+#define I347AT4_PSCR_DOWNSHIFT_4X	0x3000
+#define I347AT4_PSCR_DOWNSHIFT_5X	0x4000
+#define I347AT4_PSCR_DOWNSHIFT_6X	0x5000
+#define I347AT4_PSCR_DOWNSHIFT_7X	0x6000
+#define I347AT4_PSCR_DOWNSHIFT_8X	0x7000
+
+/* I347AT4 PHY Cable Diagnostics Control */
+#define I347AT4_PCDC_CABLE_LENGTH_UNIT	0x0400 /* 0=cm 1=meters */
+
+/* M88E1112 only registers */
+#define M88E1112_VCT_DSP_DISTANCE	0x001A
+
+/* M88EC018 Rev 2 specific DownShift settings */
+#define M88EC018_EPSCR_DOWNSHIFT_COUNTER_MASK	0x0E00
+#define M88EC018_EPSCR_DOWNSHIFT_COUNTER_5X	0x0800
+
+/* Bits...
+ * 15-5: page
+ * 4-0: register offset
+ */
+#define GG82563_PAGE_SHIFT	5
+#define GG82563_REG(page, reg)	\
+	(((page) << GG82563_PAGE_SHIFT) | ((reg) & MAX_PHY_REG_ADDRESS))
+#define GG82563_MIN_ALT_REG	30
+
+/* GG82563 Specific Registers */
+#define GG82563_PHY_SPEC_CTRL		GG82563_REG(0, 16) /* PHY Spec Cntrl */
+#define GG82563_PHY_PAGE_SELECT		GG82563_REG(0, 22) /* Page Select */
+#define GG82563_PHY_SPEC_CTRL_2		GG82563_REG(0, 26) /* PHY Spec Cntrl2 */
+#define GG82563_PHY_PAGE_SELECT_ALT	GG82563_REG(0, 29) /* Alt Page Select */
+
+/* MAC Specific Control Register */
+#define GG82563_PHY_MAC_SPEC_CTRL	GG82563_REG(2, 21)
+
+#define GG82563_PHY_DSP_DISTANCE	GG82563_REG(5, 26) /* DSP Distance */
+
+/* Page 193 - Port Control Registers */
+/* Kumeran Mode Control */
+#define GG82563_PHY_KMRN_MODE_CTRL	GG82563_REG(193, 16)
+#define GG82563_PHY_PWR_MGMT_CTRL	GG82563_REG(193, 20) /* Pwr Mgt Ctrl */
+
+/* Page 194 - KMRN Registers */
+#define GG82563_PHY_INBAND_CTRL		GG82563_REG(194, 18) /* Inband Ctrl */
+
+/* MDI Control */
+#define E1000_MDIC_REG_MASK	0x001F0000
+#define E1000_MDIC_REG_SHIFT	16
+#define E1000_MDIC_PHY_MASK	0x03E00000
+#define E1000_MDIC_PHY_SHIFT	21
+#define E1000_MDIC_OP_WRITE	0x04000000
+#define E1000_MDIC_OP_READ	0x08000000
+#define E1000_MDIC_READY	0x10000000
+#define E1000_MDIC_ERROR	0x40000000
+#define E1000_MDIC_DEST		0x80000000
+
+/* SerDes Control */
+#define E1000_GEN_CTL_READY		0x80000000
+#define E1000_GEN_CTL_ADDRESS_SHIFT	8
+#define E1000_GEN_POLL_TIMEOUT		640
+
+/* LinkSec register fields */
+#define E1000_LSECTXCAP_SUM_MASK	0x00FF0000
+#define E1000_LSECTXCAP_SUM_SHIFT	16
+#define E1000_LSECRXCAP_SUM_MASK	0x00FF0000
+#define E1000_LSECRXCAP_SUM_SHIFT	16
+
+#define E1000_LSECTXCTRL_EN_MASK	0x00000003
+#define E1000_LSECTXCTRL_DISABLE	0x0
+#define E1000_LSECTXCTRL_AUTH		0x1
+#define E1000_LSECTXCTRL_AUTH_ENCRYPT	0x2
+#define E1000_LSECTXCTRL_AISCI		0x00000020
+#define E1000_LSECTXCTRL_PNTHRSH_MASK	0xFFFFFF00
+#define E1000_LSECTXCTRL_RSV_MASK	0x000000D8
+
+#define E1000_LSECRXCTRL_EN_MASK	0x0000000C
+#define E1000_LSECRXCTRL_EN_SHIFT	2
+#define E1000_LSECRXCTRL_DISABLE	0x0
+#define E1000_LSECRXCTRL_CHECK		0x1
+#define E1000_LSECRXCTRL_STRICT		0x2
+#define E1000_LSECRXCTRL_DROP		0x3
+#define E1000_LSECRXCTRL_PLSH		0x00000040
+#define E1000_LSECRXCTRL_RP		0x00000080
+#define E1000_LSECRXCTRL_RSV_MASK	0xFFFFFF33
+
+/* Tx Rate-Scheduler Config fields */
+#define E1000_RTTBCNRC_RS_ENA		0x80000000
+#define E1000_RTTBCNRC_RF_DEC_MASK	0x00003FFF
+#define E1000_RTTBCNRC_RF_INT_SHIFT	14
+#define E1000_RTTBCNRC_RF_INT_MASK	\
+	(E1000_RTTBCNRC_RF_DEC_MASK << E1000_RTTBCNRC_RF_INT_SHIFT)
+
+/* DMA Coalescing register fields */
+/* DMA Coalescing Watchdog Timer */
+#define E1000_DMACR_DMACWT_MASK		0x00003FFF
+/* DMA Coalescing Rx Threshold */
+#define E1000_DMACR_DMACTHR_MASK	0x00FF0000
+#define E1000_DMACR_DMACTHR_SHIFT	16
+/* Lx when no PCIe transactions */
+#define E1000_DMACR_DMAC_LX_MASK	0x30000000
+#define E1000_DMACR_DMAC_LX_SHIFT	28
+#define E1000_DMACR_DMAC_EN		0x80000000 /* Enable DMA Coalescing */
+/* DMA Coalescing BMC-to-OS Watchdog Enable */
+#define E1000_DMACR_DC_BMC2OSW_EN	0x00008000
+
+/* DMA Coalescing Transmit Threshold */
+#define E1000_DMCTXTH_DMCTTHR_MASK	0x00000FFF
+
+#define E1000_DMCTLX_TTLX_MASK		0x00000FFF /* Time to LX request */
+
+/* Rx Traffic Rate Threshold */
+#define E1000_DMCRTRH_UTRESH_MASK	0x0007FFFF
+/* Rx packet rate in current window */
+#define E1000_DMCRTRH_LRPRCW		0x80000000
+
+/* DMA Coal Rx Traffic Current Count */
+#define E1000_DMCCNT_CCOUNT_MASK	0x01FFFFFF
+
+/* Flow ctrl Rx Threshold High val */
+#define E1000_FCRTC_RTH_COAL_MASK	0x0003FFF0
+#define E1000_FCRTC_RTH_COAL_SHIFT	4
+/* Lx power decision based on DMA coal */
+#define E1000_PCIEMISC_LX_DECISION	0x00000080
+
+#define E1000_RXPBS_CFG_TS_EN		0x80000000 /* Timestamp in Rx buffer */
+#define E1000_RXPBS_SIZE_I210_MASK	0x0000003F /* Rx packet buffer size */
+#define E1000_TXPB0S_SIZE_I210_MASK	0x0000003F /* Tx packet buffer 0 size */
+
+/* Proxy Filter Control */
+#define E1000_PROXYFC_D0		0x00000001 /* Enable offload in D0 */
+#define E1000_PROXYFC_EX		0x00000004 /* Directed exact proxy */
+#define E1000_PROXYFC_MC		0x00000008 /* Directed MC Proxy */
+#define E1000_PROXYFC_BC		0x00000010 /* Broadcast Proxy Enable */
+#define E1000_PROXYFC_ARP_DIRECTED	0x00000020 /* Directed ARP Proxy Ena */
+#define E1000_PROXYFC_IPV4		0x00000040 /* Directed IPv4 Enable */
+#define E1000_PROXYFC_IPV6		0x00000080 /* Directed IPv6 Enable */
+#define E1000_PROXYFC_NS		0x00000200 /* IPv6 Neighbor Solicitation */
+#define E1000_PROXYFC_ARP		0x00000800 /* ARP Request Proxy Ena */
+/* Proxy Status */
+#define E1000_PROXYS_CLEAR		0xFFFFFFFF /* Clear */
+
+/* Firmware Status */
+#define E1000_FWSTS_FWRI		0x80000000 /* FW Reset Indication */
+/* VF Control */
+#define E1000_VTCTRL_RST		0x04000000 /* Reset VF */
+
+#define E1000_STATUS_LAN_ID_MASK	0x00000000C /* Mask for Lan ID field */
+/* Lan ID bit field offset in status register */
+#define E1000_STATUS_LAN_ID_OFFSET	2
+#define E1000_VFTA_ENTRIES		128
+#ifndef E1000_UNUSEDARG
+#define E1000_UNUSEDARG
+#endif /* E1000_UNUSEDARG */
+#endif /* _E1000_DEFINES_H_ */
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_hw.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_hw.h
new file mode 100644
index 00000000..347cef71
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_hw.h
@@ -0,0 +1,793 @@
+/*******************************************************************************
+
+  Intel(R) Gigabit Ethernet Linux driver
+  Copyright(c) 2007-2013 Intel Corporation.
+
+  This program is free software; you can redistribute it and/or modify it
+  under the terms and conditions of the GNU General Public License,
+  version 2, as published by the Free Software Foundation.
+
+  This program is distributed in the hope it will be useful, but WITHOUT
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+  more details.
+
+  You should have received a copy of the GNU General Public License along with
+  this program; if not, write to the Free Software Foundation, Inc.,
+  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+  The full GNU General Public License is included in this distribution in
+  the file called "COPYING".
+
+  Contact Information:
+  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+#ifndef _E1000_HW_H_
+#define _E1000_HW_H_
+
+#include "e1000_osdep.h"
+#include "e1000_regs.h"
+#include "e1000_defines.h"
+
+struct e1000_hw;
+
+#define E1000_DEV_ID_82576			0x10C9
+#define E1000_DEV_ID_82576_FIBER		0x10E6
+#define E1000_DEV_ID_82576_SERDES		0x10E7
+#define E1000_DEV_ID_82576_QUAD_COPPER		0x10E8
+#define E1000_DEV_ID_82576_QUAD_COPPER_ET2	0x1526
+#define E1000_DEV_ID_82576_NS			0x150A
+#define E1000_DEV_ID_82576_NS_SERDES		0x1518
+#define E1000_DEV_ID_82576_SERDES_QUAD		0x150D
+#define E1000_DEV_ID_82575EB_COPPER		0x10A7
+#define E1000_DEV_ID_82575EB_FIBER_SERDES	0x10A9
+#define E1000_DEV_ID_82575GB_QUAD_COPPER	0x10D6
+#define E1000_DEV_ID_82580_COPPER		0x150E
+#define E1000_DEV_ID_82580_FIBER		0x150F
+#define E1000_DEV_ID_82580_SERDES		0x1510
+#define E1000_DEV_ID_82580_SGMII		0x1511
+#define E1000_DEV_ID_82580_COPPER_DUAL		0x1516
+#define E1000_DEV_ID_82580_QUAD_FIBER		0x1527
+#define E1000_DEV_ID_I350_COPPER		0x1521
+#define E1000_DEV_ID_I350_FIBER			0x1522
+#define E1000_DEV_ID_I350_SERDES		0x1523
+#define E1000_DEV_ID_I350_SGMII			0x1524
+#define E1000_DEV_ID_I350_DA4			0x1546
+#define E1000_DEV_ID_I210_COPPER		0x1533
+#define E1000_DEV_ID_I210_COPPER_OEM1		0x1534
+#define E1000_DEV_ID_I210_COPPER_IT		0x1535
+#define E1000_DEV_ID_I210_FIBER			0x1536
+#define E1000_DEV_ID_I210_SERDES		0x1537
+#define E1000_DEV_ID_I210_SGMII			0x1538
+#define E1000_DEV_ID_I210_COPPER_FLASHLESS	0x157B
+#define E1000_DEV_ID_I210_SERDES_FLASHLESS	0x157C
+#define E1000_DEV_ID_I211_COPPER		0x1539
+#define E1000_DEV_ID_I354_BACKPLANE_1GBPS	0x1F40
+#define E1000_DEV_ID_I354_SGMII			0x1F41
+#define E1000_DEV_ID_I354_BACKPLANE_2_5GBPS	0x1F45
+#define E1000_DEV_ID_DH89XXCC_SGMII		0x0438
+#define E1000_DEV_ID_DH89XXCC_SERDES		0x043A
+#define E1000_DEV_ID_DH89XXCC_BACKPLANE		0x043C
+#define E1000_DEV_ID_DH89XXCC_SFP		0x0440
+
+#define E1000_REVISION_0	0
+#define E1000_REVISION_1	1
+#define E1000_REVISION_2	2
+#define E1000_REVISION_3	3
+#define E1000_REVISION_4	4
+
+#define E1000_FUNC_0		0
+#define E1000_FUNC_1		1
+#define E1000_FUNC_2		2
+#define E1000_FUNC_3		3
+
+#define E1000_ALT_MAC_ADDRESS_OFFSET_LAN0	0
+#define E1000_ALT_MAC_ADDRESS_OFFSET_LAN1	3
+#define E1000_ALT_MAC_ADDRESS_OFFSET_LAN2	6
+#define E1000_ALT_MAC_ADDRESS_OFFSET_LAN3	9
+
+enum e1000_mac_type {
+	e1000_undefined = 0,
+	e1000_82575,
+	e1000_82576,
+	e1000_82580,
+	e1000_i350,
+	e1000_i354,
+	e1000_i210,
+	e1000_i211,
+	e1000_num_macs  /* List is 1-based, so subtract 1 for true count. */
+};
+
+enum e1000_media_type {
+	e1000_media_type_unknown = 0,
+	e1000_media_type_copper = 1,
+	e1000_media_type_fiber = 2,
+	e1000_media_type_internal_serdes = 3,
+	e1000_num_media_types
+};
+
+enum e1000_nvm_type {
+	e1000_nvm_unknown = 0,
+	e1000_nvm_none,
+	e1000_nvm_eeprom_spi,
+	e1000_nvm_flash_hw,
+	e1000_nvm_invm,
+	e1000_nvm_flash_sw
+};
+
+enum e1000_nvm_override {
+	e1000_nvm_override_none = 0,
+	e1000_nvm_override_spi_small,
+	e1000_nvm_override_spi_large,
+};
+
+enum e1000_phy_type {
+	e1000_phy_unknown = 0,
+	e1000_phy_none,
+	e1000_phy_m88,
+	e1000_phy_igp,
+	e1000_phy_igp_2,
+	e1000_phy_gg82563,
+	e1000_phy_igp_3,
+	e1000_phy_ife,
+	e1000_phy_82580,
+	e1000_phy_vf,
+	e1000_phy_i210,
+};
+
+enum e1000_bus_type {
+	e1000_bus_type_unknown = 0,
+	e1000_bus_type_pci,
+	e1000_bus_type_pcix,
+	e1000_bus_type_pci_express,
+	e1000_bus_type_reserved
+};
+
+enum e1000_bus_speed {
+	e1000_bus_speed_unknown = 0,
+	e1000_bus_speed_33,
+	e1000_bus_speed_66,
+	e1000_bus_speed_100,
+	e1000_bus_speed_120,
+	e1000_bus_speed_133,
+	e1000_bus_speed_2500,
+	e1000_bus_speed_5000,
+	e1000_bus_speed_reserved
+};
+
+enum e1000_bus_width {
+	e1000_bus_width_unknown = 0,
+	e1000_bus_width_pcie_x1,
+	e1000_bus_width_pcie_x2,
+	e1000_bus_width_pcie_x4 = 4,
+	e1000_bus_width_pcie_x8 = 8,
+	e1000_bus_width_32,
+	e1000_bus_width_64,
+	e1000_bus_width_reserved
+};
+
+enum e1000_1000t_rx_status {
+	e1000_1000t_rx_status_not_ok = 0,
+	e1000_1000t_rx_status_ok,
+	e1000_1000t_rx_status_undefined = 0xFF
+};
+
+enum e1000_rev_polarity {
+	e1000_rev_polarity_normal = 0,
+	e1000_rev_polarity_reversed,
+	e1000_rev_polarity_undefined = 0xFF
+};
+
+enum e1000_fc_mode {
+	e1000_fc_none = 0,
+	e1000_fc_rx_pause,
+	e1000_fc_tx_pause,
+	e1000_fc_full,
+	e1000_fc_default = 0xFF
+};
+
+enum e1000_ms_type {
+	e1000_ms_hw_default = 0,
+	e1000_ms_force_master,
+	e1000_ms_force_slave,
+	e1000_ms_auto
+};
+
+enum e1000_smart_speed {
+	e1000_smart_speed_default = 0,
+	e1000_smart_speed_on,
+	e1000_smart_speed_off
+};
+
+enum e1000_serdes_link_state {
+	e1000_serdes_link_down = 0,
+	e1000_serdes_link_autoneg_progress,
+	e1000_serdes_link_autoneg_complete,
+	e1000_serdes_link_forced_up
+};
+
+#ifndef __le16
+#define __le16 u16
+#endif
+#ifndef __le32
+#define __le32 u32
+#endif
+#ifndef __le64
+#define __le64 u64
+#endif
+/* Receive Descriptor */
+struct e1000_rx_desc {
+	__le64 buffer_addr; /* Address of the descriptor's data buffer */
+	__le16 length;      /* Length of data DMAed into data buffer */
+	__le16 csum; /* Packet checksum */
+	u8  status;  /* Descriptor status */
+	u8  errors;  /* Descriptor Errors */
+	__le16 special;
+};
+
+/* Receive Descriptor - Extended */
+union e1000_rx_desc_extended {
+	struct {
+		__le64 buffer_addr;
+		__le64 reserved;
+	} read;
+	struct {
+		struct {
+			__le32 mrq; /* Multiple Rx Queues */
+			union {
+				__le32 rss; /* RSS Hash */
+				struct {
+					__le16 ip_id;  /* IP id */
+					__le16 csum;   /* Packet Checksum */
+				} csum_ip;
+			} hi_dword;
+		} lower;
+		struct {
+			__le32 status_error;  /* ext status/error */
+			__le16 length;
+			__le16 vlan; /* VLAN tag */
+		} upper;
+	} wb;  /* writeback */
+};
+
+#define MAX_PS_BUFFERS 4
+
+/* Number of packet split data buffers (not including the header buffer) */
+#define PS_PAGE_BUFFERS	(MAX_PS_BUFFERS - 1)
+
+/* Receive Descriptor - Packet Split */
+union e1000_rx_desc_packet_split {
+	struct {
+		/* one buffer for protocol header(s), three data buffers */
+		__le64 buffer_addr[MAX_PS_BUFFERS];
+	} read;
+	struct {
+		struct {
+			__le32 mrq;  /* Multiple Rx Queues */
+			union {
+				__le32 rss; /* RSS Hash */
+				struct {
+					__le16 ip_id;    /* IP id */
+					__le16 csum;     /* Packet Checksum */
+				} csum_ip;
+			} hi_dword;
+		} lower;
+		struct {
+			__le32 status_error;  /* ext status/error */
+			__le16 length0;  /* length of buffer 0 */
+			__le16 vlan;  /* VLAN tag */
+		} middle;
+		struct {
+			__le16 header_status;
+			/* length of buffers 1-3 */
+			__le16 length[PS_PAGE_BUFFERS];
+		} upper;
+		__le64 reserved;
+	} wb; /* writeback */
+};
+
+/* Transmit Descriptor */
+struct e1000_tx_desc {
+	__le64 buffer_addr;   /* Address of the descriptor's data buffer */
+	union {
+		__le32 data;
+		struct {
+			__le16 length;  /* Data buffer length */
+			u8 cso;  /* Checksum offset */
+			u8 cmd;  /* Descriptor control */
+		} flags;
+	} lower;
+	union {
+		__le32 data;
+		struct {
+			u8 status; /* Descriptor status */
+			u8 css;  /* Checksum start */
+			__le16 special;
+		} fields;
+	} upper;
+};
+
+/* Offload Context Descriptor */
+struct e1000_context_desc {
+	union {
+		__le32 ip_config;
+		struct {
+			u8 ipcss;  /* IP checksum start */
+			u8 ipcso;  /* IP checksum offset */
+			__le16 ipcse;  /* IP checksum end */
+		} ip_fields;
+	} lower_setup;
+	union {
+		__le32 tcp_config;
+		struct {
+			u8 tucss;  /* TCP checksum start */
+			u8 tucso;  /* TCP checksum offset */
+			__le16 tucse;  /* TCP checksum end */
+		} tcp_fields;
+	} upper_setup;
+	__le32 cmd_and_length;
+	union {
+		__le32 data;
+		struct {
+			u8 status;  /* Descriptor status */
+			u8 hdr_len;  /* Header length */
+			__le16 mss;  /* Maximum segment size */
+		} fields;
+	} tcp_seg_setup;
+};
+
+/* Offload data descriptor */
+struct e1000_data_desc {
+	__le64 buffer_addr;  /* Address of the descriptor's buffer address */
+	union {
+		__le32 data;
+		struct {
+			__le16 length;  /* Data buffer length */
+			u8 typ_len_ext;
+			u8 cmd;
+		} flags;
+	} lower;
+	union {
+		__le32 data;
+		struct {
+			u8 status;  /* Descriptor status */
+			u8 popts;  /* Packet Options */
+			__le16 special;
+		} fields;
+	} upper;
+};
+
+/* Statistics counters collected by the MAC */
+struct e1000_hw_stats {
+	u64 crcerrs;
+	u64 algnerrc;
+	u64 symerrs;
+	u64 rxerrc;
+	u64 mpc;
+	u64 scc;
+	u64 ecol;
+	u64 mcc;
+	u64 latecol;
+	u64 colc;
+	u64 dc;
+	u64 tncrs;
+	u64 sec;
+	u64 cexterr;
+	u64 rlec;
+	u64 xonrxc;
+	u64 xontxc;
+	u64 xoffrxc;
+	u64 xofftxc;
+	u64 fcruc;
+	u64 prc64;
+	u64 prc127;
+	u64 prc255;
+	u64 prc511;
+	u64 prc1023;
+	u64 prc1522;
+	u64 gprc;
+	u64 bprc;
+	u64 mprc;
+	u64 gptc;
+	u64 gorc;
+	u64 gotc;
+	u64 rnbc;
+	u64 ruc;
+	u64 rfc;
+	u64 roc;
+	u64 rjc;
+	u64 mgprc;
+	u64 mgpdc;
+	u64 mgptc;
+	u64 tor;
+	u64 tot;
+	u64 tpr;
+	u64 tpt;
+	u64 ptc64;
+	u64 ptc127;
+	u64 ptc255;
+	u64 ptc511;
+	u64 ptc1023;
+	u64 ptc1522;
+	u64 mptc;
+	u64 bptc;
+	u64 tsctc;
+	u64 tsctfc;
+	u64 iac;
+	u64 icrxptc;
+	u64 icrxatc;
+	u64 ictxptc;
+	u64 ictxatc;
+	u64 ictxqec;
+	u64 ictxqmtc;
+	u64 icrxdmtc;
+	u64 icrxoc;
+	u64 cbtmpc;
+	u64 htdpmc;
+	u64 cbrdpc;
+	u64 cbrmpc;
+	u64 rpthc;
+	u64 hgptc;
+	u64 htcbdpc;
+	u64 hgorc;
+	u64 hgotc;
+	u64 lenerrs;
+	u64 scvpc;
+	u64 hrmpc;
+	u64 doosync;
+	u64 o2bgptc;
+	u64 o2bspc;
+	u64 b2ospc;
+	u64 b2ogprc;
+};
+
+
+struct e1000_phy_stats {
+	u32 idle_errors;
+	u32 receive_errors;
+};
+
+struct e1000_host_mng_dhcp_cookie {
+	u32 signature;
+	u8  status;
+	u8  reserved0;
+	u16 vlan_id;
+	u32 reserved1;
+	u16 reserved2;
+	u8  reserved3;
+	u8  checksum;
+};
+
+/* Host Interface "Rev 1" */
+struct e1000_host_command_header {
+	u8 command_id;
+	u8 command_length;
+	u8 command_options;
+	u8 checksum;
+};
+
+#define E1000_HI_MAX_DATA_LENGTH	252
+struct e1000_host_command_info {
+	struct e1000_host_command_header command_header;
+	u8 command_data[E1000_HI_MAX_DATA_LENGTH];
+};
+
+/* Host Interface "Rev 2" */
+struct e1000_host_mng_command_header {
+	u8  command_id;
+	u8  checksum;
+	u16 reserved1;
+	u16 reserved2;
+	u16 command_length;
+};
+
+#define E1000_HI_MAX_MNG_DATA_LENGTH	0x6F8
+struct e1000_host_mng_command_info {
+	struct e1000_host_mng_command_header command_header;
+	u8 command_data[E1000_HI_MAX_MNG_DATA_LENGTH];
+};
+
+#include "e1000_mac.h"
+#include "e1000_phy.h"
+#include "e1000_nvm.h"
+#include "e1000_manage.h"
+#include "e1000_mbx.h"
+
+/* Function pointers for the MAC. */
+struct e1000_mac_operations {
+	s32  (*init_params)(struct e1000_hw *);
+	s32  (*id_led_init)(struct e1000_hw *);
+	s32  (*blink_led)(struct e1000_hw *);
+	bool (*check_mng_mode)(struct e1000_hw *);
+	s32  (*check_for_link)(struct e1000_hw *);
+	s32  (*cleanup_led)(struct e1000_hw *);
+	void (*clear_hw_cntrs)(struct e1000_hw *);
+	void (*clear_vfta)(struct e1000_hw *);
+	s32  (*get_bus_info)(struct e1000_hw *);
+	void (*set_lan_id)(struct e1000_hw *);
+	s32  (*get_link_up_info)(struct e1000_hw *, u16 *, u16 *);
+	s32  (*led_on)(struct e1000_hw *);
+	s32  (*led_off)(struct e1000_hw *);
+	void (*update_mc_addr_list)(struct e1000_hw *, u8 *, u32);
+	s32  (*reset_hw)(struct e1000_hw *);
+	s32  (*init_hw)(struct e1000_hw *);
+	void (*shutdown_serdes)(struct e1000_hw *);
+	void (*power_up_serdes)(struct e1000_hw *);
+	s32  (*setup_link)(struct e1000_hw *);
+	s32  (*setup_physical_interface)(struct e1000_hw *);
+	s32  (*setup_led)(struct e1000_hw *);
+	void (*write_vfta)(struct e1000_hw *, u32, u32);
+	void (*config_collision_dist)(struct e1000_hw *);
+	void (*rar_set)(struct e1000_hw *, u8*, u32);
+	s32  (*read_mac_addr)(struct e1000_hw *);
+	s32  (*validate_mdi_setting)(struct e1000_hw *);
+	s32 (*get_thermal_sensor_data)(struct e1000_hw *);
+	s32 (*init_thermal_sensor_thresh)(struct e1000_hw *);
+	s32  (*acquire_swfw_sync)(struct e1000_hw *, u16);
+	void (*release_swfw_sync)(struct e1000_hw *, u16);
+};
+
+/* When to use various PHY register access functions:
+ *
+ *                 Func   Caller
+ *   Function      Does   Does    When to use
+ *   ~~~~~~~~~~~~  ~~~~~  ~~~~~~  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *   X_reg         L,P,A  n/a     for simple PHY reg accesses
+ *   X_reg_locked  P,A    L       for multiple accesses of different regs
+ *                                on different pages
+ *   X_reg_page    A      L,P     for multiple accesses of different regs
+ *                                on the same page
+ *
+ * Where X=[read|write], L=locking, P=sets page, A=register access
+ *
+ */
+struct e1000_phy_operations {
+	s32  (*init_params)(struct e1000_hw *);
+	s32  (*acquire)(struct e1000_hw *);
+	s32  (*check_polarity)(struct e1000_hw *);
+	s32  (*check_reset_block)(struct e1000_hw *);
+	s32  (*commit)(struct e1000_hw *);
+	s32  (*force_speed_duplex)(struct e1000_hw *);
+	s32  (*get_cfg_done)(struct e1000_hw *hw);
+	s32  (*get_cable_length)(struct e1000_hw *);
+	s32  (*get_info)(struct e1000_hw *);
+	s32  (*set_page)(struct e1000_hw *, u16);
+	s32  (*read_reg)(struct e1000_hw *, u32, u16 *);
+	s32  (*read_reg_locked)(struct e1000_hw *, u32, u16 *);
+	s32  (*read_reg_page)(struct e1000_hw *, u32, u16 *);
+	void (*release)(struct e1000_hw *);
+	s32  (*reset)(struct e1000_hw *);
+	s32  (*set_d0_lplu_state)(struct e1000_hw *, bool);
+	s32  (*set_d3_lplu_state)(struct e1000_hw *, bool);
+	s32  (*write_reg)(struct e1000_hw *, u32, u16);
+	s32  (*write_reg_locked)(struct e1000_hw *, u32, u16);
+	s32  (*write_reg_page)(struct e1000_hw *, u32, u16);
+	void (*power_up)(struct e1000_hw *);
+	void (*power_down)(struct e1000_hw *);
+	s32 (*read_i2c_byte)(struct e1000_hw *, u8, u8, u8 *);
+	s32 (*write_i2c_byte)(struct e1000_hw *, u8, u8, u8);
+};
+
+/* Function pointers for the NVM. */
+struct e1000_nvm_operations {
+	s32  (*init_params)(struct e1000_hw *);
+	s32  (*acquire)(struct e1000_hw *);
+	s32  (*read)(struct e1000_hw *, u16, u16, u16 *);
+	void (*release)(struct e1000_hw *);
+	void (*reload)(struct e1000_hw *);
+	s32  (*update)(struct e1000_hw *);
+	s32  (*valid_led_default)(struct e1000_hw *, u16 *);
+	s32  (*validate)(struct e1000_hw *);
+	s32  (*write)(struct e1000_hw *, u16, u16, u16 *);
+};
+
+#define E1000_MAX_SENSORS		3
+
+struct e1000_thermal_diode_data {
+	u8 location;
+	u8 temp;
+	u8 caution_thresh;
+	u8 max_op_thresh;
+};
+
+struct e1000_thermal_sensor_data {
+	struct e1000_thermal_diode_data sensor[E1000_MAX_SENSORS];
+};
+
+struct e1000_mac_info {
+	struct e1000_mac_operations ops;
+	u8 addr[ETH_ADDR_LEN];
+	u8 perm_addr[ETH_ADDR_LEN];
+
+	enum e1000_mac_type type;
+
+	u32 collision_delta;
+	u32 ledctl_default;
+	u32 ledctl_mode1;
+	u32 ledctl_mode2;
+	u32 mc_filter_type;
+	u32 tx_packet_delta;
+	u32 txcw;
+
+	u16 current_ifs_val;
+	u16 ifs_max_val;
+	u16 ifs_min_val;
+	u16 ifs_ratio;
+	u16 ifs_step_size;
+	u16 mta_reg_count;
+	u16 uta_reg_count;
+
+	/* Maximum size of the MTA register table in all supported adapters */
+	#define MAX_MTA_REG 128
+	u32 mta_shadow[MAX_MTA_REG];
+	u16 rar_entry_count;
+
+	u8  forced_speed_duplex;
+
+	bool adaptive_ifs;
+	bool has_fwsm;
+	bool arc_subsystem_valid;
+	bool asf_firmware_present;
+	bool autoneg;
+	bool autoneg_failed;
+	bool get_link_status;
+	bool in_ifs_mode;
+	enum e1000_serdes_link_state serdes_link_state;
+	bool serdes_has_link;
+	bool tx_pkt_filtering;
+	struct e1000_thermal_sensor_data thermal_sensor_data;
+};
+
+struct e1000_phy_info {
+	struct e1000_phy_operations ops;
+	enum e1000_phy_type type;
+
+	enum e1000_1000t_rx_status local_rx;
+	enum e1000_1000t_rx_status remote_rx;
+	enum e1000_ms_type ms_type;
+	enum e1000_ms_type original_ms_type;
+	enum e1000_rev_polarity cable_polarity;
+	enum e1000_smart_speed smart_speed;
+
+	u32 addr;
+	u32 id;
+	u32 reset_delay_us; /* in usec */
+	u32 revision;
+
+	enum e1000_media_type media_type;
+
+	u16 autoneg_advertised;
+	u16 autoneg_mask;
+	u16 cable_length;
+	u16 max_cable_length;
+	u16 min_cable_length;
+
+	u8 mdix;
+
+	bool disable_polarity_correction;
+	bool is_mdix;
+	bool polarity_correction;
+	bool reset_disable;
+	bool speed_downgraded;
+	bool autoneg_wait_to_complete;
+};
+
+struct e1000_nvm_info {
+	struct e1000_nvm_operations ops;
+	enum e1000_nvm_type type;
+	enum e1000_nvm_override override;
+
+	u32 flash_bank_size;
+	u32 flash_base_addr;
+
+	u16 word_size;
+	u16 delay_usec;
+	u16 address_bits;
+	u16 opcode_bits;
+	u16 page_size;
+};
+
+struct e1000_bus_info {
+	enum e1000_bus_type type;
+	enum e1000_bus_speed speed;
+	enum e1000_bus_width width;
+
+	u16 func;
+	u16 pci_cmd_word;
+};
+
+struct e1000_fc_info {
+	u32 high_water;  /* Flow control high-water mark */
+	u32 low_water;  /* Flow control low-water mark */
+	u16 pause_time;  /* Flow control pause timer */
+	u16 refresh_time;  /* Flow control refresh timer */
+	bool send_xon;  /* Flow control send XON */
+	bool strict_ieee;  /* Strict IEEE mode */
+	enum e1000_fc_mode current_mode;  /* FC mode in effect */
+	enum e1000_fc_mode requested_mode;  /* FC mode requested by caller */
+};
+
+struct e1000_mbx_operations {
+	s32 (*init_params)(struct e1000_hw *hw);
+	s32 (*read)(struct e1000_hw *, u32 *, u16,  u16);
+	s32 (*write)(struct e1000_hw *, u32 *, u16, u16);
+	s32 (*read_posted)(struct e1000_hw *, u32 *, u16,  u16);
+	s32 (*write_posted)(struct e1000_hw *, u32 *, u16, u16);
+	s32 (*check_for_msg)(struct e1000_hw *, u16);
+	s32 (*check_for_ack)(struct e1000_hw *, u16);
+	s32 (*check_for_rst)(struct e1000_hw *, u16);
+};
+
+struct e1000_mbx_stats {
+	u32 msgs_tx;
+	u32 msgs_rx;
+
+	u32 acks;
+	u32 reqs;
+	u32 rsts;
+};
+
+struct e1000_mbx_info {
+	struct e1000_mbx_operations ops;
+	struct e1000_mbx_stats stats;
+	u32 timeout;
+	u32 usec_delay;
+	u16 size;
+};
+
+struct e1000_dev_spec_82575 {
+	bool sgmii_active;
+	bool global_device_reset;
+	bool eee_disable;
+	bool module_plugged;
+	bool clear_semaphore_once;
+	u32 mtu;
+	struct sfp_e1000_flags eth_flags;
+	u8 media_port;
+	bool media_changed;
+};
+
+struct e1000_dev_spec_vf {
+	u32 vf_number;
+	u32 v2p_mailbox;
+};
+
+struct e1000_hw {
+	void *back;
+
+	u8 __iomem *hw_addr;
+	u8 __iomem *flash_address;
+	unsigned long io_base;
+
+	struct e1000_mac_info  mac;
+	struct e1000_fc_info   fc;
+	struct e1000_phy_info  phy;
+	struct e1000_nvm_info  nvm;
+	struct e1000_bus_info  bus;
+	struct e1000_mbx_info mbx;
+	struct e1000_host_mng_dhcp_cookie mng_cookie;
+
+	union {
+		struct e1000_dev_spec_82575 _82575;
+		struct e1000_dev_spec_vf vf;
+	} dev_spec;
+
+	u16 device_id;
+	u16 subsystem_vendor_id;
+	u16 subsystem_device_id;
+	u16 vendor_id;
+
+	u8  revision_id;
+};
+
+#include "e1000_82575.h"
+#include "e1000_i210.h"
+
+/* These functions must be implemented by drivers */
+s32  e1000_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value);
+s32  e1000_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value);
+
+#endif
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_i210.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_i210.c
new file mode 100644
index 00000000..1e9f3e6e
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_i210.c
@@ -0,0 +1,909 @@
+/*******************************************************************************
+
+  Intel(R) Gigabit Ethernet Linux driver
+  Copyright(c) 2007-2013 Intel Corporation.
+
+  This program is free software; you can redistribute it and/or modify it
+  under the terms and conditions of the GNU General Public License,
+  version 2, as published by the Free Software Foundation.
+
+  This program is distributed in the hope it will be useful, but WITHOUT
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+  more details.
+
+  You should have received a copy of the GNU General Public License along with
+  this program; if not, write to the Free Software Foundation, Inc.,
+  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+  The full GNU General Public License is included in this distribution in
+  the file called "COPYING".
+
+  Contact Information:
+  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+#include "e1000_api.h"
+
+
+static s32 e1000_acquire_nvm_i210(struct e1000_hw *hw);
+static void e1000_release_nvm_i210(struct e1000_hw *hw);
+static s32 e1000_get_hw_semaphore_i210(struct e1000_hw *hw);
+static s32 e1000_write_nvm_srwr(struct e1000_hw *hw, u16 offset, u16 words,
+				u16 *data);
+static s32 e1000_pool_flash_update_done_i210(struct e1000_hw *hw);
+static s32 e1000_valid_led_default_i210(struct e1000_hw *hw, u16 *data);
+
+/**
+ *  e1000_acquire_nvm_i210 - Request for access to EEPROM
+ *  @hw: pointer to the HW structure
+ *
+ *  Acquire the necessary semaphores for exclusive access to the EEPROM.
+ *  Set the EEPROM access request bit and wait for EEPROM access grant bit.
+ *  Return successful if access grant bit set, else clear the request for
+ *  EEPROM access and return -E1000_ERR_NVM (-1).
+ **/
+static s32 e1000_acquire_nvm_i210(struct e1000_hw *hw)
+{
+	s32 ret_val;
+
+	DEBUGFUNC("e1000_acquire_nvm_i210");
+
+	ret_val = e1000_acquire_swfw_sync_i210(hw, E1000_SWFW_EEP_SM);
+
+	return ret_val;
+}
+
+/**
+ *  e1000_release_nvm_i210 - Release exclusive access to EEPROM
+ *  @hw: pointer to the HW structure
+ *
+ *  Stop any current commands to the EEPROM and clear the EEPROM request bit,
+ *  then release the semaphores acquired.
+ **/
+static void e1000_release_nvm_i210(struct e1000_hw *hw)
+{
+	DEBUGFUNC("e1000_release_nvm_i210");
+
+	e1000_release_swfw_sync_i210(hw, E1000_SWFW_EEP_SM);
+}
+
+/**
+ *  e1000_acquire_swfw_sync_i210 - Acquire SW/FW semaphore
+ *  @hw: pointer to the HW structure
+ *  @mask: specifies which semaphore to acquire
+ *
+ *  Acquire the SW/FW semaphore to access the PHY or NVM.  The mask
+ *  will also specify which port we're acquiring the lock for.
+ **/
+s32 e1000_acquire_swfw_sync_i210(struct e1000_hw *hw, u16 mask)
+{
+	u32 swfw_sync;
+	u32 swmask = mask;
+	u32 fwmask = mask << 16;
+	s32 ret_val = E1000_SUCCESS;
+	s32 i = 0, timeout = 200; /* FIXME: find real value to use here */
+
+	DEBUGFUNC("e1000_acquire_swfw_sync_i210");
+
+	while (i < timeout) {
+		if (e1000_get_hw_semaphore_i210(hw)) {
+			ret_val = -E1000_ERR_SWFW_SYNC;
+			goto out;
+		}
+
+		swfw_sync = E1000_READ_REG(hw, E1000_SW_FW_SYNC);
+		if (!(swfw_sync & (fwmask | swmask)))
+			break;
+
+		/*
+		 * Firmware currently using resource (fwmask)
+		 * or other software thread using resource (swmask)
+		 */
+		e1000_put_hw_semaphore_generic(hw);
+		msec_delay_irq(5);
+		i++;
+	}
+
+	if (i == timeout) {
+		DEBUGOUT("Driver can't access resource, SW_FW_SYNC timeout.\n");
+		ret_val = -E1000_ERR_SWFW_SYNC;
+		goto out;
+	}
+
+	swfw_sync |= swmask;
+	E1000_WRITE_REG(hw, E1000_SW_FW_SYNC, swfw_sync);
+
+	e1000_put_hw_semaphore_generic(hw);
+
+out:
+	return ret_val;
+}
+
+/**
+ *  e1000_release_swfw_sync_i210 - Release SW/FW semaphore
+ *  @hw: pointer to the HW structure
+ *  @mask: specifies which semaphore to acquire
+ *
+ *  Release the SW/FW semaphore used to access the PHY or NVM.  The mask
+ *  will also specify which port we're releasing the lock for.
+ **/
+void e1000_release_swfw_sync_i210(struct e1000_hw *hw, u16 mask)
+{
+	u32 swfw_sync;
+
+	DEBUGFUNC("e1000_release_swfw_sync_i210");
+
+	while (e1000_get_hw_semaphore_i210(hw) != E1000_SUCCESS)
+		; /* Empty */
+
+	swfw_sync = E1000_READ_REG(hw, E1000_SW_FW_SYNC);
+	swfw_sync &= ~mask;
+	E1000_WRITE_REG(hw, E1000_SW_FW_SYNC, swfw_sync);
+
+	e1000_put_hw_semaphore_generic(hw);
+}
+
+/**
+ *  e1000_get_hw_semaphore_i210 - Acquire hardware semaphore
+ *  @hw: pointer to the HW structure
+ *
+ *  Acquire the HW semaphore to access the PHY or NVM
+ **/
+static s32 e1000_get_hw_semaphore_i210(struct e1000_hw *hw)
+{
+	u32 swsm;
+	s32 timeout = hw->nvm.word_size + 1;
+	s32 i = 0;
+
+	DEBUGFUNC("e1000_get_hw_semaphore_i210");
+
+	/* Get the SW semaphore */
+	while (i < timeout) {
+		swsm = E1000_READ_REG(hw, E1000_SWSM);
+		if (!(swsm & E1000_SWSM_SMBI))
+			break;
+
+		usec_delay(50);
+		i++;
+	}
+
+	if (i == timeout) {
+		/* In rare circumstances, the SW semaphore may already be held
+		 * unintentionally. Clear the semaphore once before giving up.
+		 */
+		if (hw->dev_spec._82575.clear_semaphore_once) {
+			hw->dev_spec._82575.clear_semaphore_once = false;
+			e1000_put_hw_semaphore_generic(hw);
+			for (i = 0; i < timeout; i++) {
+				swsm = E1000_READ_REG(hw, E1000_SWSM);
+				if (!(swsm & E1000_SWSM_SMBI))
+					break;
+
+				usec_delay(50);
+			}
+		}
+
+		/* If we do not have the semaphore here, we have to give up. */
+		if (i == timeout) {
+			DEBUGOUT("Driver can't access device - SMBI bit is set.\n");
+			return -E1000_ERR_NVM;
+		}
+	}
+
+	/* Get the FW semaphore. */
+	for (i = 0; i < timeout; i++) {
+		swsm = E1000_READ_REG(hw, E1000_SWSM);
+		E1000_WRITE_REG(hw, E1000_SWSM, swsm | E1000_SWSM_SWESMBI);
+
+		/* Semaphore acquired if bit latched */
+		if (E1000_READ_REG(hw, E1000_SWSM) & E1000_SWSM_SWESMBI)
+			break;
+
+		usec_delay(50);
+	}
+
+	if (i == timeout) {
+		/* Release semaphores */
+		e1000_put_hw_semaphore_generic(hw);
+		DEBUGOUT("Driver can't access the NVM\n");
+		return -E1000_ERR_NVM;
+	}
+
+	return E1000_SUCCESS;
+}
+
+/**
+ *  e1000_read_nvm_srrd_i210 - Reads Shadow Ram using EERD register
+ *  @hw: pointer to the HW structure
+ *  @offset: offset of word in the Shadow Ram to read
+ *  @words: number of words to read
+ *  @data: word read from the Shadow Ram
+ *
+ *  Reads a 16 bit word from the Shadow Ram using the EERD register.
+ *  Uses necessary synchronization semaphores.
+ **/
+s32 e1000_read_nvm_srrd_i210(struct e1000_hw *hw, u16 offset, u16 words,
+			     u16 *data)
+{
+	s32 status = E1000_SUCCESS;
+	u16 i, count;
+
+	DEBUGFUNC("e1000_read_nvm_srrd_i210");
+
+	/* We cannot hold synchronization semaphores for too long,
+	 * because of forceful takeover procedure. However it is more efficient
+	 * to read in bursts than synchronizing access for each word. */
+	for (i = 0; i < words; i += E1000_EERD_EEWR_MAX_COUNT) {
+		count = (words - i) / E1000_EERD_EEWR_MAX_COUNT > 0 ?
+			E1000_EERD_EEWR_MAX_COUNT : (words - i);
+		if (hw->nvm.ops.acquire(hw) == E1000_SUCCESS) {
+			status = e1000_read_nvm_eerd(hw, offset, count,
+						     data + i);
+			hw->nvm.ops.release(hw);
+		} else {
+			status = E1000_ERR_SWFW_SYNC;
+		}
+
+		if (status != E1000_SUCCESS)
+			break;
+	}
+
+	return status;
+}
+
+/**
+ *  e1000_write_nvm_srwr_i210 - Write to Shadow RAM using EEWR
+ *  @hw: pointer to the HW structure
+ *  @offset: offset within the Shadow RAM to be written to
+ *  @words: number of words to write
+ *  @data: 16 bit word(s) to be written to the Shadow RAM
+ *
+ *  Writes data to Shadow RAM at offset using EEWR register.
+ *
+ *  If e1000_update_nvm_checksum is not called after this function , the
+ *  data will not be committed to FLASH and also Shadow RAM will most likely
+ *  contain an invalid checksum.
+ *
+ *  If error code is returned, data and Shadow RAM may be inconsistent - buffer
+ *  partially written.
+ **/
+s32 e1000_write_nvm_srwr_i210(struct e1000_hw *hw, u16 offset, u16 words,
+			      u16 *data)
+{
+	s32 status = E1000_SUCCESS;
+	u16 i, count;
+
+	DEBUGFUNC("e1000_write_nvm_srwr_i210");
+
+	/* We cannot hold synchronization semaphores for too long,
+	 * because of forceful takeover procedure. However it is more efficient
+	 * to write in bursts than synchronizing access for each word. */
+	for (i = 0; i < words; i += E1000_EERD_EEWR_MAX_COUNT) {
+		count = (words - i) / E1000_EERD_EEWR_MAX_COUNT > 0 ?
+			E1000_EERD_EEWR_MAX_COUNT : (words - i);
+		if (hw->nvm.ops.acquire(hw) == E1000_SUCCESS) {
+			status = e1000_write_nvm_srwr(hw, offset, count,
+						      data + i);
+			hw->nvm.ops.release(hw);
+		} else {
+			status = E1000_ERR_SWFW_SYNC;
+		}
+
+		if (status != E1000_SUCCESS)
+			break;
+	}
+
+	return status;
+}
+
+/**
+ *  e1000_write_nvm_srwr - Write to Shadow Ram using EEWR
+ *  @hw: pointer to the HW structure
+ *  @offset: offset within the Shadow Ram to be written to
+ *  @words: number of words to write
+ *  @data: 16 bit word(s) to be written to the Shadow Ram
+ *
+ *  Writes data to Shadow Ram at offset using EEWR register.
+ *
+ *  If e1000_update_nvm_checksum is not called after this function , the
+ *  Shadow Ram will most likely contain an invalid checksum.
+ **/
+static s32 e1000_write_nvm_srwr(struct e1000_hw *hw, u16 offset, u16 words,
+				u16 *data)
+{
+	struct e1000_nvm_info *nvm = &hw->nvm;
+	u32 i, k, eewr = 0;
+	u32 attempts = 100000;
+	s32 ret_val = E1000_SUCCESS;
+
+	DEBUGFUNC("e1000_write_nvm_srwr");
+
+	/*
+	 * A check for invalid values:  offset too large, too many words,
+	 * too many words for the offset, and not enough words.
+	 */
+	if ((offset >= nvm->word_size) || (words > (nvm->word_size - offset)) ||
+	    (words == 0)) {
+		DEBUGOUT("nvm parameter(s) out of bounds\n");
+		ret_val = -E1000_ERR_NVM;
+		goto out;
+	}
+
+	for (i = 0; i < words; i++) {
+		eewr = ((offset+i) << E1000_NVM_RW_ADDR_SHIFT) |
+			(data[i] << E1000_NVM_RW_REG_DATA) |
+			E1000_NVM_RW_REG_START;
+
+		E1000_WRITE_REG(hw, E1000_SRWR, eewr);
+
+		for (k = 0; k < attempts; k++) {
+			if (E1000_NVM_RW_REG_DONE &
+			    E1000_READ_REG(hw, E1000_SRWR)) {
+				ret_val = E1000_SUCCESS;
+				break;
+			}
+			usec_delay(5);
+		}
+
+		if (ret_val != E1000_SUCCESS) {
+			DEBUGOUT("Shadow RAM write EEWR timed out\n");
+			break;
+		}
+	}
+
+out:
+	return ret_val;
+}
+
+/** e1000_read_invm_word_i210 - Reads OTP
+ *  @hw: pointer to the HW structure
+ *  @address: the word address (aka eeprom offset) to read
+ *  @data: pointer to the data read
+ *
+ *  Reads 16-bit words from the OTP. Return error when the word is not
+ *  stored in OTP.
+ **/
+static s32 e1000_read_invm_word_i210(struct e1000_hw *hw, u8 address, u16 *data)
+{
+	s32 status = -E1000_ERR_INVM_VALUE_NOT_FOUND;
+	u32 invm_dword;
+	u16 i;
+	u8 record_type, word_address;
+
+	DEBUGFUNC("e1000_read_invm_word_i210");
+
+	for (i = 0; i < E1000_INVM_SIZE; i++) {
+		invm_dword = E1000_READ_REG(hw, E1000_INVM_DATA_REG(i));
+		/* Get record type */
+		record_type = INVM_DWORD_TO_RECORD_TYPE(invm_dword);
+		if (record_type == E1000_INVM_UNINITIALIZED_STRUCTURE)
+			break;
+		if (record_type == E1000_INVM_CSR_AUTOLOAD_STRUCTURE)
+			i += E1000_INVM_CSR_AUTOLOAD_DATA_SIZE_IN_DWORDS;
+		if (record_type == E1000_INVM_RSA_KEY_SHA256_STRUCTURE)
+			i += E1000_INVM_RSA_KEY_SHA256_DATA_SIZE_IN_DWORDS;
+		if (record_type == E1000_INVM_WORD_AUTOLOAD_STRUCTURE) {
+			word_address = INVM_DWORD_TO_WORD_ADDRESS(invm_dword);
+			if (word_address == address) {
+				*data = INVM_DWORD_TO_WORD_DATA(invm_dword);
+				DEBUGOUT2("Read INVM Word 0x%02x = %x",
+					  address, *data);
+				status = E1000_SUCCESS;
+				break;
+			}
+		}
+	}
+	if (status != E1000_SUCCESS)
+		DEBUGOUT1("Requested word 0x%02x not found in OTP\n", address);
+	return status;
+}
+
+/** e1000_read_invm_i210 - Read invm wrapper function for I210/I211
+ *  @hw: pointer to the HW structure
+ *  @address: the word address (aka eeprom offset) to read
+ *  @data: pointer to the data read
+ *
+ *  Wrapper function to return data formerly found in the NVM.
+ **/
+static s32 e1000_read_invm_i210(struct e1000_hw *hw, u16 offset,
+				u16 E1000_UNUSEDARG words, u16 *data)
+{
+	s32 ret_val = E1000_SUCCESS;
+
+	DEBUGFUNC("e1000_read_invm_i210");
+
+	/* Only the MAC addr is required to be present in the iNVM */
+	switch (offset) {
+	case NVM_MAC_ADDR:
+		ret_val = e1000_read_invm_word_i210(hw, (u8)offset, &data[0]);
+		ret_val |= e1000_read_invm_word_i210(hw, (u8)offset+1,
+						     &data[1]);
+		ret_val |= e1000_read_invm_word_i210(hw, (u8)offset+2,
+						     &data[2]);
+		if (ret_val != E1000_SUCCESS)
+			DEBUGOUT("MAC Addr not found in iNVM\n");
+		break;
+	case NVM_INIT_CTRL_2:
+		ret_val = e1000_read_invm_word_i210(hw, (u8)offset, data);
+		if (ret_val != E1000_SUCCESS) {
+			*data = NVM_INIT_CTRL_2_DEFAULT_I211;
+			ret_val = E1000_SUCCESS;
+		}
+		break;
+	case NVM_INIT_CTRL_4:
+		ret_val = e1000_read_invm_word_i210(hw, (u8)offset, data);
+		if (ret_val != E1000_SUCCESS) {
+			*data = NVM_INIT_CTRL_4_DEFAULT_I211;
+			ret_val = E1000_SUCCESS;
+		}
+		break;
+	case NVM_LED_1_CFG:
+		ret_val = e1000_read_invm_word_i210(hw, (u8)offset, data);
+		if (ret_val != E1000_SUCCESS) {
+			*data = NVM_LED_1_CFG_DEFAULT_I211;
+			ret_val = E1000_SUCCESS;
+		}
+		break;
+	case NVM_LED_0_2_CFG:
+		ret_val = e1000_read_invm_word_i210(hw, (u8)offset, data);
+		if (ret_val != E1000_SUCCESS) {
+			*data = NVM_LED_0_2_CFG_DEFAULT_I211;
+			ret_val = E1000_SUCCESS;
+		}
+		break;
+	case NVM_ID_LED_SETTINGS:
+		ret_val = e1000_read_invm_word_i210(hw, (u8)offset, data);
+		if (ret_val != E1000_SUCCESS) {
+			*data = ID_LED_RESERVED_FFFF;
+			ret_val = E1000_SUCCESS;
+		}
+		break;
+	case NVM_SUB_DEV_ID:
+		*data = hw->subsystem_device_id;
+		break;
+	case NVM_SUB_VEN_ID:
+		*data = hw->subsystem_vendor_id;
+		break;
+	case NVM_DEV_ID:
+		*data = hw->device_id;
+		break;
+	case NVM_VEN_ID:
+		*data = hw->vendor_id;
+		break;
+	default:
+		DEBUGOUT1("NVM word 0x%02x is not mapped.\n", offset);
+		*data = NVM_RESERVED_WORD;
+		break;
+	}
+	return ret_val;
+}
+
+/**
+ *  e1000_read_invm_version - Reads iNVM version and image type
+ *  @hw: pointer to the HW structure
+ *  @invm_ver: version structure for the version read
+ *
+ *  Reads iNVM version and image type.
+ **/
+s32 e1000_read_invm_version(struct e1000_hw *hw,
+			    struct e1000_fw_version *invm_ver)
+{
+	u32 *record = NULL;
+	u32 *next_record = NULL;
+	u32 i = 0;
+	u32 invm_dword = 0;
+	u32 invm_blocks = E1000_INVM_SIZE - (E1000_INVM_ULT_BYTES_SIZE /
+					     E1000_INVM_RECORD_SIZE_IN_BYTES);
+	u32 buffer[E1000_INVM_SIZE];
+	s32 status = -E1000_ERR_INVM_VALUE_NOT_FOUND;
+	u16 version = 0;
+
+	DEBUGFUNC("e1000_read_invm_version");
+
+	/* Read iNVM memory */
+	for (i = 0; i < E1000_INVM_SIZE; i++) {
+		invm_dword = E1000_READ_REG(hw, E1000_INVM_DATA_REG(i));
+		buffer[i] = invm_dword;
+	}
+
+	/* Read version number */
+	for (i = 1; i < invm_blocks; i++) {
+		record = &buffer[invm_blocks - i];
+		next_record = &buffer[invm_blocks - i + 1];
+
+		/* Check if we have first version location used */
+		if ((i == 1) && ((*record & E1000_INVM_VER_FIELD_ONE) == 0)) {
+			version = 0;
+			status = E1000_SUCCESS;
+			break;
+		}
+		/* Check if we have second version location used */
+		else if ((i == 1) &&
+			 ((*record & E1000_INVM_VER_FIELD_TWO) == 0)) {
+			version = (*record & E1000_INVM_VER_FIELD_ONE) >> 3;
+			status = E1000_SUCCESS;
+			break;
+		}
+		/*
+		 * Check if we have odd version location
+		 * used and it is the last one used
+		 */
+		else if ((((*record & E1000_INVM_VER_FIELD_ONE) == 0) &&
+			 ((*record & 0x3) == 0)) || (((*record & 0x3) != 0) &&
+			 (i != 1))) {
+			version = (*next_record & E1000_INVM_VER_FIELD_TWO)
+				  >> 13;
+			status = E1000_SUCCESS;
+			break;
+		}
+		/*
+		 * Check if we have even version location
+		 * used and it is the last one used
+		 */
+		else if (((*record & E1000_INVM_VER_FIELD_TWO) == 0) &&
+			 ((*record & 0x3) == 0)) {
+			version = (*record & E1000_INVM_VER_FIELD_ONE) >> 3;
+			status = E1000_SUCCESS;
+			break;
+		}
+	}
+
+	if (status == E1000_SUCCESS) {
+		invm_ver->invm_major = (version & E1000_INVM_MAJOR_MASK)
+					>> E1000_INVM_MAJOR_SHIFT;
+		invm_ver->invm_minor = version & E1000_INVM_MINOR_MASK;
+	}
+	/* Read Image Type */
+	for (i = 1; i < invm_blocks; i++) {
+		record = &buffer[invm_blocks - i];
+		next_record = &buffer[invm_blocks - i + 1];
+
+		/* Check if we have image type in first location used */
+		if ((i == 1) && ((*record & E1000_INVM_IMGTYPE_FIELD) == 0)) {
+			invm_ver->invm_img_type = 0;
+			status = E1000_SUCCESS;
+			break;
+		}
+		/* Check if we have image type in first location used */
+		else if ((((*record & 0x3) == 0) &&
+			 ((*record & E1000_INVM_IMGTYPE_FIELD) == 0)) ||
+			 ((((*record & 0x3) != 0) && (i != 1)))) {
+			invm_ver->invm_img_type =
+				(*next_record & E1000_INVM_IMGTYPE_FIELD) >> 23;
+			status = E1000_SUCCESS;
+			break;
+		}
+	}
+	return status;
+}
+
+/**
+ *  e1000_validate_nvm_checksum_i210 - Validate EEPROM checksum
+ *  @hw: pointer to the HW structure
+ *
+ *  Calculates the EEPROM checksum by reading/adding each word of the EEPROM
+ *  and then verifies that the sum of the EEPROM is equal to 0xBABA.
+ **/
+s32 e1000_validate_nvm_checksum_i210(struct e1000_hw *hw)
+{
+	s32 status = E1000_SUCCESS;
+	s32 (*read_op_ptr)(struct e1000_hw *, u16, u16, u16 *);
+
+	DEBUGFUNC("e1000_validate_nvm_checksum_i210");
+
+	if (hw->nvm.ops.acquire(hw) == E1000_SUCCESS) {
+
+		/*
+		 * Replace the read function with semaphore grabbing with
+		 * the one that skips this for a while.
+		 * We have semaphore taken already here.
+		 */
+		read_op_ptr = hw->nvm.ops.read;
+		hw->nvm.ops.read = e1000_read_nvm_eerd;
+
+		status = e1000_validate_nvm_checksum_generic(hw);
+
+		/* Revert original read operation. */
+		hw->nvm.ops.read = read_op_ptr;
+
+		hw->nvm.ops.release(hw);
+	} else {
+		status = E1000_ERR_SWFW_SYNC;
+	}
+
+	return status;
+}
+
+
+/**
+ *  e1000_update_nvm_checksum_i210 - Update EEPROM checksum
+ *  @hw: pointer to the HW structure
+ *
+ *  Updates the EEPROM checksum by reading/adding each word of the EEPROM
+ *  up to the checksum.  Then calculates the EEPROM checksum and writes the
+ *  value to the EEPROM. Next commit EEPROM data onto the Flash.
+ **/
+s32 e1000_update_nvm_checksum_i210(struct e1000_hw *hw)
+{
+	s32 ret_val = E1000_SUCCESS;
+	u16 checksum = 0;
+	u16 i, nvm_data;
+
+	DEBUGFUNC("e1000_update_nvm_checksum_i210");
+
+	/*
+	 * Read the first word from the EEPROM. If this times out or fails, do
+	 * not continue or we could be in for a very long wait while every
+	 * EEPROM read fails
+	 */
+	ret_val = e1000_read_nvm_eerd(hw, 0, 1, &nvm_data);
+	if (ret_val != E1000_SUCCESS) {
+		DEBUGOUT("EEPROM read failed\n");
+		goto out;
+	}
+
+	if (hw->nvm.ops.acquire(hw) == E1000_SUCCESS) {
+		/*
+		 * Do not use hw->nvm.ops.write, hw->nvm.ops.read
+		 * because we do not want to take the synchronization
+		 * semaphores twice here.
+		 */
+
+		for (i = 0; i < NVM_CHECKSUM_REG; i++) {
+			ret_val = e1000_read_nvm_eerd(hw, i, 1, &nvm_data);
+			if (ret_val) {
+				hw->nvm.ops.release(hw);
+				DEBUGOUT("NVM Read Error while updating checksum.\n");
+				goto out;
+			}
+			checksum += nvm_data;
+		}
+		checksum = (u16) NVM_SUM - checksum;
+		ret_val = e1000_write_nvm_srwr(hw, NVM_CHECKSUM_REG, 1,
+						&checksum);
+		if (ret_val != E1000_SUCCESS) {
+			hw->nvm.ops.release(hw);
+			DEBUGOUT("NVM Write Error while updating checksum.\n");
+			goto out;
+		}
+
+		hw->nvm.ops.release(hw);
+
+		ret_val = e1000_update_flash_i210(hw);
+	} else {
+		ret_val = E1000_ERR_SWFW_SYNC;
+	}
+out:
+	return ret_val;
+}
+
+/**
+ *  e1000_get_flash_presence_i210 - Check if flash device is detected.
+ *  @hw: pointer to the HW structure
+ *
+ **/
+bool e1000_get_flash_presence_i210(struct e1000_hw *hw)
+{
+	u32 eec = 0;
+	bool ret_val = false;
+
+	DEBUGFUNC("e1000_get_flash_presence_i210");
+
+	eec = E1000_READ_REG(hw, E1000_EECD);
+
+	if (eec & E1000_EECD_FLASH_DETECTED_I210)
+		ret_val = true;
+
+	return ret_val;
+}
+
+/**
+ *  e1000_update_flash_i210 - Commit EEPROM to the flash
+ *  @hw: pointer to the HW structure
+ *
+ **/
+s32 e1000_update_flash_i210(struct e1000_hw *hw)
+{
+	s32 ret_val = E1000_SUCCESS;
+	u32 flup;
+
+	DEBUGFUNC("e1000_update_flash_i210");
+
+	ret_val = e1000_pool_flash_update_done_i210(hw);
+	if (ret_val == -E1000_ERR_NVM) {
+		DEBUGOUT("Flash update time out\n");
+		goto out;
+	}
+
+	flup = E1000_READ_REG(hw, E1000_EECD) | E1000_EECD_FLUPD_I210;
+	E1000_WRITE_REG(hw, E1000_EECD, flup);
+
+	ret_val = e1000_pool_flash_update_done_i210(hw);
+	if (ret_val == E1000_SUCCESS)
+		DEBUGOUT("Flash update complete\n");
+	else
+		DEBUGOUT("Flash update time out\n");
+
+out:
+	return ret_val;
+}
+
+/**
+ *  e1000_pool_flash_update_done_i210 - Pool FLUDONE status.
+ *  @hw: pointer to the HW structure
+ *
+ **/
+s32 e1000_pool_flash_update_done_i210(struct e1000_hw *hw)
+{
+	s32 ret_val = -E1000_ERR_NVM;
+	u32 i, reg;
+
+	DEBUGFUNC("e1000_pool_flash_update_done_i210");
+
+	for (i = 0; i < E1000_FLUDONE_ATTEMPTS; i++) {
+		reg = E1000_READ_REG(hw, E1000_EECD);
+		if (reg & E1000_EECD_FLUDONE_I210) {
+			ret_val = E1000_SUCCESS;
+			break;
+		}
+		usec_delay(5);
+	}
+
+	return ret_val;
+}
+
+/**
+ *  e1000_init_nvm_params_i210 - Initialize i210 NVM function pointers
+ *  @hw: pointer to the HW structure
+ *
+ *  Initialize the i210/i211 NVM parameters and function pointers.
+ **/
+static s32 e1000_init_nvm_params_i210(struct e1000_hw *hw)
+{
+	s32 ret_val = E1000_SUCCESS;
+	struct e1000_nvm_info *nvm = &hw->nvm;
+
+	DEBUGFUNC("e1000_init_nvm_params_i210");
+
+	ret_val = e1000_init_nvm_params_82575(hw);
+	nvm->ops.acquire = e1000_acquire_nvm_i210;
+	nvm->ops.release = e1000_release_nvm_i210;
+	nvm->ops.valid_led_default = e1000_valid_led_default_i210;
+	if (e1000_get_flash_presence_i210(hw)) {
+		hw->nvm.type = e1000_nvm_flash_hw;
+		nvm->ops.read    = e1000_read_nvm_srrd_i210;
+		nvm->ops.write   = e1000_write_nvm_srwr_i210;
+		nvm->ops.validate = e1000_validate_nvm_checksum_i210;
+		nvm->ops.update   = e1000_update_nvm_checksum_i210;
+	} else {
+		hw->nvm.type = e1000_nvm_invm;
+		nvm->ops.read     = e1000_read_invm_i210;
+		nvm->ops.write    = e1000_null_write_nvm;
+		nvm->ops.validate = e1000_null_ops_generic;
+		nvm->ops.update   = e1000_null_ops_generic;
+	}
+	return ret_val;
+}
+
+/**
+ *  e1000_init_function_pointers_i210 - Init func ptrs.
+ *  @hw: pointer to the HW structure
+ *
+ *  Called to initialize all function pointers and parameters.
+ **/
+void e1000_init_function_pointers_i210(struct e1000_hw *hw)
+{
+	e1000_init_function_pointers_82575(hw);
+	hw->nvm.ops.init_params = e1000_init_nvm_params_i210;
+
+	return;
+}
+
+/**
+ *  e1000_valid_led_default_i210 - Verify a valid default LED config
+ *  @hw: pointer to the HW structure
+ *  @data: pointer to the NVM (EEPROM)
+ *
+ *  Read the EEPROM for the current default LED configuration.  If the
+ *  LED configuration is not valid, set to a valid LED configuration.
+ **/
+static s32 e1000_valid_led_default_i210(struct e1000_hw *hw, u16 *data)
+{
+	s32 ret_val;
+
+	DEBUGFUNC("e1000_valid_led_default_i210");
+
+	ret_val = hw->nvm.ops.read(hw, NVM_ID_LED_SETTINGS, 1, data);
+	if (ret_val) {
+		DEBUGOUT("NVM Read Error\n");
+		goto out;
+	}
+
+	if (*data == ID_LED_RESERVED_0000 || *data == ID_LED_RESERVED_FFFF) {
+		switch (hw->phy.media_type) {
+		case e1000_media_type_internal_serdes:
+			*data = ID_LED_DEFAULT_I210_SERDES;
+			break;
+		case e1000_media_type_copper:
+		default:
+			*data = ID_LED_DEFAULT_I210;
+			break;
+		}
+	}
+out:
+	return ret_val;
+}
+
+/**
+ *  __e1000_access_xmdio_reg - Read/write XMDIO register
+ *  @hw: pointer to the HW structure
+ *  @address: XMDIO address to program
+ *  @dev_addr: device address to program
+ *  @data: pointer to value to read/write from/to the XMDIO address
+ *  @read: boolean flag to indicate read or write
+ **/
+static s32 __e1000_access_xmdio_reg(struct e1000_hw *hw, u16 address,
+				    u8 dev_addr, u16 *data, bool read)
+{
+	s32 ret_val = E1000_SUCCESS;
+
+	DEBUGFUNC("__e1000_access_xmdio_reg");
+
+	ret_val = hw->phy.ops.write_reg(hw, E1000_MMDAC, dev_addr);
+	if (ret_val)
+		return ret_val;
+
+	ret_val = hw->phy.ops.write_reg(hw, E1000_MMDAAD, address);
+	if (ret_val)
+		return ret_val;
+
+	ret_val = hw->phy.ops.write_reg(hw, E1000_MMDAC, E1000_MMDAC_FUNC_DATA |
+							 dev_addr);
+	if (ret_val)
+		return ret_val;
+
+	if (read)
+		ret_val = hw->phy.ops.read_reg(hw, E1000_MMDAAD, data);
+	else
+		ret_val = hw->phy.ops.write_reg(hw, E1000_MMDAAD, *data);
+	if (ret_val)
+		return ret_val;
+
+	/* Recalibrate the device back to 0 */
+	ret_val = hw->phy.ops.write_reg(hw, E1000_MMDAC, 0);
+	if (ret_val)
+		return ret_val;
+
+	return ret_val;
+}
+
+/**
+ *  e1000_read_xmdio_reg - Read XMDIO register
+ *  @hw: pointer to the HW structure
+ *  @addr: XMDIO address to program
+ *  @dev_addr: device address to program
+ *  @data: value to be read from the EMI address
+ **/
+s32 e1000_read_xmdio_reg(struct e1000_hw *hw, u16 addr, u8 dev_addr, u16 *data)
+{
+	DEBUGFUNC("e1000_read_xmdio_reg");
+
+	return __e1000_access_xmdio_reg(hw, addr, dev_addr, data, true);
+}
+
+/**
+ *  e1000_write_xmdio_reg - Write XMDIO register
+ *  @hw: pointer to the HW structure
+ *  @addr: XMDIO address to program
+ *  @dev_addr: device address to program
+ *  @data: value to be written to the XMDIO address
+ **/
+s32 e1000_write_xmdio_reg(struct e1000_hw *hw, u16 addr, u8 dev_addr, u16 data)
+{
+	DEBUGFUNC("e1000_read_xmdio_reg");
+
+	return __e1000_access_xmdio_reg(hw, addr, dev_addr, &data, false);
+}
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_i210.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_i210.h
new file mode 100644
index 00000000..57b2eb56
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_i210.h
@@ -0,0 +1,91 @@
+/*******************************************************************************
+
+  Intel(R) Gigabit Ethernet Linux driver
+  Copyright(c) 2007-2013 Intel Corporation.
+
+  This program is free software; you can redistribute it and/or modify it
+  under the terms and conditions of the GNU General Public License,
+  version 2, as published by the Free Software Foundation.
+
+  This program is distributed in the hope it will be useful, but WITHOUT
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+  more details.
+
+  You should have received a copy of the GNU General Public License along with
+  this program; if not, write to the Free Software Foundation, Inc.,
+  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+  The full GNU General Public License is included in this distribution in
+  the file called "COPYING".
+
+  Contact Information:
+  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+#ifndef _E1000_I210_H_
+#define _E1000_I210_H_
+
+bool e1000_get_flash_presence_i210(struct e1000_hw *hw);
+s32 e1000_update_flash_i210(struct e1000_hw *hw);
+s32 e1000_update_nvm_checksum_i210(struct e1000_hw *hw);
+s32 e1000_validate_nvm_checksum_i210(struct e1000_hw *hw);
+s32 e1000_write_nvm_srwr_i210(struct e1000_hw *hw, u16 offset,
+			      u16 words, u16 *data);
+s32 e1000_read_nvm_srrd_i210(struct e1000_hw *hw, u16 offset,
+			     u16 words, u16 *data);
+s32 e1000_read_invm_version(struct e1000_hw *hw,
+			    struct e1000_fw_version *invm_ver);
+s32 e1000_acquire_swfw_sync_i210(struct e1000_hw *hw, u16 mask);
+void e1000_release_swfw_sync_i210(struct e1000_hw *hw, u16 mask);
+s32 e1000_read_xmdio_reg(struct e1000_hw *hw, u16 addr, u8 dev_addr,
+			 u16 *data);
+s32 e1000_write_xmdio_reg(struct e1000_hw *hw, u16 addr, u8 dev_addr,
+			  u16 data);
+
+#define E1000_STM_OPCODE		0xDB00
+#define E1000_EEPROM_FLASH_SIZE_WORD	0x11
+
+#define INVM_DWORD_TO_RECORD_TYPE(invm_dword) \
+	(u8)((invm_dword) & 0x7)
+#define INVM_DWORD_TO_WORD_ADDRESS(invm_dword) \
+	(u8)(((invm_dword) & 0x0000FE00) >> 9)
+#define INVM_DWORD_TO_WORD_DATA(invm_dword) \
+	(u16)(((invm_dword) & 0xFFFF0000) >> 16)
+
+enum E1000_INVM_STRUCTURE_TYPE {
+	E1000_INVM_UNINITIALIZED_STRUCTURE		= 0x00,
+	E1000_INVM_WORD_AUTOLOAD_STRUCTURE		= 0x01,
+	E1000_INVM_CSR_AUTOLOAD_STRUCTURE		= 0x02,
+	E1000_INVM_PHY_REGISTER_AUTOLOAD_STRUCTURE	= 0x03,
+	E1000_INVM_RSA_KEY_SHA256_STRUCTURE		= 0x04,
+	E1000_INVM_INVALIDATED_STRUCTURE		= 0x0F,
+};
+
+#define E1000_INVM_RSA_KEY_SHA256_DATA_SIZE_IN_DWORDS	8
+#define E1000_INVM_CSR_AUTOLOAD_DATA_SIZE_IN_DWORDS	1
+#define E1000_INVM_ULT_BYTES_SIZE	8
+#define E1000_INVM_RECORD_SIZE_IN_BYTES	4
+#define E1000_INVM_VER_FIELD_ONE	0x1FF8
+#define E1000_INVM_VER_FIELD_TWO	0x7FE000
+#define E1000_INVM_IMGTYPE_FIELD	0x1F800000
+
+#define E1000_INVM_MAJOR_MASK	0x3F0
+#define E1000_INVM_MINOR_MASK	0xF
+#define E1000_INVM_MAJOR_SHIFT	4
+
+#define ID_LED_DEFAULT_I210		((ID_LED_OFF1_ON2  << 8) | \
+					 (ID_LED_DEF1_DEF2 <<  4) | \
+					 (ID_LED_OFF1_OFF2))
+#define ID_LED_DEFAULT_I210_SERDES	((ID_LED_DEF1_DEF2 << 8) | \
+					 (ID_LED_DEF1_DEF2 <<  4) | \
+					 (ID_LED_OFF1_ON2))
+
+/* NVM offset defaults for I211 devices */
+#define NVM_INIT_CTRL_2_DEFAULT_I211	0X7243
+#define NVM_INIT_CTRL_4_DEFAULT_I211	0x00C1
+#define NVM_LED_1_CFG_DEFAULT_I211	0x0184
+#define NVM_LED_0_2_CFG_DEFAULT_I211	0x200C
+#endif
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mac.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mac.c
new file mode 100644
index 00000000..4ee59ba9
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mac.c
@@ -0,0 +1,2096 @@
+/*******************************************************************************
+
+  Intel(R) Gigabit Ethernet Linux driver
+  Copyright(c) 2007-2013 Intel Corporation.
+
+  This program is free software; you can redistribute it and/or modify it
+  under the terms and conditions of the GNU General Public License,
+  version 2, as published by the Free Software Foundation.
+
+  This program is distributed in the hope it will be useful, but WITHOUT
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+  more details.
+
+  You should have received a copy of the GNU General Public License along with
+  this program; if not, write to the Free Software Foundation, Inc.,
+  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+  The full GNU General Public License is included in this distribution in
+  the file called "COPYING".
+
+  Contact Information:
+  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+#include "e1000_api.h"
+
+static s32 e1000_validate_mdi_setting_generic(struct e1000_hw *hw);
+static void e1000_set_lan_id_multi_port_pcie(struct e1000_hw *hw);
+static void e1000_config_collision_dist_generic(struct e1000_hw *hw);
+static void e1000_rar_set_generic(struct e1000_hw *hw, u8 *addr, u32 index);
+
+/**
+ *  e1000_init_mac_ops_generic - Initialize MAC function pointers
+ *  @hw: pointer to the HW structure
+ *
+ *  Setups up the function pointers to no-op functions
+ **/
+void e1000_init_mac_ops_generic(struct e1000_hw *hw)
+{
+	struct e1000_mac_info *mac = &hw->mac;
+	DEBUGFUNC("e1000_init_mac_ops_generic");
+
+	/* General Setup */
+	mac->ops.init_params = e1000_null_ops_generic;
+	mac->ops.init_hw = e1000_null_ops_generic;
+	mac->ops.reset_hw = e1000_null_ops_generic;
+	mac->ops.setup_physical_interface = e1000_null_ops_generic;
+	mac->ops.get_bus_info = e1000_null_ops_generic;
+	mac->ops.set_lan_id = e1000_set_lan_id_multi_port_pcie;
+	mac->ops.read_mac_addr = e1000_read_mac_addr_generic;
+	mac->ops.config_collision_dist = e1000_config_collision_dist_generic;
+	mac->ops.clear_hw_cntrs = e1000_null_mac_generic;
+	/* LED */
+	mac->ops.cleanup_led = e1000_null_ops_generic;
+	mac->ops.setup_led = e1000_null_ops_generic;
+	mac->ops.blink_led = e1000_null_ops_generic;
+	mac->ops.led_on = e1000_null_ops_generic;
+	mac->ops.led_off = e1000_null_ops_generic;
+	/* LINK */
+	mac->ops.setup_link = e1000_null_ops_generic;
+	mac->ops.get_link_up_info = e1000_null_link_info;
+	mac->ops.check_for_link = e1000_null_ops_generic;
+	/* Management */
+	mac->ops.check_mng_mode = e1000_null_mng_mode;
+	/* VLAN, MC, etc. */
+	mac->ops.update_mc_addr_list = e1000_null_update_mc;
+	mac->ops.clear_vfta = e1000_null_mac_generic;
+	mac->ops.write_vfta = e1000_null_write_vfta;
+	mac->ops.rar_set = e1000_rar_set_generic;
+	mac->ops.validate_mdi_setting = e1000_validate_mdi_setting_generic;
+}
+
+/**
+ *  e1000_null_ops_generic - No-op function, returns 0
+ *  @hw: pointer to the HW structure
+ **/
+s32 e1000_null_ops_generic(struct e1000_hw E1000_UNUSEDARG *hw)
+{
+	DEBUGFUNC("e1000_null_ops_generic");
+	return E1000_SUCCESS;
+}
+
+/**
+ *  e1000_null_mac_generic - No-op function, return void
+ *  @hw: pointer to the HW structure
+ **/
+void e1000_null_mac_generic(struct e1000_hw E1000_UNUSEDARG *hw)
+{
+	DEBUGFUNC("e1000_null_mac_generic");
+	return;
+}
+
+/**
+ *  e1000_null_link_info - No-op function, return 0
+ *  @hw: pointer to the HW structure
+ **/
+s32 e1000_null_link_info(struct e1000_hw E1000_UNUSEDARG *hw,
+			 u16 E1000_UNUSEDARG *s, u16 E1000_UNUSEDARG *d)
+{
+	DEBUGFUNC("e1000_null_link_info");
+	return E1000_SUCCESS;
+}
+
+/**
+ *  e1000_null_mng_mode - No-op function, return false
+ *  @hw: pointer to the HW structure
+ **/
+bool e1000_null_mng_mode(struct e1000_hw E1000_UNUSEDARG *hw)
+{
+	DEBUGFUNC("e1000_null_mng_mode");
+	return false;
+}
+
+/**
+ *  e1000_null_update_mc - No-op function, return void
+ *  @hw: pointer to the HW structure
+ **/
+void e1000_null_update_mc(struct e1000_hw E1000_UNUSEDARG *hw,
+			  u8 E1000_UNUSEDARG *h, u32 E1000_UNUSEDARG a)
+{
+	DEBUGFUNC("e1000_null_update_mc");
+	return;
+}
+
+/**
+ *  e1000_null_write_vfta - No-op function, return void
+ *  @hw: pointer to the HW structure
+ **/
+void e1000_null_write_vfta(struct e1000_hw E1000_UNUSEDARG *hw,
+			   u32 E1000_UNUSEDARG a, u32 E1000_UNUSEDARG b)
+{
+	DEBUGFUNC("e1000_null_write_vfta");
+	return;
+}
+
+/**
+ *  e1000_null_rar_set - No-op function, return void
+ *  @hw: pointer to the HW structure
+ **/
+void e1000_null_rar_set(struct e1000_hw E1000_UNUSEDARG *hw,
+			u8 E1000_UNUSEDARG *h, u32 E1000_UNUSEDARG a)
+{
+	DEBUGFUNC("e1000_null_rar_set");
+	return;
+}
+
+/**
+ *  e1000_get_bus_info_pcie_generic - Get PCIe bus information
+ *  @hw: pointer to the HW structure
+ *
+ *  Determines and stores the system bus information for a particular
+ *  network interface.  The following bus information is determined and stored:
+ *  bus speed, bus width, type (PCIe), and PCIe function.
+ **/
+s32 e1000_get_bus_info_pcie_generic(struct e1000_hw *hw)
+{
+	struct e1000_mac_info *mac = &hw->mac;
+	struct e1000_bus_info *bus = &hw->bus;
+	s32 ret_val;
+	u16 pcie_link_status;
+
+	DEBUGFUNC("e1000_get_bus_info_pcie_generic");
+
+	bus->type = e1000_bus_type_pci_express;
+
+	ret_val = e1000_read_pcie_cap_reg(hw, PCIE_LINK_STATUS,
+					  &pcie_link_status);
+	if (ret_val) {
+		bus->width = e1000_bus_width_unknown;
+		bus->speed = e1000_bus_speed_unknown;
+	} else {
+		switch (pcie_link_status & PCIE_LINK_SPEED_MASK) {
+		case PCIE_LINK_SPEED_2500:
+			bus->speed = e1000_bus_speed_2500;
+			break;
+		case PCIE_LINK_SPEED_5000:
+			bus->speed = e1000_bus_speed_5000;
+			break;
+		default:
+			bus->speed = e1000_bus_speed_unknown;
+			break;
+		}
+
+		bus->width = (enum e1000_bus_width)((pcie_link_status &
+			      PCIE_LINK_WIDTH_MASK) >> PCIE_LINK_WIDTH_SHIFT);
+	}
+
+	mac->ops.set_lan_id(hw);
+
+	return E1000_SUCCESS;
+}
+
+/**
+ *  e1000_set_lan_id_multi_port_pcie - Set LAN id for PCIe multiple port devices
+ *
+ *  @hw: pointer to the HW structure
+ *
+ *  Determines the LAN function id by reading memory-mapped registers
+ *  and swaps the port value if requested.
+ **/
+static void e1000_set_lan_id_multi_port_pcie(struct e1000_hw *hw)
+{
+	struct e1000_bus_info *bus = &hw->bus;
+	u32 reg;
+
+	/* The status register reports the correct function number
+	 * for the device regardless of function swap state.
+	 */
+	reg = E1000_READ_REG(hw, E1000_STATUS);
+	bus->func = (reg & E1000_STATUS_FUNC_MASK) >> E1000_STATUS_FUNC_SHIFT;
+}
+
+/**
+ *  e1000_set_lan_id_single_port - Set LAN id for a single port device
+ *  @hw: pointer to the HW structure
+ *
+ *  Sets the LAN function id to zero for a single port device.
+ **/
+void e1000_set_lan_id_single_port(struct e1000_hw *hw)
+{
+	struct e1000_bus_info *bus = &hw->bus;
+
+	bus->func = 0;
+}
+
+/**
+ *  e1000_clear_vfta_generic - Clear VLAN filter table
+ *  @hw: pointer to the HW structure
+ *
+ *  Clears the register array which contains the VLAN filter table by
+ *  setting all the values to 0.
+ **/
+void e1000_clear_vfta_generic(struct e1000_hw *hw)
+{
+	u32 offset;
+
+	DEBUGFUNC("e1000_clear_vfta_generic");
+
+	for (offset = 0; offset < E1000_VLAN_FILTER_TBL_SIZE; offset++) {
+		E1000_WRITE_REG_ARRAY(hw, E1000_VFTA, offset, 0);
+		E1000_WRITE_FLUSH(hw);
+	}
+}
+
+/**
+ *  e1000_write_vfta_generic - Write value to VLAN filter table
+ *  @hw: pointer to the HW structure
+ *  @offset: register offset in VLAN filter table
+ *  @value: register value written to VLAN filter table
+ *
+ *  Writes value at the given offset in the register array which stores
+ *  the VLAN filter table.
+ **/
+void e1000_write_vfta_generic(struct e1000_hw *hw, u32 offset, u32 value)
+{
+	DEBUGFUNC("e1000_write_vfta_generic");
+
+	E1000_WRITE_REG_ARRAY(hw, E1000_VFTA, offset, value);
+	E1000_WRITE_FLUSH(hw);
+}
+
+/**
+ *  e1000_init_rx_addrs_generic - Initialize receive address's
+ *  @hw: pointer to the HW structure
+ *  @rar_count: receive address registers
+ *
+ *  Setup the receive address registers by setting the base receive address
+ *  register to the devices MAC address and clearing all the other receive
+ *  address registers to 0.
+ **/
+void e1000_init_rx_addrs_generic(struct e1000_hw *hw, u16 rar_count)
+{
+	u32 i;
+	u8 mac_addr[ETH_ADDR_LEN] = {0};
+
+	DEBUGFUNC("e1000_init_rx_addrs_generic");
+
+	/* Setup the receive address */
+	DEBUGOUT("Programming MAC Address into RAR[0]\n");
+
+	hw->mac.ops.rar_set(hw, hw->mac.addr, 0);
+
+	/* Zero out the other (rar_entry_count - 1) receive addresses */
+	DEBUGOUT1("Clearing RAR[1-%u]\n", rar_count-1);
+	for (i = 1; i < rar_count; i++)
+		hw->mac.ops.rar_set(hw, mac_addr, i);
+}
+
+/**
+ *  e1000_check_alt_mac_addr_generic - Check for alternate MAC addr
+ *  @hw: pointer to the HW structure
+ *
+ *  Checks the nvm for an alternate MAC address.  An alternate MAC address
+ *  can be setup by pre-boot software and must be treated like a permanent
+ *  address and must override the actual permanent MAC address. If an
+ *  alternate MAC address is found it is programmed into RAR0, replacing
+ *  the permanent address that was installed into RAR0 by the Si on reset.
+ *  This function will return SUCCESS unless it encounters an error while
+ *  reading the EEPROM.
+ **/
+s32 e1000_check_alt_mac_addr_generic(struct e1000_hw *hw)
+{
+	u32 i;
+	s32 ret_val;
+	u16 offset, nvm_alt_mac_addr_offset, nvm_data;
+	u8 alt_mac_addr[ETH_ADDR_LEN];
+
+	DEBUGFUNC("e1000_check_alt_mac_addr_generic");
+
+	ret_val = hw->nvm.ops.read(hw, NVM_COMPAT, 1, &nvm_data);
+	if (ret_val)
+		return ret_val;
+
+
+	/* Alternate MAC address is handled by the option ROM for 82580
+	 * and newer. SW support not required.
+	 */
+	if (hw->mac.type >= e1000_82580)
+		return E1000_SUCCESS;
+
+	ret_val = hw->nvm.ops.read(hw, NVM_ALT_MAC_ADDR_PTR, 1,
+				   &nvm_alt_mac_addr_offset);
+	if (ret_val) {
+		DEBUGOUT("NVM Read Error\n");
+		return ret_val;
+	}
+
+	if ((nvm_alt_mac_addr_offset == 0xFFFF) ||
+	    (nvm_alt_mac_addr_offset == 0x0000))
+		/* There is no Alternate MAC Address */
+		return E1000_SUCCESS;
+
+	if (hw->bus.func == E1000_FUNC_1)
+		nvm_alt_mac_addr_offset += E1000_ALT_MAC_ADDRESS_OFFSET_LAN1;
+	if (hw->bus.func == E1000_FUNC_2)
+		nvm_alt_mac_addr_offset += E1000_ALT_MAC_ADDRESS_OFFSET_LAN2;
+
+	if (hw->bus.func == E1000_FUNC_3)
+		nvm_alt_mac_addr_offset += E1000_ALT_MAC_ADDRESS_OFFSET_LAN3;
+	for (i = 0; i < ETH_ADDR_LEN; i += 2) {
+		offset = nvm_alt_mac_addr_offset + (i >> 1);
+		ret_val = hw->nvm.ops.read(hw, offset, 1, &nvm_data);
+		if (ret_val) {
+			DEBUGOUT("NVM Read Error\n");
+			return ret_val;
+		}
+
+		alt_mac_addr[i] = (u8)(nvm_data & 0xFF);
+		alt_mac_addr[i + 1] = (u8)(nvm_data >> 8);
+	}
+
+	/* if multicast bit is set, the alternate address will not be used */
+	if (alt_mac_addr[0] & 0x01) {
+		DEBUGOUT("Ignoring Alternate Mac Address with MC bit set\n");
+		return E1000_SUCCESS;
+	}
+
+	/* We have a valid alternate MAC address, and we want to treat it the
+	 * same as the normal permanent MAC address stored by the HW into the
+	 * RAR. Do this by mapping this address into RAR0.
+	 */
+	hw->mac.ops.rar_set(hw, alt_mac_addr, 0);
+
+	return E1000_SUCCESS;
+}
+
+/**
+ *  e1000_rar_set_generic - Set receive address register
+ *  @hw: pointer to the HW structure
+ *  @addr: pointer to the receive address
+ *  @index: receive address array register
+ *
+ *  Sets the receive address array register at index to the address passed
+ *  in by addr.
+ **/
+static void e1000_rar_set_generic(struct e1000_hw *hw, u8 *addr, u32 index)
+{
+	u32 rar_low, rar_high;
+
+	DEBUGFUNC("e1000_rar_set_generic");
+
+	/* HW expects these in little endian so we reverse the byte order
+	 * from network order (big endian) to little endian
+	 */
+	rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
+		   ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
+
+	rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
+
+	/* If MAC address zero, no need to set the AV bit */
+	if (rar_low || rar_high)
+		rar_high |= E1000_RAH_AV;
+
+	/* Some bridges will combine consecutive 32-bit writes into
+	 * a single burst write, which will malfunction on some parts.
+	 * The flushes avoid this.
+	 */
+	E1000_WRITE_REG(hw, E1000_RAL(index), rar_low);
+	E1000_WRITE_FLUSH(hw);
+	E1000_WRITE_REG(hw, E1000_RAH(index), rar_high);
+	E1000_WRITE_FLUSH(hw);
+}
+
+/**
+ *  e1000_hash_mc_addr_generic - Generate a multicast hash value
+ *  @hw: pointer to the HW structure
+ *  @mc_addr: pointer to a multicast address
+ *
+ *  Generates a multicast address hash value which is used to determine
+ *  the multicast filter table array address and new table value.
+ **/
+u32 e1000_hash_mc_addr_generic(struct e1000_hw *hw, u8 *mc_addr)
+{
+	u32 hash_value, hash_mask;
+	u8 bit_shift = 0;
+
+	DEBUGFUNC("e1000_hash_mc_addr_generic");
+
+	/* Register count multiplied by bits per register */
+	hash_mask = (hw->mac.mta_reg_count * 32) - 1;
+
+	/* For a mc_filter_type of 0, bit_shift is the number of left-shifts
+	 * where 0xFF would still fall within the hash mask.
+	 */
+	while (hash_mask >> bit_shift != 0xFF)
+		bit_shift++;
+
+	/* The portion of the address that is used for the hash table
+	 * is determined by the mc_filter_type setting.
+	 * The algorithm is such that there is a total of 8 bits of shifting.
+	 * The bit_shift for a mc_filter_type of 0 represents the number of
+	 * left-shifts where the MSB of mc_addr[5] would still fall within
+	 * the hash_mask.  Case 0 does this exactly.  Since there are a total
+	 * of 8 bits of shifting, then mc_addr[4] will shift right the
+	 * remaining number of bits. Thus 8 - bit_shift.  The rest of the
+	 * cases are a variation of this algorithm...essentially raising the
+	 * number of bits to shift mc_addr[5] left, while still keeping the
+	 * 8-bit shifting total.
+	 *
+	 * For example, given the following Destination MAC Address and an
+	 * mta register count of 128 (thus a 4096-bit vector and 0xFFF mask),
+	 * we can see that the bit_shift for case 0 is 4.  These are the hash
+	 * values resulting from each mc_filter_type...
+	 * [0] [1] [2] [3] [4] [5]
+	 * 01  AA  00  12  34  56
+	 * LSB		 MSB
+	 *
+	 * case 0: hash_value = ((0x34 >> 4) | (0x56 << 4)) & 0xFFF = 0x563
+	 * case 1: hash_value = ((0x34 >> 3) | (0x56 << 5)) & 0xFFF = 0xAC6
+	 * case 2: hash_value = ((0x34 >> 2) | (0x56 << 6)) & 0xFFF = 0x163
+	 * case 3: hash_value = ((0x34 >> 0) | (0x56 << 8)) & 0xFFF = 0x634
+	 */
+	switch (hw->mac.mc_filter_type) {
+	default:
+	case 0:
+		break;
+	case 1:
+		bit_shift += 1;
+		break;
+	case 2:
+		bit_shift += 2;
+		break;
+	case 3:
+		bit_shift += 4;
+		break;
+	}
+
+	hash_value = hash_mask & (((mc_addr[4] >> (8 - bit_shift)) |
+				  (((u16) mc_addr[5]) << bit_shift)));
+
+	return hash_value;
+}
+
+/**
+ *  e1000_update_mc_addr_list_generic - Update Multicast addresses
+ *  @hw: pointer to the HW structure
+ *  @mc_addr_list: array of multicast addresses to program
+ *  @mc_addr_count: number of multicast addresses to program
+ *
+ *  Updates entire Multicast Table Array.
+ *  The caller must have a packed mc_addr_list of multicast addresses.
+ **/
+void e1000_update_mc_addr_list_generic(struct e1000_hw *hw,
+				       u8 *mc_addr_list, u32 mc_addr_count)
+{
+	u32 hash_value, hash_bit, hash_reg;
+	int i;
+
+	DEBUGFUNC("e1000_update_mc_addr_list_generic");
+
+	/* clear mta_shadow */
+	memset(&hw->mac.mta_shadow, 0, sizeof(hw->mac.mta_shadow));
+
+	/* update mta_shadow from mc_addr_list */
+	for (i = 0; (u32) i < mc_addr_count; i++) {
+		hash_value = e1000_hash_mc_addr_generic(hw, mc_addr_list);
+
+		hash_reg = (hash_value >> 5) & (hw->mac.mta_reg_count - 1);
+		hash_bit = hash_value & 0x1F;
+
+		hw->mac.mta_shadow[hash_reg] |= (1 << hash_bit);
+		mc_addr_list += (ETH_ADDR_LEN);
+	}
+
+	/* replace the entire MTA table */
+	for (i = hw->mac.mta_reg_count - 1; i >= 0; i--)
+		E1000_WRITE_REG_ARRAY(hw, E1000_MTA, i, hw->mac.mta_shadow[i]);
+	E1000_WRITE_FLUSH(hw);
+}
+
+/**
+ *  e1000_clear_hw_cntrs_base_generic - Clear base hardware counters
+ *  @hw: pointer to the HW structure
+ *
+ *  Clears the base hardware counters by reading the counter registers.
+ **/
+void e1000_clear_hw_cntrs_base_generic(struct e1000_hw *hw)
+{
+	DEBUGFUNC("e1000_clear_hw_cntrs_base_generic");
+
+	E1000_READ_REG(hw, E1000_CRCERRS);
+	E1000_READ_REG(hw, E1000_SYMERRS);
+	E1000_READ_REG(hw, E1000_MPC);
+	E1000_READ_REG(hw, E1000_SCC);
+	E1000_READ_REG(hw, E1000_ECOL);
+	E1000_READ_REG(hw, E1000_MCC);
+	E1000_READ_REG(hw, E1000_LATECOL);
+	E1000_READ_REG(hw, E1000_COLC);
+	E1000_READ_REG(hw, E1000_DC);
+	E1000_READ_REG(hw, E1000_SEC);
+	E1000_READ_REG(hw, E1000_RLEC);
+	E1000_READ_REG(hw, E1000_XONRXC);
+	E1000_READ_REG(hw, E1000_XONTXC);
+	E1000_READ_REG(hw, E1000_XOFFRXC);
+	E1000_READ_REG(hw, E1000_XOFFTXC);
+	E1000_READ_REG(hw, E1000_FCRUC);
+	E1000_READ_REG(hw, E1000_GPRC);
+	E1000_READ_REG(hw, E1000_BPRC);
+	E1000_READ_REG(hw, E1000_MPRC);
+	E1000_READ_REG(hw, E1000_GPTC);
+	E1000_READ_REG(hw, E1000_GORCL);
+	E1000_READ_REG(hw, E1000_GORCH);
+	E1000_READ_REG(hw, E1000_GOTCL);
+	E1000_READ_REG(hw, E1000_GOTCH);
+	E1000_READ_REG(hw, E1000_RNBC);
+	E1000_READ_REG(hw, E1000_RUC);
+	E1000_READ_REG(hw, E1000_RFC);
+	E1000_READ_REG(hw, E1000_ROC);
+	E1000_READ_REG(hw, E1000_RJC);
+	E1000_READ_REG(hw, E1000_TORL);
+	E1000_READ_REG(hw, E1000_TORH);
+	E1000_READ_REG(hw, E1000_TOTL);
+	E1000_READ_REG(hw, E1000_TOTH);
+	E1000_READ_REG(hw, E1000_TPR);
+	E1000_READ_REG(hw, E1000_TPT);
+	E1000_READ_REG(hw, E1000_MPTC);
+	E1000_READ_REG(hw, E1000_BPTC);
+}
+
+/**
+ *  e1000_check_for_copper_link_generic - Check for link (Copper)
+ *  @hw: pointer to the HW structure
+ *
+ *  Checks to see of the link status of the hardware has changed.  If a
+ *  change in link status has been detected, then we read the PHY registers
+ *  to get the current speed/duplex if link exists.
+ **/
+s32 e1000_check_for_copper_link_generic(struct e1000_hw *hw)
+{
+	struct e1000_mac_info *mac = &hw->mac;
+	s32 ret_val;
+	bool link;
+
+	DEBUGFUNC("e1000_check_for_copper_link");
+
+	/* We only want to go out to the PHY registers to see if Auto-Neg
+	 * has completed and/or if our link status has changed.  The
+	 * get_link_status flag is set upon receiving a Link Status
+	 * Change or Rx Sequence Error interrupt.
+	 */
+	if (!mac->get_link_status)
+		return E1000_SUCCESS;
+
+	/* First we want to see if the MII Status Register reports
+	 * link.  If so, then we want to get the current speed/duplex
+	 * of the PHY.
+	 */
+	ret_val = e1000_phy_has_link_generic(hw, 1, 0, &link);
+	if (ret_val)
+		return ret_val;
+
+	if (!link)
+		return E1000_SUCCESS; /* No link detected */
+
+	mac->get_link_status = false;
+
+	/* Check if there was DownShift, must be checked
+	 * immediately after link-up
+	 */
+	e1000_check_downshift_generic(hw);
+
+	/* If we are forcing speed/duplex, then we simply return since
+	 * we have already determined whether we have link or not.
+	 */
+	if (!mac->autoneg)
+		return -E1000_ERR_CONFIG;
+
+	/* Auto-Neg is enabled.  Auto Speed Detection takes care
+	 * of MAC speed/duplex configuration.  So we only need to
+	 * configure Collision Distance in the MAC.
+	 */
+	mac->ops.config_collision_dist(hw);
+
+	/* Configure Flow Control now that Auto-Neg has completed.
+	 * First, we need to restore the desired flow control
+	 * settings because we may have had to re-autoneg with a
+	 * different link partner.
+	 */
+	ret_val = e1000_config_fc_after_link_up_generic(hw);
+	if (ret_val)
+		DEBUGOUT("Error configuring flow control\n");
+
+	return ret_val;
+}
+
+/**
+ *  e1000_check_for_fiber_link_generic - Check for link (Fiber)
+ *  @hw: pointer to the HW structure
+ *
+ *  Checks for link up on the hardware.  If link is not up and we have
+ *  a signal, then we need to force link up.
+ **/
+s32 e1000_check_for_fiber_link_generic(struct e1000_hw *hw)
+{
+	struct e1000_mac_info *mac = &hw->mac;
+	u32 rxcw;
+	u32 ctrl;
+	u32 status;
+	s32 ret_val;
+
+	DEBUGFUNC("e1000_check_for_fiber_link_generic");
+
+	ctrl = E1000_READ_REG(hw, E1000_CTRL);
+	status = E1000_READ_REG(hw, E1000_STATUS);
+	rxcw = E1000_READ_REG(hw, E1000_RXCW);
+
+	/* If we don't have link (auto-negotiation failed or link partner
+	 * cannot auto-negotiate), the cable is plugged in (we have signal),
+	 * and our link partner is not trying to auto-negotiate with us (we
+	 * are receiving idles or data), we need to force link up. We also
+	 * need to give auto-negotiation time to complete, in case the cable
+	 * was just plugged in. The autoneg_failed flag does this.
+	 */
+	/* (ctrl & E1000_CTRL_SWDPIN1) == 1 == have signal */
+	if ((ctrl & E1000_CTRL_SWDPIN1) && !(status & E1000_STATUS_LU) &&
+	    !(rxcw & E1000_RXCW_C)) {
+		if (!mac->autoneg_failed) {
+			mac->autoneg_failed = true;
+			return E1000_SUCCESS;
+		}
+		DEBUGOUT("NOT Rx'ing /C/, disable AutoNeg and force link.\n");
+
+		/* Disable auto-negotiation in the TXCW register */
+		E1000_WRITE_REG(hw, E1000_TXCW, (mac->txcw & ~E1000_TXCW_ANE));
+
+		/* Force link-up and also force full-duplex. */
+		ctrl = E1000_READ_REG(hw, E1000_CTRL);
+		ctrl |= (E1000_CTRL_SLU | E1000_CTRL_FD);
+		E1000_WRITE_REG(hw, E1000_CTRL, ctrl);
+
+		/* Configure Flow Control after forcing link up. */
+		ret_val = e1000_config_fc_after_link_up_generic(hw);
+		if (ret_val) {
+			DEBUGOUT("Error configuring flow control\n");
+			return ret_val;
+		}
+	} else if ((ctrl & E1000_CTRL_SLU) && (rxcw & E1000_RXCW_C)) {
+		/* If we are forcing link and we are receiving /C/ ordered
+		 * sets, re-enable auto-negotiation in the TXCW register
+		 * and disable forced link in the Device Control register
+		 * in an attempt to auto-negotiate with our link partner.
+		 */
+		DEBUGOUT("Rx'ing /C/, enable AutoNeg and stop forcing link.\n");
+		E1000_WRITE_REG(hw, E1000_TXCW, mac->txcw);
+		E1000_WRITE_REG(hw, E1000_CTRL, (ctrl & ~E1000_CTRL_SLU));
+
+		mac->serdes_has_link = true;
+	}
+
+	return E1000_SUCCESS;
+}
+
+/**
+ *  e1000_check_for_serdes_link_generic - Check for link (Serdes)
+ *  @hw: pointer to the HW structure
+ *
+ *  Checks for link up on the hardware.  If link is not up and we have
+ *  a signal, then we need to force link up.
+ **/
+s32 e1000_check_for_serdes_link_generic(struct e1000_hw *hw)
+{
+	struct e1000_mac_info *mac = &hw->mac;
+	u32 rxcw;
+	u32 ctrl;
+	u32 status;
+	s32 ret_val;
+
+	DEBUGFUNC("e1000_check_for_serdes_link_generic");
+
+	ctrl = E1000_READ_REG(hw, E1000_CTRL);
+	status = E1000_READ_REG(hw, E1000_STATUS);
+	rxcw = E1000_READ_REG(hw, E1000_RXCW);
+
+	/* If we don't have link (auto-negotiation failed or link partner
+	 * cannot auto-negotiate), and our link partner is not trying to
+	 * auto-negotiate with us (we are receiving idles or data),
+	 * we need to force link up. We also need to give auto-negotiation
+	 * time to complete.
+	 */
+	/* (ctrl & E1000_CTRL_SWDPIN1) == 1 == have signal */
+	if (!(status & E1000_STATUS_LU) && !(rxcw & E1000_RXCW_C)) {
+		if (!mac->autoneg_failed) {
+			mac->autoneg_failed = true;
+			return E1000_SUCCESS;
+		}
+		DEBUGOUT("NOT Rx'ing /C/, disable AutoNeg and force link.\n");
+
+		/* Disable auto-negotiation in the TXCW register */
+		E1000_WRITE_REG(hw, E1000_TXCW, (mac->txcw & ~E1000_TXCW_ANE));
+
+		/* Force link-up and also force full-duplex. */
+		ctrl = E1000_READ_REG(hw, E1000_CTRL);
+		ctrl |= (E1000_CTRL_SLU | E1000_CTRL_FD);
+		E1000_WRITE_REG(hw, E1000_CTRL, ctrl);
+
+		/* Configure Flow Control after forcing link up. */
+		ret_val = e1000_config_fc_after_link_up_generic(hw);
+		if (ret_val) {
+			DEBUGOUT("Error configuring flow control\n");
+			return ret_val;
+		}
+	} else if ((ctrl & E1000_CTRL_SLU) && (rxcw & E1000_RXCW_C)) {
+		/* If we are forcing link and we are receiving /C/ ordered
+		 * sets, re-enable auto-negotiation in the TXCW register
+		 * and disable forced link in the Device Control register
+		 * in an attempt to auto-negotiate with our link partner.
+		 */
+		DEBUGOUT("Rx'ing /C/, enable AutoNeg and stop forcing link.\n");
+		E1000_WRITE_REG(hw, E1000_TXCW, mac->txcw);
+		E1000_WRITE_REG(hw, E1000_CTRL, (ctrl & ~E1000_CTRL_SLU));
+
+		mac->serdes_has_link = true;
+	} else if (!(E1000_TXCW_ANE & E1000_READ_REG(hw, E1000_TXCW))) {
+		/* If we force link for non-auto-negotiation switch, check
+		 * link status based on MAC synchronization for internal
+		 * serdes media type.
+		 */
+		/* SYNCH bit and IV bit are sticky. */
+		usec_delay(10);
+		rxcw = E1000_READ_REG(hw, E1000_RXCW);
+		if (rxcw & E1000_RXCW_SYNCH) {
+			if (!(rxcw & E1000_RXCW_IV)) {
+				mac->serdes_has_link = true;
+				DEBUGOUT("SERDES: Link up - forced.\n");
+			}
+		} else {
+			mac->serdes_has_link = false;
+			DEBUGOUT("SERDES: Link down - force failed.\n");
+		}
+	}
+
+	if (E1000_TXCW_ANE & E1000_READ_REG(hw, E1000_TXCW)) {
+		status = E1000_READ_REG(hw, E1000_STATUS);
+		if (status & E1000_STATUS_LU) {
+			/* SYNCH bit and IV bit are sticky, so reread rxcw. */
+			usec_delay(10);
+			rxcw = E1000_READ_REG(hw, E1000_RXCW);
+			if (rxcw & E1000_RXCW_SYNCH) {
+				if (!(rxcw & E1000_RXCW_IV)) {
+					mac->serdes_has_link = true;
+					DEBUGOUT("SERDES: Link up - autoneg completed successfully.\n");
+				} else {
+					mac->serdes_has_link = false;
+					DEBUGOUT("SERDES: Link down - invalid codewords detected in autoneg.\n");
+				}
+			} else {
+				mac->serdes_has_link = false;
+				DEBUGOUT("SERDES: Link down - no sync.\n");
+			}
+		} else {
+			mac->serdes_has_link = false;
+			DEBUGOUT("SERDES: Link down - autoneg failed\n");
+		}
+	}
+
+	return E1000_SUCCESS;
+}
+
+/**
+ *  e1000_set_default_fc_generic - Set flow control default values
+ *  @hw: pointer to the HW structure
+ *
+ *  Read the EEPROM for the default values for flow control and store the
+ *  values.
+ **/
+static s32 e1000_set_default_fc_generic(struct e1000_hw *hw)
+{
+	s32 ret_val;
+	u16 nvm_data;
+
+	DEBUGFUNC("e1000_set_default_fc_generic");
+
+	/* Read and store word 0x0F of the EEPROM. This word contains bits
+	 * that determine the hardware's default PAUSE (flow control) mode,
+	 * a bit that determines whether the HW defaults to enabling or
+	 * disabling auto-negotiation, and the direction of the
+	 * SW defined pins. If there is no SW over-ride of the flow
+	 * control setting, then the variable hw->fc will
+	 * be initialized based on a value in the EEPROM.
+	 */
+	ret_val = hw->nvm.ops.read(hw, NVM_INIT_CONTROL2_REG, 1, &nvm_data);
+
+	if (ret_val) {
+		DEBUGOUT("NVM Read Error\n");
+		return ret_val;
+	}
+
+	if (!(nvm_data & NVM_WORD0F_PAUSE_MASK))
+		hw->fc.requested_mode = e1000_fc_none;
+	else if ((nvm_data & NVM_WORD0F_PAUSE_MASK) ==
+		 NVM_WORD0F_ASM_DIR)
+		hw->fc.requested_mode = e1000_fc_tx_pause;
+	else
+		hw->fc.requested_mode = e1000_fc_full;
+
+	return E1000_SUCCESS;
+}
+
+/**
+ *  e1000_setup_link_generic - Setup flow control and link settings
+ *  @hw: pointer to the HW structure
+ *
+ *  Determines which flow control settings to use, then configures flow
+ *  control.  Calls the appropriate media-specific link configuration
+ *  function.  Assuming the adapter has a valid link partner, a valid link
+ *  should be established.  Assumes the hardware has previously been reset
+ *  and the transmitter and receiver are not enabled.
+ **/
+s32 e1000_setup_link_generic(struct e1000_hw *hw)
+{
+	s32 ret_val;
+
+	DEBUGFUNC("e1000_setup_link_generic");
+
+	/* In the case of the phy reset being blocked, we already have a link.
+	 * We do not need to set it up again.
+	 */
+	if (hw->phy.ops.check_reset_block && hw->phy.ops.check_reset_block(hw))
+		return E1000_SUCCESS;
+
+	/* If requested flow control is set to default, set flow control
+	 * based on the EEPROM flow control settings.
+	 */
+	if (hw->fc.requested_mode == e1000_fc_default) {
+		ret_val = e1000_set_default_fc_generic(hw);
+		if (ret_val)
+			return ret_val;
+	}
+
+	/* Save off the requested flow control mode for use later.  Depending
+	 * on the link partner's capabilities, we may or may not use this mode.
+	 */
+	hw->fc.current_mode = hw->fc.requested_mode;
+
+	DEBUGOUT1("After fix-ups FlowControl is now = %x\n",
+		hw->fc.current_mode);
+
+	/* Call the necessary media_type subroutine to configure the link. */
+	ret_val = hw->mac.ops.setup_physical_interface(hw);
+	if (ret_val)
+		return ret_val;
+
+	/* Initialize the flow control address, type, and PAUSE timer
+	 * registers to their default values.  This is done even if flow
+	 * control is disabled, because it does not hurt anything to
+	 * initialize these registers.
+	 */
+	DEBUGOUT("Initializing the Flow Control address, type and timer regs\n");
+	E1000_WRITE_REG(hw, E1000_FCT, FLOW_CONTROL_TYPE);
+	E1000_WRITE_REG(hw, E1000_FCAH, FLOW_CONTROL_ADDRESS_HIGH);
+	E1000_WRITE_REG(hw, E1000_FCAL, FLOW_CONTROL_ADDRESS_LOW);
+
+	E1000_WRITE_REG(hw, E1000_FCTTV, hw->fc.pause_time);
+
+	return e1000_set_fc_watermarks_generic(hw);
+}
+
+/**
+ *  e1000_commit_fc_settings_generic - Configure flow control
+ *  @hw: pointer to the HW structure
+ *
+ *  Write the flow control settings to the Transmit Config Word Register (TXCW)
+ *  base on the flow control settings in e1000_mac_info.
+ **/
+static s32 e1000_commit_fc_settings_generic(struct e1000_hw *hw)
+{
+	struct e1000_mac_info *mac = &hw->mac;
+	u32 txcw;
+
+	DEBUGFUNC("e1000_commit_fc_settings_generic");
+
+	/* Check for a software override of the flow control settings, and
+	 * setup the device accordingly.  If auto-negotiation is enabled, then
+	 * software will have to set the "PAUSE" bits to the correct value in
+	 * the Transmit Config Word Register (TXCW) and re-start auto-
+	 * negotiation.  However, if auto-negotiation is disabled, then
+	 * software will have to manually configure the two flow control enable
+	 * bits in the CTRL register.
+	 *
+	 * The possible values of the "fc" parameter are:
+	 *      0:  Flow control is completely disabled
+	 *      1:  Rx flow control is enabled (we can receive pause frames,
+	 *          but not send pause frames).
+	 *      2:  Tx flow control is enabled (we can send pause frames but we
+	 *          do not support receiving pause frames).
+	 *      3:  Both Rx and Tx flow control (symmetric) are enabled.
+	 */
+	switch (hw->fc.current_mode) {
+	case e1000_fc_none:
+		/* Flow control completely disabled by a software over-ride. */
+		txcw = (E1000_TXCW_ANE | E1000_TXCW_FD);
+		break;
+	case e1000_fc_rx_pause:
+		/* Rx Flow control is enabled and Tx Flow control is disabled
+		 * by a software over-ride. Since there really isn't a way to
+		 * advertise that we are capable of Rx Pause ONLY, we will
+		 * advertise that we support both symmetric and asymmetric Rx
+		 * PAUSE.  Later, we will disable the adapter's ability to send
+		 * PAUSE frames.
+		 */
+		txcw = (E1000_TXCW_ANE | E1000_TXCW_FD | E1000_TXCW_PAUSE_MASK);
+		break;
+	case e1000_fc_tx_pause:
+		/* Tx Flow control is enabled, and Rx Flow control is disabled,
+		 * by a software over-ride.
+		 */
+		txcw = (E1000_TXCW_ANE | E1000_TXCW_FD | E1000_TXCW_ASM_DIR);
+		break;
+	case e1000_fc_full:
+		/* Flow control (both Rx and Tx) is enabled by a software
+		 * over-ride.
+		 */
+		txcw = (E1000_TXCW_ANE | E1000_TXCW_FD | E1000_TXCW_PAUSE_MASK);
+		break;
+	default:
+		DEBUGOUT("Flow control param set incorrectly\n");
+		return -E1000_ERR_CONFIG;
+		break;
+	}
+
+	E1000_WRITE_REG(hw, E1000_TXCW, txcw);
+	mac->txcw = txcw;
+
+	return E1000_SUCCESS;
+}
+
+/**
+ *  e1000_poll_fiber_serdes_link_generic - Poll for link up
+ *  @hw: pointer to the HW structure
+ *
+ *  Polls for link up by reading the status register, if link fails to come
+ *  up with auto-negotiation, then the link is forced if a signal is detected.
+ **/
+static s32 e1000_poll_fiber_serdes_link_generic(struct e1000_hw *hw)
+{
+	struct e1000_mac_info *mac = &hw->mac;
+	u32 i, status;
+	s32 ret_val;
+
+	DEBUGFUNC("e1000_poll_fiber_serdes_link_generic");
+
+	/* If we have a signal (the cable is plugged in, or assumed true for
+	 * serdes media) then poll for a "Link-Up" indication in the Device
+	 * Status Register.  Time-out if a link isn't seen in 500 milliseconds
+	 * seconds (Auto-negotiation should complete in less than 500
+	 * milliseconds even if the other end is doing it in SW).
+	 */
+	for (i = 0; i < FIBER_LINK_UP_LIMIT; i++) {
+		msec_delay(10);
+		status = E1000_READ_REG(hw, E1000_STATUS);
+		if (status & E1000_STATUS_LU)
+			break;
+	}
+	if (i == FIBER_LINK_UP_LIMIT) {
+		DEBUGOUT("Never got a valid link from auto-neg!!!\n");
+		mac->autoneg_failed = true;
+		/* AutoNeg failed to achieve a link, so we'll call
+		 * mac->check_for_link. This routine will force the
+		 * link up if we detect a signal. This will allow us to
+		 * communicate with non-autonegotiating link partners.
+		 */
+		ret_val = mac->ops.check_for_link(hw);
+		if (ret_val) {
+			DEBUGOUT("Error while checking for link\n");
+			return ret_val;
+		}
+		mac->autoneg_failed = false;
+	} else {
+		mac->autoneg_failed = false;
+		DEBUGOUT("Valid Link Found\n");
+	}
+
+	return E1000_SUCCESS;
+}
+
+/**
+ *  e1000_setup_fiber_serdes_link_generic - Setup link for fiber/serdes
+ *  @hw: pointer to the HW structure
+ *
+ *  Configures collision distance and flow control for fiber and serdes
+ *  links.  Upon successful setup, poll for link.
+ **/
+s32 e1000_setup_fiber_serdes_link_generic(struct e1000_hw *hw)
+{
+	u32 ctrl;
+	s32 ret_val;
+
+	DEBUGFUNC("e1000_setup_fiber_serdes_link_generic");
+
+	ctrl = E1000_READ_REG(hw, E1000_CTRL);
+
+	/* Take the link out of reset */
+	ctrl &= ~E1000_CTRL_LRST;
+
+	hw->mac.ops.config_collision_dist(hw);
+
+	ret_val = e1000_commit_fc_settings_generic(hw);
+	if (ret_val)
+		return ret_val;
+
+	/* Since auto-negotiation is enabled, take the link out of reset (the
+	 * link will be in reset, because we previously reset the chip). This
+	 * will restart auto-negotiation.  If auto-negotiation is successful
+	 * then the link-up status bit will be set and the flow control enable
+	 * bits (RFCE and TFCE) will be set according to their negotiated value.
+	 */
+	DEBUGOUT("Auto-negotiation enabled\n");
+
+	E1000_WRITE_REG(hw, E1000_CTRL, ctrl);
+	E1000_WRITE_FLUSH(hw);
+	msec_delay(1);
+
+	/* For these adapters, the SW definable pin 1 is set when the optics
+	 * detect a signal.  If we have a signal, then poll for a "Link-Up"
+	 * indication.
+	 */
+	if (hw->phy.media_type == e1000_media_type_internal_serdes ||
+	    (E1000_READ_REG(hw, E1000_CTRL) & E1000_CTRL_SWDPIN1)) {
+		ret_val = e1000_poll_fiber_serdes_link_generic(hw);
+	} else {
+		DEBUGOUT("No signal detected\n");
+	}
+
+	return ret_val;
+}
+
+/**
+ *  e1000_config_collision_dist_generic - Configure collision distance
+ *  @hw: pointer to the HW structure
+ *
+ *  Configures the collision distance to the default value and is used
+ *  during link setup.
+ **/
+static void e1000_config_collision_dist_generic(struct e1000_hw *hw)
+{
+	u32 tctl;
+
+	DEBUGFUNC("e1000_config_collision_dist_generic");
+
+	tctl = E1000_READ_REG(hw, E1000_TCTL);
+
+	tctl &= ~E1000_TCTL_COLD;
+	tctl |= E1000_COLLISION_DISTANCE << E1000_COLD_SHIFT;
+
+	E1000_WRITE_REG(hw, E1000_TCTL, tctl);
+	E1000_WRITE_FLUSH(hw);
+}
+
+/**
+ *  e1000_set_fc_watermarks_generic - Set flow control high/low watermarks
+ *  @hw: pointer to the HW structure
+ *
+ *  Sets the flow control high/low threshold (watermark) registers.  If
+ *  flow control XON frame transmission is enabled, then set XON frame
+ *  transmission as well.
+ **/
+s32 e1000_set_fc_watermarks_generic(struct e1000_hw *hw)
+{
+	u32 fcrtl = 0, fcrth = 0;
+
+	DEBUGFUNC("e1000_set_fc_watermarks_generic");
+
+	/* Set the flow control receive threshold registers.  Normally,
+	 * these registers will be set to a default threshold that may be
+	 * adjusted later by the driver's runtime code.  However, if the
+	 * ability to transmit pause frames is not enabled, then these
+	 * registers will be set to 0.
+	 */
+	if (hw->fc.current_mode & e1000_fc_tx_pause) {
+		/* We need to set up the Receive Threshold high and low water
+		 * marks as well as (optionally) enabling the transmission of
+		 * XON frames.
+		 */
+		fcrtl = hw->fc.low_water;
+		if (hw->fc.send_xon)
+			fcrtl |= E1000_FCRTL_XONE;
+
+		fcrth = hw->fc.high_water;
+	}
+	E1000_WRITE_REG(hw, E1000_FCRTL, fcrtl);
+	E1000_WRITE_REG(hw, E1000_FCRTH, fcrth);
+
+	return E1000_SUCCESS;
+}
+
+/**
+ *  e1000_force_mac_fc_generic - Force the MAC's flow control settings
+ *  @hw: pointer to the HW structure
+ *
+ *  Force the MAC's flow control settings.  Sets the TFCE and RFCE bits in the
+ *  device control register to reflect the adapter settings.  TFCE and RFCE
+ *  need to be explicitly set by software when a copper PHY is used because
+ *  autonegotiation is managed by the PHY rather than the MAC.  Software must
+ *  also configure these bits when link is forced on a fiber connection.
+ **/
+s32 e1000_force_mac_fc_generic(struct e1000_hw *hw)
+{
+	u32 ctrl;
+
+	DEBUGFUNC("e1000_force_mac_fc_generic");
+
+	ctrl = E1000_READ_REG(hw, E1000_CTRL);
+
+	/* Because we didn't get link via the internal auto-negotiation
+	 * mechanism (we either forced link or we got link via PHY
+	 * auto-neg), we have to manually enable/disable transmit an
+	 * receive flow control.
+	 *
+	 * The "Case" statement below enables/disable flow control
+	 * according to the "hw->fc.current_mode" parameter.
+	 *
+	 * The possible values of the "fc" parameter are:
+	 *      0:  Flow control is completely disabled
+	 *      1:  Rx flow control is enabled (we can receive pause
+	 *          frames but not send pause frames).
+	 *      2:  Tx flow control is enabled (we can send pause frames
+	 *          frames but we do not receive pause frames).
+	 *      3:  Both Rx and Tx flow control (symmetric) is enabled.
+	 *  other:  No other values should be possible at this point.
+	 */
+	DEBUGOUT1("hw->fc.current_mode = %u\n", hw->fc.current_mode);
+
+	switch (hw->fc.current_mode) {
+	case e1000_fc_none:
+		ctrl &= (~(E1000_CTRL_TFCE | E1000_CTRL_RFCE));
+		break;
+	case e1000_fc_rx_pause:
+		ctrl &= (~E1000_CTRL_TFCE);
+		ctrl |= E1000_CTRL_RFCE;
+		break;
+	case e1000_fc_tx_pause:
+		ctrl &= (~E1000_CTRL_RFCE);
+		ctrl |= E1000_CTRL_TFCE;
+		break;
+	case e1000_fc_full:
+		ctrl |= (E1000_CTRL_TFCE | E1000_CTRL_RFCE);
+		break;
+	default:
+		DEBUGOUT("Flow control param set incorrectly\n");
+		return -E1000_ERR_CONFIG;
+	}
+
+	E1000_WRITE_REG(hw, E1000_CTRL, ctrl);
+
+	return E1000_SUCCESS;
+}
+
+/**
+ *  e1000_config_fc_after_link_up_generic - Configures flow control after link
+ *  @hw: pointer to the HW structure
+ *
+ *  Checks the status of auto-negotiation after link up to ensure that the
+ *  speed and duplex were not forced.  If the link needed to be forced, then
+ *  flow control needs to be forced also.  If auto-negotiation is enabled
+ *  and did not fail, then we configure flow control based on our link
+ *  partner.
+ **/
+s32 e1000_config_fc_after_link_up_generic(struct e1000_hw *hw)
+{
+	struct e1000_mac_info *mac = &hw->mac;
+	s32 ret_val = E1000_SUCCESS;
+	u32 pcs_status_reg, pcs_adv_reg, pcs_lp_ability_reg, pcs_ctrl_reg;
+	u16 mii_status_reg, mii_nway_adv_reg, mii_nway_lp_ability_reg;
+	u16 speed, duplex;
+
+	DEBUGFUNC("e1000_config_fc_after_link_up_generic");
+
+	/* Check for the case where we have fiber media and auto-neg failed
+	 * so we had to force link.  In this case, we need to force the
+	 * configuration of the MAC to match the "fc" parameter.
+	 */
+	if (mac->autoneg_failed) {
+		if (hw->phy.media_type == e1000_media_type_fiber ||
+		    hw->phy.media_type == e1000_media_type_internal_serdes)
+			ret_val = e1000_force_mac_fc_generic(hw);
+	} else {
+		if (hw->phy.media_type == e1000_media_type_copper)
+			ret_val = e1000_force_mac_fc_generic(hw);
+	}
+
+	if (ret_val) {
+		DEBUGOUT("Error forcing flow control settings\n");
+		return ret_val;
+	}
+
+	/* Check for the case where we have copper media and auto-neg is
+	 * enabled.  In this case, we need to check and see if Auto-Neg
+	 * has completed, and if so, how the PHY and link partner has
+	 * flow control configured.
+	 */
+	if ((hw->phy.media_type == e1000_media_type_copper) && mac->autoneg) {
+		/* Read the MII Status Register and check to see if AutoNeg
+		 * has completed.  We read this twice because this reg has
+		 * some "sticky" (latched) bits.
+		 */
+		ret_val = hw->phy.ops.read_reg(hw, PHY_STATUS, &mii_status_reg);
+		if (ret_val)
+			return ret_val;
+		ret_val = hw->phy.ops.read_reg(hw, PHY_STATUS, &mii_status_reg);
+		if (ret_val)
+			return ret_val;
+
+		if (!(mii_status_reg & MII_SR_AUTONEG_COMPLETE)) {
+			DEBUGOUT("Copper PHY and Auto Neg has not completed.\n");
+			return ret_val;
+		}
+
+		/* The AutoNeg process has completed, so we now need to
+		 * read both the Auto Negotiation Advertisement
+		 * Register (Address 4) and the Auto_Negotiation Base
+		 * Page Ability Register (Address 5) to determine how
+		 * flow control was negotiated.
+		 */
+		ret_val = hw->phy.ops.read_reg(hw, PHY_AUTONEG_ADV,
+					       &mii_nway_adv_reg);
+		if (ret_val)
+			return ret_val;
+		ret_val = hw->phy.ops.read_reg(hw, PHY_LP_ABILITY,
+					       &mii_nway_lp_ability_reg);
+		if (ret_val)
+			return ret_val;
+
+		/* Two bits in the Auto Negotiation Advertisement Register
+		 * (Address 4) and two bits in the Auto Negotiation Base
+		 * Page Ability Register (Address 5) determine flow control
+		 * for both the PHY and the link partner.  The following
+		 * table, taken out of the IEEE 802.3ab/D6.0 dated March 25,
+		 * 1999, describes these PAUSE resolution bits and how flow
+		 * control is determined based upon these settings.
+		 * NOTE:  DC = Don't Care
+		 *
+		 *   LOCAL DEVICE  |   LINK PARTNER
+		 * PAUSE | ASM_DIR | PAUSE | ASM_DIR | NIC Resolution
+		 *-------|---------|-------|---------|--------------------
+		 *   0   |    0    |  DC   |   DC    | e1000_fc_none
+		 *   0   |    1    |   0   |   DC    | e1000_fc_none
+		 *   0   |    1    |   1   |    0    | e1000_fc_none
+		 *   0   |    1    |   1   |    1    | e1000_fc_tx_pause
+		 *   1   |    0    |   0   |   DC    | e1000_fc_none
+		 *   1   |   DC    |   1   |   DC    | e1000_fc_full
+		 *   1   |    1    |   0   |    0    | e1000_fc_none
+		 *   1   |    1    |   0   |    1    | e1000_fc_rx_pause
+		 *
+		 * Are both PAUSE bits set to 1?  If so, this implies
+		 * Symmetric Flow Control is enabled at both ends.  The
+		 * ASM_DIR bits are irrelevant per the spec.
+		 *
+		 * For Symmetric Flow Control:
+		 *
+		 *   LOCAL DEVICE  |   LINK PARTNER
+		 * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result
+		 *-------|---------|-------|---------|--------------------
+		 *   1   |   DC    |   1   |   DC    | E1000_fc_full
+		 *
+		 */
+		if ((mii_nway_adv_reg & NWAY_AR_PAUSE) &&
+		    (mii_nway_lp_ability_reg & NWAY_LPAR_PAUSE)) {
+			/* Now we need to check if the user selected Rx ONLY
+			 * of pause frames.  In this case, we had to advertise
+			 * FULL flow control because we could not advertise Rx
+			 * ONLY. Hence, we must now check to see if we need to
+			 * turn OFF the TRANSMISSION of PAUSE frames.
+			 */
+			if (hw->fc.requested_mode == e1000_fc_full) {
+				hw->fc.current_mode = e1000_fc_full;
+				DEBUGOUT("Flow Control = FULL.\n");
+			} else {
+				hw->fc.current_mode = e1000_fc_rx_pause;
+				DEBUGOUT("Flow Control = Rx PAUSE frames only.\n");
+			}
+		}
+		/* For receiving PAUSE frames ONLY.
+		 *
+		 *   LOCAL DEVICE  |   LINK PARTNER
+		 * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result
+		 *-------|---------|-------|---------|--------------------
+		 *   0   |    1    |   1   |    1    | e1000_fc_tx_pause
+		 */
+		else if (!(mii_nway_adv_reg & NWAY_AR_PAUSE) &&
+			  (mii_nway_adv_reg & NWAY_AR_ASM_DIR) &&
+			  (mii_nway_lp_ability_reg & NWAY_LPAR_PAUSE) &&
+			  (mii_nway_lp_ability_reg & NWAY_LPAR_ASM_DIR)) {
+			hw->fc.current_mode = e1000_fc_tx_pause;
+			DEBUGOUT("Flow Control = Tx PAUSE frames only.\n");
+		}
+		/* For transmitting PAUSE frames ONLY.
+		 *
+		 *   LOCAL DEVICE  |   LINK PARTNER
+		 * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result
+		 *-------|---------|-------|---------|--------------------
+		 *   1   |    1    |   0   |    1    | e1000_fc_rx_pause
+		 */
+		else if ((mii_nway_adv_reg & NWAY_AR_PAUSE) &&
+			 (mii_nway_adv_reg & NWAY_AR_ASM_DIR) &&
+			 !(mii_nway_lp_ability_reg & NWAY_LPAR_PAUSE) &&
+			 (mii_nway_lp_ability_reg & NWAY_LPAR_ASM_DIR)) {
+			hw->fc.current_mode = e1000_fc_rx_pause;
+			DEBUGOUT("Flow Control = Rx PAUSE frames only.\n");
+		} else {
+			/* Per the IEEE spec, at this point flow control
+			 * should be disabled.
+			 */
+			hw->fc.current_mode = e1000_fc_none;
+			DEBUGOUT("Flow Control = NONE.\n");
+		}
+
+		/* Now we need to do one last check...  If we auto-
+		 * negotiated to HALF DUPLEX, flow control should not be
+		 * enabled per IEEE 802.3 spec.
+		 */
+		ret_val = mac->ops.get_link_up_info(hw, &speed, &duplex);
+		if (ret_val) {
+			DEBUGOUT("Error getting link speed and duplex\n");
+			return ret_val;
+		}
+
+		if (duplex == HALF_DUPLEX)
+			hw->fc.current_mode = e1000_fc_none;
+
+		/* Now we call a subroutine to actually force the MAC
+		 * controller to use the correct flow control settings.
+		 */
+		ret_val = e1000_force_mac_fc_generic(hw);
+		if (ret_val) {
+			DEBUGOUT("Error forcing flow control settings\n");
+			return ret_val;
+		}
+	}
+
+	/* Check for the case where we have SerDes media and auto-neg is
+	 * enabled.  In this case, we need to check and see if Auto-Neg
+	 * has completed, and if so, how the PHY and link partner has
+	 * flow control configured.
+	 */
+	if ((hw->phy.media_type == e1000_media_type_internal_serdes) &&
+	    mac->autoneg) {
+		/* Read the PCS_LSTS and check to see if AutoNeg
+		 * has completed.
+		 */
+		pcs_status_reg = E1000_READ_REG(hw, E1000_PCS_LSTAT);
+
+		if (!(pcs_status_reg & E1000_PCS_LSTS_AN_COMPLETE)) {
+			DEBUGOUT("PCS Auto Neg has not completed.\n");
+			return ret_val;
+		}
+
+		/* The AutoNeg process has completed, so we now need to
+		 * read both the Auto Negotiation Advertisement
+		 * Register (PCS_ANADV) and the Auto_Negotiation Base
+		 * Page Ability Register (PCS_LPAB) to determine how
+		 * flow control was negotiated.
+		 */
+		pcs_adv_reg = E1000_READ_REG(hw, E1000_PCS_ANADV);
+		pcs_lp_ability_reg = E1000_READ_REG(hw, E1000_PCS_LPAB);
+
+		/* Two bits in the Auto Negotiation Advertisement Register
+		 * (PCS_ANADV) and two bits in the Auto Negotiation Base
+		 * Page Ability Register (PCS_LPAB) determine flow control
+		 * for both the PHY and the link partner.  The following
+		 * table, taken out of the IEEE 802.3ab/D6.0 dated March 25,
+		 * 1999, describes these PAUSE resolution bits and how flow
+		 * control is determined based upon these settings.
+		 * NOTE:  DC = Don't Care
+		 *
+		 *   LOCAL DEVICE  |   LINK PARTNER
+		 * PAUSE | ASM_DIR | PAUSE | ASM_DIR | NIC Resolution
+		 *-------|---------|-------|---------|--------------------
+		 *   0   |    0    |  DC   |   DC    | e1000_fc_none
+		 *   0   |    1    |   0   |   DC    | e1000_fc_none
+		 *   0   |    1    |   1   |    0    | e1000_fc_none
+		 *   0   |    1    |   1   |    1    | e1000_fc_tx_pause
+		 *   1   |    0    |   0   |   DC    | e1000_fc_none
+		 *   1   |   DC    |   1   |   DC    | e1000_fc_full
+		 *   1   |    1    |   0   |    0    | e1000_fc_none
+		 *   1   |    1    |   0   |    1    | e1000_fc_rx_pause
+		 *
+		 * Are both PAUSE bits set to 1?  If so, this implies
+		 * Symmetric Flow Control is enabled at both ends.  The
+		 * ASM_DIR bits are irrelevant per the spec.
+		 *
+		 * For Symmetric Flow Control:
+		 *
+		 *   LOCAL DEVICE  |   LINK PARTNER
+		 * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result
+		 *-------|---------|-------|---------|--------------------
+		 *   1   |   DC    |   1   |   DC    | e1000_fc_full
+		 *
+		 */
+		if ((pcs_adv_reg & E1000_TXCW_PAUSE) &&
+		    (pcs_lp_ability_reg & E1000_TXCW_PAUSE)) {
+			/* Now we need to check if the user selected Rx ONLY
+			 * of pause frames.  In this case, we had to advertise
+			 * FULL flow control because we could not advertise Rx
+			 * ONLY. Hence, we must now check to see if we need to
+			 * turn OFF the TRANSMISSION of PAUSE frames.
+			 */
+			if (hw->fc.requested_mode == e1000_fc_full) {
+				hw->fc.current_mode = e1000_fc_full;
+				DEBUGOUT("Flow Control = FULL.\n");
+			} else {
+				hw->fc.current_mode = e1000_fc_rx_pause;
+				DEBUGOUT("Flow Control = Rx PAUSE frames only.\n");
+			}
+		}
+		/* For receiving PAUSE frames ONLY.
+		 *
+		 *   LOCAL DEVICE  |   LINK PARTNER
+		 * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result
+		 *-------|---------|-------|---------|--------------------
+		 *   0   |    1    |   1   |    1    | e1000_fc_tx_pause
+		 */
+		else if (!(pcs_adv_reg & E1000_TXCW_PAUSE) &&
+			  (pcs_adv_reg & E1000_TXCW_ASM_DIR) &&
+			  (pcs_lp_ability_reg & E1000_TXCW_PAUSE) &&
+			  (pcs_lp_ability_reg & E1000_TXCW_ASM_DIR)) {
+			hw->fc.current_mode = e1000_fc_tx_pause;
+			DEBUGOUT("Flow Control = Tx PAUSE frames only.\n");
+		}
+		/* For transmitting PAUSE frames ONLY.
+		 *
+		 *   LOCAL DEVICE  |   LINK PARTNER
+		 * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result
+		 *-------|---------|-------|---------|--------------------
+		 *   1   |    1    |   0   |    1    | e1000_fc_rx_pause
+		 */
+		else if ((pcs_adv_reg & E1000_TXCW_PAUSE) &&
+			 (pcs_adv_reg & E1000_TXCW_ASM_DIR) &&
+			 !(pcs_lp_ability_reg & E1000_TXCW_PAUSE) &&
+			 (pcs_lp_ability_reg & E1000_TXCW_ASM_DIR)) {
+			hw->fc.current_mode = e1000_fc_rx_pause;
+			DEBUGOUT("Flow Control = Rx PAUSE frames only.\n");
+		} else {
+			/* Per the IEEE spec, at this point flow control
+			 * should be disabled.
+			 */
+			hw->fc.current_mode = e1000_fc_none;
+			DEBUGOUT("Flow Control = NONE.\n");
+		}
+
+		/* Now we call a subroutine to actually force the MAC
+		 * controller to use the correct flow control settings.
+		 */
+		pcs_ctrl_reg = E1000_READ_REG(hw, E1000_PCS_LCTL);
+		pcs_ctrl_reg |= E1000_PCS_LCTL_FORCE_FCTRL;
+		E1000_WRITE_REG(hw, E1000_PCS_LCTL, pcs_ctrl_reg);
+
+		ret_val = e1000_force_mac_fc_generic(hw);
+		if (ret_val) {
+			DEBUGOUT("Error forcing flow control settings\n");
+			return ret_val;
+		}
+	}
+
+	return E1000_SUCCESS;
+}
+
+/**
+ *  e1000_get_speed_and_duplex_copper_generic - Retrieve current speed/duplex
+ *  @hw: pointer to the HW structure
+ *  @speed: stores the current speed
+ *  @duplex: stores the current duplex
+ *
+ *  Read the status register for the current speed/duplex and store the current
+ *  speed and duplex for copper connections.
+ **/
+s32 e1000_get_speed_and_duplex_copper_generic(struct e1000_hw *hw, u16 *speed,
+					      u16 *duplex)
+{
+	u32 status;
+
+	DEBUGFUNC("e1000_get_speed_and_duplex_copper_generic");
+
+	status = E1000_READ_REG(hw, E1000_STATUS);
+	if (status & E1000_STATUS_SPEED_1000) {
+		*speed = SPEED_1000;
+		DEBUGOUT("1000 Mbs, ");
+	} else if (status & E1000_STATUS_SPEED_100) {
+		*speed = SPEED_100;
+		DEBUGOUT("100 Mbs, ");
+	} else {
+		*speed = SPEED_10;
+		DEBUGOUT("10 Mbs, ");
+	}
+
+	if (status & E1000_STATUS_FD) {
+		*duplex = FULL_DUPLEX;
+		DEBUGOUT("Full Duplex\n");
+	} else {
+		*duplex = HALF_DUPLEX;
+		DEBUGOUT("Half Duplex\n");
+	}
+
+	return E1000_SUCCESS;
+}
+
+/**
+ *  e1000_get_speed_and_duplex_fiber_generic - Retrieve current speed/duplex
+ *  @hw: pointer to the HW structure
+ *  @speed: stores the current speed
+ *  @duplex: stores the current duplex
+ *
+ *  Sets the speed and duplex to gigabit full duplex (the only possible option)
+ *  for fiber/serdes links.
+ **/
+s32 e1000_get_speed_and_duplex_fiber_serdes_generic(struct e1000_hw E1000_UNUSEDARG *hw,
+						    u16 *speed, u16 *duplex)
+{
+	DEBUGFUNC("e1000_get_speed_and_duplex_fiber_serdes_generic");
+
+	*speed = SPEED_1000;
+	*duplex = FULL_DUPLEX;
+
+	return E1000_SUCCESS;
+}
+
+/**
+ *  e1000_get_hw_semaphore_generic - Acquire hardware semaphore
+ *  @hw: pointer to the HW structure
+ *
+ *  Acquire the HW semaphore to access the PHY or NVM
+ **/
+s32 e1000_get_hw_semaphore_generic(struct e1000_hw *hw)
+{
+	u32 swsm;
+	s32 timeout = hw->nvm.word_size + 1;
+	s32 i = 0;
+
+	DEBUGFUNC("e1000_get_hw_semaphore_generic");
+
+	/* Get the SW semaphore */
+	while (i < timeout) {
+		swsm = E1000_READ_REG(hw, E1000_SWSM);
+		if (!(swsm & E1000_SWSM_SMBI))
+			break;
+
+		usec_delay(50);
+		i++;
+	}
+
+	if (i == timeout) {
+		DEBUGOUT("Driver can't access device - SMBI bit is set.\n");
+		return -E1000_ERR_NVM;
+	}
+
+	/* Get the FW semaphore. */
+	for (i = 0; i < timeout; i++) {
+		swsm = E1000_READ_REG(hw, E1000_SWSM);
+		E1000_WRITE_REG(hw, E1000_SWSM, swsm | E1000_SWSM_SWESMBI);
+
+		/* Semaphore acquired if bit latched */
+		if (E1000_READ_REG(hw, E1000_SWSM) & E1000_SWSM_SWESMBI)
+			break;
+
+		usec_delay(50);
+	}
+
+	if (i == timeout) {
+		/* Release semaphores */
+		e1000_put_hw_semaphore_generic(hw);
+		DEBUGOUT("Driver can't access the NVM\n");
+		return -E1000_ERR_NVM;
+	}
+
+	return E1000_SUCCESS;
+}
+
+/**
+ *  e1000_put_hw_semaphore_generic - Release hardware semaphore
+ *  @hw: pointer to the HW structure
+ *
+ *  Release hardware semaphore used to access the PHY or NVM
+ **/
+void e1000_put_hw_semaphore_generic(struct e1000_hw *hw)
+{
+	u32 swsm;
+
+	DEBUGFUNC("e1000_put_hw_semaphore_generic");
+
+	swsm = E1000_READ_REG(hw, E1000_SWSM);
+
+	swsm &= ~(E1000_SWSM_SMBI | E1000_SWSM_SWESMBI);
+
+	E1000_WRITE_REG(hw, E1000_SWSM, swsm);
+}
+
+/**
+ *  e1000_get_auto_rd_done_generic - Check for auto read completion
+ *  @hw: pointer to the HW structure
+ *
+ *  Check EEPROM for Auto Read done bit.
+ **/
+s32 e1000_get_auto_rd_done_generic(struct e1000_hw *hw)
+{
+	s32 i = 0;
+
+	DEBUGFUNC("e1000_get_auto_rd_done_generic");
+
+	while (i < AUTO_READ_DONE_TIMEOUT) {
+		if (E1000_READ_REG(hw, E1000_EECD) & E1000_EECD_AUTO_RD)
+			break;
+		msec_delay(1);
+		i++;
+	}
+
+	if (i == AUTO_READ_DONE_TIMEOUT) {
+		DEBUGOUT("Auto read by HW from NVM has not completed.\n");
+		return -E1000_ERR_RESET;
+	}
+
+	return E1000_SUCCESS;
+}
+
+/**
+ *  e1000_valid_led_default_generic - Verify a valid default LED config
+ *  @hw: pointer to the HW structure
+ *  @data: pointer to the NVM (EEPROM)
+ *
+ *  Read the EEPROM for the current default LED configuration.  If the
+ *  LED configuration is not valid, set to a valid LED configuration.
+ **/
+s32 e1000_valid_led_default_generic(struct e1000_hw *hw, u16 *data)
+{
+	s32 ret_val;
+
+	DEBUGFUNC("e1000_valid_led_default_generic");
+
+	ret_val = hw->nvm.ops.read(hw, NVM_ID_LED_SETTINGS, 1, data);
+	if (ret_val) {
+		DEBUGOUT("NVM Read Error\n");
+		return ret_val;
+	}
+
+	if (*data == ID_LED_RESERVED_0000 || *data == ID_LED_RESERVED_FFFF)
+		*data = ID_LED_DEFAULT;
+
+	return E1000_SUCCESS;
+}
+
+/**
+ *  e1000_id_led_init_generic -
+ *  @hw: pointer to the HW structure
+ *
+ **/
+s32 e1000_id_led_init_generic(struct e1000_hw *hw)
+{
+	struct e1000_mac_info *mac = &hw->mac;
+	s32 ret_val;
+	const u32 ledctl_mask = 0x000000FF;
+	const u32 ledctl_on = E1000_LEDCTL_MODE_LED_ON;
+	const u32 ledctl_off = E1000_LEDCTL_MODE_LED_OFF;
+	u16 data, i, temp;
+	const u16 led_mask = 0x0F;
+
+	DEBUGFUNC("e1000_id_led_init_generic");
+
+	ret_val = hw->nvm.ops.valid_led_default(hw, &data);
+	if (ret_val)
+		return ret_val;
+
+	mac->ledctl_default = E1000_READ_REG(hw, E1000_LEDCTL);
+	mac->ledctl_mode1 = mac->ledctl_default;
+	mac->ledctl_mode2 = mac->ledctl_default;
+
+	for (i = 0; i < 4; i++) {
+		temp = (data >> (i << 2)) & led_mask;
+		switch (temp) {
+		case ID_LED_ON1_DEF2:
+		case ID_LED_ON1_ON2:
+		case ID_LED_ON1_OFF2:
+			mac->ledctl_mode1 &= ~(ledctl_mask << (i << 3));
+			mac->ledctl_mode1 |= ledctl_on << (i << 3);
+			break;
+		case ID_LED_OFF1_DEF2:
+		case ID_LED_OFF1_ON2:
+		case ID_LED_OFF1_OFF2:
+			mac->ledctl_mode1 &= ~(ledctl_mask << (i << 3));
+			mac->ledctl_mode1 |= ledctl_off << (i << 3);
+			break;
+		default:
+			/* Do nothing */
+			break;
+		}
+		switch (temp) {
+		case ID_LED_DEF1_ON2:
+		case ID_LED_ON1_ON2:
+		case ID_LED_OFF1_ON2:
+			mac->ledctl_mode2 &= ~(ledctl_mask << (i << 3));
+			mac->ledctl_mode2 |= ledctl_on << (i << 3);
+			break;
+		case ID_LED_DEF1_OFF2:
+		case ID_LED_ON1_OFF2:
+		case ID_LED_OFF1_OFF2:
+			mac->ledctl_mode2 &= ~(ledctl_mask << (i << 3));
+			mac->ledctl_mode2 |= ledctl_off << (i << 3);
+			break;
+		default:
+			/* Do nothing */
+			break;
+		}
+	}
+
+	return E1000_SUCCESS;
+}
+
+/**
+ *  e1000_setup_led_generic - Configures SW controllable LED
+ *  @hw: pointer to the HW structure
+ *
+ *  This prepares the SW controllable LED for use and saves the current state
+ *  of the LED so it can be later restored.
+ **/
+s32 e1000_setup_led_generic(struct e1000_hw *hw)
+{
+	u32 ledctl;
+
+	DEBUGFUNC("e1000_setup_led_generic");
+
+	if (hw->mac.ops.setup_led != e1000_setup_led_generic)
+		return -E1000_ERR_CONFIG;
+
+	if (hw->phy.media_type == e1000_media_type_fiber) {
+		ledctl = E1000_READ_REG(hw, E1000_LEDCTL);
+		hw->mac.ledctl_default = ledctl;
+		/* Turn off LED0 */
+		ledctl &= ~(E1000_LEDCTL_LED0_IVRT | E1000_LEDCTL_LED0_BLINK |
+			    E1000_LEDCTL_LED0_MODE_MASK);
+		ledctl |= (E1000_LEDCTL_MODE_LED_OFF <<
+			   E1000_LEDCTL_LED0_MODE_SHIFT);
+		E1000_WRITE_REG(hw, E1000_LEDCTL, ledctl);
+	} else if (hw->phy.media_type == e1000_media_type_copper) {
+		E1000_WRITE_REG(hw, E1000_LEDCTL, hw->mac.ledctl_mode1);
+	}
+
+	return E1000_SUCCESS;
+}
+
+/**
+ *  e1000_cleanup_led_generic - Set LED config to default operation
+ *  @hw: pointer to the HW structure
+ *
+ *  Remove the current LED configuration and set the LED configuration
+ *  to the default value, saved from the EEPROM.
+ **/
+s32 e1000_cleanup_led_generic(struct e1000_hw *hw)
+{
+	DEBUGFUNC("e1000_cleanup_led_generic");
+
+	E1000_WRITE_REG(hw, E1000_LEDCTL, hw->mac.ledctl_default);
+	return E1000_SUCCESS;
+}
+
+/**
+ *  e1000_blink_led_generic - Blink LED
+ *  @hw: pointer to the HW structure
+ *
+ *  Blink the LEDs which are set to be on.
+ **/
+s32 e1000_blink_led_generic(struct e1000_hw *hw)
+{
+	u32 ledctl_blink = 0;
+	u32 i;
+
+	DEBUGFUNC("e1000_blink_led_generic");
+
+	if (hw->phy.media_type == e1000_media_type_fiber) {
+		/* always blink LED0 for PCI-E fiber */
+		ledctl_blink = E1000_LEDCTL_LED0_BLINK |
+		     (E1000_LEDCTL_MODE_LED_ON << E1000_LEDCTL_LED0_MODE_SHIFT);
+	} else {
+		/* Set the blink bit for each LED that's "on" (0x0E)
+		 * (or "off" if inverted) in ledctl_mode2.  The blink
+		 * logic in hardware only works when mode is set to "on"
+		 * so it must be changed accordingly when the mode is
+		 * "off" and inverted.
+		 */
+		ledctl_blink = hw->mac.ledctl_mode2;
+		for (i = 0; i < 32; i += 8) {
+			u32 mode = (hw->mac.ledctl_mode2 >> i) &
+			    E1000_LEDCTL_LED0_MODE_MASK;
+			u32 led_default = hw->mac.ledctl_default >> i;
+
+			if ((!(led_default & E1000_LEDCTL_LED0_IVRT) &&
+			     (mode == E1000_LEDCTL_MODE_LED_ON)) ||
+			    ((led_default & E1000_LEDCTL_LED0_IVRT) &&
+			     (mode == E1000_LEDCTL_MODE_LED_OFF))) {
+				ledctl_blink &=
+				    ~(E1000_LEDCTL_LED0_MODE_MASK << i);
+				ledctl_blink |= (E1000_LEDCTL_LED0_BLINK |
+						 E1000_LEDCTL_MODE_LED_ON) << i;
+			}
+		}
+	}
+
+	E1000_WRITE_REG(hw, E1000_LEDCTL, ledctl_blink);
+
+	return E1000_SUCCESS;
+}
+
+/**
+ *  e1000_led_on_generic - Turn LED on
+ *  @hw: pointer to the HW structure
+ *
+ *  Turn LED on.
+ **/
+s32 e1000_led_on_generic(struct e1000_hw *hw)
+{
+	u32 ctrl;
+
+	DEBUGFUNC("e1000_led_on_generic");
+
+	switch (hw->phy.media_type) {
+	case e1000_media_type_fiber:
+		ctrl = E1000_READ_REG(hw, E1000_CTRL);
+		ctrl &= ~E1000_CTRL_SWDPIN0;
+		ctrl |= E1000_CTRL_SWDPIO0;
+		E1000_WRITE_REG(hw, E1000_CTRL, ctrl);
+		break;
+	case e1000_media_type_copper:
+		E1000_WRITE_REG(hw, E1000_LEDCTL, hw->mac.ledctl_mode2);
+		break;
+	default:
+		break;
+	}
+
+	return E1000_SUCCESS;
+}
+
+/**
+ *  e1000_led_off_generic - Turn LED off
+ *  @hw: pointer to the HW structure
+ *
+ *  Turn LED off.
+ **/
+s32 e1000_led_off_generic(struct e1000_hw *hw)
+{
+	u32 ctrl;
+
+	DEBUGFUNC("e1000_led_off_generic");
+
+	switch (hw->phy.media_type) {
+	case e1000_media_type_fiber:
+		ctrl = E1000_READ_REG(hw, E1000_CTRL);
+		ctrl |= E1000_CTRL_SWDPIN0;
+		ctrl |= E1000_CTRL_SWDPIO0;
+		E1000_WRITE_REG(hw, E1000_CTRL, ctrl);
+		break;
+	case e1000_media_type_copper:
+		E1000_WRITE_REG(hw, E1000_LEDCTL, hw->mac.ledctl_mode1);
+		break;
+	default:
+		break;
+	}
+
+	return E1000_SUCCESS;
+}
+
+/**
+ *  e1000_set_pcie_no_snoop_generic - Set PCI-express capabilities
+ *  @hw: pointer to the HW structure
+ *  @no_snoop: bitmap of snoop events
+ *
+ *  Set the PCI-express register to snoop for events enabled in 'no_snoop'.
+ **/
+void e1000_set_pcie_no_snoop_generic(struct e1000_hw *hw, u32 no_snoop)
+{
+	u32 gcr;
+
+	DEBUGFUNC("e1000_set_pcie_no_snoop_generic");
+
+	if (no_snoop) {
+		gcr = E1000_READ_REG(hw, E1000_GCR);
+		gcr &= ~(PCIE_NO_SNOOP_ALL);
+		gcr |= no_snoop;
+		E1000_WRITE_REG(hw, E1000_GCR, gcr);
+	}
+}
+
+/**
+ *  e1000_disable_pcie_master_generic - Disables PCI-express master access
+ *  @hw: pointer to the HW structure
+ *
+ *  Returns E1000_SUCCESS if successful, else returns -10
+ *  (-E1000_ERR_MASTER_REQUESTS_PENDING) if master disable bit has not caused
+ *  the master requests to be disabled.
+ *
+ *  Disables PCI-Express master access and verifies there are no pending
+ *  requests.
+ **/
+s32 e1000_disable_pcie_master_generic(struct e1000_hw *hw)
+{
+	u32 ctrl;
+	s32 timeout = MASTER_DISABLE_TIMEOUT;
+
+	DEBUGFUNC("e1000_disable_pcie_master_generic");
+
+	ctrl = E1000_READ_REG(hw, E1000_CTRL);
+	ctrl |= E1000_CTRL_GIO_MASTER_DISABLE;
+	E1000_WRITE_REG(hw, E1000_CTRL, ctrl);
+
+	while (timeout) {
+		if (!(E1000_READ_REG(hw, E1000_STATUS) &
+		      E1000_STATUS_GIO_MASTER_ENABLE))
+			break;
+		usec_delay(100);
+		timeout--;
+	}
+
+	if (!timeout) {
+		DEBUGOUT("Master requests are pending.\n");
+		return -E1000_ERR_MASTER_REQUESTS_PENDING;
+	}
+
+	return E1000_SUCCESS;
+}
+
+/**
+ *  e1000_reset_adaptive_generic - Reset Adaptive Interframe Spacing
+ *  @hw: pointer to the HW structure
+ *
+ *  Reset the Adaptive Interframe Spacing throttle to default values.
+ **/
+void e1000_reset_adaptive_generic(struct e1000_hw *hw)
+{
+	struct e1000_mac_info *mac = &hw->mac;
+
+	DEBUGFUNC("e1000_reset_adaptive_generic");
+
+	if (!mac->adaptive_ifs) {
+		DEBUGOUT("Not in Adaptive IFS mode!\n");
+		return;
+	}
+
+	mac->current_ifs_val = 0;
+	mac->ifs_min_val = IFS_MIN;
+	mac->ifs_max_val = IFS_MAX;
+	mac->ifs_step_size = IFS_STEP;
+	mac->ifs_ratio = IFS_RATIO;
+
+	mac->in_ifs_mode = false;
+	E1000_WRITE_REG(hw, E1000_AIT, 0);
+}
+
+/**
+ *  e1000_update_adaptive_generic - Update Adaptive Interframe Spacing
+ *  @hw: pointer to the HW structure
+ *
+ *  Update the Adaptive Interframe Spacing Throttle value based on the
+ *  time between transmitted packets and time between collisions.
+ **/
+void e1000_update_adaptive_generic(struct e1000_hw *hw)
+{
+	struct e1000_mac_info *mac = &hw->mac;
+
+	DEBUGFUNC("e1000_update_adaptive_generic");
+
+	if (!mac->adaptive_ifs) {
+		DEBUGOUT("Not in Adaptive IFS mode!\n");
+		return;
+	}
+
+	if ((mac->collision_delta * mac->ifs_ratio) > mac->tx_packet_delta) {
+		if (mac->tx_packet_delta > MIN_NUM_XMITS) {
+			mac->in_ifs_mode = true;
+			if (mac->current_ifs_val < mac->ifs_max_val) {
+				if (!mac->current_ifs_val)
+					mac->current_ifs_val = mac->ifs_min_val;
+				else
+					mac->current_ifs_val +=
+						mac->ifs_step_size;
+				E1000_WRITE_REG(hw, E1000_AIT,
+						mac->current_ifs_val);
+			}
+		}
+	} else {
+		if (mac->in_ifs_mode &&
+		    (mac->tx_packet_delta <= MIN_NUM_XMITS)) {
+			mac->current_ifs_val = 0;
+			mac->in_ifs_mode = false;
+			E1000_WRITE_REG(hw, E1000_AIT, 0);
+		}
+	}
+}
+
+/**
+ *  e1000_validate_mdi_setting_generic - Verify MDI/MDIx settings
+ *  @hw: pointer to the HW structure
+ *
+ *  Verify that when not using auto-negotiation that MDI/MDIx is correctly
+ *  set, which is forced to MDI mode only.
+ **/
+static s32 e1000_validate_mdi_setting_generic(struct e1000_hw *hw)
+{
+	DEBUGFUNC("e1000_validate_mdi_setting_generic");
+
+	if (!hw->mac.autoneg && (hw->phy.mdix == 0 || hw->phy.mdix == 3)) {
+		DEBUGOUT("Invalid MDI setting detected\n");
+		hw->phy.mdix = 1;
+		return -E1000_ERR_CONFIG;
+	}
+
+	return E1000_SUCCESS;
+}
+
+/**
+ *  e1000_validate_mdi_setting_crossover_generic - Verify MDI/MDIx settings
+ *  @hw: pointer to the HW structure
+ *
+ *  Validate the MDI/MDIx setting, allowing for auto-crossover during forced
+ *  operation.
+ **/
+s32 e1000_validate_mdi_setting_crossover_generic(struct e1000_hw E1000_UNUSEDARG *hw)
+{
+	DEBUGFUNC("e1000_validate_mdi_setting_crossover_generic");
+
+	return E1000_SUCCESS;
+}
+
+/**
+ *  e1000_write_8bit_ctrl_reg_generic - Write a 8bit CTRL register
+ *  @hw: pointer to the HW structure
+ *  @reg: 32bit register offset such as E1000_SCTL
+ *  @offset: register offset to write to
+ *  @data: data to write at register offset
+ *
+ *  Writes an address/data control type register.  There are several of these
+ *  and they all have the format address << 8 | data and bit 31 is polled for
+ *  completion.
+ **/
+s32 e1000_write_8bit_ctrl_reg_generic(struct e1000_hw *hw, u32 reg,
+				      u32 offset, u8 data)
+{
+	u32 i, regvalue = 0;
+
+	DEBUGFUNC("e1000_write_8bit_ctrl_reg_generic");
+
+	/* Set up the address and data */
+	regvalue = ((u32)data) | (offset << E1000_GEN_CTL_ADDRESS_SHIFT);
+	E1000_WRITE_REG(hw, reg, regvalue);
+
+	/* Poll the ready bit to see if the MDI read completed */
+	for (i = 0; i < E1000_GEN_POLL_TIMEOUT; i++) {
+		usec_delay(5);
+		regvalue = E1000_READ_REG(hw, reg);
+		if (regvalue & E1000_GEN_CTL_READY)
+			break;
+	}
+	if (!(regvalue & E1000_GEN_CTL_READY)) {
+		DEBUGOUT1("Reg %08x did not indicate ready\n", reg);
+		return -E1000_ERR_PHY;
+	}
+
+	return E1000_SUCCESS;
+}
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mac.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mac.h
new file mode 100644
index 00000000..6a1b0f52
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mac.h
@@ -0,0 +1,80 @@
+/*******************************************************************************
+
+  Intel(R) Gigabit Ethernet Linux driver
+  Copyright(c) 2007-2013 Intel Corporation.
+
+  This program is free software; you can redistribute it and/or modify it
+  under the terms and conditions of the GNU General Public License,
+  version 2, as published by the Free Software Foundation.
+
+  This program is distributed in the hope it will be useful, but WITHOUT
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+  more details.
+
+  You should have received a copy of the GNU General Public License along with
+  this program; if not, write to the Free Software Foundation, Inc.,
+  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+  The full GNU General Public License is included in this distribution in
+  the file called "COPYING".
+
+  Contact Information:
+  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+#ifndef _E1000_MAC_H_
+#define _E1000_MAC_H_
+
+void e1000_init_mac_ops_generic(struct e1000_hw *hw);
+void e1000_null_mac_generic(struct e1000_hw *hw);
+s32  e1000_null_ops_generic(struct e1000_hw *hw);
+s32  e1000_null_link_info(struct e1000_hw *hw, u16 *s, u16 *d);
+bool e1000_null_mng_mode(struct e1000_hw *hw);
+void e1000_null_update_mc(struct e1000_hw *hw, u8 *h, u32 a);
+void e1000_null_write_vfta(struct e1000_hw *hw, u32 a, u32 b);
+void e1000_null_rar_set(struct e1000_hw *hw, u8 *h, u32 a);
+s32  e1000_blink_led_generic(struct e1000_hw *hw);
+s32  e1000_check_for_copper_link_generic(struct e1000_hw *hw);
+s32  e1000_check_for_fiber_link_generic(struct e1000_hw *hw);
+s32  e1000_check_for_serdes_link_generic(struct e1000_hw *hw);
+s32  e1000_cleanup_led_generic(struct e1000_hw *hw);
+s32  e1000_config_fc_after_link_up_generic(struct e1000_hw *hw);
+s32  e1000_disable_pcie_master_generic(struct e1000_hw *hw);
+s32  e1000_force_mac_fc_generic(struct e1000_hw *hw);
+s32  e1000_get_auto_rd_done_generic(struct e1000_hw *hw);
+s32  e1000_get_bus_info_pcie_generic(struct e1000_hw *hw);
+void e1000_set_lan_id_single_port(struct e1000_hw *hw);
+s32  e1000_get_hw_semaphore_generic(struct e1000_hw *hw);
+s32  e1000_get_speed_and_duplex_copper_generic(struct e1000_hw *hw, u16 *speed,
+					       u16 *duplex);
+s32  e1000_get_speed_and_duplex_fiber_serdes_generic(struct e1000_hw *hw,
+						     u16 *speed, u16 *duplex);
+s32  e1000_id_led_init_generic(struct e1000_hw *hw);
+s32  e1000_led_on_generic(struct e1000_hw *hw);
+s32  e1000_led_off_generic(struct e1000_hw *hw);
+void e1000_update_mc_addr_list_generic(struct e1000_hw *hw,
+				       u8 *mc_addr_list, u32 mc_addr_count);
+s32  e1000_set_fc_watermarks_generic(struct e1000_hw *hw);
+s32  e1000_setup_fiber_serdes_link_generic(struct e1000_hw *hw);
+s32  e1000_setup_led_generic(struct e1000_hw *hw);
+s32  e1000_setup_link_generic(struct e1000_hw *hw);
+s32  e1000_validate_mdi_setting_crossover_generic(struct e1000_hw *hw);
+s32  e1000_write_8bit_ctrl_reg_generic(struct e1000_hw *hw, u32 reg,
+				       u32 offset, u8 data);
+
+u32  e1000_hash_mc_addr_generic(struct e1000_hw *hw, u8 *mc_addr);
+
+void e1000_clear_hw_cntrs_base_generic(struct e1000_hw *hw);
+void e1000_clear_vfta_generic(struct e1000_hw *hw);
+void e1000_init_rx_addrs_generic(struct e1000_hw *hw, u16 rar_count);
+void e1000_put_hw_semaphore_generic(struct e1000_hw *hw);
+s32  e1000_check_alt_mac_addr_generic(struct e1000_hw *hw);
+void e1000_reset_adaptive_generic(struct e1000_hw *hw);
+void e1000_set_pcie_no_snoop_generic(struct e1000_hw *hw, u32 no_snoop);
+void e1000_update_adaptive_generic(struct e1000_hw *hw);
+void e1000_write_vfta_generic(struct e1000_hw *hw, u32 offset, u32 value);
+
+#endif
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_manage.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_manage.c
new file mode 100644
index 00000000..a1700398
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_manage.c
@@ -0,0 +1,554 @@
+/*******************************************************************************
+
+  Intel(R) Gigabit Ethernet Linux driver
+  Copyright(c) 2007-2013 Intel Corporation.
+
+  This program is free software; you can redistribute it and/or modify it
+  under the terms and conditions of the GNU General Public License,
+  version 2, as published by the Free Software Foundation.
+
+  This program is distributed in the hope it will be useful, but WITHOUT
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+  more details.
+
+  You should have received a copy of the GNU General Public License along with
+  this program; if not, write to the Free Software Foundation, Inc.,
+  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+  The full GNU General Public License is included in this distribution in
+  the file called "COPYING".
+
+  Contact Information:
+  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+#include "e1000_api.h"
+
+/**
+ *  e1000_calculate_checksum - Calculate checksum for buffer
+ *  @buffer: pointer to EEPROM
+ *  @length: size of EEPROM to calculate a checksum for
+ *
+ *  Calculates the checksum for some buffer on a specified length.  The
+ *  checksum calculated is returned.
+ **/
+u8 e1000_calculate_checksum(u8 *buffer, u32 length)
+{
+	u32 i;
+	u8 sum = 0;
+
+	DEBUGFUNC("e1000_calculate_checksum");
+
+	if (!buffer)
+		return 0;
+
+	for (i = 0; i < length; i++)
+		sum += buffer[i];
+
+	return (u8) (0 - sum);
+}
+
+/**
+ *  e1000_mng_enable_host_if_generic - Checks host interface is enabled
+ *  @hw: pointer to the HW structure
+ *
+ *  Returns E1000_success upon success, else E1000_ERR_HOST_INTERFACE_COMMAND
+ *
+ *  This function checks whether the HOST IF is enabled for command operation
+ *  and also checks whether the previous command is completed.  It busy waits
+ *  in case of previous command is not completed.
+ **/
+s32 e1000_mng_enable_host_if_generic(struct e1000_hw *hw)
+{
+	u32 hicr;
+	u8 i;
+
+	DEBUGFUNC("e1000_mng_enable_host_if_generic");
+
+	if (!hw->mac.arc_subsystem_valid) {
+		DEBUGOUT("ARC subsystem not valid.\n");
+		return -E1000_ERR_HOST_INTERFACE_COMMAND;
+	}
+
+	/* Check that the host interface is enabled. */
+	hicr = E1000_READ_REG(hw, E1000_HICR);
+	if (!(hicr & E1000_HICR_EN)) {
+		DEBUGOUT("E1000_HOST_EN bit disabled.\n");
+		return -E1000_ERR_HOST_INTERFACE_COMMAND;
+	}
+	/* check the previous command is completed */
+	for (i = 0; i < E1000_MNG_DHCP_COMMAND_TIMEOUT; i++) {
+		hicr = E1000_READ_REG(hw, E1000_HICR);
+		if (!(hicr & E1000_HICR_C))
+			break;
+		msec_delay_irq(1);
+	}
+
+	if (i == E1000_MNG_DHCP_COMMAND_TIMEOUT) {
+		DEBUGOUT("Previous command timeout failed .\n");
+		return -E1000_ERR_HOST_INTERFACE_COMMAND;
+	}
+
+	return E1000_SUCCESS;
+}
+
+/**
+ *  e1000_check_mng_mode_generic - Generic check management mode
+ *  @hw: pointer to the HW structure
+ *
+ *  Reads the firmware semaphore register and returns true (>0) if
+ *  manageability is enabled, else false (0).
+ **/
+bool e1000_check_mng_mode_generic(struct e1000_hw *hw)
+{
+	u32 fwsm = E1000_READ_REG(hw, E1000_FWSM);
+
+	DEBUGFUNC("e1000_check_mng_mode_generic");
+
+
+	return (fwsm & E1000_FWSM_MODE_MASK) ==
+		(E1000_MNG_IAMT_MODE << E1000_FWSM_MODE_SHIFT);
+}
+
+/**
+ *  e1000_enable_tx_pkt_filtering_generic - Enable packet filtering on Tx
+ *  @hw: pointer to the HW structure
+ *
+ *  Enables packet filtering on transmit packets if manageability is enabled
+ *  and host interface is enabled.
+ **/
+bool e1000_enable_tx_pkt_filtering_generic(struct e1000_hw *hw)
+{
+	struct e1000_host_mng_dhcp_cookie *hdr = &hw->mng_cookie;
+	u32 *buffer = (u32 *)&hw->mng_cookie;
+	u32 offset;
+	s32 ret_val, hdr_csum, csum;
+	u8 i, len;
+
+	DEBUGFUNC("e1000_enable_tx_pkt_filtering_generic");
+
+	hw->mac.tx_pkt_filtering = true;
+
+	/* No manageability, no filtering */
+	if (!hw->mac.ops.check_mng_mode(hw)) {
+		hw->mac.tx_pkt_filtering = false;
+		return hw->mac.tx_pkt_filtering;
+	}
+
+	/* If we can't read from the host interface for whatever
+	 * reason, disable filtering.
+	 */
+	ret_val = e1000_mng_enable_host_if_generic(hw);
+	if (ret_val != E1000_SUCCESS) {
+		hw->mac.tx_pkt_filtering = false;
+		return hw->mac.tx_pkt_filtering;
+	}
+
+	/* Read in the header.  Length and offset are in dwords. */
+	len    = E1000_MNG_DHCP_COOKIE_LENGTH >> 2;
+	offset = E1000_MNG_DHCP_COOKIE_OFFSET >> 2;
+	for (i = 0; i < len; i++)
+		*(buffer + i) = E1000_READ_REG_ARRAY_DWORD(hw, E1000_HOST_IF,
+							   offset + i);
+	hdr_csum = hdr->checksum;
+	hdr->checksum = 0;
+	csum = e1000_calculate_checksum((u8 *)hdr,
+					E1000_MNG_DHCP_COOKIE_LENGTH);
+	/* If either the checksums or signature don't match, then
+	 * the cookie area isn't considered valid, in which case we
+	 * take the safe route of assuming Tx filtering is enabled.
+	 */
+	if ((hdr_csum != csum) || (hdr->signature != E1000_IAMT_SIGNATURE)) {
+		hw->mac.tx_pkt_filtering = true;
+		return hw->mac.tx_pkt_filtering;
+	}
+
+	/* Cookie area is valid, make the final check for filtering. */
+	if (!(hdr->status & E1000_MNG_DHCP_COOKIE_STATUS_PARSING))
+		hw->mac.tx_pkt_filtering = false;
+
+	return hw->mac.tx_pkt_filtering;
+}
+
+/**
+ *  e1000_mng_write_cmd_header_generic - Writes manageability command header
+ *  @hw: pointer to the HW structure
+ *  @hdr: pointer to the host interface command header
+ *
+ *  Writes the command header after does the checksum calculation.
+ **/
+s32 e1000_mng_write_cmd_header_generic(struct e1000_hw *hw,
+				      struct e1000_host_mng_command_header *hdr)
+{
+	u16 i, length = sizeof(struct e1000_host_mng_command_header);
+
+	DEBUGFUNC("e1000_mng_write_cmd_header_generic");
+
+	/* Write the whole command header structure with new checksum. */
+
+	hdr->checksum = e1000_calculate_checksum((u8 *)hdr, length);
+
+	length >>= 2;
+	/* Write the relevant command block into the ram area. */
+	for (i = 0; i < length; i++) {
+		E1000_WRITE_REG_ARRAY_DWORD(hw, E1000_HOST_IF, i,
+					    *((u32 *) hdr + i));
+		E1000_WRITE_FLUSH(hw);
+	}
+
+	return E1000_SUCCESS;
+}
+
+/**
+ *  e1000_mng_host_if_write_generic - Write to the manageability host interface
+ *  @hw: pointer to the HW structure
+ *  @buffer: pointer to the host interface buffer
+ *  @length: size of the buffer
+ *  @offset: location in the buffer to write to
+ *  @sum: sum of the data (not checksum)
+ *
+ *  This function writes the buffer content at the offset given on the host if.
+ *  It also does alignment considerations to do the writes in most efficient
+ *  way.  Also fills up the sum of the buffer in *buffer parameter.
+ **/
+s32 e1000_mng_host_if_write_generic(struct e1000_hw *hw, u8 *buffer,
+				    u16 length, u16 offset, u8 *sum)
+{
+	u8 *tmp;
+	u8 *bufptr = buffer;
+	u32 data = 0;
+	u16 remaining, i, j, prev_bytes;
+
+	DEBUGFUNC("e1000_mng_host_if_write_generic");
+
+	/* sum = only sum of the data and it is not checksum */
+
+	if (length == 0 || offset + length > E1000_HI_MAX_MNG_DATA_LENGTH)
+		return -E1000_ERR_PARAM;
+
+	tmp = (u8 *)&data;
+	prev_bytes = offset & 0x3;
+	offset >>= 2;
+
+	if (prev_bytes) {
+		data = E1000_READ_REG_ARRAY_DWORD(hw, E1000_HOST_IF, offset);
+		for (j = prev_bytes; j < sizeof(u32); j++) {
+			*(tmp + j) = *bufptr++;
+			*sum += *(tmp + j);
+		}
+		E1000_WRITE_REG_ARRAY_DWORD(hw, E1000_HOST_IF, offset, data);
+		length -= j - prev_bytes;
+		offset++;
+	}
+
+	remaining = length & 0x3;
+	length -= remaining;
+
+	/* Calculate length in DWORDs */
+	length >>= 2;
+
+	/* The device driver writes the relevant command block into the
+	 * ram area.
+	 */
+	for (i = 0; i < length; i++) {
+		for (j = 0; j < sizeof(u32); j++) {
+			*(tmp + j) = *bufptr++;
+			*sum += *(tmp + j);
+		}
+
+		E1000_WRITE_REG_ARRAY_DWORD(hw, E1000_HOST_IF, offset + i,
+					    data);
+	}
+	if (remaining) {
+		for (j = 0; j < sizeof(u32); j++) {
+			if (j < remaining)
+				*(tmp + j) = *bufptr++;
+			else
+				*(tmp + j) = 0;
+
+			*sum += *(tmp + j);
+		}
+		E1000_WRITE_REG_ARRAY_DWORD(hw, E1000_HOST_IF, offset + i,
+					    data);
+	}
+
+	return E1000_SUCCESS;
+}
+
+/**
+ *  e1000_mng_write_dhcp_info_generic - Writes DHCP info to host interface
+ *  @hw: pointer to the HW structure
+ *  @buffer: pointer to the host interface
+ *  @length: size of the buffer
+ *
+ *  Writes the DHCP information to the host interface.
+ **/
+s32 e1000_mng_write_dhcp_info_generic(struct e1000_hw *hw, u8 *buffer,
+				      u16 length)
+{
+	struct e1000_host_mng_command_header hdr;
+	s32 ret_val;
+	u32 hicr;
+
+	DEBUGFUNC("e1000_mng_write_dhcp_info_generic");
+
+	hdr.command_id = E1000_MNG_DHCP_TX_PAYLOAD_CMD;
+	hdr.command_length = length;
+	hdr.reserved1 = 0;
+	hdr.reserved2 = 0;
+	hdr.checksum = 0;
+
+	/* Enable the host interface */
+	ret_val = e1000_mng_enable_host_if_generic(hw);
+	if (ret_val)
+		return ret_val;
+
+	/* Populate the host interface with the contents of "buffer". */
+	ret_val = e1000_mng_host_if_write_generic(hw, buffer, length,
+						  sizeof(hdr), &(hdr.checksum));
+	if (ret_val)
+		return ret_val;
+
+	/* Write the manageability command header */
+	ret_val = e1000_mng_write_cmd_header_generic(hw, &hdr);
+	if (ret_val)
+		return ret_val;
+
+	/* Tell the ARC a new command is pending. */
+	hicr = E1000_READ_REG(hw, E1000_HICR);
+	E1000_WRITE_REG(hw, E1000_HICR, hicr | E1000_HICR_C);
+
+	return E1000_SUCCESS;
+}
+
+/**
+ *  e1000_enable_mng_pass_thru - Check if management passthrough is needed
+ *  @hw: pointer to the HW structure
+ *
+ *  Verifies the hardware needs to leave interface enabled so that frames can
+ *  be directed to and from the management interface.
+ **/
+bool e1000_enable_mng_pass_thru(struct e1000_hw *hw)
+{
+	u32 manc;
+	u32 fwsm, factps;
+
+	DEBUGFUNC("e1000_enable_mng_pass_thru");
+
+	if (!hw->mac.asf_firmware_present)
+		return false;
+
+	manc = E1000_READ_REG(hw, E1000_MANC);
+
+	if (!(manc & E1000_MANC_RCV_TCO_EN))
+		return false;
+
+	if (hw->mac.has_fwsm) {
+		fwsm = E1000_READ_REG(hw, E1000_FWSM);
+		factps = E1000_READ_REG(hw, E1000_FACTPS);
+
+		if (!(factps & E1000_FACTPS_MNGCG) &&
+		    ((fwsm & E1000_FWSM_MODE_MASK) ==
+		     (e1000_mng_mode_pt << E1000_FWSM_MODE_SHIFT)))
+			return true;
+	} else if ((manc & E1000_MANC_SMBUS_EN) &&
+		   !(manc & E1000_MANC_ASF_EN)) {
+		return true;
+	}
+
+	return false;
+}
+
+/**
+ *  e1000_host_interface_command - Writes buffer to host interface
+ *  @hw: pointer to the HW structure
+ *  @buffer: contains a command to write
+ *  @length: the byte length of the buffer, must be multiple of 4 bytes
+ *
+ *  Writes a buffer to the Host Interface.  Upon success, returns E1000_SUCCESS
+ *  else returns E1000_ERR_HOST_INTERFACE_COMMAND.
+ **/
+s32 e1000_host_interface_command(struct e1000_hw *hw, u8 *buffer, u32 length)
+{
+	u32 hicr, i;
+
+	DEBUGFUNC("e1000_host_interface_command");
+
+	if (!(hw->mac.arc_subsystem_valid)) {
+		DEBUGOUT("Hardware doesn't support host interface command.\n");
+		return E1000_SUCCESS;
+	}
+
+	if (!hw->mac.asf_firmware_present) {
+		DEBUGOUT("Firmware is not present.\n");
+		return E1000_SUCCESS;
+	}
+
+	if (length == 0 || length & 0x3 ||
+	    length > E1000_HI_MAX_BLOCK_BYTE_LENGTH) {
+		DEBUGOUT("Buffer length failure.\n");
+		return -E1000_ERR_HOST_INTERFACE_COMMAND;
+	}
+
+	/* Check that the host interface is enabled. */
+	hicr = E1000_READ_REG(hw, E1000_HICR);
+	if (!(hicr & E1000_HICR_EN)) {
+		DEBUGOUT("E1000_HOST_EN bit disabled.\n");
+		return -E1000_ERR_HOST_INTERFACE_COMMAND;
+	}
+
+	/* Calculate length in DWORDs */
+	length >>= 2;
+
+	/* The device driver writes the relevant command block
+	 * into the ram area.
+	 */
+	for (i = 0; i < length; i++)
+		E1000_WRITE_REG_ARRAY_DWORD(hw, E1000_HOST_IF, i,
+					    *((u32 *)buffer + i));
+
+	/* Setting this bit tells the ARC that a new command is pending. */
+	E1000_WRITE_REG(hw, E1000_HICR, hicr | E1000_HICR_C);
+
+	for (i = 0; i < E1000_HI_COMMAND_TIMEOUT; i++) {
+		hicr = E1000_READ_REG(hw, E1000_HICR);
+		if (!(hicr & E1000_HICR_C))
+			break;
+		msec_delay(1);
+	}
+
+	/* Check command successful completion. */
+	if (i == E1000_HI_COMMAND_TIMEOUT ||
+	    (!(E1000_READ_REG(hw, E1000_HICR) & E1000_HICR_SV))) {
+		DEBUGOUT("Command has failed with no status valid.\n");
+		return -E1000_ERR_HOST_INTERFACE_COMMAND;
+	}
+
+	for (i = 0; i < length; i++)
+		*((u32 *)buffer + i) = E1000_READ_REG_ARRAY_DWORD(hw,
+								  E1000_HOST_IF,
+								  i);
+
+	return E1000_SUCCESS;
+}
+/**
+ *  e1000_load_firmware - Writes proxy FW code buffer to host interface
+ *                        and execute.
+ *  @hw: pointer to the HW structure
+ *  @buffer: contains a firmware to write
+ *  @length: the byte length of the buffer, must be multiple of 4 bytes
+ *
+ *  Upon success returns E1000_SUCCESS, returns E1000_ERR_CONFIG if not enabled
+ *  in HW else returns E1000_ERR_HOST_INTERFACE_COMMAND.
+ **/
+s32 e1000_load_firmware(struct e1000_hw *hw, u8 *buffer, u32 length)
+{
+	u32 hicr, hibba, fwsm, icr, i;
+
+	DEBUGFUNC("e1000_load_firmware");
+
+	if (hw->mac.type < e1000_i210) {
+		DEBUGOUT("Hardware doesn't support loading FW by the driver\n");
+		return -E1000_ERR_CONFIG;
+	}
+
+	/* Check that the host interface is enabled. */
+	hicr = E1000_READ_REG(hw, E1000_HICR);
+	if (!(hicr & E1000_HICR_EN)) {
+		DEBUGOUT("E1000_HOST_EN bit disabled.\n");
+		return -E1000_ERR_CONFIG;
+	}
+	if (!(hicr & E1000_HICR_MEMORY_BASE_EN)) {
+		DEBUGOUT("E1000_HICR_MEMORY_BASE_EN bit disabled.\n");
+		return -E1000_ERR_CONFIG;
+	}
+
+	if (length == 0 || length & 0x3 || length > E1000_HI_FW_MAX_LENGTH) {
+		DEBUGOUT("Buffer length failure.\n");
+		return -E1000_ERR_INVALID_ARGUMENT;
+	}
+
+	/* Clear notification from ROM-FW by reading ICR register */
+	icr = E1000_READ_REG(hw, E1000_ICR_V2);
+
+	/* Reset ROM-FW */
+	hicr = E1000_READ_REG(hw, E1000_HICR);
+	hicr |= E1000_HICR_FW_RESET_ENABLE;
+	E1000_WRITE_REG(hw, E1000_HICR, hicr);
+	hicr |= E1000_HICR_FW_RESET;
+	E1000_WRITE_REG(hw, E1000_HICR, hicr);
+	E1000_WRITE_FLUSH(hw);
+
+	/* Wait till MAC notifies about its readiness after ROM-FW reset */
+	for (i = 0; i < (E1000_HI_COMMAND_TIMEOUT * 2); i++) {
+		icr = E1000_READ_REG(hw, E1000_ICR_V2);
+		if (icr & E1000_ICR_MNG)
+			break;
+		msec_delay(1);
+	}
+
+	/* Check for timeout */
+	if (i == E1000_HI_COMMAND_TIMEOUT) {
+		DEBUGOUT("FW reset failed.\n");
+		return -E1000_ERR_HOST_INTERFACE_COMMAND;
+	}
+
+	/* Wait till MAC is ready to accept new FW code */
+	for (i = 0; i < E1000_HI_COMMAND_TIMEOUT; i++) {
+		fwsm = E1000_READ_REG(hw, E1000_FWSM);
+		if ((fwsm & E1000_FWSM_FW_VALID) &&
+		    ((fwsm & E1000_FWSM_MODE_MASK) >> E1000_FWSM_MODE_SHIFT ==
+		    E1000_FWSM_HI_EN_ONLY_MODE))
+			break;
+		msec_delay(1);
+	}
+
+	/* Check for timeout */
+	if (i == E1000_HI_COMMAND_TIMEOUT) {
+		DEBUGOUT("FW reset failed.\n");
+		return -E1000_ERR_HOST_INTERFACE_COMMAND;
+	}
+
+	/* Calculate length in DWORDs */
+	length >>= 2;
+
+	/* The device driver writes the relevant FW code block
+	 * into the ram area in DWORDs via 1kB ram addressing window.
+	 */
+	for (i = 0; i < length; i++) {
+		if (!(i % E1000_HI_FW_BLOCK_DWORD_LENGTH)) {
+			/* Point to correct 1kB ram window */
+			hibba = E1000_HI_FW_BASE_ADDRESS +
+				((E1000_HI_FW_BLOCK_DWORD_LENGTH << 2) *
+				(i / E1000_HI_FW_BLOCK_DWORD_LENGTH));
+
+			E1000_WRITE_REG(hw, E1000_HIBBA, hibba);
+		}
+
+		E1000_WRITE_REG_ARRAY_DWORD(hw, E1000_HOST_IF,
+					    i % E1000_HI_FW_BLOCK_DWORD_LENGTH,
+					    *((u32 *)buffer + i));
+	}
+
+	/* Setting this bit tells the ARC that a new FW is ready to execute. */
+	hicr = E1000_READ_REG(hw, E1000_HICR);
+	E1000_WRITE_REG(hw, E1000_HICR, hicr | E1000_HICR_C);
+
+	for (i = 0; i < E1000_HI_COMMAND_TIMEOUT; i++) {
+		hicr = E1000_READ_REG(hw, E1000_HICR);
+		if (!(hicr & E1000_HICR_C))
+			break;
+		msec_delay(1);
+	}
+
+	/* Check for successful FW start. */
+	if (i == E1000_HI_COMMAND_TIMEOUT) {
+		DEBUGOUT("New FW did not start within timeout period.\n");
+		return -E1000_ERR_HOST_INTERFACE_COMMAND;
+	}
+
+	return E1000_SUCCESS;
+}
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_manage.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_manage.h
new file mode 100644
index 00000000..c94b2185
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_manage.h
@@ -0,0 +1,89 @@
+/*******************************************************************************
+
+  Intel(R) Gigabit Ethernet Linux driver
+  Copyright(c) 2007-2013 Intel Corporation.
+
+  This program is free software; you can redistribute it and/or modify it
+  under the terms and conditions of the GNU General Public License,
+  version 2, as published by the Free Software Foundation.
+
+  This program is distributed in the hope it will be useful, but WITHOUT
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+  more details.
+
+  You should have received a copy of the GNU General Public License along with
+  this program; if not, write to the Free Software Foundation, Inc.,
+  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+  The full GNU General Public License is included in this distribution in
+  the file called "COPYING".
+
+  Contact Information:
+  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+#ifndef _E1000_MANAGE_H_
+#define _E1000_MANAGE_H_
+
+bool e1000_check_mng_mode_generic(struct e1000_hw *hw);
+bool e1000_enable_tx_pkt_filtering_generic(struct e1000_hw *hw);
+s32  e1000_mng_enable_host_if_generic(struct e1000_hw *hw);
+s32  e1000_mng_host_if_write_generic(struct e1000_hw *hw, u8 *buffer,
+				     u16 length, u16 offset, u8 *sum);
+s32  e1000_mng_write_cmd_header_generic(struct e1000_hw *hw,
+				     struct e1000_host_mng_command_header *hdr);
+s32  e1000_mng_write_dhcp_info_generic(struct e1000_hw *hw,
+				       u8 *buffer, u16 length);
+bool e1000_enable_mng_pass_thru(struct e1000_hw *hw);
+u8 e1000_calculate_checksum(u8 *buffer, u32 length);
+s32 e1000_host_interface_command(struct e1000_hw *hw, u8 *buffer, u32 length);
+s32 e1000_load_firmware(struct e1000_hw *hw, u8 *buffer, u32 length);
+
+enum e1000_mng_mode {
+	e1000_mng_mode_none = 0,
+	e1000_mng_mode_asf,
+	e1000_mng_mode_pt,
+	e1000_mng_mode_ipmi,
+	e1000_mng_mode_host_if_only
+};
+
+#define E1000_FACTPS_MNGCG			0x20000000
+
+#define E1000_FWSM_MODE_MASK			0xE
+#define E1000_FWSM_MODE_SHIFT			1
+#define E1000_FWSM_FW_VALID			0x00008000
+#define E1000_FWSM_HI_EN_ONLY_MODE		0x4
+
+#define E1000_MNG_IAMT_MODE			0x3
+#define E1000_MNG_DHCP_COOKIE_LENGTH		0x10
+#define E1000_MNG_DHCP_COOKIE_OFFSET		0x6F0
+#define E1000_MNG_DHCP_COMMAND_TIMEOUT		10
+#define E1000_MNG_DHCP_TX_PAYLOAD_CMD		64
+#define E1000_MNG_DHCP_COOKIE_STATUS_PARSING	0x1
+#define E1000_MNG_DHCP_COOKIE_STATUS_VLAN	0x2
+
+#define E1000_VFTA_ENTRY_SHIFT			5
+#define E1000_VFTA_ENTRY_MASK			0x7F
+#define E1000_VFTA_ENTRY_BIT_SHIFT_MASK		0x1F
+
+#define E1000_HI_MAX_BLOCK_BYTE_LENGTH		1792 /* Num of bytes in range */
+#define E1000_HI_MAX_BLOCK_DWORD_LENGTH		448 /* Num of dwords in range */
+#define E1000_HI_COMMAND_TIMEOUT		500 /* Process HI cmd limit */
+#define E1000_HI_FW_BASE_ADDRESS		0x10000
+#define E1000_HI_FW_MAX_LENGTH			(64 * 1024) /* Num of bytes */
+#define E1000_HI_FW_BLOCK_DWORD_LENGTH		256 /* Num of DWORDs per page */
+#define E1000_HICR_MEMORY_BASE_EN		0x200 /* MB Enable bit - RO */
+#define E1000_HICR_EN			0x01  /* Enable bit - RO */
+/* Driver sets this bit when done to put command in RAM */
+#define E1000_HICR_C			0x02
+#define E1000_HICR_SV			0x04  /* Status Validity */
+#define E1000_HICR_FW_RESET_ENABLE	0x40
+#define E1000_HICR_FW_RESET		0x80
+
+/* Intel(R) Active Management Technology signature */
+#define E1000_IAMT_SIGNATURE		0x544D4149
+
+#endif
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mbx.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mbx.c
new file mode 100644
index 00000000..3ef0d98b
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mbx.c
@@ -0,0 +1,525 @@
+/*******************************************************************************
+
+  Intel(R) Gigabit Ethernet Linux driver
+  Copyright(c) 2007-2013 Intel Corporation.
+
+  This program is free software; you can redistribute it and/or modify it
+  under the terms and conditions of the GNU General Public License,
+  version 2, as published by the Free Software Foundation.
+
+  This program is distributed in the hope it will be useful, but WITHOUT
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+  more details.
+
+  You should have received a copy of the GNU General Public License along with
+  this program; if not, write to the Free Software Foundation, Inc.,
+  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+  The full GNU General Public License is included in this distribution in
+  the file called "COPYING".
+
+  Contact Information:
+  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+#include "e1000_mbx.h"
+
+/**
+ *  e1000_null_mbx_check_for_flag - No-op function, return 0
+ *  @hw: pointer to the HW structure
+ **/
+static s32 e1000_null_mbx_check_for_flag(struct e1000_hw E1000_UNUSEDARG *hw,
+					 u16 E1000_UNUSEDARG mbx_id)
+{
+	DEBUGFUNC("e1000_null_mbx_check_flag");
+
+	return E1000_SUCCESS;
+}
+
+/**
+ *  e1000_null_mbx_transact - No-op function, return 0
+ *  @hw: pointer to the HW structure
+ **/
+static s32 e1000_null_mbx_transact(struct e1000_hw E1000_UNUSEDARG *hw,
+				   u32 E1000_UNUSEDARG *msg,
+				   u16 E1000_UNUSEDARG size,
+				   u16 E1000_UNUSEDARG mbx_id)
+{
+	DEBUGFUNC("e1000_null_mbx_rw_msg");
+
+	return E1000_SUCCESS;
+}
+
+/**
+ *  e1000_read_mbx - Reads a message from the mailbox
+ *  @hw: pointer to the HW structure
+ *  @msg: The message buffer
+ *  @size: Length of buffer
+ *  @mbx_id: id of mailbox to read
+ *
+ *  returns SUCCESS if it successfully read message from buffer
+ **/
+s32 e1000_read_mbx(struct e1000_hw *hw, u32 *msg, u16 size, u16 mbx_id)
+{
+	struct e1000_mbx_info *mbx = &hw->mbx;
+	s32 ret_val = -E1000_ERR_MBX;
+
+	DEBUGFUNC("e1000_read_mbx");
+
+	/* limit read to size of mailbox */
+	if (size > mbx->size)
+		size = mbx->size;
+
+	if (mbx->ops.read)
+		ret_val = mbx->ops.read(hw, msg, size, mbx_id);
+
+	return ret_val;
+}
+
+/**
+ *  e1000_write_mbx - Write a message to the mailbox
+ *  @hw: pointer to the HW structure
+ *  @msg: The message buffer
+ *  @size: Length of buffer
+ *  @mbx_id: id of mailbox to write
+ *
+ *  returns SUCCESS if it successfully copied message into the buffer
+ **/
+s32 e1000_write_mbx(struct e1000_hw *hw, u32 *msg, u16 size, u16 mbx_id)
+{
+	struct e1000_mbx_info *mbx = &hw->mbx;
+	s32 ret_val = E1000_SUCCESS;
+
+	DEBUGFUNC("e1000_write_mbx");
+
+	if (size > mbx->size)
+		ret_val = -E1000_ERR_MBX;
+
+	else if (mbx->ops.write)
+		ret_val = mbx->ops.write(hw, msg, size, mbx_id);
+
+	return ret_val;
+}
+
+/**
+ *  e1000_check_for_msg - checks to see if someone sent us mail
+ *  @hw: pointer to the HW structure
+ *  @mbx_id: id of mailbox to check
+ *
+ *  returns SUCCESS if the Status bit was found or else ERR_MBX
+ **/
+s32 e1000_check_for_msg(struct e1000_hw *hw, u16 mbx_id)
+{
+	struct e1000_mbx_info *mbx = &hw->mbx;
+	s32 ret_val = -E1000_ERR_MBX;
+
+	DEBUGFUNC("e1000_check_for_msg");
+
+	if (mbx->ops.check_for_msg)
+		ret_val = mbx->ops.check_for_msg(hw, mbx_id);
+
+	return ret_val;
+}
+
+/**
+ *  e1000_check_for_ack - checks to see if someone sent us ACK
+ *  @hw: pointer to the HW structure
+ *  @mbx_id: id of mailbox to check
+ *
+ *  returns SUCCESS if the Status bit was found or else ERR_MBX
+ **/
+s32 e1000_check_for_ack(struct e1000_hw *hw, u16 mbx_id)
+{
+	struct e1000_mbx_info *mbx = &hw->mbx;
+	s32 ret_val = -E1000_ERR_MBX;
+
+	DEBUGFUNC("e1000_check_for_ack");
+
+	if (mbx->ops.check_for_ack)
+		ret_val = mbx->ops.check_for_ack(hw, mbx_id);
+
+	return ret_val;
+}
+
+/**
+ *  e1000_check_for_rst - checks to see if other side has reset
+ *  @hw: pointer to the HW structure
+ *  @mbx_id: id of mailbox to check
+ *
+ *  returns SUCCESS if the Status bit was found or else ERR_MBX
+ **/
+s32 e1000_check_for_rst(struct e1000_hw *hw, u16 mbx_id)
+{
+	struct e1000_mbx_info *mbx = &hw->mbx;
+	s32 ret_val = -E1000_ERR_MBX;
+
+	DEBUGFUNC("e1000_check_for_rst");
+
+	if (mbx->ops.check_for_rst)
+		ret_val = mbx->ops.check_for_rst(hw, mbx_id);
+
+	return ret_val;
+}
+
+/**
+ *  e1000_poll_for_msg - Wait for message notification
+ *  @hw: pointer to the HW structure
+ *  @mbx_id: id of mailbox to write
+ *
+ *  returns SUCCESS if it successfully received a message notification
+ **/
+static s32 e1000_poll_for_msg(struct e1000_hw *hw, u16 mbx_id)
+{
+	struct e1000_mbx_info *mbx = &hw->mbx;
+	int countdown = mbx->timeout;
+
+	DEBUGFUNC("e1000_poll_for_msg");
+
+	if (!countdown || !mbx->ops.check_for_msg)
+		goto out;
+
+	while (countdown && mbx->ops.check_for_msg(hw, mbx_id)) {
+		countdown--;
+		if (!countdown)
+			break;
+		usec_delay(mbx->usec_delay);
+	}
+
+	/* if we failed, all future posted messages fail until reset */
+	if (!countdown)
+		mbx->timeout = 0;
+out:
+	return countdown ? E1000_SUCCESS : -E1000_ERR_MBX;
+}
+
+/**
+ *  e1000_poll_for_ack - Wait for message acknowledgement
+ *  @hw: pointer to the HW structure
+ *  @mbx_id: id of mailbox to write
+ *
+ *  returns SUCCESS if it successfully received a message acknowledgement
+ **/
+static s32 e1000_poll_for_ack(struct e1000_hw *hw, u16 mbx_id)
+{
+	struct e1000_mbx_info *mbx = &hw->mbx;
+	int countdown = mbx->timeout;
+
+	DEBUGFUNC("e1000_poll_for_ack");
+
+	if (!countdown || !mbx->ops.check_for_ack)
+		goto out;
+
+	while (countdown && mbx->ops.check_for_ack(hw, mbx_id)) {
+		countdown--;
+		if (!countdown)
+			break;
+		usec_delay(mbx->usec_delay);
+	}
+
+	/* if we failed, all future posted messages fail until reset */
+	if (!countdown)
+		mbx->timeout = 0;
+out:
+	return countdown ? E1000_SUCCESS : -E1000_ERR_MBX;
+}
+
+/**
+ *  e1000_read_posted_mbx - Wait for message notification and receive message
+ *  @hw: pointer to the HW structure
+ *  @msg: The message buffer
+ *  @size: Length of buffer
+ *  @mbx_id: id of mailbox to write
+ *
+ *  returns SUCCESS if it successfully received a message notification and
+ *  copied it into the receive buffer.
+ **/
+s32 e1000_read_posted_mbx(struct e1000_hw *hw, u32 *msg, u16 size, u16 mbx_id)
+{
+	struct e1000_mbx_info *mbx = &hw->mbx;
+	s32 ret_val = -E1000_ERR_MBX;
+
+	DEBUGFUNC("e1000_read_posted_mbx");
+
+	if (!mbx->ops.read)
+		goto out;
+
+	ret_val = e1000_poll_for_msg(hw, mbx_id);
+
+	/* if ack received read message, otherwise we timed out */
+	if (!ret_val)
+		ret_val = mbx->ops.read(hw, msg, size, mbx_id);
+out:
+	return ret_val;
+}
+
+/**
+ *  e1000_write_posted_mbx - Write a message to the mailbox, wait for ack
+ *  @hw: pointer to the HW structure
+ *  @msg: The message buffer
+ *  @size: Length of buffer
+ *  @mbx_id: id of mailbox to write
+ *
+ *  returns SUCCESS if it successfully copied message into the buffer and
+ *  received an ack to that message within delay * timeout period
+ **/
+s32 e1000_write_posted_mbx(struct e1000_hw *hw, u32 *msg, u16 size, u16 mbx_id)
+{
+	struct e1000_mbx_info *mbx = &hw->mbx;
+	s32 ret_val = -E1000_ERR_MBX;
+
+	DEBUGFUNC("e1000_write_posted_mbx");
+
+	/* exit if either we can't write or there isn't a defined timeout */
+	if (!mbx->ops.write || !mbx->timeout)
+		goto out;
+
+	/* send msg */
+	ret_val = mbx->ops.write(hw, msg, size, mbx_id);
+
+	/* if msg sent wait until we receive an ack */
+	if (!ret_val)
+		ret_val = e1000_poll_for_ack(hw, mbx_id);
+out:
+	return ret_val;
+}
+
+/**
+ *  e1000_init_mbx_ops_generic - Initialize mbx function pointers
+ *  @hw: pointer to the HW structure
+ *
+ *  Sets the function pointers to no-op functions
+ **/
+void e1000_init_mbx_ops_generic(struct e1000_hw *hw)
+{
+	struct e1000_mbx_info *mbx = &hw->mbx;
+	mbx->ops.init_params = e1000_null_ops_generic;
+	mbx->ops.read = e1000_null_mbx_transact;
+	mbx->ops.write = e1000_null_mbx_transact;
+	mbx->ops.check_for_msg = e1000_null_mbx_check_for_flag;
+	mbx->ops.check_for_ack = e1000_null_mbx_check_for_flag;
+	mbx->ops.check_for_rst = e1000_null_mbx_check_for_flag;
+	mbx->ops.read_posted = e1000_read_posted_mbx;
+	mbx->ops.write_posted = e1000_write_posted_mbx;
+}
+
+static s32 e1000_check_for_bit_pf(struct e1000_hw *hw, u32 mask)
+{
+	u32 mbvficr = E1000_READ_REG(hw, E1000_MBVFICR);
+	s32 ret_val = -E1000_ERR_MBX;
+
+	if (mbvficr & mask) {
+		ret_val = E1000_SUCCESS;
+		E1000_WRITE_REG(hw, E1000_MBVFICR, mask);
+	}
+
+	return ret_val;
+}
+
+/**
+ *  e1000_check_for_msg_pf - checks to see if the VF has sent mail
+ *  @hw: pointer to the HW structure
+ *  @vf_number: the VF index
+ *
+ *  returns SUCCESS if the VF has set the Status bit or else ERR_MBX
+ **/
+static s32 e1000_check_for_msg_pf(struct e1000_hw *hw, u16 vf_number)
+{
+	s32 ret_val = -E1000_ERR_MBX;
+
+	DEBUGFUNC("e1000_check_for_msg_pf");
+
+	if (!e1000_check_for_bit_pf(hw, E1000_MBVFICR_VFREQ_VF1 << vf_number)) {
+		ret_val = E1000_SUCCESS;
+		hw->mbx.stats.reqs++;
+	}
+
+	return ret_val;
+}
+
+/**
+ *  e1000_check_for_ack_pf - checks to see if the VF has ACKed
+ *  @hw: pointer to the HW structure
+ *  @vf_number: the VF index
+ *
+ *  returns SUCCESS if the VF has set the Status bit or else ERR_MBX
+ **/
+static s32 e1000_check_for_ack_pf(struct e1000_hw *hw, u16 vf_number)
+{
+	s32 ret_val = -E1000_ERR_MBX;
+
+	DEBUGFUNC("e1000_check_for_ack_pf");
+
+	if (!e1000_check_for_bit_pf(hw, E1000_MBVFICR_VFACK_VF1 << vf_number)) {
+		ret_val = E1000_SUCCESS;
+		hw->mbx.stats.acks++;
+	}
+
+	return ret_val;
+}
+
+/**
+ *  e1000_check_for_rst_pf - checks to see if the VF has reset
+ *  @hw: pointer to the HW structure
+ *  @vf_number: the VF index
+ *
+ *  returns SUCCESS if the VF has set the Status bit or else ERR_MBX
+ **/
+static s32 e1000_check_for_rst_pf(struct e1000_hw *hw, u16 vf_number)
+{
+	u32 vflre = E1000_READ_REG(hw, E1000_VFLRE);
+	s32 ret_val = -E1000_ERR_MBX;
+
+	DEBUGFUNC("e1000_check_for_rst_pf");
+
+	if (vflre & (1 << vf_number)) {
+		ret_val = E1000_SUCCESS;
+		E1000_WRITE_REG(hw, E1000_VFLRE, (1 << vf_number));
+		hw->mbx.stats.rsts++;
+	}
+
+	return ret_val;
+}
+
+/**
+ *  e1000_obtain_mbx_lock_pf - obtain mailbox lock
+ *  @hw: pointer to the HW structure
+ *  @vf_number: the VF index
+ *
+ *  return SUCCESS if we obtained the mailbox lock
+ **/
+static s32 e1000_obtain_mbx_lock_pf(struct e1000_hw *hw, u16 vf_number)
+{
+	s32 ret_val = -E1000_ERR_MBX;
+	u32 p2v_mailbox;
+
+	DEBUGFUNC("e1000_obtain_mbx_lock_pf");
+
+	/* Take ownership of the buffer */
+	E1000_WRITE_REG(hw, E1000_P2VMAILBOX(vf_number), E1000_P2VMAILBOX_PFU);
+
+	/* reserve mailbox for vf use */
+	p2v_mailbox = E1000_READ_REG(hw, E1000_P2VMAILBOX(vf_number));
+	if (p2v_mailbox & E1000_P2VMAILBOX_PFU)
+		ret_val = E1000_SUCCESS;
+
+	return ret_val;
+}
+
+/**
+ *  e1000_write_mbx_pf - Places a message in the mailbox
+ *  @hw: pointer to the HW structure
+ *  @msg: The message buffer
+ *  @size: Length of buffer
+ *  @vf_number: the VF index
+ *
+ *  returns SUCCESS if it successfully copied message into the buffer
+ **/
+static s32 e1000_write_mbx_pf(struct e1000_hw *hw, u32 *msg, u16 size,
+			      u16 vf_number)
+{
+	s32 ret_val;
+	u16 i;
+
+	DEBUGFUNC("e1000_write_mbx_pf");
+
+	/* lock the mailbox to prevent pf/vf race condition */
+	ret_val = e1000_obtain_mbx_lock_pf(hw, vf_number);
+	if (ret_val)
+		goto out_no_write;
+
+	/* flush msg and acks as we are overwriting the message buffer */
+	e1000_check_for_msg_pf(hw, vf_number);
+	e1000_check_for_ack_pf(hw, vf_number);
+
+	/* copy the caller specified message to the mailbox memory buffer */
+	for (i = 0; i < size; i++)
+		E1000_WRITE_REG_ARRAY(hw, E1000_VMBMEM(vf_number), i, msg[i]);
+
+	/* Interrupt VF to tell it a message has been sent and release buffer*/
+	E1000_WRITE_REG(hw, E1000_P2VMAILBOX(vf_number), E1000_P2VMAILBOX_STS);
+
+	/* update stats */
+	hw->mbx.stats.msgs_tx++;
+
+out_no_write:
+	return ret_val;
+
+}
+
+/**
+ *  e1000_read_mbx_pf - Read a message from the mailbox
+ *  @hw: pointer to the HW structure
+ *  @msg: The message buffer
+ *  @size: Length of buffer
+ *  @vf_number: the VF index
+ *
+ *  This function copies a message from the mailbox buffer to the caller's
+ *  memory buffer.  The presumption is that the caller knows that there was
+ *  a message due to a VF request so no polling for message is needed.
+ **/
+static s32 e1000_read_mbx_pf(struct e1000_hw *hw, u32 *msg, u16 size,
+			     u16 vf_number)
+{
+	s32 ret_val;
+	u16 i;
+
+	DEBUGFUNC("e1000_read_mbx_pf");
+
+	/* lock the mailbox to prevent pf/vf race condition */
+	ret_val = e1000_obtain_mbx_lock_pf(hw, vf_number);
+	if (ret_val)
+		goto out_no_read;
+
+	/* copy the message to the mailbox memory buffer */
+	for (i = 0; i < size; i++)
+		msg[i] = E1000_READ_REG_ARRAY(hw, E1000_VMBMEM(vf_number), i);
+
+	/* Acknowledge the message and release buffer */
+	E1000_WRITE_REG(hw, E1000_P2VMAILBOX(vf_number), E1000_P2VMAILBOX_ACK);
+
+	/* update stats */
+	hw->mbx.stats.msgs_rx++;
+
+out_no_read:
+	return ret_val;
+}
+
+/**
+ *  e1000_init_mbx_params_pf - set initial values for pf mailbox
+ *  @hw: pointer to the HW structure
+ *
+ *  Initializes the hw->mbx struct to correct values for pf mailbox
+ */
+s32 e1000_init_mbx_params_pf(struct e1000_hw *hw)
+{
+	struct e1000_mbx_info *mbx = &hw->mbx;
+
+	switch (hw->mac.type) {
+	case e1000_82576:
+	case e1000_i350:
+	case e1000_i354:
+		mbx->timeout = 0;
+		mbx->usec_delay = 0;
+
+		mbx->size = E1000_VFMAILBOX_SIZE;
+
+		mbx->ops.read = e1000_read_mbx_pf;
+		mbx->ops.write = e1000_write_mbx_pf;
+		mbx->ops.read_posted = e1000_read_posted_mbx;
+		mbx->ops.write_posted = e1000_write_posted_mbx;
+		mbx->ops.check_for_msg = e1000_check_for_msg_pf;
+		mbx->ops.check_for_ack = e1000_check_for_ack_pf;
+		mbx->ops.check_for_rst = e1000_check_for_rst_pf;
+
+		mbx->stats.msgs_tx = 0;
+		mbx->stats.msgs_rx = 0;
+		mbx->stats.reqs = 0;
+		mbx->stats.acks = 0;
+		mbx->stats.rsts = 0;
+	default:
+		return E1000_SUCCESS;
+	}
+}
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mbx.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mbx.h
new file mode 100644
index 00000000..bbf838c8
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mbx.h
@@ -0,0 +1,87 @@
+/*******************************************************************************
+
+  Intel(R) Gigabit Ethernet Linux driver
+  Copyright(c) 2007-2013 Intel Corporation.
+
+  This program is free software; you can redistribute it and/or modify it
+  under the terms and conditions of the GNU General Public License,
+  version 2, as published by the Free Software Foundation.
+
+  This program is distributed in the hope it will be useful, but WITHOUT
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+  more details.
+
+  You should have received a copy of the GNU General Public License along with
+  this program; if not, write to the Free Software Foundation, Inc.,
+  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+  The full GNU General Public License is included in this distribution in
+  the file called "COPYING".
+
+  Contact Information:
+  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+#ifndef _E1000_MBX_H_
+#define _E1000_MBX_H_
+
+#include "e1000_api.h"
+
+#define E1000_P2VMAILBOX_STS	0x00000001 /* Initiate message send to VF */
+#define E1000_P2VMAILBOX_ACK	0x00000002 /* Ack message recv'd from VF */
+#define E1000_P2VMAILBOX_VFU	0x00000004 /* VF owns the mailbox buffer */
+#define E1000_P2VMAILBOX_PFU	0x00000008 /* PF owns the mailbox buffer */
+#define E1000_P2VMAILBOX_RVFU	0x00000010 /* Reset VFU - used when VF stuck */
+
+#define E1000_MBVFICR_VFREQ_MASK 0x000000FF /* bits for VF messages */
+#define E1000_MBVFICR_VFREQ_VF1	0x00000001 /* bit for VF 1 message */
+#define E1000_MBVFICR_VFACK_MASK 0x00FF0000 /* bits for VF acks */
+#define E1000_MBVFICR_VFACK_VF1	0x00010000 /* bit for VF 1 ack */
+
+#define E1000_VFMAILBOX_SIZE	16 /* 16 32 bit words - 64 bytes */
+
+/* If it's a E1000_VF_* msg then it originates in the VF and is sent to the
+ * PF.  The reverse is true if it is E1000_PF_*.
+ * Message ACK's are the value or'd with 0xF0000000
+ */
+/* Msgs below or'd with this are the ACK */
+#define E1000_VT_MSGTYPE_ACK	0x80000000
+/* Msgs below or'd with this are the NACK */
+#define E1000_VT_MSGTYPE_NACK	0x40000000
+/* Indicates that VF is still clear to send requests */
+#define E1000_VT_MSGTYPE_CTS	0x20000000
+#define E1000_VT_MSGINFO_SHIFT	16
+/* bits 23:16 are used for extra info for certain messages */
+#define E1000_VT_MSGINFO_MASK	(0xFF << E1000_VT_MSGINFO_SHIFT)
+
+#define E1000_VF_RESET			0x01 /* VF requests reset */
+#define E1000_VF_SET_MAC_ADDR		0x02 /* VF requests to set MAC addr */
+#define E1000_VF_SET_MULTICAST		0x03 /* VF requests to set MC addr */
+#define E1000_VF_SET_MULTICAST_COUNT_MASK (0x1F << E1000_VT_MSGINFO_SHIFT)
+#define E1000_VF_SET_MULTICAST_OVERFLOW	(0x80 << E1000_VT_MSGINFO_SHIFT)
+#define E1000_VF_SET_VLAN		0x04 /* VF requests to set VLAN */
+#define E1000_VF_SET_VLAN_ADD		(0x01 << E1000_VT_MSGINFO_SHIFT)
+#define E1000_VF_SET_LPE		0x05 /* reqs to set VMOLR.LPE */
+#define E1000_VF_SET_PROMISC		0x06 /* reqs to clear VMOLR.ROPE/MPME*/
+#define E1000_VF_SET_PROMISC_UNICAST	(0x01 << E1000_VT_MSGINFO_SHIFT)
+#define E1000_VF_SET_PROMISC_MULTICAST	(0x02 << E1000_VT_MSGINFO_SHIFT)
+
+#define E1000_PF_CONTROL_MSG		0x0100 /* PF control message */
+
+#define E1000_VF_MBX_INIT_TIMEOUT	2000 /* number of retries on mailbox */
+#define E1000_VF_MBX_INIT_DELAY		500  /* microseconds between retries */
+
+s32 e1000_read_mbx(struct e1000_hw *, u32 *, u16, u16);
+s32 e1000_write_mbx(struct e1000_hw *, u32 *, u16, u16);
+s32 e1000_read_posted_mbx(struct e1000_hw *, u32 *, u16, u16);
+s32 e1000_write_posted_mbx(struct e1000_hw *, u32 *, u16, u16);
+s32 e1000_check_for_msg(struct e1000_hw *, u16);
+s32 e1000_check_for_ack(struct e1000_hw *, u16);
+s32 e1000_check_for_rst(struct e1000_hw *, u16);
+void e1000_init_mbx_ops_generic(struct e1000_hw *hw);
+s32 e1000_init_mbx_params_pf(struct e1000_hw *);
+
+#endif /* _E1000_MBX_H_ */
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_nvm.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_nvm.c
new file mode 100644
index 00000000..6188d007
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_nvm.c
@@ -0,0 +1,965 @@
+/*******************************************************************************
+
+  Intel(R) Gigabit Ethernet Linux driver
+  Copyright(c) 2007-2013 Intel Corporation.
+
+  This program is free software; you can redistribute it and/or modify it
+  under the terms and conditions of the GNU General Public License,
+  version 2, as published by the Free Software Foundation.
+
+  This program is distributed in the hope it will be useful, but WITHOUT
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+  more details.
+
+  You should have received a copy of the GNU General Public License along with
+  this program; if not, write to the Free Software Foundation, Inc.,
+  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+  The full GNU General Public License is included in this distribution in
+  the file called "COPYING".
+
+  Contact Information:
+  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+#include "e1000_api.h"
+
+static void e1000_reload_nvm_generic(struct e1000_hw *hw);
+
+/**
+ *  e1000_init_nvm_ops_generic - Initialize NVM function pointers
+ *  @hw: pointer to the HW structure
+ *
+ *  Setups up the function pointers to no-op functions
+ **/
+void e1000_init_nvm_ops_generic(struct e1000_hw *hw)
+{
+	struct e1000_nvm_info *nvm = &hw->nvm;
+	DEBUGFUNC("e1000_init_nvm_ops_generic");
+
+	/* Initialize function pointers */
+	nvm->ops.init_params = e1000_null_ops_generic;
+	nvm->ops.acquire = e1000_null_ops_generic;
+	nvm->ops.read = e1000_null_read_nvm;
+	nvm->ops.release = e1000_null_nvm_generic;
+	nvm->ops.reload = e1000_reload_nvm_generic;
+	nvm->ops.update = e1000_null_ops_generic;
+	nvm->ops.valid_led_default = e1000_null_led_default;
+	nvm->ops.validate = e1000_null_ops_generic;
+	nvm->ops.write = e1000_null_write_nvm;
+}
+
+/**
+ *  e1000_null_nvm_read - No-op function, return 0
+ *  @hw: pointer to the HW structure
+ **/
+s32 e1000_null_read_nvm(struct e1000_hw E1000_UNUSEDARG *hw,
+			u16 E1000_UNUSEDARG a, u16 E1000_UNUSEDARG b,
+			u16 E1000_UNUSEDARG *c)
+{
+	DEBUGFUNC("e1000_null_read_nvm");
+	return E1000_SUCCESS;
+}
+
+/**
+ *  e1000_null_nvm_generic - No-op function, return void
+ *  @hw: pointer to the HW structure
+ **/
+void e1000_null_nvm_generic(struct e1000_hw E1000_UNUSEDARG *hw)
+{
+	DEBUGFUNC("e1000_null_nvm_generic");
+	return;
+}
+
+/**
+ *  e1000_null_led_default - No-op function, return 0
+ *  @hw: pointer to the HW structure
+ **/
+s32 e1000_null_led_default(struct e1000_hw E1000_UNUSEDARG *hw,
+			   u16 E1000_UNUSEDARG *data)
+{
+	DEBUGFUNC("e1000_null_led_default");
+	return E1000_SUCCESS;
+}
+
+/**
+ *  e1000_null_write_nvm - No-op function, return 0
+ *  @hw: pointer to the HW structure
+ **/
+s32 e1000_null_write_nvm(struct e1000_hw E1000_UNUSEDARG *hw,
+			 u16 E1000_UNUSEDARG a, u16 E1000_UNUSEDARG b,
+			 u16 E1000_UNUSEDARG *c)
+{
+	DEBUGFUNC("e1000_null_write_nvm");
+	return E1000_SUCCESS;
+}
+
+/**
+ *  e1000_raise_eec_clk - Raise EEPROM clock
+ *  @hw: pointer to the HW structure
+ *  @eecd: pointer to the EEPROM
+ *
+ *  Enable/Raise the EEPROM clock bit.
+ **/
+static void e1000_raise_eec_clk(struct e1000_hw *hw, u32 *eecd)
+{
+	*eecd = *eecd | E1000_EECD_SK;
+	E1000_WRITE_REG(hw, E1000_EECD, *eecd);
+	E1000_WRITE_FLUSH(hw);
+	usec_delay(hw->nvm.delay_usec);
+}
+
+/**
+ *  e1000_lower_eec_clk - Lower EEPROM clock
+ *  @hw: pointer to the HW structure
+ *  @eecd: pointer to the EEPROM
+ *
+ *  Clear/Lower the EEPROM clock bit.
+ **/
+static void e1000_lower_eec_clk(struct e1000_hw *hw, u32 *eecd)
+{
+	*eecd = *eecd & ~E1000_EECD_SK;
+	E1000_WRITE_REG(hw, E1000_EECD, *eecd);
+	E1000_WRITE_FLUSH(hw);
+	usec_delay(hw->nvm.delay_usec);
+}
+
+/**
+ *  e1000_shift_out_eec_bits - Shift data bits our to the EEPROM
+ *  @hw: pointer to the HW structure
+ *  @data: data to send to the EEPROM
+ *  @count: number of bits to shift out
+ *
+ *  We need to shift 'count' bits out to the EEPROM.  So, the value in the
+ *  "data" parameter will be shifted out to the EEPROM one bit at a time.
+ *  In order to do this, "data" must be broken down into bits.
+ **/
+static void e1000_shift_out_eec_bits(struct e1000_hw *hw, u16 data, u16 count)
+{
+	struct e1000_nvm_info *nvm = &hw->nvm;
+	u32 eecd = E1000_READ_REG(hw, E1000_EECD);
+	u32 mask;
+
+	DEBUGFUNC("e1000_shift_out_eec_bits");
+
+	mask = 0x01 << (count - 1);
+	if (nvm->type == e1000_nvm_eeprom_spi)
+		eecd |= E1000_EECD_DO;
+
+	do {
+		eecd &= ~E1000_EECD_DI;
+
+		if (data & mask)
+			eecd |= E1000_EECD_DI;
+
+		E1000_WRITE_REG(hw, E1000_EECD, eecd);
+		E1000_WRITE_FLUSH(hw);
+
+		usec_delay(nvm->delay_usec);
+
+		e1000_raise_eec_clk(hw, &eecd);
+		e1000_lower_eec_clk(hw, &eecd);
+
+		mask >>= 1;
+	} while (mask);
+
+	eecd &= ~E1000_EECD_DI;
+	E1000_WRITE_REG(hw, E1000_EECD, eecd);
+}
+
+/**
+ *  e1000_shift_in_eec_bits - Shift data bits in from the EEPROM
+ *  @hw: pointer to the HW structure
+ *  @count: number of bits to shift in
+ *
+ *  In order to read a register from the EEPROM, we need to shift 'count' bits
+ *  in from the EEPROM.  Bits are "shifted in" by raising the clock input to
+ *  the EEPROM (setting the SK bit), and then reading the value of the data out
+ *  "DO" bit.  During this "shifting in" process the data in "DI" bit should
+ *  always be clear.
+ **/
+static u16 e1000_shift_in_eec_bits(struct e1000_hw *hw, u16 count)
+{
+	u32 eecd;
+	u32 i;
+	u16 data;
+
+	DEBUGFUNC("e1000_shift_in_eec_bits");
+
+	eecd = E1000_READ_REG(hw, E1000_EECD);
+
+	eecd &= ~(E1000_EECD_DO | E1000_EECD_DI);
+	data = 0;
+
+	for (i = 0; i < count; i++) {
+		data <<= 1;
+		e1000_raise_eec_clk(hw, &eecd);
+
+		eecd = E1000_READ_REG(hw, E1000_EECD);
+
+		eecd &= ~E1000_EECD_DI;
+		if (eecd & E1000_EECD_DO)
+			data |= 1;
+
+		e1000_lower_eec_clk(hw, &eecd);
+	}
+
+	return data;
+}
+
+/**
+ *  e1000_poll_eerd_eewr_done - Poll for EEPROM read/write completion
+ *  @hw: pointer to the HW structure
+ *  @ee_reg: EEPROM flag for polling
+ *
+ *  Polls the EEPROM status bit for either read or write completion based
+ *  upon the value of 'ee_reg'.
+ **/
+s32 e1000_poll_eerd_eewr_done(struct e1000_hw *hw, int ee_reg)
+{
+	u32 attempts = 100000;
+	u32 i, reg = 0;
+
+	DEBUGFUNC("e1000_poll_eerd_eewr_done");
+
+	for (i = 0; i < attempts; i++) {
+		if (ee_reg == E1000_NVM_POLL_READ)
+			reg = E1000_READ_REG(hw, E1000_EERD);
+		else
+			reg = E1000_READ_REG(hw, E1000_EEWR);
+
+		if (reg & E1000_NVM_RW_REG_DONE)
+			return E1000_SUCCESS;
+
+		usec_delay(5);
+	}
+
+	return -E1000_ERR_NVM;
+}
+
+/**
+ *  e1000_acquire_nvm_generic - Generic request for access to EEPROM
+ *  @hw: pointer to the HW structure
+ *
+ *  Set the EEPROM access request bit and wait for EEPROM access grant bit.
+ *  Return successful if access grant bit set, else clear the request for
+ *  EEPROM access and return -E1000_ERR_NVM (-1).
+ **/
+s32 e1000_acquire_nvm_generic(struct e1000_hw *hw)
+{
+	u32 eecd = E1000_READ_REG(hw, E1000_EECD);
+	s32 timeout = E1000_NVM_GRANT_ATTEMPTS;
+
+	DEBUGFUNC("e1000_acquire_nvm_generic");
+
+	E1000_WRITE_REG(hw, E1000_EECD, eecd | E1000_EECD_REQ);
+	eecd = E1000_READ_REG(hw, E1000_EECD);
+
+	while (timeout) {
+		if (eecd & E1000_EECD_GNT)
+			break;
+		usec_delay(5);
+		eecd = E1000_READ_REG(hw, E1000_EECD);
+		timeout--;
+	}
+
+	if (!timeout) {
+		eecd &= ~E1000_EECD_REQ;
+		E1000_WRITE_REG(hw, E1000_EECD, eecd);
+		DEBUGOUT("Could not acquire NVM grant\n");
+		return -E1000_ERR_NVM;
+	}
+
+	return E1000_SUCCESS;
+}
+
+/**
+ *  e1000_standby_nvm - Return EEPROM to standby state
+ *  @hw: pointer to the HW structure
+ *
+ *  Return the EEPROM to a standby state.
+ **/
+static void e1000_standby_nvm(struct e1000_hw *hw)
+{
+	struct e1000_nvm_info *nvm = &hw->nvm;
+	u32 eecd = E1000_READ_REG(hw, E1000_EECD);
+
+	DEBUGFUNC("e1000_standby_nvm");
+
+	if (nvm->type == e1000_nvm_eeprom_spi) {
+		/* Toggle CS to flush commands */
+		eecd |= E1000_EECD_CS;
+		E1000_WRITE_REG(hw, E1000_EECD, eecd);
+		E1000_WRITE_FLUSH(hw);
+		usec_delay(nvm->delay_usec);
+		eecd &= ~E1000_EECD_CS;
+		E1000_WRITE_REG(hw, E1000_EECD, eecd);
+		E1000_WRITE_FLUSH(hw);
+		usec_delay(nvm->delay_usec);
+	}
+}
+
+/**
+ *  e1000_stop_nvm - Terminate EEPROM command
+ *  @hw: pointer to the HW structure
+ *
+ *  Terminates the current command by inverting the EEPROM's chip select pin.
+ **/
+static void e1000_stop_nvm(struct e1000_hw *hw)
+{
+	u32 eecd;
+
+	DEBUGFUNC("e1000_stop_nvm");
+
+	eecd = E1000_READ_REG(hw, E1000_EECD);
+	if (hw->nvm.type == e1000_nvm_eeprom_spi) {
+		/* Pull CS high */
+		eecd |= E1000_EECD_CS;
+		e1000_lower_eec_clk(hw, &eecd);
+	}
+}
+
+/**
+ *  e1000_release_nvm_generic - Release exclusive access to EEPROM
+ *  @hw: pointer to the HW structure
+ *
+ *  Stop any current commands to the EEPROM and clear the EEPROM request bit.
+ **/
+void e1000_release_nvm_generic(struct e1000_hw *hw)
+{
+	u32 eecd;
+
+	DEBUGFUNC("e1000_release_nvm_generic");
+
+	e1000_stop_nvm(hw);
+
+	eecd = E1000_READ_REG(hw, E1000_EECD);
+	eecd &= ~E1000_EECD_REQ;
+	E1000_WRITE_REG(hw, E1000_EECD, eecd);
+}
+
+/**
+ *  e1000_ready_nvm_eeprom - Prepares EEPROM for read/write
+ *  @hw: pointer to the HW structure
+ *
+ *  Setups the EEPROM for reading and writing.
+ **/
+static s32 e1000_ready_nvm_eeprom(struct e1000_hw *hw)
+{
+	struct e1000_nvm_info *nvm = &hw->nvm;
+	u32 eecd = E1000_READ_REG(hw, E1000_EECD);
+	u8 spi_stat_reg;
+
+	DEBUGFUNC("e1000_ready_nvm_eeprom");
+
+	if (nvm->type == e1000_nvm_eeprom_spi) {
+		u16 timeout = NVM_MAX_RETRY_SPI;
+
+		/* Clear SK and CS */
+		eecd &= ~(E1000_EECD_CS | E1000_EECD_SK);
+		E1000_WRITE_REG(hw, E1000_EECD, eecd);
+		E1000_WRITE_FLUSH(hw);
+		usec_delay(1);
+
+		/* Read "Status Register" repeatedly until the LSB is cleared.
+		 * The EEPROM will signal that the command has been completed
+		 * by clearing bit 0 of the internal status register.  If it's
+		 * not cleared within 'timeout', then error out.
+		 */
+		while (timeout) {
+			e1000_shift_out_eec_bits(hw, NVM_RDSR_OPCODE_SPI,
+						 hw->nvm.opcode_bits);
+			spi_stat_reg = (u8)e1000_shift_in_eec_bits(hw, 8);
+			if (!(spi_stat_reg & NVM_STATUS_RDY_SPI))
+				break;
+
+			usec_delay(5);
+			e1000_standby_nvm(hw);
+			timeout--;
+		}
+
+		if (!timeout) {
+			DEBUGOUT("SPI NVM Status error\n");
+			return -E1000_ERR_NVM;
+		}
+	}
+
+	return E1000_SUCCESS;
+}
+
+/**
+ *  e1000_read_nvm_spi - Read EEPROM's using SPI
+ *  @hw: pointer to the HW structure
+ *  @offset: offset of word in the EEPROM to read
+ *  @words: number of words to read
+ *  @data: word read from the EEPROM
+ *
+ *  Reads a 16 bit word from the EEPROM.
+ **/
+s32 e1000_read_nvm_spi(struct e1000_hw *hw, u16 offset, u16 words, u16 *data)
+{
+	struct e1000_nvm_info *nvm = &hw->nvm;
+	u32 i = 0;
+	s32 ret_val;
+	u16 word_in;
+	u8 read_opcode = NVM_READ_OPCODE_SPI;
+
+	DEBUGFUNC("e1000_read_nvm_spi");
+
+	/* A check for invalid values:  offset too large, too many words,
+	 * and not enough words.
+	 */
+	if ((offset >= nvm->word_size) || (words > (nvm->word_size - offset)) ||
+	    (words == 0)) {
+		DEBUGOUT("nvm parameter(s) out of bounds\n");
+		return -E1000_ERR_NVM;
+	}
+
+	ret_val = nvm->ops.acquire(hw);
+	if (ret_val)
+		return ret_val;
+
+	ret_val = e1000_ready_nvm_eeprom(hw);
+	if (ret_val)
+		goto release;
+
+	e1000_standby_nvm(hw);
+
+	if ((nvm->address_bits == 8) && (offset >= 128))
+		read_opcode |= NVM_A8_OPCODE_SPI;
+
+	/* Send the READ command (opcode + addr) */
+	e1000_shift_out_eec_bits(hw, read_opcode, nvm->opcode_bits);
+	e1000_shift_out_eec_bits(hw, (u16)(offset*2), nvm->address_bits);
+
+	/* Read the data.  SPI NVMs increment the address with each byte
+	 * read and will roll over if reading beyond the end.  This allows
+	 * us to read the whole NVM from any offset
+	 */
+	for (i = 0; i < words; i++) {
+		word_in = e1000_shift_in_eec_bits(hw, 16);
+		data[i] = (word_in >> 8) | (word_in << 8);
+	}
+
+release:
+	nvm->ops.release(hw);
+
+	return ret_val;
+}
+
+/**
+ *  e1000_read_nvm_eerd - Reads EEPROM using EERD register
+ *  @hw: pointer to the HW structure
+ *  @offset: offset of word in the EEPROM to read
+ *  @words: number of words to read
+ *  @data: word read from the EEPROM
+ *
+ *  Reads a 16 bit word from the EEPROM using the EERD register.
+ **/
+s32 e1000_read_nvm_eerd(struct e1000_hw *hw, u16 offset, u16 words, u16 *data)
+{
+	struct e1000_nvm_info *nvm = &hw->nvm;
+	u32 i, eerd = 0;
+	s32 ret_val = E1000_SUCCESS;
+
+	DEBUGFUNC("e1000_read_nvm_eerd");
+
+	/* A check for invalid values:  offset too large, too many words,
+	 * too many words for the offset, and not enough words.
+	 */
+	if ((offset >= nvm->word_size) || (words > (nvm->word_size - offset)) ||
+	    (words == 0)) {
+		DEBUGOUT("nvm parameter(s) out of bounds\n");
+		return -E1000_ERR_NVM;
+	}
+
+	for (i = 0; i < words; i++) {
+		eerd = ((offset+i) << E1000_NVM_RW_ADDR_SHIFT) +
+		       E1000_NVM_RW_REG_START;
+
+		E1000_WRITE_REG(hw, E1000_EERD, eerd);
+		ret_val = e1000_poll_eerd_eewr_done(hw, E1000_NVM_POLL_READ);
+		if (ret_val)
+			break;
+
+		data[i] = (E1000_READ_REG(hw, E1000_EERD) >>
+			   E1000_NVM_RW_REG_DATA);
+	}
+
+	return ret_val;
+}
+
+/**
+ *  e1000_write_nvm_spi - Write to EEPROM using SPI
+ *  @hw: pointer to the HW structure
+ *  @offset: offset within the EEPROM to be written to
+ *  @words: number of words to write
+ *  @data: 16 bit word(s) to be written to the EEPROM
+ *
+ *  Writes data to EEPROM at offset using SPI interface.
+ *
+ *  If e1000_update_nvm_checksum is not called after this function , the
+ *  EEPROM will most likely contain an invalid checksum.
+ **/
+s32 e1000_write_nvm_spi(struct e1000_hw *hw, u16 offset, u16 words, u16 *data)
+{
+	struct e1000_nvm_info *nvm = &hw->nvm;
+	s32 ret_val = -E1000_ERR_NVM;
+	u16 widx = 0;
+
+	DEBUGFUNC("e1000_write_nvm_spi");
+
+	/* A check for invalid values:  offset too large, too many words,
+	 * and not enough words.
+	 */
+	if ((offset >= nvm->word_size) || (words > (nvm->word_size - offset)) ||
+	    (words == 0)) {
+		DEBUGOUT("nvm parameter(s) out of bounds\n");
+		return -E1000_ERR_NVM;
+	}
+
+	while (widx < words) {
+		u8 write_opcode = NVM_WRITE_OPCODE_SPI;
+
+		ret_val = nvm->ops.acquire(hw);
+		if (ret_val)
+			return ret_val;
+
+		ret_val = e1000_ready_nvm_eeprom(hw);
+		if (ret_val) {
+			nvm->ops.release(hw);
+			return ret_val;
+		}
+
+		e1000_standby_nvm(hw);
+
+		/* Send the WRITE ENABLE command (8 bit opcode) */
+		e1000_shift_out_eec_bits(hw, NVM_WREN_OPCODE_SPI,
+					 nvm->opcode_bits);
+
+		e1000_standby_nvm(hw);
+
+		/* Some SPI eeproms use the 8th address bit embedded in the
+		 * opcode
+		 */
+		if ((nvm->address_bits == 8) && (offset >= 128))
+			write_opcode |= NVM_A8_OPCODE_SPI;
+
+		/* Send the Write command (8-bit opcode + addr) */
+		e1000_shift_out_eec_bits(hw, write_opcode, nvm->opcode_bits);
+		e1000_shift_out_eec_bits(hw, (u16)((offset + widx) * 2),
+					 nvm->address_bits);
+
+		/* Loop to allow for up to whole page write of eeprom */
+		while (widx < words) {
+			u16 word_out = data[widx];
+			word_out = (word_out >> 8) | (word_out << 8);
+			e1000_shift_out_eec_bits(hw, word_out, 16);
+			widx++;
+
+			if ((((offset + widx) * 2) % nvm->page_size) == 0) {
+				e1000_standby_nvm(hw);
+				break;
+			}
+		}
+		msec_delay(10);
+		nvm->ops.release(hw);
+	}
+
+	return ret_val;
+}
+
+/**
+ *  e1000_read_pba_string_generic - Read device part number
+ *  @hw: pointer to the HW structure
+ *  @pba_num: pointer to device part number
+ *  @pba_num_size: size of part number buffer
+ *
+ *  Reads the product board assembly (PBA) number from the EEPROM and stores
+ *  the value in pba_num.
+ **/
+s32 e1000_read_pba_string_generic(struct e1000_hw *hw, u8 *pba_num,
+				  u32 pba_num_size)
+{
+	s32 ret_val;
+	u16 nvm_data;
+	u16 pba_ptr;
+	u16 offset;
+	u16 length;
+
+	DEBUGFUNC("e1000_read_pba_string_generic");
+
+	if (pba_num == NULL) {
+		DEBUGOUT("PBA string buffer was null\n");
+		return -E1000_ERR_INVALID_ARGUMENT;
+	}
+
+	ret_val = hw->nvm.ops.read(hw, NVM_PBA_OFFSET_0, 1, &nvm_data);
+	if (ret_val) {
+		DEBUGOUT("NVM Read Error\n");
+		return ret_val;
+	}
+
+	ret_val = hw->nvm.ops.read(hw, NVM_PBA_OFFSET_1, 1, &pba_ptr);
+	if (ret_val) {
+		DEBUGOUT("NVM Read Error\n");
+		return ret_val;
+	}
+
+	/* if nvm_data is not ptr guard the PBA must be in legacy format which
+	 * means pba_ptr is actually our second data word for the PBA number
+	 * and we can decode it into an ascii string
+	 */
+	if (nvm_data != NVM_PBA_PTR_GUARD) {
+		DEBUGOUT("NVM PBA number is not stored as string\n");
+
+		/* make sure callers buffer is big enough to store the PBA */
+		if (pba_num_size < E1000_PBANUM_LENGTH) {
+			DEBUGOUT("PBA string buffer too small\n");
+			return E1000_ERR_NO_SPACE;
+		}
+
+		/* extract hex string from data and pba_ptr */
+		pba_num[0] = (nvm_data >> 12) & 0xF;
+		pba_num[1] = (nvm_data >> 8) & 0xF;
+		pba_num[2] = (nvm_data >> 4) & 0xF;
+		pba_num[3] = nvm_data & 0xF;
+		pba_num[4] = (pba_ptr >> 12) & 0xF;
+		pba_num[5] = (pba_ptr >> 8) & 0xF;
+		pba_num[6] = '-';
+		pba_num[7] = 0;
+		pba_num[8] = (pba_ptr >> 4) & 0xF;
+		pba_num[9] = pba_ptr & 0xF;
+
+		/* put a null character on the end of our string */
+		pba_num[10] = '\0';
+
+		/* switch all the data but the '-' to hex char */
+		for (offset = 0; offset < 10; offset++) {
+			if (pba_num[offset] < 0xA)
+				pba_num[offset] += '0';
+			else if (pba_num[offset] < 0x10)
+				pba_num[offset] += 'A' - 0xA;
+		}
+
+		return E1000_SUCCESS;
+	}
+
+	ret_val = hw->nvm.ops.read(hw, pba_ptr, 1, &length);
+	if (ret_val) {
+		DEBUGOUT("NVM Read Error\n");
+		return ret_val;
+	}
+
+	if (length == 0xFFFF || length == 0) {
+		DEBUGOUT("NVM PBA number section invalid length\n");
+		return -E1000_ERR_NVM_PBA_SECTION;
+	}
+	/* check if pba_num buffer is big enough */
+	if (pba_num_size < (((u32)length * 2) - 1)) {
+		DEBUGOUT("PBA string buffer too small\n");
+		return -E1000_ERR_NO_SPACE;
+	}
+
+	/* trim pba length from start of string */
+	pba_ptr++;
+	length--;
+
+	for (offset = 0; offset < length; offset++) {
+		ret_val = hw->nvm.ops.read(hw, pba_ptr + offset, 1, &nvm_data);
+		if (ret_val) {
+			DEBUGOUT("NVM Read Error\n");
+			return ret_val;
+		}
+		pba_num[offset * 2] = (u8)(nvm_data >> 8);
+		pba_num[(offset * 2) + 1] = (u8)(nvm_data & 0xFF);
+	}
+	pba_num[offset * 2] = '\0';
+
+	return E1000_SUCCESS;
+}
+
+/**
+ *  e1000_read_pba_length_generic - Read device part number length
+ *  @hw: pointer to the HW structure
+ *  @pba_num_size: size of part number buffer
+ *
+ *  Reads the product board assembly (PBA) number length from the EEPROM and
+ *  stores the value in pba_num_size.
+ **/
+s32 e1000_read_pba_length_generic(struct e1000_hw *hw, u32 *pba_num_size)
+{
+	s32 ret_val;
+	u16 nvm_data;
+	u16 pba_ptr;
+	u16 length;
+
+	DEBUGFUNC("e1000_read_pba_length_generic");
+
+	if (pba_num_size == NULL) {
+		DEBUGOUT("PBA buffer size was null\n");
+		return -E1000_ERR_INVALID_ARGUMENT;
+	}
+
+	ret_val = hw->nvm.ops.read(hw, NVM_PBA_OFFSET_0, 1, &nvm_data);
+	if (ret_val) {
+		DEBUGOUT("NVM Read Error\n");
+		return ret_val;
+	}
+
+	ret_val = hw->nvm.ops.read(hw, NVM_PBA_OFFSET_1, 1, &pba_ptr);
+	if (ret_val) {
+		DEBUGOUT("NVM Read Error\n");
+		return ret_val;
+	}
+
+	 /* if data is not ptr guard the PBA must be in legacy format */
+	if (nvm_data != NVM_PBA_PTR_GUARD) {
+		*pba_num_size = E1000_PBANUM_LENGTH;
+		return E1000_SUCCESS;
+	}
+
+	ret_val = hw->nvm.ops.read(hw, pba_ptr, 1, &length);
+	if (ret_val) {
+		DEBUGOUT("NVM Read Error\n");
+		return ret_val;
+	}
+
+	if (length == 0xFFFF || length == 0) {
+		DEBUGOUT("NVM PBA number section invalid length\n");
+		return -E1000_ERR_NVM_PBA_SECTION;
+	}
+
+	/* Convert from length in u16 values to u8 chars, add 1 for NULL,
+	 * and subtract 2 because length field is included in length.
+	 */
+	*pba_num_size = ((u32)length * 2) - 1;
+
+	return E1000_SUCCESS;
+}
+
+
+
+
+
+/**
+ *  e1000_read_mac_addr_generic - Read device MAC address
+ *  @hw: pointer to the HW structure
+ *
+ *  Reads the device MAC address from the EEPROM and stores the value.
+ *  Since devices with two ports use the same EEPROM, we increment the
+ *  last bit in the MAC address for the second port.
+ **/
+s32 e1000_read_mac_addr_generic(struct e1000_hw *hw)
+{
+	u32 rar_high;
+	u32 rar_low;
+	u16 i;
+
+	rar_high = E1000_READ_REG(hw, E1000_RAH(0));
+	rar_low = E1000_READ_REG(hw, E1000_RAL(0));
+
+	for (i = 0; i < E1000_RAL_MAC_ADDR_LEN; i++)
+		hw->mac.perm_addr[i] = (u8)(rar_low >> (i*8));
+
+	for (i = 0; i < E1000_RAH_MAC_ADDR_LEN; i++)
+		hw->mac.perm_addr[i+4] = (u8)(rar_high >> (i*8));
+
+	for (i = 0; i < ETH_ADDR_LEN; i++)
+		hw->mac.addr[i] = hw->mac.perm_addr[i];
+
+	return E1000_SUCCESS;
+}
+
+/**
+ *  e1000_validate_nvm_checksum_generic - Validate EEPROM checksum
+ *  @hw: pointer to the HW structure
+ *
+ *  Calculates the EEPROM checksum by reading/adding each word of the EEPROM
+ *  and then verifies that the sum of the EEPROM is equal to 0xBABA.
+ **/
+s32 e1000_validate_nvm_checksum_generic(struct e1000_hw *hw)
+{
+	s32 ret_val;
+	u16 checksum = 0;
+	u16 i, nvm_data;
+
+	DEBUGFUNC("e1000_validate_nvm_checksum_generic");
+
+	for (i = 0; i < (NVM_CHECKSUM_REG + 1); i++) {
+		ret_val = hw->nvm.ops.read(hw, i, 1, &nvm_data);
+		if (ret_val) {
+			DEBUGOUT("NVM Read Error\n");
+			return ret_val;
+		}
+		checksum += nvm_data;
+	}
+
+	if (checksum != (u16) NVM_SUM) {
+		DEBUGOUT("NVM Checksum Invalid\n");
+		return -E1000_ERR_NVM;
+	}
+
+	return E1000_SUCCESS;
+}
+
+/**
+ *  e1000_update_nvm_checksum_generic - Update EEPROM checksum
+ *  @hw: pointer to the HW structure
+ *
+ *  Updates the EEPROM checksum by reading/adding each word of the EEPROM
+ *  up to the checksum.  Then calculates the EEPROM checksum and writes the
+ *  value to the EEPROM.
+ **/
+s32 e1000_update_nvm_checksum_generic(struct e1000_hw *hw)
+{
+	s32 ret_val;
+	u16 checksum = 0;
+	u16 i, nvm_data;
+
+	DEBUGFUNC("e1000_update_nvm_checksum");
+
+	for (i = 0; i < NVM_CHECKSUM_REG; i++) {
+		ret_val = hw->nvm.ops.read(hw, i, 1, &nvm_data);
+		if (ret_val) {
+			DEBUGOUT("NVM Read Error while updating checksum.\n");
+			return ret_val;
+		}
+		checksum += nvm_data;
+	}
+	checksum = (u16) NVM_SUM - checksum;
+	ret_val = hw->nvm.ops.write(hw, NVM_CHECKSUM_REG, 1, &checksum);
+	if (ret_val)
+		DEBUGOUT("NVM Write Error while updating checksum.\n");
+
+	return ret_val;
+}
+
+/**
+ *  e1000_reload_nvm_generic - Reloads EEPROM
+ *  @hw: pointer to the HW structure
+ *
+ *  Reloads the EEPROM by setting the "Reinitialize from EEPROM" bit in the
+ *  extended control register.
+ **/
+static void e1000_reload_nvm_generic(struct e1000_hw *hw)
+{
+	u32 ctrl_ext;
+
+	DEBUGFUNC("e1000_reload_nvm_generic");
+
+	usec_delay(10);
+	ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT);
+	ctrl_ext |= E1000_CTRL_EXT_EE_RST;
+	E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext);
+	E1000_WRITE_FLUSH(hw);
+}
+
+/**
+ *  e1000_get_fw_version - Get firmware version information
+ *  @hw: pointer to the HW structure
+ *  @fw_vers: pointer to output version structure
+ *
+ *  unsupported/not present features return 0 in version structure
+ **/
+void e1000_get_fw_version(struct e1000_hw *hw, struct e1000_fw_version *fw_vers)
+{
+	u16 eeprom_verh, eeprom_verl, etrack_test, fw_version;
+	u8 q, hval, rem, result;
+	u16 comb_verh, comb_verl, comb_offset;
+
+	memset(fw_vers, 0, sizeof(struct e1000_fw_version));
+
+	/* basic eeprom version numbers, bits used vary by part and by tool
+	 * used to create the nvm images */
+	/* Check which data format we have */
+	hw->nvm.ops.read(hw, NVM_ETRACK_HIWORD, 1, &etrack_test);
+	switch (hw->mac.type) {
+	case e1000_i211:
+		e1000_read_invm_version(hw, fw_vers);
+		return;
+	case e1000_82575:
+	case e1000_82576:
+	case e1000_82580:
+		/* Use this format, unless EETRACK ID exists,
+		 * then use alternate format
+		 */
+		if ((etrack_test &  NVM_MAJOR_MASK) != NVM_ETRACK_VALID) {
+			hw->nvm.ops.read(hw, NVM_VERSION, 1, &fw_version);
+			fw_vers->eep_major = (fw_version & NVM_MAJOR_MASK)
+					      >> NVM_MAJOR_SHIFT;
+			fw_vers->eep_minor = (fw_version & NVM_MINOR_MASK)
+					      >> NVM_MINOR_SHIFT;
+			fw_vers->eep_build = (fw_version & NVM_IMAGE_ID_MASK);
+			goto etrack_id;
+		}
+		break;
+	case e1000_i210:
+		if (!(e1000_get_flash_presence_i210(hw))) {
+			e1000_read_invm_version(hw, fw_vers);
+			return;
+		}
+		/* fall through */
+	case e1000_i350:
+	case e1000_i354:
+		/* find combo image version */
+		hw->nvm.ops.read(hw, NVM_COMB_VER_PTR, 1, &comb_offset);
+		if ((comb_offset != 0x0) &&
+		    (comb_offset != NVM_VER_INVALID)) {
+
+			hw->nvm.ops.read(hw, (NVM_COMB_VER_OFF + comb_offset
+					 + 1), 1, &comb_verh);
+			hw->nvm.ops.read(hw, (NVM_COMB_VER_OFF + comb_offset),
+					 1, &comb_verl);
+
+			/* get Option Rom version if it exists and is valid */
+			if ((comb_verh && comb_verl) &&
+			    ((comb_verh != NVM_VER_INVALID) &&
+			     (comb_verl != NVM_VER_INVALID))) {
+
+				fw_vers->or_valid = true;
+				fw_vers->or_major =
+					comb_verl >> NVM_COMB_VER_SHFT;
+				fw_vers->or_build =
+					(comb_verl << NVM_COMB_VER_SHFT)
+					| (comb_verh >> NVM_COMB_VER_SHFT);
+				fw_vers->or_patch =
+					comb_verh & NVM_COMB_VER_MASK;
+			}
+		}
+		break;
+	default:
+		return;
+	}
+	hw->nvm.ops.read(hw, NVM_VERSION, 1, &fw_version);
+	fw_vers->eep_major = (fw_version & NVM_MAJOR_MASK)
+			      >> NVM_MAJOR_SHIFT;
+
+	/* check for old style version format in newer images*/
+	if ((fw_version & NVM_NEW_DEC_MASK) == 0x0) {
+		eeprom_verl = (fw_version & NVM_COMB_VER_MASK);
+	} else {
+		eeprom_verl = (fw_version & NVM_MINOR_MASK)
+				>> NVM_MINOR_SHIFT;
+	}
+	/* Convert minor value to hex before assigning to output struct
+	 * Val to be converted will not be higher than 99, per tool output
+	 */
+	q = eeprom_verl / NVM_HEX_CONV;
+	hval = q * NVM_HEX_TENS;
+	rem = eeprom_verl % NVM_HEX_CONV;
+	result = hval + rem;
+	fw_vers->eep_minor = result;
+
+etrack_id:
+	if ((etrack_test &  NVM_MAJOR_MASK) == NVM_ETRACK_VALID) {
+		hw->nvm.ops.read(hw, NVM_ETRACK_WORD, 1, &eeprom_verl);
+		hw->nvm.ops.read(hw, (NVM_ETRACK_WORD + 1), 1, &eeprom_verh);
+		fw_vers->etrack_id = (eeprom_verh << NVM_ETRACK_SHIFT)
+			| eeprom_verl;
+	}
+	return;
+}
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_nvm.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_nvm.h
new file mode 100644
index 00000000..fe62785a
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_nvm.h
@@ -0,0 +1,75 @@
+/*******************************************************************************
+
+  Intel(R) Gigabit Ethernet Linux driver
+  Copyright(c) 2007-2013 Intel Corporation.
+
+  This program is free software; you can redistribute it and/or modify it
+  under the terms and conditions of the GNU General Public License,
+  version 2, as published by the Free Software Foundation.
+
+  This program is distributed in the hope it will be useful, but WITHOUT
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+  more details.
+
+  You should have received a copy of the GNU General Public License along with
+  this program; if not, write to the Free Software Foundation, Inc.,
+  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+  The full GNU General Public License is included in this distribution in
+  the file called "COPYING".
+
+  Contact Information:
+  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+#ifndef _E1000_NVM_H_
+#define _E1000_NVM_H_
+
+
+struct e1000_fw_version {
+	u32 etrack_id;
+	u16 eep_major;
+	u16 eep_minor;
+	u16 eep_build;
+
+	u8 invm_major;
+	u8 invm_minor;
+	u8 invm_img_type;
+
+	bool or_valid;
+	u16 or_major;
+	u16 or_build;
+	u16 or_patch;
+};
+
+
+void e1000_init_nvm_ops_generic(struct e1000_hw *hw);
+s32  e1000_null_read_nvm(struct e1000_hw *hw, u16 a, u16 b, u16 *c);
+void e1000_null_nvm_generic(struct e1000_hw *hw);
+s32  e1000_null_led_default(struct e1000_hw *hw, u16 *data);
+s32  e1000_null_write_nvm(struct e1000_hw *hw, u16 a, u16 b, u16 *c);
+s32  e1000_acquire_nvm_generic(struct e1000_hw *hw);
+
+s32  e1000_poll_eerd_eewr_done(struct e1000_hw *hw, int ee_reg);
+s32  e1000_read_mac_addr_generic(struct e1000_hw *hw);
+s32  e1000_read_pba_string_generic(struct e1000_hw *hw, u8 *pba_num,
+				   u32 pba_num_size);
+s32  e1000_read_pba_length_generic(struct e1000_hw *hw, u32 *pba_num_size);
+s32  e1000_read_nvm_spi(struct e1000_hw *hw, u16 offset, u16 words, u16 *data);
+s32  e1000_read_nvm_eerd(struct e1000_hw *hw, u16 offset, u16 words,
+			 u16 *data);
+s32  e1000_valid_led_default_generic(struct e1000_hw *hw, u16 *data);
+s32  e1000_validate_nvm_checksum_generic(struct e1000_hw *hw);
+s32  e1000_write_nvm_spi(struct e1000_hw *hw, u16 offset, u16 words,
+			 u16 *data);
+s32  e1000_update_nvm_checksum_generic(struct e1000_hw *hw);
+void e1000_release_nvm_generic(struct e1000_hw *hw);
+void e1000_get_fw_version(struct e1000_hw *hw,
+			  struct e1000_fw_version *fw_vers);
+
+#define E1000_STM_OPCODE	0xDB00
+
+#endif
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_osdep.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_osdep.h
new file mode 100644
index 00000000..d1cf98e2
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_osdep.h
@@ -0,0 +1,136 @@
+/*******************************************************************************
+
+  Intel(R) Gigabit Ethernet Linux driver
+  Copyright(c) 2007-2013 Intel Corporation.
+
+  This program is free software; you can redistribute it and/or modify it
+  under the terms and conditions of the GNU General Public License,
+  version 2, as published by the Free Software Foundation.
+
+  This program is distributed in the hope it will be useful, but WITHOUT
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+  more details.
+
+  You should have received a copy of the GNU General Public License along with
+  this program; if not, write to the Free Software Foundation, Inc.,
+  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+  The full GNU General Public License is included in this distribution in
+  the file called "COPYING".
+
+  Contact Information:
+  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+
+/* glue for the OS independent part of e1000
+ * includes register access macros
+ */
+
+#ifndef _E1000_OSDEP_H_
+#define _E1000_OSDEP_H_
+
+#include <linux/pci.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/if_ether.h>
+#include <linux/sched.h>
+#include "kcompat.h"
+
+#ifndef __INTEL_COMPILER
+#pragma GCC diagnostic ignored "-Wunused-function"
+#endif
+
+#define usec_delay(x) udelay(x)
+#define usec_delay_irq(x) udelay(x)
+#ifndef msec_delay
+#define msec_delay(x) do { \
+	/* Don't mdelay in interrupt context! */ \
+	if (in_interrupt()) \
+		BUG(); \
+	else \
+		msleep(x); \
+} while (0)
+
+/* Some workarounds require millisecond delays and are run during interrupt
+ * context.  Most notably, when establishing link, the phy may need tweaking
+ * but cannot process phy register reads/writes faster than millisecond
+ * intervals...and we establish link due to a "link status change" interrupt.
+ */
+#define msec_delay_irq(x) mdelay(x)
+#endif
+
+#define PCI_COMMAND_REGISTER   PCI_COMMAND
+#define CMD_MEM_WRT_INVALIDATE PCI_COMMAND_INVALIDATE
+#define ETH_ADDR_LEN           ETH_ALEN
+
+#ifdef __BIG_ENDIAN
+#define E1000_BIG_ENDIAN __BIG_ENDIAN
+#endif
+
+
+#ifdef DEBUG
+#define DEBUGOUT(S) printk(KERN_DEBUG S)
+#define DEBUGOUT1(S, A...) printk(KERN_DEBUG S, ## A)
+#else
+#define DEBUGOUT(S)
+#define DEBUGOUT1(S, A...)
+#endif
+
+#ifdef DEBUG_FUNC
+#define DEBUGFUNC(F) DEBUGOUT(F "\n")
+#else
+#define DEBUGFUNC(F)
+#endif
+#define DEBUGOUT2 DEBUGOUT1
+#define DEBUGOUT3 DEBUGOUT2
+#define DEBUGOUT7 DEBUGOUT3
+
+#define E1000_REGISTER(a, reg) reg
+
+#define E1000_WRITE_REG(a, reg, value) ( \
+    writel((value), ((a)->hw_addr + E1000_REGISTER(a, reg))))
+
+#define E1000_READ_REG(a, reg) (readl((a)->hw_addr + E1000_REGISTER(a, reg)))
+
+#define E1000_WRITE_REG_ARRAY(a, reg, offset, value) ( \
+    writel((value), ((a)->hw_addr + E1000_REGISTER(a, reg) + ((offset) << 2))))
+
+#define E1000_READ_REG_ARRAY(a, reg, offset) ( \
+    readl((a)->hw_addr + E1000_REGISTER(a, reg) + ((offset) << 2)))
+
+#define E1000_READ_REG_ARRAY_DWORD E1000_READ_REG_ARRAY
+#define E1000_WRITE_REG_ARRAY_DWORD E1000_WRITE_REG_ARRAY
+
+#define E1000_WRITE_REG_ARRAY_WORD(a, reg, offset, value) ( \
+    writew((value), ((a)->hw_addr + E1000_REGISTER(a, reg) + ((offset) << 1))))
+
+#define E1000_READ_REG_ARRAY_WORD(a, reg, offset) ( \
+    readw((a)->hw_addr + E1000_REGISTER(a, reg) + ((offset) << 1)))
+
+#define E1000_WRITE_REG_ARRAY_BYTE(a, reg, offset, value) ( \
+    writeb((value), ((a)->hw_addr + E1000_REGISTER(a, reg) + (offset))))
+
+#define E1000_READ_REG_ARRAY_BYTE(a, reg, offset) ( \
+    readb((a)->hw_addr + E1000_REGISTER(a, reg) + (offset)))
+
+#define E1000_WRITE_REG_IO(a, reg, offset) do { \
+    outl(reg, ((a)->io_base));                  \
+    outl(offset, ((a)->io_base + 4));      } while (0)
+
+#define E1000_WRITE_FLUSH(a) E1000_READ_REG(a, E1000_STATUS)
+
+#define E1000_WRITE_FLASH_REG(a, reg, value) ( \
+    writel((value), ((a)->flash_address + reg)))
+
+#define E1000_WRITE_FLASH_REG16(a, reg, value) ( \
+    writew((value), ((a)->flash_address + reg)))
+
+#define E1000_READ_FLASH_REG(a, reg) (readl((a)->flash_address + reg))
+
+#define E1000_READ_FLASH_REG16(a, reg) (readw((a)->flash_address + reg))
+
+#endif /* _E1000_OSDEP_H_ */
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_phy.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_phy.c
new file mode 100644
index 00000000..df224702
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_phy.c
@@ -0,0 +1,3405 @@
+/*******************************************************************************
+
+  Intel(R) Gigabit Ethernet Linux driver
+  Copyright(c) 2007-2013 Intel Corporation.
+
+  This program is free software; you can redistribute it and/or modify it
+  under the terms and conditions of the GNU General Public License,
+  version 2, as published by the Free Software Foundation.
+
+  This program is distributed in the hope it will be useful, but WITHOUT
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+  more details.
+
+  You should have received a copy of the GNU General Public License along with
+  this program; if not, write to the Free Software Foundation, Inc.,
+  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+  The full GNU General Public License is included in this distribution in
+  the file called "COPYING".
+
+  Contact Information:
+  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+#include "e1000_api.h"
+
+static s32 e1000_wait_autoneg(struct e1000_hw *hw);
+/* Cable length tables */
+static const u16 e1000_m88_cable_length_table[] = {
+	0, 50, 80, 110, 140, 140, E1000_CABLE_LENGTH_UNDEFINED };
+#define M88E1000_CABLE_LENGTH_TABLE_SIZE \
+		(sizeof(e1000_m88_cable_length_table) / \
+		 sizeof(e1000_m88_cable_length_table[0]))
+
+static const u16 e1000_igp_2_cable_length_table[] = {
+	0, 0, 0, 0, 0, 0, 0, 0, 3, 5, 8, 11, 13, 16, 18, 21, 0, 0, 0, 3,
+	6, 10, 13, 16, 19, 23, 26, 29, 32, 35, 38, 41, 6, 10, 14, 18, 22,
+	26, 30, 33, 37, 41, 44, 48, 51, 54, 58, 61, 21, 26, 31, 35, 40,
+	44, 49, 53, 57, 61, 65, 68, 72, 75, 79, 82, 40, 45, 51, 56, 61,
+	66, 70, 75, 79, 83, 87, 91, 94, 98, 101, 104, 60, 66, 72, 77, 82,
+	87, 92, 96, 100, 104, 108, 111, 114, 117, 119, 121, 83, 89, 95,
+	100, 105, 109, 113, 116, 119, 122, 124, 104, 109, 114, 118, 121,
+	124};
+#define IGP02E1000_CABLE_LENGTH_TABLE_SIZE \
+		(sizeof(e1000_igp_2_cable_length_table) / \
+		 sizeof(e1000_igp_2_cable_length_table[0]))
+
+/**
+ *  e1000_init_phy_ops_generic - Initialize PHY function pointers
+ *  @hw: pointer to the HW structure
+ *
+ *  Setups up the function pointers to no-op functions
+ **/
+void e1000_init_phy_ops_generic(struct e1000_hw *hw)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+	DEBUGFUNC("e1000_init_phy_ops_generic");
+
+	/* Initialize function pointers */
+	phy->ops.init_params = e1000_null_ops_generic;
+	phy->ops.acquire = e1000_null_ops_generic;
+	phy->ops.check_polarity = e1000_null_ops_generic;
+	phy->ops.check_reset_block = e1000_null_ops_generic;
+	phy->ops.commit = e1000_null_ops_generic;
+	phy->ops.force_speed_duplex = e1000_null_ops_generic;
+	phy->ops.get_cfg_done = e1000_null_ops_generic;
+	phy->ops.get_cable_length = e1000_null_ops_generic;
+	phy->ops.get_info = e1000_null_ops_generic;
+	phy->ops.set_page = e1000_null_set_page;
+	phy->ops.read_reg = e1000_null_read_reg;
+	phy->ops.read_reg_locked = e1000_null_read_reg;
+	phy->ops.read_reg_page = e1000_null_read_reg;
+	phy->ops.release = e1000_null_phy_generic;
+	phy->ops.reset = e1000_null_ops_generic;
+	phy->ops.set_d0_lplu_state = e1000_null_lplu_state;
+	phy->ops.set_d3_lplu_state = e1000_null_lplu_state;
+	phy->ops.write_reg = e1000_null_write_reg;
+	phy->ops.write_reg_locked = e1000_null_write_reg;
+	phy->ops.write_reg_page = e1000_null_write_reg;
+	phy->ops.power_up = e1000_null_phy_generic;
+	phy->ops.power_down = e1000_null_phy_generic;
+	phy->ops.read_i2c_byte = e1000_read_i2c_byte_null;
+	phy->ops.write_i2c_byte = e1000_write_i2c_byte_null;
+}
+
+/**
+ *  e1000_null_set_page - No-op function, return 0
+ *  @hw: pointer to the HW structure
+ **/
+s32 e1000_null_set_page(struct e1000_hw E1000_UNUSEDARG *hw,
+			u16 E1000_UNUSEDARG data)
+{
+	DEBUGFUNC("e1000_null_set_page");
+	return E1000_SUCCESS;
+}
+
+/**
+ *  e1000_null_read_reg - No-op function, return 0
+ *  @hw: pointer to the HW structure
+ **/
+s32 e1000_null_read_reg(struct e1000_hw E1000_UNUSEDARG *hw,
+			u32 E1000_UNUSEDARG offset, u16 E1000_UNUSEDARG *data)
+{
+	DEBUGFUNC("e1000_null_read_reg");
+	return E1000_SUCCESS;
+}
+
+/**
+ *  e1000_null_phy_generic - No-op function, return void
+ *  @hw: pointer to the HW structure
+ **/
+void e1000_null_phy_generic(struct e1000_hw E1000_UNUSEDARG *hw)
+{
+	DEBUGFUNC("e1000_null_phy_generic");
+	return;
+}
+
+/**
+ *  e1000_null_lplu_state - No-op function, return 0
+ *  @hw: pointer to the HW structure
+ **/
+s32 e1000_null_lplu_state(struct e1000_hw E1000_UNUSEDARG *hw,
+			  bool E1000_UNUSEDARG active)
+{
+	DEBUGFUNC("e1000_null_lplu_state");
+	return E1000_SUCCESS;
+}
+
+/**
+ *  e1000_null_write_reg - No-op function, return 0
+ *  @hw: pointer to the HW structure
+ **/
+s32 e1000_null_write_reg(struct e1000_hw E1000_UNUSEDARG *hw,
+			 u32 E1000_UNUSEDARG offset, u16 E1000_UNUSEDARG data)
+{
+	DEBUGFUNC("e1000_null_write_reg");
+	return E1000_SUCCESS;
+}
+
+/**
+ *  e1000_read_i2c_byte_null - No-op function, return 0
+ *  @hw: pointer to hardware structure
+ *  @byte_offset: byte offset to write
+ *  @dev_addr: device address
+ *  @data: data value read
+ *
+ **/
+s32 e1000_read_i2c_byte_null(struct e1000_hw E1000_UNUSEDARG *hw,
+			     u8 E1000_UNUSEDARG byte_offset,
+			     u8 E1000_UNUSEDARG dev_addr,
+			     u8 E1000_UNUSEDARG *data)
+{
+	DEBUGFUNC("e1000_read_i2c_byte_null");
+	return E1000_SUCCESS;
+}
+
+/**
+ *  e1000_write_i2c_byte_null - No-op function, return 0
+ *  @hw: pointer to hardware structure
+ *  @byte_offset: byte offset to write
+ *  @dev_addr: device address
+ *  @data: data value to write
+ *
+ **/
+s32 e1000_write_i2c_byte_null(struct e1000_hw E1000_UNUSEDARG *hw,
+			      u8 E1000_UNUSEDARG byte_offset,
+			      u8 E1000_UNUSEDARG dev_addr,
+			      u8 E1000_UNUSEDARG data)
+{
+	DEBUGFUNC("e1000_write_i2c_byte_null");
+	return E1000_SUCCESS;
+}
+
+/**
+ *  e1000_check_reset_block_generic - Check if PHY reset is blocked
+ *  @hw: pointer to the HW structure
+ *
+ *  Read the PHY management control register and check whether a PHY reset
+ *  is blocked.  If a reset is not blocked return E1000_SUCCESS, otherwise
+ *  return E1000_BLK_PHY_RESET (12).
+ **/
+s32 e1000_check_reset_block_generic(struct e1000_hw *hw)
+{
+	u32 manc;
+
+	DEBUGFUNC("e1000_check_reset_block");
+
+	manc = E1000_READ_REG(hw, E1000_MANC);
+
+	return (manc & E1000_MANC_BLK_PHY_RST_ON_IDE) ?
+	       E1000_BLK_PHY_RESET : E1000_SUCCESS;
+}
+
+/**
+ *  e1000_get_phy_id - Retrieve the PHY ID and revision
+ *  @hw: pointer to the HW structure
+ *
+ *  Reads the PHY registers and stores the PHY ID and possibly the PHY
+ *  revision in the hardware structure.
+ **/
+s32 e1000_get_phy_id(struct e1000_hw *hw)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+	s32 ret_val = E1000_SUCCESS;
+	u16 phy_id;
+
+	DEBUGFUNC("e1000_get_phy_id");
+
+	if (!phy->ops.read_reg)
+		return E1000_SUCCESS;
+
+	ret_val = phy->ops.read_reg(hw, PHY_ID1, &phy_id);
+	if (ret_val)
+		return ret_val;
+
+	phy->id = (u32)(phy_id << 16);
+	usec_delay(20);
+	ret_val = phy->ops.read_reg(hw, PHY_ID2, &phy_id);
+	if (ret_val)
+		return ret_val;
+
+	phy->id |= (u32)(phy_id & PHY_REVISION_MASK);
+	phy->revision = (u32)(phy_id & ~PHY_REVISION_MASK);
+
+
+	return E1000_SUCCESS;
+}
+
+/**
+ *  e1000_phy_reset_dsp_generic - Reset PHY DSP
+ *  @hw: pointer to the HW structure
+ *
+ *  Reset the digital signal processor.
+ **/
+s32 e1000_phy_reset_dsp_generic(struct e1000_hw *hw)
+{
+	s32 ret_val;
+
+	DEBUGFUNC("e1000_phy_reset_dsp_generic");
+
+	if (!hw->phy.ops.write_reg)
+		return E1000_SUCCESS;
+
+	ret_val = hw->phy.ops.write_reg(hw, M88E1000_PHY_GEN_CONTROL, 0xC1);
+	if (ret_val)
+		return ret_val;
+
+	return hw->phy.ops.write_reg(hw, M88E1000_PHY_GEN_CONTROL, 0);
+}
+
+/**
+ *  e1000_read_phy_reg_mdic - Read MDI control register
+ *  @hw: pointer to the HW structure
+ *  @offset: register offset to be read
+ *  @data: pointer to the read data
+ *
+ *  Reads the MDI control register in the PHY at offset and stores the
+ *  information read to data.
+ **/
+s32 e1000_read_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 *data)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+	u32 i, mdic = 0;
+
+	DEBUGFUNC("e1000_read_phy_reg_mdic");
+
+	if (offset > MAX_PHY_REG_ADDRESS) {
+		DEBUGOUT1("PHY Address %d is out of range\n", offset);
+		return -E1000_ERR_PARAM;
+	}
+
+	/* Set up Op-code, Phy Address, and register offset in the MDI
+	 * Control register.  The MAC will take care of interfacing with the
+	 * PHY to retrieve the desired data.
+	 */
+	mdic = ((offset << E1000_MDIC_REG_SHIFT) |
+		(phy->addr << E1000_MDIC_PHY_SHIFT) |
+		(E1000_MDIC_OP_READ));
+
+	E1000_WRITE_REG(hw, E1000_MDIC, mdic);
+
+	/* Poll the ready bit to see if the MDI read completed
+	 * Increasing the time out as testing showed failures with
+	 * the lower time out
+	 */
+	for (i = 0; i < (E1000_GEN_POLL_TIMEOUT * 3); i++) {
+		usec_delay_irq(50);
+		mdic = E1000_READ_REG(hw, E1000_MDIC);
+		if (mdic & E1000_MDIC_READY)
+			break;
+	}
+	if (!(mdic & E1000_MDIC_READY)) {
+		DEBUGOUT("MDI Read did not complete\n");
+		return -E1000_ERR_PHY;
+	}
+	if (mdic & E1000_MDIC_ERROR) {
+		DEBUGOUT("MDI Error\n");
+		return -E1000_ERR_PHY;
+	}
+	if (((mdic & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT) != offset) {
+		DEBUGOUT2("MDI Read offset error - requested %d, returned %d\n",
+			  offset,
+			  (mdic & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT);
+		return -E1000_ERR_PHY;
+	}
+	*data = (u16) mdic;
+
+	return E1000_SUCCESS;
+}
+
+/**
+ *  e1000_write_phy_reg_mdic - Write MDI control register
+ *  @hw: pointer to the HW structure
+ *  @offset: register offset to write to
+ *  @data: data to write to register at offset
+ *
+ *  Writes data to MDI control register in the PHY at offset.
+ **/
+s32 e1000_write_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 data)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+	u32 i, mdic = 0;
+
+	DEBUGFUNC("e1000_write_phy_reg_mdic");
+
+	if (offset > MAX_PHY_REG_ADDRESS) {
+		DEBUGOUT1("PHY Address %d is out of range\n", offset);
+		return -E1000_ERR_PARAM;
+	}
+
+	/* Set up Op-code, Phy Address, and register offset in the MDI
+	 * Control register.  The MAC will take care of interfacing with the
+	 * PHY to retrieve the desired data.
+	 */
+	mdic = (((u32)data) |
+		(offset << E1000_MDIC_REG_SHIFT) |
+		(phy->addr << E1000_MDIC_PHY_SHIFT) |
+		(E1000_MDIC_OP_WRITE));
+
+	E1000_WRITE_REG(hw, E1000_MDIC, mdic);
+
+	/* Poll the ready bit to see if the MDI read completed
+	 * Increasing the time out as testing showed failures with
+	 * the lower time out
+	 */
+	for (i = 0; i < (E1000_GEN_POLL_TIMEOUT * 3); i++) {
+		usec_delay_irq(50);
+		mdic = E1000_READ_REG(hw, E1000_MDIC);
+		if (mdic & E1000_MDIC_READY)
+			break;
+	}
+	if (!(mdic & E1000_MDIC_READY)) {
+		DEBUGOUT("MDI Write did not complete\n");
+		return -E1000_ERR_PHY;
+	}
+	if (mdic & E1000_MDIC_ERROR) {
+		DEBUGOUT("MDI Error\n");
+		return -E1000_ERR_PHY;
+	}
+	if (((mdic & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT) != offset) {
+		DEBUGOUT2("MDI Write offset error - requested %d, returned %d\n",
+			  offset,
+			  (mdic & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT);
+		return -E1000_ERR_PHY;
+	}
+
+	return E1000_SUCCESS;
+}
+
+/**
+ *  e1000_read_phy_reg_i2c - Read PHY register using i2c
+ *  @hw: pointer to the HW structure
+ *  @offset: register offset to be read
+ *  @data: pointer to the read data
+ *
+ *  Reads the PHY register at offset using the i2c interface and stores the
+ *  retrieved information in data.
+ **/
+s32 e1000_read_phy_reg_i2c(struct e1000_hw *hw, u32 offset, u16 *data)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+	u32 i, i2ccmd = 0;
+
+	DEBUGFUNC("e1000_read_phy_reg_i2c");
+
+	/* Set up Op-code, Phy Address, and register address in the I2CCMD
+	 * register.  The MAC will take care of interfacing with the
+	 * PHY to retrieve the desired data.
+	 */
+	i2ccmd = ((offset << E1000_I2CCMD_REG_ADDR_SHIFT) |
+		  (phy->addr << E1000_I2CCMD_PHY_ADDR_SHIFT) |
+		  (E1000_I2CCMD_OPCODE_READ));
+
+	E1000_WRITE_REG(hw, E1000_I2CCMD, i2ccmd);
+
+	/* Poll the ready bit to see if the I2C read completed */
+	for (i = 0; i < E1000_I2CCMD_PHY_TIMEOUT; i++) {
+		usec_delay(50);
+		i2ccmd = E1000_READ_REG(hw, E1000_I2CCMD);
+		if (i2ccmd & E1000_I2CCMD_READY)
+			break;
+	}
+	if (!(i2ccmd & E1000_I2CCMD_READY)) {
+		DEBUGOUT("I2CCMD Read did not complete\n");
+		return -E1000_ERR_PHY;
+	}
+	if (i2ccmd & E1000_I2CCMD_ERROR) {
+		DEBUGOUT("I2CCMD Error bit set\n");
+		return -E1000_ERR_PHY;
+	}
+
+	/* Need to byte-swap the 16-bit value. */
+	*data = ((i2ccmd >> 8) & 0x00FF) | ((i2ccmd << 8) & 0xFF00);
+
+	return E1000_SUCCESS;
+}
+
+/**
+ *  e1000_write_phy_reg_i2c - Write PHY register using i2c
+ *  @hw: pointer to the HW structure
+ *  @offset: register offset to write to
+ *  @data: data to write at register offset
+ *
+ *  Writes the data to PHY register at the offset using the i2c interface.
+ **/
+s32 e1000_write_phy_reg_i2c(struct e1000_hw *hw, u32 offset, u16 data)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+	u32 i, i2ccmd = 0;
+	u16 phy_data_swapped;
+
+	DEBUGFUNC("e1000_write_phy_reg_i2c");
+
+	/* Prevent overwritting SFP I2C EEPROM which is at A0 address.*/
+	if ((hw->phy.addr == 0) || (hw->phy.addr > 7)) {
+		DEBUGOUT1("PHY I2C Address %d is out of range.\n",
+			  hw->phy.addr);
+		return -E1000_ERR_CONFIG;
+	}
+
+	/* Swap the data bytes for the I2C interface */
+	phy_data_swapped = ((data >> 8) & 0x00FF) | ((data << 8) & 0xFF00);
+
+	/* Set up Op-code, Phy Address, and register address in the I2CCMD
+	 * register.  The MAC will take care of interfacing with the
+	 * PHY to retrieve the desired data.
+	 */
+	i2ccmd = ((offset << E1000_I2CCMD_REG_ADDR_SHIFT) |
+		  (phy->addr << E1000_I2CCMD_PHY_ADDR_SHIFT) |
+		  E1000_I2CCMD_OPCODE_WRITE |
+		  phy_data_swapped);
+
+	E1000_WRITE_REG(hw, E1000_I2CCMD, i2ccmd);
+
+	/* Poll the ready bit to see if the I2C read completed */
+	for (i = 0; i < E1000_I2CCMD_PHY_TIMEOUT; i++) {
+		usec_delay(50);
+		i2ccmd = E1000_READ_REG(hw, E1000_I2CCMD);
+		if (i2ccmd & E1000_I2CCMD_READY)
+			break;
+	}
+	if (!(i2ccmd & E1000_I2CCMD_READY)) {
+		DEBUGOUT("I2CCMD Write did not complete\n");
+		return -E1000_ERR_PHY;
+	}
+	if (i2ccmd & E1000_I2CCMD_ERROR) {
+		DEBUGOUT("I2CCMD Error bit set\n");
+		return -E1000_ERR_PHY;
+	}
+
+	return E1000_SUCCESS;
+}
+
+/**
+ *  e1000_read_sfp_data_byte - Reads SFP module data.
+ *  @hw: pointer to the HW structure
+ *  @offset: byte location offset to be read
+ *  @data: read data buffer pointer
+ *
+ *  Reads one byte from SFP module data stored
+ *  in SFP resided EEPROM memory or SFP diagnostic area.
+ *  Function should be called with
+ *  E1000_I2CCMD_SFP_DATA_ADDR(<byte offset>) for SFP module database access
+ *  E1000_I2CCMD_SFP_DIAG_ADDR(<byte offset>) for SFP diagnostics parameters
+ *  access
+ **/
+s32 e1000_read_sfp_data_byte(struct e1000_hw *hw, u16 offset, u8 *data)
+{
+	u32 i = 0;
+	u32 i2ccmd = 0;
+	u32 data_local = 0;
+
+	DEBUGFUNC("e1000_read_sfp_data_byte");
+
+	if (offset > E1000_I2CCMD_SFP_DIAG_ADDR(255)) {
+		DEBUGOUT("I2CCMD command address exceeds upper limit\n");
+		return -E1000_ERR_PHY;
+	}
+
+	/* Set up Op-code, EEPROM Address,in the I2CCMD
+	 * register. The MAC will take care of interfacing with the
+	 * EEPROM to retrieve the desired data.
+	 */
+	i2ccmd = ((offset << E1000_I2CCMD_REG_ADDR_SHIFT) |
+		  E1000_I2CCMD_OPCODE_READ);
+
+	E1000_WRITE_REG(hw, E1000_I2CCMD, i2ccmd);
+
+	/* Poll the ready bit to see if the I2C read completed */
+	for (i = 0; i < E1000_I2CCMD_PHY_TIMEOUT; i++) {
+		usec_delay(50);
+		data_local = E1000_READ_REG(hw, E1000_I2CCMD);
+		if (data_local & E1000_I2CCMD_READY)
+			break;
+	}
+	if (!(data_local & E1000_I2CCMD_READY)) {
+		DEBUGOUT("I2CCMD Read did not complete\n");
+		return -E1000_ERR_PHY;
+	}
+	if (data_local & E1000_I2CCMD_ERROR) {
+		DEBUGOUT("I2CCMD Error bit set\n");
+		return -E1000_ERR_PHY;
+	}
+	*data = (u8) data_local & 0xFF;
+
+	return E1000_SUCCESS;
+}
+
+/**
+ *  e1000_write_sfp_data_byte - Writes SFP module data.
+ *  @hw: pointer to the HW structure
+ *  @offset: byte location offset to write to
+ *  @data: data to write
+ *
+ *  Writes one byte to SFP module data stored
+ *  in SFP resided EEPROM memory or SFP diagnostic area.
+ *  Function should be called with
+ *  E1000_I2CCMD_SFP_DATA_ADDR(<byte offset>) for SFP module database access
+ *  E1000_I2CCMD_SFP_DIAG_ADDR(<byte offset>) for SFP diagnostics parameters
+ *  access
+ **/
+s32 e1000_write_sfp_data_byte(struct e1000_hw *hw, u16 offset, u8 data)
+{
+	u32 i = 0;
+	u32 i2ccmd = 0;
+	u32 data_local = 0;
+
+	DEBUGFUNC("e1000_write_sfp_data_byte");
+
+	if (offset > E1000_I2CCMD_SFP_DIAG_ADDR(255)) {
+		DEBUGOUT("I2CCMD command address exceeds upper limit\n");
+		return -E1000_ERR_PHY;
+	}
+	/* The programming interface is 16 bits wide
+	 * so we need to read the whole word first
+	 * then update appropriate byte lane and write
+	 * the updated word back.
+	 */
+	/* Set up Op-code, EEPROM Address,in the I2CCMD
+	 * register. The MAC will take care of interfacing
+	 * with an EEPROM to write the data given.
+	 */
+	i2ccmd = ((offset << E1000_I2CCMD_REG_ADDR_SHIFT) |
+		  E1000_I2CCMD_OPCODE_READ);
+	/* Set a command to read single word */
+	E1000_WRITE_REG(hw, E1000_I2CCMD, i2ccmd);
+	for (i = 0; i < E1000_I2CCMD_PHY_TIMEOUT; i++) {
+		usec_delay(50);
+		/* Poll the ready bit to see if lastly
+		 * launched I2C operation completed
+		 */
+		i2ccmd = E1000_READ_REG(hw, E1000_I2CCMD);
+		if (i2ccmd & E1000_I2CCMD_READY) {
+			/* Check if this is READ or WRITE phase */
+			if ((i2ccmd & E1000_I2CCMD_OPCODE_READ) ==
+			    E1000_I2CCMD_OPCODE_READ) {
+				/* Write the selected byte
+				 * lane and update whole word
+				 */
+				data_local = i2ccmd & 0xFF00;
+				data_local |= data;
+				i2ccmd = ((offset <<
+					E1000_I2CCMD_REG_ADDR_SHIFT) |
+					E1000_I2CCMD_OPCODE_WRITE | data_local);
+				E1000_WRITE_REG(hw, E1000_I2CCMD, i2ccmd);
+			} else {
+				break;
+			}
+		}
+	}
+	if (!(i2ccmd & E1000_I2CCMD_READY)) {
+		DEBUGOUT("I2CCMD Write did not complete\n");
+		return -E1000_ERR_PHY;
+	}
+	if (i2ccmd & E1000_I2CCMD_ERROR) {
+		DEBUGOUT("I2CCMD Error bit set\n");
+		return -E1000_ERR_PHY;
+	}
+	return E1000_SUCCESS;
+}
+
+/**
+ *  e1000_read_phy_reg_m88 - Read m88 PHY register
+ *  @hw: pointer to the HW structure
+ *  @offset: register offset to be read
+ *  @data: pointer to the read data
+ *
+ *  Acquires semaphore, if necessary, then reads the PHY register at offset
+ *  and storing the retrieved information in data.  Release any acquired
+ *  semaphores before exiting.
+ **/
+s32 e1000_read_phy_reg_m88(struct e1000_hw *hw, u32 offset, u16 *data)
+{
+	s32 ret_val;
+
+	DEBUGFUNC("e1000_read_phy_reg_m88");
+
+	if (!hw->phy.ops.acquire)
+		return E1000_SUCCESS;
+
+	ret_val = hw->phy.ops.acquire(hw);
+	if (ret_val)
+		return ret_val;
+
+	ret_val = e1000_read_phy_reg_mdic(hw, MAX_PHY_REG_ADDRESS & offset,
+					  data);
+
+	hw->phy.ops.release(hw);
+
+	return ret_val;
+}
+
+/**
+ *  e1000_write_phy_reg_m88 - Write m88 PHY register
+ *  @hw: pointer to the HW structure
+ *  @offset: register offset to write to
+ *  @data: data to write at register offset
+ *
+ *  Acquires semaphore, if necessary, then writes the data to PHY register
+ *  at the offset.  Release any acquired semaphores before exiting.
+ **/
+s32 e1000_write_phy_reg_m88(struct e1000_hw *hw, u32 offset, u16 data)
+{
+	s32 ret_val;
+
+	DEBUGFUNC("e1000_write_phy_reg_m88");
+
+	if (!hw->phy.ops.acquire)
+		return E1000_SUCCESS;
+
+	ret_val = hw->phy.ops.acquire(hw);
+	if (ret_val)
+		return ret_val;
+
+	ret_val = e1000_write_phy_reg_mdic(hw, MAX_PHY_REG_ADDRESS & offset,
+					   data);
+
+	hw->phy.ops.release(hw);
+
+	return ret_val;
+}
+
+/**
+ *  e1000_set_page_igp - Set page as on IGP-like PHY(s)
+ *  @hw: pointer to the HW structure
+ *  @page: page to set (shifted left when necessary)
+ *
+ *  Sets PHY page required for PHY register access.  Assumes semaphore is
+ *  already acquired.  Note, this function sets phy.addr to 1 so the caller
+ *  must set it appropriately (if necessary) after this function returns.
+ **/
+s32 e1000_set_page_igp(struct e1000_hw *hw, u16 page)
+{
+	DEBUGFUNC("e1000_set_page_igp");
+
+	DEBUGOUT1("Setting page 0x%x\n", page);
+
+	hw->phy.addr = 1;
+
+	return e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT, page);
+}
+
+/**
+ *  __e1000_read_phy_reg_igp - Read igp PHY register
+ *  @hw: pointer to the HW structure
+ *  @offset: register offset to be read
+ *  @data: pointer to the read data
+ *  @locked: semaphore has already been acquired or not
+ *
+ *  Acquires semaphore, if necessary, then reads the PHY register at offset
+ *  and stores the retrieved information in data.  Release any acquired
+ *  semaphores before exiting.
+ **/
+static s32 __e1000_read_phy_reg_igp(struct e1000_hw *hw, u32 offset, u16 *data,
+				    bool locked)
+{
+	s32 ret_val = E1000_SUCCESS;
+
+	DEBUGFUNC("__e1000_read_phy_reg_igp");
+
+	if (!locked) {
+		if (!hw->phy.ops.acquire)
+			return E1000_SUCCESS;
+
+		ret_val = hw->phy.ops.acquire(hw);
+		if (ret_val)
+			return ret_val;
+	}
+
+	if (offset > MAX_PHY_MULTI_PAGE_REG)
+		ret_val = e1000_write_phy_reg_mdic(hw,
+						   IGP01E1000_PHY_PAGE_SELECT,
+						   (u16)offset);
+	if (!ret_val)
+		ret_val = e1000_read_phy_reg_mdic(hw,
+						  MAX_PHY_REG_ADDRESS & offset,
+						  data);
+	if (!locked)
+		hw->phy.ops.release(hw);
+
+	return ret_val;
+}
+
+/**
+ *  e1000_read_phy_reg_igp - Read igp PHY register
+ *  @hw: pointer to the HW structure
+ *  @offset: register offset to be read
+ *  @data: pointer to the read data
+ *
+ *  Acquires semaphore then reads the PHY register at offset and stores the
+ *  retrieved information in data.
+ *  Release the acquired semaphore before exiting.
+ **/
+s32 e1000_read_phy_reg_igp(struct e1000_hw *hw, u32 offset, u16 *data)
+{
+	return __e1000_read_phy_reg_igp(hw, offset, data, false);
+}
+
+/**
+ *  e1000_read_phy_reg_igp_locked - Read igp PHY register
+ *  @hw: pointer to the HW structure
+ *  @offset: register offset to be read
+ *  @data: pointer to the read data
+ *
+ *  Reads the PHY register at offset and stores the retrieved information
+ *  in data.  Assumes semaphore already acquired.
+ **/
+s32 e1000_read_phy_reg_igp_locked(struct e1000_hw *hw, u32 offset, u16 *data)
+{
+	return __e1000_read_phy_reg_igp(hw, offset, data, true);
+}
+
+/**
+ *  e1000_write_phy_reg_igp - Write igp PHY register
+ *  @hw: pointer to the HW structure
+ *  @offset: register offset to write to
+ *  @data: data to write at register offset
+ *  @locked: semaphore has already been acquired or not
+ *
+ *  Acquires semaphore, if necessary, then writes the data to PHY register
+ *  at the offset.  Release any acquired semaphores before exiting.
+ **/
+static s32 __e1000_write_phy_reg_igp(struct e1000_hw *hw, u32 offset, u16 data,
+				     bool locked)
+{
+	s32 ret_val = E1000_SUCCESS;
+
+	DEBUGFUNC("e1000_write_phy_reg_igp");
+
+	if (!locked) {
+		if (!hw->phy.ops.acquire)
+			return E1000_SUCCESS;
+
+		ret_val = hw->phy.ops.acquire(hw);
+		if (ret_val)
+			return ret_val;
+	}
+
+	if (offset > MAX_PHY_MULTI_PAGE_REG)
+		ret_val = e1000_write_phy_reg_mdic(hw,
+						   IGP01E1000_PHY_PAGE_SELECT,
+						   (u16)offset);
+	if (!ret_val)
+		ret_val = e1000_write_phy_reg_mdic(hw, MAX_PHY_REG_ADDRESS &
+						       offset,
+						   data);
+	if (!locked)
+		hw->phy.ops.release(hw);
+
+	return ret_val;
+}
+
+/**
+ *  e1000_write_phy_reg_igp - Write igp PHY register
+ *  @hw: pointer to the HW structure
+ *  @offset: register offset to write to
+ *  @data: data to write at register offset
+ *
+ *  Acquires semaphore then writes the data to PHY register
+ *  at the offset.  Release any acquired semaphores before exiting.
+ **/
+s32 e1000_write_phy_reg_igp(struct e1000_hw *hw, u32 offset, u16 data)
+{
+	return __e1000_write_phy_reg_igp(hw, offset, data, false);
+}
+
+/**
+ *  e1000_write_phy_reg_igp_locked - Write igp PHY register
+ *  @hw: pointer to the HW structure
+ *  @offset: register offset to write to
+ *  @data: data to write at register offset
+ *
+ *  Writes the data to PHY register at the offset.
+ *  Assumes semaphore already acquired.
+ **/
+s32 e1000_write_phy_reg_igp_locked(struct e1000_hw *hw, u32 offset, u16 data)
+{
+	return __e1000_write_phy_reg_igp(hw, offset, data, true);
+}
+
+/**
+ *  __e1000_read_kmrn_reg - Read kumeran register
+ *  @hw: pointer to the HW structure
+ *  @offset: register offset to be read
+ *  @data: pointer to the read data
+ *  @locked: semaphore has already been acquired or not
+ *
+ *  Acquires semaphore, if necessary.  Then reads the PHY register at offset
+ *  using the kumeran interface.  The information retrieved is stored in data.
+ *  Release any acquired semaphores before exiting.
+ **/
+static s32 __e1000_read_kmrn_reg(struct e1000_hw *hw, u32 offset, u16 *data,
+				 bool locked)
+{
+	u32 kmrnctrlsta;
+
+	DEBUGFUNC("__e1000_read_kmrn_reg");
+
+	if (!locked) {
+		s32 ret_val = E1000_SUCCESS;
+
+		if (!hw->phy.ops.acquire)
+			return E1000_SUCCESS;
+
+		ret_val = hw->phy.ops.acquire(hw);
+		if (ret_val)
+			return ret_val;
+	}
+
+	kmrnctrlsta = ((offset << E1000_KMRNCTRLSTA_OFFSET_SHIFT) &
+		       E1000_KMRNCTRLSTA_OFFSET) | E1000_KMRNCTRLSTA_REN;
+	E1000_WRITE_REG(hw, E1000_KMRNCTRLSTA, kmrnctrlsta);
+	E1000_WRITE_FLUSH(hw);
+
+	usec_delay(2);
+
+	kmrnctrlsta = E1000_READ_REG(hw, E1000_KMRNCTRLSTA);
+	*data = (u16)kmrnctrlsta;
+
+	if (!locked)
+		hw->phy.ops.release(hw);
+
+	return E1000_SUCCESS;
+}
+
+/**
+ *  e1000_read_kmrn_reg_generic -  Read kumeran register
+ *  @hw: pointer to the HW structure
+ *  @offset: register offset to be read
+ *  @data: pointer to the read data
+ *
+ *  Acquires semaphore then reads the PHY register at offset using the
+ *  kumeran interface.  The information retrieved is stored in data.
+ *  Release the acquired semaphore before exiting.
+ **/
+s32 e1000_read_kmrn_reg_generic(struct e1000_hw *hw, u32 offset, u16 *data)
+{
+	return __e1000_read_kmrn_reg(hw, offset, data, false);
+}
+
+/**
+ *  e1000_read_kmrn_reg_locked -  Read kumeran register
+ *  @hw: pointer to the HW structure
+ *  @offset: register offset to be read
+ *  @data: pointer to the read data
+ *
+ *  Reads the PHY register at offset using the kumeran interface.  The
+ *  information retrieved is stored in data.
+ *  Assumes semaphore already acquired.
+ **/
+s32 e1000_read_kmrn_reg_locked(struct e1000_hw *hw, u32 offset, u16 *data)
+{
+	return __e1000_read_kmrn_reg(hw, offset, data, true);
+}
+
+/**
+ *  __e1000_write_kmrn_reg - Write kumeran register
+ *  @hw: pointer to the HW structure
+ *  @offset: register offset to write to
+ *  @data: data to write at register offset
+ *  @locked: semaphore has already been acquired or not
+ *
+ *  Acquires semaphore, if necessary.  Then write the data to PHY register
+ *  at the offset using the kumeran interface.  Release any acquired semaphores
+ *  before exiting.
+ **/
+static s32 __e1000_write_kmrn_reg(struct e1000_hw *hw, u32 offset, u16 data,
+				  bool locked)
+{
+	u32 kmrnctrlsta;
+
+	DEBUGFUNC("e1000_write_kmrn_reg_generic");
+
+	if (!locked) {
+		s32 ret_val = E1000_SUCCESS;
+
+		if (!hw->phy.ops.acquire)
+			return E1000_SUCCESS;
+
+		ret_val = hw->phy.ops.acquire(hw);
+		if (ret_val)
+			return ret_val;
+	}
+
+	kmrnctrlsta = ((offset << E1000_KMRNCTRLSTA_OFFSET_SHIFT) &
+		       E1000_KMRNCTRLSTA_OFFSET) | data;
+	E1000_WRITE_REG(hw, E1000_KMRNCTRLSTA, kmrnctrlsta);
+	E1000_WRITE_FLUSH(hw);
+
+	usec_delay(2);
+
+	if (!locked)
+		hw->phy.ops.release(hw);
+
+	return E1000_SUCCESS;
+}
+
+/**
+ *  e1000_write_kmrn_reg_generic -  Write kumeran register
+ *  @hw: pointer to the HW structure
+ *  @offset: register offset to write to
+ *  @data: data to write at register offset
+ *
+ *  Acquires semaphore then writes the data to the PHY register at the offset
+ *  using the kumeran interface.  Release the acquired semaphore before exiting.
+ **/
+s32 e1000_write_kmrn_reg_generic(struct e1000_hw *hw, u32 offset, u16 data)
+{
+	return __e1000_write_kmrn_reg(hw, offset, data, false);
+}
+
+/**
+ *  e1000_write_kmrn_reg_locked -  Write kumeran register
+ *  @hw: pointer to the HW structure
+ *  @offset: register offset to write to
+ *  @data: data to write at register offset
+ *
+ *  Write the data to PHY register at the offset using the kumeran interface.
+ *  Assumes semaphore already acquired.
+ **/
+s32 e1000_write_kmrn_reg_locked(struct e1000_hw *hw, u32 offset, u16 data)
+{
+	return __e1000_write_kmrn_reg(hw, offset, data, true);
+}
+
+/**
+ *  e1000_set_master_slave_mode - Setup PHY for Master/slave mode
+ *  @hw: pointer to the HW structure
+ *
+ *  Sets up Master/slave mode
+ **/
+static s32 e1000_set_master_slave_mode(struct e1000_hw *hw)
+{
+	s32 ret_val;
+	u16 phy_data;
+
+	/* Resolve Master/Slave mode */
+	ret_val = hw->phy.ops.read_reg(hw, PHY_1000T_CTRL, &phy_data);
+	if (ret_val)
+		return ret_val;
+
+	/* load defaults for future use */
+	hw->phy.original_ms_type = (phy_data & CR_1000T_MS_ENABLE) ?
+				   ((phy_data & CR_1000T_MS_VALUE) ?
+				    e1000_ms_force_master :
+				    e1000_ms_force_slave) : e1000_ms_auto;
+
+	switch (hw->phy.ms_type) {
+	case e1000_ms_force_master:
+		phy_data |= (CR_1000T_MS_ENABLE | CR_1000T_MS_VALUE);
+		break;
+	case e1000_ms_force_slave:
+		phy_data |= CR_1000T_MS_ENABLE;
+		phy_data &= ~(CR_1000T_MS_VALUE);
+		break;
+	case e1000_ms_auto:
+		phy_data &= ~CR_1000T_MS_ENABLE;
+		/* fall-through */
+	default:
+		break;
+	}
+
+	return hw->phy.ops.write_reg(hw, PHY_1000T_CTRL, phy_data);
+}
+
+/**
+ *  e1000_copper_link_setup_82577 - Setup 82577 PHY for copper link
+ *  @hw: pointer to the HW structure
+ *
+ *  Sets up Carrier-sense on Transmit and downshift values.
+ **/
+s32 e1000_copper_link_setup_82577(struct e1000_hw *hw)
+{
+	s32 ret_val;
+	u16 phy_data;
+
+	DEBUGFUNC("e1000_copper_link_setup_82577");
+
+	if (hw->phy.reset_disable)
+		return E1000_SUCCESS;
+
+	if (hw->phy.type == e1000_phy_82580) {
+		ret_val = hw->phy.ops.reset(hw);
+		if (ret_val) {
+			DEBUGOUT("Error resetting the PHY.\n");
+			return ret_val;
+		}
+	}
+
+	/* Enable CRS on Tx. This must be set for half-duplex operation. */
+	ret_val = hw->phy.ops.read_reg(hw, I82577_CFG_REG, &phy_data);
+	if (ret_val)
+		return ret_val;
+
+	phy_data |= I82577_CFG_ASSERT_CRS_ON_TX;
+
+	/* Enable downshift */
+	phy_data |= I82577_CFG_ENABLE_DOWNSHIFT;
+
+	ret_val = hw->phy.ops.write_reg(hw, I82577_CFG_REG, phy_data);
+	if (ret_val)
+		return ret_val;
+
+	/* Set MDI/MDIX mode */
+	ret_val = hw->phy.ops.read_reg(hw, I82577_PHY_CTRL_2, &phy_data);
+	if (ret_val)
+		return ret_val;
+	phy_data &= ~I82577_PHY_CTRL2_MDIX_CFG_MASK;
+	/* Options:
+	 *   0 - Auto (default)
+	 *   1 - MDI mode
+	 *   2 - MDI-X mode
+	 */
+	switch (hw->phy.mdix) {
+	case 1:
+		break;
+	case 2:
+		phy_data |= I82577_PHY_CTRL2_MANUAL_MDIX;
+		break;
+	case 0:
+	default:
+		phy_data |= I82577_PHY_CTRL2_AUTO_MDI_MDIX;
+		break;
+	}
+	ret_val = hw->phy.ops.write_reg(hw, I82577_PHY_CTRL_2, phy_data);
+	if (ret_val)
+		return ret_val;
+
+	return e1000_set_master_slave_mode(hw);
+}
+
+/**
+ *  e1000_copper_link_setup_m88 - Setup m88 PHY's for copper link
+ *  @hw: pointer to the HW structure
+ *
+ *  Sets up MDI/MDI-X and polarity for m88 PHY's.  If necessary, transmit clock
+ *  and downshift values are set also.
+ **/
+s32 e1000_copper_link_setup_m88(struct e1000_hw *hw)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+	s32 ret_val;
+	u16 phy_data;
+
+	DEBUGFUNC("e1000_copper_link_setup_m88");
+
+	if (phy->reset_disable)
+		return E1000_SUCCESS;
+
+	/* Enable CRS on Tx. This must be set for half-duplex operation. */
+	ret_val = phy->ops.read_reg(hw, M88E1000_PHY_SPEC_CTRL, &phy_data);
+	if (ret_val)
+		return ret_val;
+
+	phy_data |= M88E1000_PSCR_ASSERT_CRS_ON_TX;
+
+	/* Options:
+	 *   MDI/MDI-X = 0 (default)
+	 *   0 - Auto for all speeds
+	 *   1 - MDI mode
+	 *   2 - MDI-X mode
+	 *   3 - Auto for 1000Base-T only (MDI-X for 10/100Base-T modes)
+	 */
+	phy_data &= ~M88E1000_PSCR_AUTO_X_MODE;
+
+	switch (phy->mdix) {
+	case 1:
+		phy_data |= M88E1000_PSCR_MDI_MANUAL_MODE;
+		break;
+	case 2:
+		phy_data |= M88E1000_PSCR_MDIX_MANUAL_MODE;
+		break;
+	case 3:
+		phy_data |= M88E1000_PSCR_AUTO_X_1000T;
+		break;
+	case 0:
+	default:
+		phy_data |= M88E1000_PSCR_AUTO_X_MODE;
+		break;
+	}
+
+	/* Options:
+	 *   disable_polarity_correction = 0 (default)
+	 *       Automatic Correction for Reversed Cable Polarity
+	 *   0 - Disabled
+	 *   1 - Enabled
+	 */
+	phy_data &= ~M88E1000_PSCR_POLARITY_REVERSAL;
+	if (phy->disable_polarity_correction)
+		phy_data |= M88E1000_PSCR_POLARITY_REVERSAL;
+
+	ret_val = phy->ops.write_reg(hw, M88E1000_PHY_SPEC_CTRL, phy_data);
+	if (ret_val)
+		return ret_val;
+
+	if (phy->revision < E1000_REVISION_4) {
+		/* Force TX_CLK in the Extended PHY Specific Control Register
+		 * to 25MHz clock.
+		 */
+		ret_val = phy->ops.read_reg(hw, M88E1000_EXT_PHY_SPEC_CTRL,
+					    &phy_data);
+		if (ret_val)
+			return ret_val;
+
+		phy_data |= M88E1000_EPSCR_TX_CLK_25;
+
+		if ((phy->revision == E1000_REVISION_2) &&
+		    (phy->id == M88E1111_I_PHY_ID)) {
+			/* 82573L PHY - set the downshift counter to 5x. */
+			phy_data &= ~M88EC018_EPSCR_DOWNSHIFT_COUNTER_MASK;
+			phy_data |= M88EC018_EPSCR_DOWNSHIFT_COUNTER_5X;
+		} else {
+			/* Configure Master and Slave downshift values */
+			phy_data &= ~(M88E1000_EPSCR_MASTER_DOWNSHIFT_MASK |
+				     M88E1000_EPSCR_SLAVE_DOWNSHIFT_MASK);
+			phy_data |= (M88E1000_EPSCR_MASTER_DOWNSHIFT_1X |
+				     M88E1000_EPSCR_SLAVE_DOWNSHIFT_1X);
+		}
+		ret_val = phy->ops.write_reg(hw, M88E1000_EXT_PHY_SPEC_CTRL,
+					     phy_data);
+		if (ret_val)
+			return ret_val;
+	}
+
+	/* Commit the changes. */
+	ret_val = phy->ops.commit(hw);
+	if (ret_val) {
+		DEBUGOUT("Error committing the PHY changes\n");
+		return ret_val;
+	}
+
+	return E1000_SUCCESS;
+}
+
+/**
+ *  e1000_copper_link_setup_m88_gen2 - Setup m88 PHY's for copper link
+ *  @hw: pointer to the HW structure
+ *
+ *  Sets up MDI/MDI-X and polarity for i347-AT4, m88e1322 and m88e1112 PHY's.
+ *  Also enables and sets the downshift parameters.
+ **/
+s32 e1000_copper_link_setup_m88_gen2(struct e1000_hw *hw)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+	s32 ret_val;
+	u16 phy_data;
+
+	DEBUGFUNC("e1000_copper_link_setup_m88_gen2");
+
+	if (phy->reset_disable)
+		return E1000_SUCCESS;
+
+	/* Enable CRS on Tx. This must be set for half-duplex operation. */
+	ret_val = phy->ops.read_reg(hw, M88E1000_PHY_SPEC_CTRL, &phy_data);
+	if (ret_val)
+		return ret_val;
+
+	/* Options:
+	 *   MDI/MDI-X = 0 (default)
+	 *   0 - Auto for all speeds
+	 *   1 - MDI mode
+	 *   2 - MDI-X mode
+	 *   3 - Auto for 1000Base-T only (MDI-X for 10/100Base-T modes)
+	 */
+	phy_data &= ~M88E1000_PSCR_AUTO_X_MODE;
+
+	switch (phy->mdix) {
+	case 1:
+		phy_data |= M88E1000_PSCR_MDI_MANUAL_MODE;
+		break;
+	case 2:
+		phy_data |= M88E1000_PSCR_MDIX_MANUAL_MODE;
+		break;
+	case 3:
+		/* M88E1112 does not support this mode) */
+		if (phy->id != M88E1112_E_PHY_ID) {
+			phy_data |= M88E1000_PSCR_AUTO_X_1000T;
+			break;
+		}
+	case 0:
+	default:
+		phy_data |= M88E1000_PSCR_AUTO_X_MODE;
+		break;
+	}
+
+	/* Options:
+	 *   disable_polarity_correction = 0 (default)
+	 *       Automatic Correction for Reversed Cable Polarity
+	 *   0 - Disabled
+	 *   1 - Enabled
+	 */
+	phy_data &= ~M88E1000_PSCR_POLARITY_REVERSAL;
+	if (phy->disable_polarity_correction)
+		phy_data |= M88E1000_PSCR_POLARITY_REVERSAL;
+
+	/* Enable downshift and setting it to X6 */
+	if (phy->id == M88E1543_E_PHY_ID) {
+		phy_data &= ~I347AT4_PSCR_DOWNSHIFT_ENABLE;
+		ret_val =
+		    phy->ops.write_reg(hw, M88E1000_PHY_SPEC_CTRL, phy_data);
+		if (ret_val)
+			return ret_val;
+
+		ret_val = phy->ops.commit(hw);
+		if (ret_val) {
+			DEBUGOUT("Error committing the PHY changes\n");
+			return ret_val;
+		}
+	}
+
+	phy_data &= ~I347AT4_PSCR_DOWNSHIFT_MASK;
+	phy_data |= I347AT4_PSCR_DOWNSHIFT_6X;
+	phy_data |= I347AT4_PSCR_DOWNSHIFT_ENABLE;
+
+	ret_val = phy->ops.write_reg(hw, M88E1000_PHY_SPEC_CTRL, phy_data);
+	if (ret_val)
+		return ret_val;
+
+	/* Commit the changes. */
+	ret_val = phy->ops.commit(hw);
+	if (ret_val) {
+		DEBUGOUT("Error committing the PHY changes\n");
+		return ret_val;
+	}
+
+	ret_val = e1000_set_master_slave_mode(hw);
+	if (ret_val)
+		return ret_val;
+
+	return E1000_SUCCESS;
+}
+
+/**
+ *  e1000_copper_link_setup_igp - Setup igp PHY's for copper link
+ *  @hw: pointer to the HW structure
+ *
+ *  Sets up LPLU, MDI/MDI-X, polarity, Smartspeed and Master/Slave config for
+ *  igp PHY's.
+ **/
+s32 e1000_copper_link_setup_igp(struct e1000_hw *hw)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+	s32 ret_val;
+	u16 data;
+
+	DEBUGFUNC("e1000_copper_link_setup_igp");
+
+	if (phy->reset_disable)
+		return E1000_SUCCESS;
+
+	ret_val = hw->phy.ops.reset(hw);
+	if (ret_val) {
+		DEBUGOUT("Error resetting the PHY.\n");
+		return ret_val;
+	}
+
+	/* Wait 100ms for MAC to configure PHY from NVM settings, to avoid
+	 * timeout issues when LFS is enabled.
+	 */
+	msec_delay(100);
+
+	/* disable lplu d0 during driver init */
+	if (hw->phy.ops.set_d0_lplu_state) {
+		ret_val = hw->phy.ops.set_d0_lplu_state(hw, false);
+		if (ret_val) {
+			DEBUGOUT("Error Disabling LPLU D0\n");
+			return ret_val;
+		}
+	}
+	/* Configure mdi-mdix settings */
+	ret_val = phy->ops.read_reg(hw, IGP01E1000_PHY_PORT_CTRL, &data);
+	if (ret_val)
+		return ret_val;
+
+	data &= ~IGP01E1000_PSCR_AUTO_MDIX;
+
+	switch (phy->mdix) {
+	case 1:
+		data &= ~IGP01E1000_PSCR_FORCE_MDI_MDIX;
+		break;
+	case 2:
+		data |= IGP01E1000_PSCR_FORCE_MDI_MDIX;
+		break;
+	case 0:
+	default:
+		data |= IGP01E1000_PSCR_AUTO_MDIX;
+		break;
+	}
+	ret_val = phy->ops.write_reg(hw, IGP01E1000_PHY_PORT_CTRL, data);
+	if (ret_val)
+		return ret_val;
+
+	/* set auto-master slave resolution settings */
+	if (hw->mac.autoneg) {
+		/* when autonegotiation advertisement is only 1000Mbps then we
+		 * should disable SmartSpeed and enable Auto MasterSlave
+		 * resolution as hardware default.
+		 */
+		if (phy->autoneg_advertised == ADVERTISE_1000_FULL) {
+			/* Disable SmartSpeed */
+			ret_val = phy->ops.read_reg(hw,
+						    IGP01E1000_PHY_PORT_CONFIG,
+						    &data);
+			if (ret_val)
+				return ret_val;
+
+			data &= ~IGP01E1000_PSCFR_SMART_SPEED;
+			ret_val = phy->ops.write_reg(hw,
+						     IGP01E1000_PHY_PORT_CONFIG,
+						     data);
+			if (ret_val)
+				return ret_val;
+
+			/* Set auto Master/Slave resolution process */
+			ret_val = phy->ops.read_reg(hw, PHY_1000T_CTRL, &data);
+			if (ret_val)
+				return ret_val;
+
+			data &= ~CR_1000T_MS_ENABLE;
+			ret_val = phy->ops.write_reg(hw, PHY_1000T_CTRL, data);
+			if (ret_val)
+				return ret_val;
+		}
+
+		ret_val = e1000_set_master_slave_mode(hw);
+	}
+
+	return ret_val;
+}
+
+/**
+ *  e1000_phy_setup_autoneg - Configure PHY for auto-negotiation
+ *  @hw: pointer to the HW structure
+ *
+ *  Reads the MII auto-neg advertisement register and/or the 1000T control
+ *  register and if the PHY is already setup for auto-negotiation, then
+ *  return successful.  Otherwise, setup advertisement and flow control to
+ *  the appropriate values for the wanted auto-negotiation.
+ **/
+static s32 e1000_phy_setup_autoneg(struct e1000_hw *hw)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+	s32 ret_val;
+	u16 mii_autoneg_adv_reg;
+	u16 mii_1000t_ctrl_reg = 0;
+
+	DEBUGFUNC("e1000_phy_setup_autoneg");
+
+	phy->autoneg_advertised &= phy->autoneg_mask;
+
+	/* Read the MII Auto-Neg Advertisement Register (Address 4). */
+	ret_val = phy->ops.read_reg(hw, PHY_AUTONEG_ADV, &mii_autoneg_adv_reg);
+	if (ret_val)
+		return ret_val;
+
+	if (phy->autoneg_mask & ADVERTISE_1000_FULL) {
+		/* Read the MII 1000Base-T Control Register (Address 9). */
+		ret_val = phy->ops.read_reg(hw, PHY_1000T_CTRL,
+					    &mii_1000t_ctrl_reg);
+		if (ret_val)
+			return ret_val;
+	}
+
+	/* Need to parse both autoneg_advertised and fc and set up
+	 * the appropriate PHY registers.  First we will parse for
+	 * autoneg_advertised software override.  Since we can advertise
+	 * a plethora of combinations, we need to check each bit
+	 * individually.
+	 */
+
+	/* First we clear all the 10/100 mb speed bits in the Auto-Neg
+	 * Advertisement Register (Address 4) and the 1000 mb speed bits in
+	 * the  1000Base-T Control Register (Address 9).
+	 */
+	mii_autoneg_adv_reg &= ~(NWAY_AR_100TX_FD_CAPS |
+				 NWAY_AR_100TX_HD_CAPS |
+				 NWAY_AR_10T_FD_CAPS   |
+				 NWAY_AR_10T_HD_CAPS);
+	mii_1000t_ctrl_reg &= ~(CR_1000T_HD_CAPS | CR_1000T_FD_CAPS);
+
+	DEBUGOUT1("autoneg_advertised %x\n", phy->autoneg_advertised);
+
+	/* Do we want to advertise 10 Mb Half Duplex? */
+	if (phy->autoneg_advertised & ADVERTISE_10_HALF) {
+		DEBUGOUT("Advertise 10mb Half duplex\n");
+		mii_autoneg_adv_reg |= NWAY_AR_10T_HD_CAPS;
+	}
+
+	/* Do we want to advertise 10 Mb Full Duplex? */
+	if (phy->autoneg_advertised & ADVERTISE_10_FULL) {
+		DEBUGOUT("Advertise 10mb Full duplex\n");
+		mii_autoneg_adv_reg |= NWAY_AR_10T_FD_CAPS;
+	}
+
+	/* Do we want to advertise 100 Mb Half Duplex? */
+	if (phy->autoneg_advertised & ADVERTISE_100_HALF) {
+		DEBUGOUT("Advertise 100mb Half duplex\n");
+		mii_autoneg_adv_reg |= NWAY_AR_100TX_HD_CAPS;
+	}
+
+	/* Do we want to advertise 100 Mb Full Duplex? */
+	if (phy->autoneg_advertised & ADVERTISE_100_FULL) {
+		DEBUGOUT("Advertise 100mb Full duplex\n");
+		mii_autoneg_adv_reg |= NWAY_AR_100TX_FD_CAPS;
+	}
+
+	/* We do not allow the Phy to advertise 1000 Mb Half Duplex */
+	if (phy->autoneg_advertised & ADVERTISE_1000_HALF)
+		DEBUGOUT("Advertise 1000mb Half duplex request denied!\n");
+
+	/* Do we want to advertise 1000 Mb Full Duplex? */
+	if (phy->autoneg_advertised & ADVERTISE_1000_FULL) {
+		DEBUGOUT("Advertise 1000mb Full duplex\n");
+		mii_1000t_ctrl_reg |= CR_1000T_FD_CAPS;
+	}
+
+	/* Check for a software override of the flow control settings, and
+	 * setup the PHY advertisement registers accordingly.  If
+	 * auto-negotiation is enabled, then software will have to set the
+	 * "PAUSE" bits to the correct value in the Auto-Negotiation
+	 * Advertisement Register (PHY_AUTONEG_ADV) and re-start auto-
+	 * negotiation.
+	 *
+	 * The possible values of the "fc" parameter are:
+	 *      0:  Flow control is completely disabled
+	 *      1:  Rx flow control is enabled (we can receive pause frames
+	 *          but not send pause frames).
+	 *      2:  Tx flow control is enabled (we can send pause frames
+	 *          but we do not support receiving pause frames).
+	 *      3:  Both Rx and Tx flow control (symmetric) are enabled.
+	 *  other:  No software override.  The flow control configuration
+	 *          in the EEPROM is used.
+	 */
+	switch (hw->fc.current_mode) {
+	case e1000_fc_none:
+		/* Flow control (Rx & Tx) is completely disabled by a
+		 * software over-ride.
+		 */
+		mii_autoneg_adv_reg &= ~(NWAY_AR_ASM_DIR | NWAY_AR_PAUSE);
+		break;
+	case e1000_fc_rx_pause:
+		/* Rx Flow control is enabled, and Tx Flow control is
+		 * disabled, by a software over-ride.
+		 *
+		 * Since there really isn't a way to advertise that we are
+		 * capable of Rx Pause ONLY, we will advertise that we
+		 * support both symmetric and asymmetric Rx PAUSE.  Later
+		 * (in e1000_config_fc_after_link_up) we will disable the
+		 * hw's ability to send PAUSE frames.
+		 */
+		mii_autoneg_adv_reg |= (NWAY_AR_ASM_DIR | NWAY_AR_PAUSE);
+		break;
+	case e1000_fc_tx_pause:
+		/* Tx Flow control is enabled, and Rx Flow control is
+		 * disabled, by a software over-ride.
+		 */
+		mii_autoneg_adv_reg |= NWAY_AR_ASM_DIR;
+		mii_autoneg_adv_reg &= ~NWAY_AR_PAUSE;
+		break;
+	case e1000_fc_full:
+		/* Flow control (both Rx and Tx) is enabled by a software
+		 * over-ride.
+		 */
+		mii_autoneg_adv_reg |= (NWAY_AR_ASM_DIR | NWAY_AR_PAUSE);
+		break;
+	default:
+		DEBUGOUT("Flow control param set incorrectly\n");
+		return -E1000_ERR_CONFIG;
+	}
+
+	ret_val = phy->ops.write_reg(hw, PHY_AUTONEG_ADV, mii_autoneg_adv_reg);
+	if (ret_val)
+		return ret_val;
+
+	DEBUGOUT1("Auto-Neg Advertising %x\n", mii_autoneg_adv_reg);
+
+	if (phy->autoneg_mask & ADVERTISE_1000_FULL)
+		ret_val = phy->ops.write_reg(hw, PHY_1000T_CTRL,
+					     mii_1000t_ctrl_reg);
+
+	return ret_val;
+}
+
+/**
+ *  e1000_copper_link_autoneg - Setup/Enable autoneg for copper link
+ *  @hw: pointer to the HW structure
+ *
+ *  Performs initial bounds checking on autoneg advertisement parameter, then
+ *  configure to advertise the full capability.  Setup the PHY to autoneg
+ *  and restart the negotiation process between the link partner.  If
+ *  autoneg_wait_to_complete, then wait for autoneg to complete before exiting.
+ **/
+static s32 e1000_copper_link_autoneg(struct e1000_hw *hw)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+	s32 ret_val;
+	u16 phy_ctrl;
+
+	DEBUGFUNC("e1000_copper_link_autoneg");
+
+	/* Perform some bounds checking on the autoneg advertisement
+	 * parameter.
+	 */
+	phy->autoneg_advertised &= phy->autoneg_mask;
+
+	/* If autoneg_advertised is zero, we assume it was not defaulted
+	 * by the calling code so we set to advertise full capability.
+	 */
+	if (!phy->autoneg_advertised)
+		phy->autoneg_advertised = phy->autoneg_mask;
+
+	DEBUGOUT("Reconfiguring auto-neg advertisement params\n");
+	ret_val = e1000_phy_setup_autoneg(hw);
+	if (ret_val) {
+		DEBUGOUT("Error Setting up Auto-Negotiation\n");
+		return ret_val;
+	}
+	DEBUGOUT("Restarting Auto-Neg\n");
+
+	/* Restart auto-negotiation by setting the Auto Neg Enable bit and
+	 * the Auto Neg Restart bit in the PHY control register.
+	 */
+	ret_val = phy->ops.read_reg(hw, PHY_CONTROL, &phy_ctrl);
+	if (ret_val)
+		return ret_val;
+
+	phy_ctrl |= (MII_CR_AUTO_NEG_EN | MII_CR_RESTART_AUTO_NEG);
+	ret_val = phy->ops.write_reg(hw, PHY_CONTROL, phy_ctrl);
+	if (ret_val)
+		return ret_val;
+
+	/* Does the user want to wait for Auto-Neg to complete here, or
+	 * check at a later time (for example, callback routine).
+	 */
+	if (phy->autoneg_wait_to_complete) {
+		ret_val = e1000_wait_autoneg(hw);
+		if (ret_val) {
+			DEBUGOUT("Error while waiting for autoneg to complete\n");
+			return ret_val;
+		}
+	}
+
+	hw->mac.get_link_status = true;
+
+	return ret_val;
+}
+
+/**
+ *  e1000_setup_copper_link_generic - Configure copper link settings
+ *  @hw: pointer to the HW structure
+ *
+ *  Calls the appropriate function to configure the link for auto-neg or forced
+ *  speed and duplex.  Then we check for link, once link is established calls
+ *  to configure collision distance and flow control are called.  If link is
+ *  not established, we return -E1000_ERR_PHY (-2).
+ **/
+s32 e1000_setup_copper_link_generic(struct e1000_hw *hw)
+{
+	s32 ret_val;
+	bool link;
+
+	DEBUGFUNC("e1000_setup_copper_link_generic");
+
+	if (hw->mac.autoneg) {
+		/* Setup autoneg and flow control advertisement and perform
+		 * autonegotiation.
+		 */
+		ret_val = e1000_copper_link_autoneg(hw);
+		if (ret_val)
+			return ret_val;
+	} else {
+		/* PHY will be set to 10H, 10F, 100H or 100F
+		 * depending on user settings.
+		 */
+		DEBUGOUT("Forcing Speed and Duplex\n");
+		ret_val = hw->phy.ops.force_speed_duplex(hw);
+		if (ret_val) {
+			DEBUGOUT("Error Forcing Speed and Duplex\n");
+			return ret_val;
+		}
+	}
+
+	/* Check link status. Wait up to 100 microseconds for link to become
+	 * valid.
+	 */
+	ret_val = e1000_phy_has_link_generic(hw, COPPER_LINK_UP_LIMIT, 10,
+					     &link);
+	if (ret_val)
+		return ret_val;
+
+	if (link) {
+		DEBUGOUT("Valid link established!!!\n");
+		hw->mac.ops.config_collision_dist(hw);
+		ret_val = e1000_config_fc_after_link_up_generic(hw);
+	} else {
+		DEBUGOUT("Unable to establish link!!!\n");
+	}
+
+	return ret_val;
+}
+
+/**
+ *  e1000_phy_force_speed_duplex_igp - Force speed/duplex for igp PHY
+ *  @hw: pointer to the HW structure
+ *
+ *  Calls the PHY setup function to force speed and duplex.  Clears the
+ *  auto-crossover to force MDI manually.  Waits for link and returns
+ *  successful if link up is successful, else -E1000_ERR_PHY (-2).
+ **/
+s32 e1000_phy_force_speed_duplex_igp(struct e1000_hw *hw)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+	s32 ret_val;
+	u16 phy_data;
+	bool link;
+
+	DEBUGFUNC("e1000_phy_force_speed_duplex_igp");
+
+	ret_val = phy->ops.read_reg(hw, PHY_CONTROL, &phy_data);
+	if (ret_val)
+		return ret_val;
+
+	e1000_phy_force_speed_duplex_setup(hw, &phy_data);
+
+	ret_val = phy->ops.write_reg(hw, PHY_CONTROL, phy_data);
+	if (ret_val)
+		return ret_val;
+
+	/* Clear Auto-Crossover to force MDI manually.  IGP requires MDI
+	 * forced whenever speed and duplex are forced.
+	 */
+	ret_val = phy->ops.read_reg(hw, IGP01E1000_PHY_PORT_CTRL, &phy_data);
+	if (ret_val)
+		return ret_val;
+
+	phy_data &= ~IGP01E1000_PSCR_AUTO_MDIX;
+	phy_data &= ~IGP01E1000_PSCR_FORCE_MDI_MDIX;
+
+	ret_val = phy->ops.write_reg(hw, IGP01E1000_PHY_PORT_CTRL, phy_data);
+	if (ret_val)
+		return ret_val;
+
+	DEBUGOUT1("IGP PSCR: %X\n", phy_data);
+
+	usec_delay(1);
+
+	if (phy->autoneg_wait_to_complete) {
+		DEBUGOUT("Waiting for forced speed/duplex link on IGP phy.\n");
+
+		ret_val = e1000_phy_has_link_generic(hw, PHY_FORCE_LIMIT,
+						     100000, &link);
+		if (ret_val)
+			return ret_val;
+
+		if (!link)
+			DEBUGOUT("Link taking longer than expected.\n");
+
+		/* Try once more */
+		ret_val = e1000_phy_has_link_generic(hw, PHY_FORCE_LIMIT,
+						     100000, &link);
+	}
+
+	return ret_val;
+}
+
+/**
+ *  e1000_phy_force_speed_duplex_m88 - Force speed/duplex for m88 PHY
+ *  @hw: pointer to the HW structure
+ *
+ *  Calls the PHY setup function to force speed and duplex.  Clears the
+ *  auto-crossover to force MDI manually.  Resets the PHY to commit the
+ *  changes.  If time expires while waiting for link up, we reset the DSP.
+ *  After reset, TX_CLK and CRS on Tx must be set.  Return successful upon
+ *  successful completion, else return corresponding error code.
+ **/
+s32 e1000_phy_force_speed_duplex_m88(struct e1000_hw *hw)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+	s32 ret_val;
+	u16 phy_data;
+	bool link;
+
+	DEBUGFUNC("e1000_phy_force_speed_duplex_m88");
+
+	/* I210 and I211 devices support Auto-Crossover in forced operation. */
+	if (phy->type != e1000_phy_i210) {
+		/* Clear Auto-Crossover to force MDI manually.  M88E1000
+		 * requires MDI forced whenever speed and duplex are forced.
+		 */
+		ret_val = phy->ops.read_reg(hw, M88E1000_PHY_SPEC_CTRL,
+					    &phy_data);
+		if (ret_val)
+			return ret_val;
+
+		phy_data &= ~M88E1000_PSCR_AUTO_X_MODE;
+		ret_val = phy->ops.write_reg(hw, M88E1000_PHY_SPEC_CTRL,
+					     phy_data);
+		if (ret_val)
+			return ret_val;
+	}
+
+	DEBUGOUT1("M88E1000 PSCR: %X\n", phy_data);
+
+	ret_val = phy->ops.read_reg(hw, PHY_CONTROL, &phy_data);
+	if (ret_val)
+		return ret_val;
+
+	e1000_phy_force_speed_duplex_setup(hw, &phy_data);
+
+	ret_val = phy->ops.write_reg(hw, PHY_CONTROL, phy_data);
+	if (ret_val)
+		return ret_val;
+
+	/* Reset the phy to commit changes. */
+	ret_val = hw->phy.ops.commit(hw);
+	if (ret_val)
+		return ret_val;
+
+	if (phy->autoneg_wait_to_complete) {
+		DEBUGOUT("Waiting for forced speed/duplex link on M88 phy.\n");
+
+		ret_val = e1000_phy_has_link_generic(hw, PHY_FORCE_LIMIT,
+						     100000, &link);
+		if (ret_val)
+			return ret_val;
+
+		if (!link) {
+			bool reset_dsp = true;
+
+			switch (hw->phy.id) {
+			case I347AT4_E_PHY_ID:
+			case M88E1340M_E_PHY_ID:
+			case M88E1112_E_PHY_ID:
+			case M88E1543_E_PHY_ID:
+			case I210_I_PHY_ID:
+				reset_dsp = false;
+				break;
+			default:
+				if (hw->phy.type != e1000_phy_m88)
+					reset_dsp = false;
+				break;
+			}
+
+			if (!reset_dsp) {
+				DEBUGOUT("Link taking longer than expected.\n");
+			} else {
+				/* We didn't get link.
+				 * Reset the DSP and cross our fingers.
+				 */
+				ret_val = phy->ops.write_reg(hw,
+						M88E1000_PHY_PAGE_SELECT,
+						0x001d);
+				if (ret_val)
+					return ret_val;
+				ret_val = e1000_phy_reset_dsp_generic(hw);
+				if (ret_val)
+					return ret_val;
+			}
+		}
+
+		/* Try once more */
+		ret_val = e1000_phy_has_link_generic(hw, PHY_FORCE_LIMIT,
+						     100000, &link);
+		if (ret_val)
+			return ret_val;
+	}
+
+	if (hw->phy.type != e1000_phy_m88)
+		return E1000_SUCCESS;
+
+	if (hw->phy.id == I347AT4_E_PHY_ID ||
+		hw->phy.id == M88E1340M_E_PHY_ID ||
+		hw->phy.id == M88E1112_E_PHY_ID)
+		return E1000_SUCCESS;
+	if (hw->phy.id == I210_I_PHY_ID)
+		return E1000_SUCCESS;
+	if ((hw->phy.id == M88E1543_E_PHY_ID))
+		return E1000_SUCCESS;
+	ret_val = phy->ops.read_reg(hw, M88E1000_EXT_PHY_SPEC_CTRL, &phy_data);
+	if (ret_val)
+		return ret_val;
+
+	/* Resetting the phy means we need to re-force TX_CLK in the
+	 * Extended PHY Specific Control Register to 25MHz clock from
+	 * the reset value of 2.5MHz.
+	 */
+	phy_data |= M88E1000_EPSCR_TX_CLK_25;
+	ret_val = phy->ops.write_reg(hw, M88E1000_EXT_PHY_SPEC_CTRL, phy_data);
+	if (ret_val)
+		return ret_val;
+
+	/* In addition, we must re-enable CRS on Tx for both half and full
+	 * duplex.
+	 */
+	ret_val = phy->ops.read_reg(hw, M88E1000_PHY_SPEC_CTRL, &phy_data);
+	if (ret_val)
+		return ret_val;
+
+	phy_data |= M88E1000_PSCR_ASSERT_CRS_ON_TX;
+	ret_val = phy->ops.write_reg(hw, M88E1000_PHY_SPEC_CTRL, phy_data);
+
+	return ret_val;
+}
+
+/**
+ *  e1000_phy_force_speed_duplex_ife - Force PHY speed & duplex
+ *  @hw: pointer to the HW structure
+ *
+ *  Forces the speed and duplex settings of the PHY.
+ *  This is a function pointer entry point only called by
+ *  PHY setup routines.
+ **/
+s32 e1000_phy_force_speed_duplex_ife(struct e1000_hw *hw)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+	s32 ret_val;
+	u16 data;
+	bool link;
+
+	DEBUGFUNC("e1000_phy_force_speed_duplex_ife");
+
+	ret_val = phy->ops.read_reg(hw, PHY_CONTROL, &data);
+	if (ret_val)
+		return ret_val;
+
+	e1000_phy_force_speed_duplex_setup(hw, &data);
+
+	ret_val = phy->ops.write_reg(hw, PHY_CONTROL, data);
+	if (ret_val)
+		return ret_val;
+
+	/* Disable MDI-X support for 10/100 */
+	ret_val = phy->ops.read_reg(hw, IFE_PHY_MDIX_CONTROL, &data);
+	if (ret_val)
+		return ret_val;
+
+	data &= ~IFE_PMC_AUTO_MDIX;
+	data &= ~IFE_PMC_FORCE_MDIX;
+
+	ret_val = phy->ops.write_reg(hw, IFE_PHY_MDIX_CONTROL, data);
+	if (ret_val)
+		return ret_val;
+
+	DEBUGOUT1("IFE PMC: %X\n", data);
+
+	usec_delay(1);
+
+	if (phy->autoneg_wait_to_complete) {
+		DEBUGOUT("Waiting for forced speed/duplex link on IFE phy.\n");
+
+		ret_val = e1000_phy_has_link_generic(hw, PHY_FORCE_LIMIT,
+						     100000, &link);
+		if (ret_val)
+			return ret_val;
+
+		if (!link)
+			DEBUGOUT("Link taking longer than expected.\n");
+
+		/* Try once more */
+		ret_val = e1000_phy_has_link_generic(hw, PHY_FORCE_LIMIT,
+						     100000, &link);
+		if (ret_val)
+			return ret_val;
+	}
+
+	return E1000_SUCCESS;
+}
+
+/**
+ *  e1000_phy_force_speed_duplex_setup - Configure forced PHY speed/duplex
+ *  @hw: pointer to the HW structure
+ *  @phy_ctrl: pointer to current value of PHY_CONTROL
+ *
+ *  Forces speed and duplex on the PHY by doing the following: disable flow
+ *  control, force speed/duplex on the MAC, disable auto speed detection,
+ *  disable auto-negotiation, configure duplex, configure speed, configure
+ *  the collision distance, write configuration to CTRL register.  The
+ *  caller must write to the PHY_CONTROL register for these settings to
+ *  take affect.
+ **/
+void e1000_phy_force_speed_duplex_setup(struct e1000_hw *hw, u16 *phy_ctrl)
+{
+	struct e1000_mac_info *mac = &hw->mac;
+	u32 ctrl;
+
+	DEBUGFUNC("e1000_phy_force_speed_duplex_setup");
+
+	/* Turn off flow control when forcing speed/duplex */
+	hw->fc.current_mode = e1000_fc_none;
+
+	/* Force speed/duplex on the mac */
+	ctrl = E1000_READ_REG(hw, E1000_CTRL);
+	ctrl |= (E1000_CTRL_FRCSPD | E1000_CTRL_FRCDPX);
+	ctrl &= ~E1000_CTRL_SPD_SEL;
+
+	/* Disable Auto Speed Detection */
+	ctrl &= ~E1000_CTRL_ASDE;
+
+	/* Disable autoneg on the phy */
+	*phy_ctrl &= ~MII_CR_AUTO_NEG_EN;
+
+	/* Forcing Full or Half Duplex? */
+	if (mac->forced_speed_duplex & E1000_ALL_HALF_DUPLEX) {
+		ctrl &= ~E1000_CTRL_FD;
+		*phy_ctrl &= ~MII_CR_FULL_DUPLEX;
+		DEBUGOUT("Half Duplex\n");
+	} else {
+		ctrl |= E1000_CTRL_FD;
+		*phy_ctrl |= MII_CR_FULL_DUPLEX;
+		DEBUGOUT("Full Duplex\n");
+	}
+
+	/* Forcing 10mb or 100mb? */
+	if (mac->forced_speed_duplex & E1000_ALL_100_SPEED) {
+		ctrl |= E1000_CTRL_SPD_100;
+		*phy_ctrl |= MII_CR_SPEED_100;
+		*phy_ctrl &= ~MII_CR_SPEED_1000;
+		DEBUGOUT("Forcing 100mb\n");
+	} else {
+		ctrl &= ~(E1000_CTRL_SPD_1000 | E1000_CTRL_SPD_100);
+		*phy_ctrl &= ~(MII_CR_SPEED_1000 | MII_CR_SPEED_100);
+		DEBUGOUT("Forcing 10mb\n");
+	}
+
+	hw->mac.ops.config_collision_dist(hw);
+
+	E1000_WRITE_REG(hw, E1000_CTRL, ctrl);
+}
+
+/**
+ *  e1000_set_d3_lplu_state_generic - Sets low power link up state for D3
+ *  @hw: pointer to the HW structure
+ *  @active: boolean used to enable/disable lplu
+ *
+ *  Success returns 0, Failure returns 1
+ *
+ *  The low power link up (lplu) state is set to the power management level D3
+ *  and SmartSpeed is disabled when active is true, else clear lplu for D3
+ *  and enable Smartspeed.  LPLU and Smartspeed are mutually exclusive.  LPLU
+ *  is used during Dx states where the power conservation is most important.
+ *  During driver activity, SmartSpeed should be enabled so performance is
+ *  maintained.
+ **/
+s32 e1000_set_d3_lplu_state_generic(struct e1000_hw *hw, bool active)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+	s32 ret_val;
+	u16 data;
+
+	DEBUGFUNC("e1000_set_d3_lplu_state_generic");
+
+	if (!hw->phy.ops.read_reg)
+		return E1000_SUCCESS;
+
+	ret_val = phy->ops.read_reg(hw, IGP02E1000_PHY_POWER_MGMT, &data);
+	if (ret_val)
+		return ret_val;
+
+	if (!active) {
+		data &= ~IGP02E1000_PM_D3_LPLU;
+		ret_val = phy->ops.write_reg(hw, IGP02E1000_PHY_POWER_MGMT,
+					     data);
+		if (ret_val)
+			return ret_val;
+		/* LPLU and SmartSpeed are mutually exclusive.  LPLU is used
+		 * during Dx states where the power conservation is most
+		 * important.  During driver activity we should enable
+		 * SmartSpeed, so performance is maintained.
+		 */
+		if (phy->smart_speed == e1000_smart_speed_on) {
+			ret_val = phy->ops.read_reg(hw,
+						    IGP01E1000_PHY_PORT_CONFIG,
+						    &data);
+			if (ret_val)
+				return ret_val;
+
+			data |= IGP01E1000_PSCFR_SMART_SPEED;
+			ret_val = phy->ops.write_reg(hw,
+						     IGP01E1000_PHY_PORT_CONFIG,
+						     data);
+			if (ret_val)
+				return ret_val;
+		} else if (phy->smart_speed == e1000_smart_speed_off) {
+			ret_val = phy->ops.read_reg(hw,
+						    IGP01E1000_PHY_PORT_CONFIG,
+						    &data);
+			if (ret_val)
+				return ret_val;
+
+			data &= ~IGP01E1000_PSCFR_SMART_SPEED;
+			ret_val = phy->ops.write_reg(hw,
+						     IGP01E1000_PHY_PORT_CONFIG,
+						     data);
+			if (ret_val)
+				return ret_val;
+		}
+	} else if ((phy->autoneg_advertised == E1000_ALL_SPEED_DUPLEX) ||
+		   (phy->autoneg_advertised == E1000_ALL_NOT_GIG) ||
+		   (phy->autoneg_advertised == E1000_ALL_10_SPEED)) {
+		data |= IGP02E1000_PM_D3_LPLU;
+		ret_val = phy->ops.write_reg(hw, IGP02E1000_PHY_POWER_MGMT,
+					     data);
+		if (ret_val)
+			return ret_val;
+
+		/* When LPLU is enabled, we should disable SmartSpeed */
+		ret_val = phy->ops.read_reg(hw, IGP01E1000_PHY_PORT_CONFIG,
+					    &data);
+		if (ret_val)
+			return ret_val;
+
+		data &= ~IGP01E1000_PSCFR_SMART_SPEED;
+		ret_val = phy->ops.write_reg(hw, IGP01E1000_PHY_PORT_CONFIG,
+					     data);
+	}
+
+	return ret_val;
+}
+
+/**
+ *  e1000_check_downshift_generic - Checks whether a downshift in speed occurred
+ *  @hw: pointer to the HW structure
+ *
+ *  Success returns 0, Failure returns 1
+ *
+ *  A downshift is detected by querying the PHY link health.
+ **/
+s32 e1000_check_downshift_generic(struct e1000_hw *hw)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+	s32 ret_val;
+	u16 phy_data, offset, mask;
+
+	DEBUGFUNC("e1000_check_downshift_generic");
+
+	switch (phy->type) {
+	case e1000_phy_i210:
+	case e1000_phy_m88:
+	case e1000_phy_gg82563:
+		offset = M88E1000_PHY_SPEC_STATUS;
+		mask = M88E1000_PSSR_DOWNSHIFT;
+		break;
+	case e1000_phy_igp_2:
+	case e1000_phy_igp_3:
+		offset = IGP01E1000_PHY_LINK_HEALTH;
+		mask = IGP01E1000_PLHR_SS_DOWNGRADE;
+		break;
+	default:
+		/* speed downshift not supported */
+		phy->speed_downgraded = false;
+		return E1000_SUCCESS;
+	}
+
+	ret_val = phy->ops.read_reg(hw, offset, &phy_data);
+
+	if (!ret_val)
+		phy->speed_downgraded = !!(phy_data & mask);
+
+	return ret_val;
+}
+
+/**
+ *  e1000_check_polarity_m88 - Checks the polarity.
+ *  @hw: pointer to the HW structure
+ *
+ *  Success returns 0, Failure returns -E1000_ERR_PHY (-2)
+ *
+ *  Polarity is determined based on the PHY specific status register.
+ **/
+s32 e1000_check_polarity_m88(struct e1000_hw *hw)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+	s32 ret_val;
+	u16 data;
+
+	DEBUGFUNC("e1000_check_polarity_m88");
+
+	ret_val = phy->ops.read_reg(hw, M88E1000_PHY_SPEC_STATUS, &data);
+
+	if (!ret_val)
+		phy->cable_polarity = ((data & M88E1000_PSSR_REV_POLARITY)
+				       ? e1000_rev_polarity_reversed
+				       : e1000_rev_polarity_normal);
+
+	return ret_val;
+}
+
+/**
+ *  e1000_check_polarity_igp - Checks the polarity.
+ *  @hw: pointer to the HW structure
+ *
+ *  Success returns 0, Failure returns -E1000_ERR_PHY (-2)
+ *
+ *  Polarity is determined based on the PHY port status register, and the
+ *  current speed (since there is no polarity at 100Mbps).
+ **/
+s32 e1000_check_polarity_igp(struct e1000_hw *hw)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+	s32 ret_val;
+	u16 data, offset, mask;
+
+	DEBUGFUNC("e1000_check_polarity_igp");
+
+	/* Polarity is determined based on the speed of
+	 * our connection.
+	 */
+	ret_val = phy->ops.read_reg(hw, IGP01E1000_PHY_PORT_STATUS, &data);
+	if (ret_val)
+		return ret_val;
+
+	if ((data & IGP01E1000_PSSR_SPEED_MASK) ==
+	    IGP01E1000_PSSR_SPEED_1000MBPS) {
+		offset = IGP01E1000_PHY_PCS_INIT_REG;
+		mask = IGP01E1000_PHY_POLARITY_MASK;
+	} else {
+		/* This really only applies to 10Mbps since
+		 * there is no polarity for 100Mbps (always 0).
+		 */
+		offset = IGP01E1000_PHY_PORT_STATUS;
+		mask = IGP01E1000_PSSR_POLARITY_REVERSED;
+	}
+
+	ret_val = phy->ops.read_reg(hw, offset, &data);
+
+	if (!ret_val)
+		phy->cable_polarity = ((data & mask)
+				       ? e1000_rev_polarity_reversed
+				       : e1000_rev_polarity_normal);
+
+	return ret_val;
+}
+
+/**
+ *  e1000_check_polarity_ife - Check cable polarity for IFE PHY
+ *  @hw: pointer to the HW structure
+ *
+ *  Polarity is determined on the polarity reversal feature being enabled.
+ **/
+s32 e1000_check_polarity_ife(struct e1000_hw *hw)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+	s32 ret_val;
+	u16 phy_data, offset, mask;
+
+	DEBUGFUNC("e1000_check_polarity_ife");
+
+	/* Polarity is determined based on the reversal feature being enabled.
+	 */
+	if (phy->polarity_correction) {
+		offset = IFE_PHY_EXTENDED_STATUS_CONTROL;
+		mask = IFE_PESC_POLARITY_REVERSED;
+	} else {
+		offset = IFE_PHY_SPECIAL_CONTROL;
+		mask = IFE_PSC_FORCE_POLARITY;
+	}
+
+	ret_val = phy->ops.read_reg(hw, offset, &phy_data);
+
+	if (!ret_val)
+		phy->cable_polarity = ((phy_data & mask)
+				       ? e1000_rev_polarity_reversed
+				       : e1000_rev_polarity_normal);
+
+	return ret_val;
+}
+
+/**
+ *  e1000_wait_autoneg - Wait for auto-neg completion
+ *  @hw: pointer to the HW structure
+ *
+ *  Waits for auto-negotiation to complete or for the auto-negotiation time
+ *  limit to expire, which ever happens first.
+ **/
+static s32 e1000_wait_autoneg(struct e1000_hw *hw)
+{
+	s32 ret_val = E1000_SUCCESS;
+	u16 i, phy_status;
+
+	DEBUGFUNC("e1000_wait_autoneg");
+
+	if (!hw->phy.ops.read_reg)
+		return E1000_SUCCESS;
+
+	/* Break after autoneg completes or PHY_AUTO_NEG_LIMIT expires. */
+	for (i = PHY_AUTO_NEG_LIMIT; i > 0; i--) {
+		ret_val = hw->phy.ops.read_reg(hw, PHY_STATUS, &phy_status);
+		if (ret_val)
+			break;
+		ret_val = hw->phy.ops.read_reg(hw, PHY_STATUS, &phy_status);
+		if (ret_val)
+			break;
+		if (phy_status & MII_SR_AUTONEG_COMPLETE)
+			break;
+		msec_delay(100);
+	}
+
+	/* PHY_AUTO_NEG_TIME expiration doesn't guarantee auto-negotiation
+	 * has completed.
+	 */
+	return ret_val;
+}
+
+/**
+ *  e1000_phy_has_link_generic - Polls PHY for link
+ *  @hw: pointer to the HW structure
+ *  @iterations: number of times to poll for link
+ *  @usec_interval: delay between polling attempts
+ *  @success: pointer to whether polling was successful or not
+ *
+ *  Polls the PHY status register for link, 'iterations' number of times.
+ **/
+s32 e1000_phy_has_link_generic(struct e1000_hw *hw, u32 iterations,
+			       u32 usec_interval, bool *success)
+{
+	s32 ret_val = E1000_SUCCESS;
+	u16 i, phy_status;
+
+	DEBUGFUNC("e1000_phy_has_link_generic");
+
+	if (!hw->phy.ops.read_reg)
+		return E1000_SUCCESS;
+
+	for (i = 0; i < iterations; i++) {
+		/* Some PHYs require the PHY_STATUS register to be read
+		 * twice due to the link bit being sticky.  No harm doing
+		 * it across the board.
+		 */
+		ret_val = hw->phy.ops.read_reg(hw, PHY_STATUS, &phy_status);
+		if (ret_val)
+			/* If the first read fails, another entity may have
+			 * ownership of the resources, wait and try again to
+			 * see if they have relinquished the resources yet.
+			 */
+			usec_delay(usec_interval);
+		ret_val = hw->phy.ops.read_reg(hw, PHY_STATUS, &phy_status);
+		if (ret_val)
+			break;
+		if (phy_status & MII_SR_LINK_STATUS)
+			break;
+		if (usec_interval >= 1000)
+			msec_delay_irq(usec_interval/1000);
+		else
+			usec_delay(usec_interval);
+	}
+
+	*success = (i < iterations);
+
+	return ret_val;
+}
+
+/**
+ *  e1000_get_cable_length_m88 - Determine cable length for m88 PHY
+ *  @hw: pointer to the HW structure
+ *
+ *  Reads the PHY specific status register to retrieve the cable length
+ *  information.  The cable length is determined by averaging the minimum and
+ *  maximum values to get the "average" cable length.  The m88 PHY has four
+ *  possible cable length values, which are:
+ *	Register Value		Cable Length
+ *	0			< 50 meters
+ *	1			50 - 80 meters
+ *	2			80 - 110 meters
+ *	3			110 - 140 meters
+ *	4			> 140 meters
+ **/
+s32 e1000_get_cable_length_m88(struct e1000_hw *hw)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+	s32 ret_val;
+	u16 phy_data, index;
+
+	DEBUGFUNC("e1000_get_cable_length_m88");
+
+	ret_val = phy->ops.read_reg(hw, M88E1000_PHY_SPEC_STATUS, &phy_data);
+	if (ret_val)
+		return ret_val;
+
+	index = ((phy_data & M88E1000_PSSR_CABLE_LENGTH) >>
+		 M88E1000_PSSR_CABLE_LENGTH_SHIFT);
+
+	if (index >= M88E1000_CABLE_LENGTH_TABLE_SIZE - 1)
+		return -E1000_ERR_PHY;
+
+	phy->min_cable_length = e1000_m88_cable_length_table[index];
+	phy->max_cable_length = e1000_m88_cable_length_table[index + 1];
+
+	phy->cable_length = (phy->min_cable_length + phy->max_cable_length) / 2;
+
+	return E1000_SUCCESS;
+}
+
+s32 e1000_get_cable_length_m88_gen2(struct e1000_hw *hw)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+	s32 ret_val;
+	u16 phy_data, phy_data2, is_cm;
+	u16 index, default_page;
+
+	DEBUGFUNC("e1000_get_cable_length_m88_gen2");
+
+	switch (hw->phy.id) {
+	case I210_I_PHY_ID:
+		/* Get cable length from PHY Cable Diagnostics Control Reg */
+		ret_val = phy->ops.read_reg(hw, (0x7 << GS40G_PAGE_SHIFT) +
+					    (I347AT4_PCDL + phy->addr),
+					    &phy_data);
+		if (ret_val)
+			return ret_val;
+
+		/* Check if the unit of cable length is meters or cm */
+		ret_val = phy->ops.read_reg(hw, (0x7 << GS40G_PAGE_SHIFT) +
+					    I347AT4_PCDC, &phy_data2);
+		if (ret_val)
+			return ret_val;
+
+		is_cm = !(phy_data2 & I347AT4_PCDC_CABLE_LENGTH_UNIT);
+
+		/* Populate the phy structure with cable length in meters */
+		phy->min_cable_length = phy_data / (is_cm ? 100 : 1);
+		phy->max_cable_length = phy_data / (is_cm ? 100 : 1);
+		phy->cable_length = phy_data / (is_cm ? 100 : 1);
+		break;
+	case M88E1543_E_PHY_ID:
+	case M88E1340M_E_PHY_ID:
+	case I347AT4_E_PHY_ID:
+		/* Remember the original page select and set it to 7 */
+		ret_val = phy->ops.read_reg(hw, I347AT4_PAGE_SELECT,
+					    &default_page);
+		if (ret_val)
+			return ret_val;
+
+		ret_val = phy->ops.write_reg(hw, I347AT4_PAGE_SELECT, 0x07);
+		if (ret_val)
+			return ret_val;
+
+		/* Get cable length from PHY Cable Diagnostics Control Reg */
+		ret_val = phy->ops.read_reg(hw, (I347AT4_PCDL + phy->addr),
+					    &phy_data);
+		if (ret_val)
+			return ret_val;
+
+		/* Check if the unit of cable length is meters or cm */
+		ret_val = phy->ops.read_reg(hw, I347AT4_PCDC, &phy_data2);
+		if (ret_val)
+			return ret_val;
+
+		is_cm = !(phy_data2 & I347AT4_PCDC_CABLE_LENGTH_UNIT);
+
+		/* Populate the phy structure with cable length in meters */
+		phy->min_cable_length = phy_data / (is_cm ? 100 : 1);
+		phy->max_cable_length = phy_data / (is_cm ? 100 : 1);
+		phy->cable_length = phy_data / (is_cm ? 100 : 1);
+
+		/* Reset the page select to its original value */
+		ret_val = phy->ops.write_reg(hw, I347AT4_PAGE_SELECT,
+					     default_page);
+		if (ret_val)
+			return ret_val;
+		break;
+
+	case M88E1112_E_PHY_ID:
+		/* Remember the original page select and set it to 5 */
+		ret_val = phy->ops.read_reg(hw, I347AT4_PAGE_SELECT,
+					    &default_page);
+		if (ret_val)
+			return ret_val;
+
+		ret_val = phy->ops.write_reg(hw, I347AT4_PAGE_SELECT, 0x05);
+		if (ret_val)
+			return ret_val;
+
+		ret_val = phy->ops.read_reg(hw, M88E1112_VCT_DSP_DISTANCE,
+					    &phy_data);
+		if (ret_val)
+			return ret_val;
+
+		index = (phy_data & M88E1000_PSSR_CABLE_LENGTH) >>
+			M88E1000_PSSR_CABLE_LENGTH_SHIFT;
+
+		if (index >= M88E1000_CABLE_LENGTH_TABLE_SIZE - 1)
+			return -E1000_ERR_PHY;
+
+		phy->min_cable_length = e1000_m88_cable_length_table[index];
+		phy->max_cable_length = e1000_m88_cable_length_table[index + 1];
+
+		phy->cable_length = (phy->min_cable_length +
+				     phy->max_cable_length) / 2;
+
+		/* Reset the page select to its original value */
+		ret_val = phy->ops.write_reg(hw, I347AT4_PAGE_SELECT,
+					     default_page);
+		if (ret_val)
+			return ret_val;
+
+		break;
+	default:
+		return -E1000_ERR_PHY;
+	}
+
+	return ret_val;
+}
+
+/**
+ *  e1000_get_cable_length_igp_2 - Determine cable length for igp2 PHY
+ *  @hw: pointer to the HW structure
+ *
+ *  The automatic gain control (agc) normalizes the amplitude of the
+ *  received signal, adjusting for the attenuation produced by the
+ *  cable.  By reading the AGC registers, which represent the
+ *  combination of coarse and fine gain value, the value can be put
+ *  into a lookup table to obtain the approximate cable length
+ *  for each channel.
+ **/
+s32 e1000_get_cable_length_igp_2(struct e1000_hw *hw)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+	s32 ret_val;
+	u16 phy_data, i, agc_value = 0;
+	u16 cur_agc_index, max_agc_index = 0;
+	u16 min_agc_index = IGP02E1000_CABLE_LENGTH_TABLE_SIZE - 1;
+	static const u16 agc_reg_array[IGP02E1000_PHY_CHANNEL_NUM] = {
+		IGP02E1000_PHY_AGC_A,
+		IGP02E1000_PHY_AGC_B,
+		IGP02E1000_PHY_AGC_C,
+		IGP02E1000_PHY_AGC_D
+	};
+
+	DEBUGFUNC("e1000_get_cable_length_igp_2");
+
+	/* Read the AGC registers for all channels */
+	for (i = 0; i < IGP02E1000_PHY_CHANNEL_NUM; i++) {
+		ret_val = phy->ops.read_reg(hw, agc_reg_array[i], &phy_data);
+		if (ret_val)
+			return ret_val;
+
+		/* Getting bits 15:9, which represent the combination of
+		 * coarse and fine gain values.  The result is a number
+		 * that can be put into the lookup table to obtain the
+		 * approximate cable length.
+		 */
+		cur_agc_index = ((phy_data >> IGP02E1000_AGC_LENGTH_SHIFT) &
+				 IGP02E1000_AGC_LENGTH_MASK);
+
+		/* Array index bound check. */
+		if ((cur_agc_index >= IGP02E1000_CABLE_LENGTH_TABLE_SIZE) ||
+		    (cur_agc_index == 0))
+			return -E1000_ERR_PHY;
+
+		/* Remove min & max AGC values from calculation. */
+		if (e1000_igp_2_cable_length_table[min_agc_index] >
+		    e1000_igp_2_cable_length_table[cur_agc_index])
+			min_agc_index = cur_agc_index;
+		if (e1000_igp_2_cable_length_table[max_agc_index] <
+		    e1000_igp_2_cable_length_table[cur_agc_index])
+			max_agc_index = cur_agc_index;
+
+		agc_value += e1000_igp_2_cable_length_table[cur_agc_index];
+	}
+
+	agc_value -= (e1000_igp_2_cable_length_table[min_agc_index] +
+		      e1000_igp_2_cable_length_table[max_agc_index]);
+	agc_value /= (IGP02E1000_PHY_CHANNEL_NUM - 2);
+
+	/* Calculate cable length with the error range of +/- 10 meters. */
+	phy->min_cable_length = (((agc_value - IGP02E1000_AGC_RANGE) > 0) ?
+				 (agc_value - IGP02E1000_AGC_RANGE) : 0);
+	phy->max_cable_length = agc_value + IGP02E1000_AGC_RANGE;
+
+	phy->cable_length = (phy->min_cable_length + phy->max_cable_length) / 2;
+
+	return E1000_SUCCESS;
+}
+
+/**
+ *  e1000_get_phy_info_m88 - Retrieve PHY information
+ *  @hw: pointer to the HW structure
+ *
+ *  Valid for only copper links.  Read the PHY status register (sticky read)
+ *  to verify that link is up.  Read the PHY special control register to
+ *  determine the polarity and 10base-T extended distance.  Read the PHY
+ *  special status register to determine MDI/MDIx and current speed.  If
+ *  speed is 1000, then determine cable length, local and remote receiver.
+ **/
+s32 e1000_get_phy_info_m88(struct e1000_hw *hw)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+	s32  ret_val;
+	u16 phy_data;
+	bool link;
+
+	DEBUGFUNC("e1000_get_phy_info_m88");
+
+	if (phy->media_type != e1000_media_type_copper) {
+		DEBUGOUT("Phy info is only valid for copper media\n");
+		return -E1000_ERR_CONFIG;
+	}
+
+	ret_val = e1000_phy_has_link_generic(hw, 1, 0, &link);
+	if (ret_val)
+		return ret_val;
+
+	if (!link) {
+		DEBUGOUT("Phy info is only valid if link is up\n");
+		return -E1000_ERR_CONFIG;
+	}
+
+	ret_val = phy->ops.read_reg(hw, M88E1000_PHY_SPEC_CTRL, &phy_data);
+	if (ret_val)
+		return ret_val;
+
+	phy->polarity_correction = !!(phy_data &
+				      M88E1000_PSCR_POLARITY_REVERSAL);
+
+	ret_val = e1000_check_polarity_m88(hw);
+	if (ret_val)
+		return ret_val;
+
+	ret_val = phy->ops.read_reg(hw, M88E1000_PHY_SPEC_STATUS, &phy_data);
+	if (ret_val)
+		return ret_val;
+
+	phy->is_mdix = !!(phy_data & M88E1000_PSSR_MDIX);
+
+	if ((phy_data & M88E1000_PSSR_SPEED) == M88E1000_PSSR_1000MBS) {
+		ret_val = hw->phy.ops.get_cable_length(hw);
+		if (ret_val)
+			return ret_val;
+
+		ret_val = phy->ops.read_reg(hw, PHY_1000T_STATUS, &phy_data);
+		if (ret_val)
+			return ret_val;
+
+		phy->local_rx = (phy_data & SR_1000T_LOCAL_RX_STATUS)
+				? e1000_1000t_rx_status_ok
+				: e1000_1000t_rx_status_not_ok;
+
+		phy->remote_rx = (phy_data & SR_1000T_REMOTE_RX_STATUS)
+				 ? e1000_1000t_rx_status_ok
+				 : e1000_1000t_rx_status_not_ok;
+	} else {
+		/* Set values to "undefined" */
+		phy->cable_length = E1000_CABLE_LENGTH_UNDEFINED;
+		phy->local_rx = e1000_1000t_rx_status_undefined;
+		phy->remote_rx = e1000_1000t_rx_status_undefined;
+	}
+
+	return ret_val;
+}
+
+/**
+ *  e1000_get_phy_info_igp - Retrieve igp PHY information
+ *  @hw: pointer to the HW structure
+ *
+ *  Read PHY status to determine if link is up.  If link is up, then
+ *  set/determine 10base-T extended distance and polarity correction.  Read
+ *  PHY port status to determine MDI/MDIx and speed.  Based on the speed,
+ *  determine on the cable length, local and remote receiver.
+ **/
+s32 e1000_get_phy_info_igp(struct e1000_hw *hw)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+	s32 ret_val;
+	u16 data;
+	bool link;
+
+	DEBUGFUNC("e1000_get_phy_info_igp");
+
+	ret_val = e1000_phy_has_link_generic(hw, 1, 0, &link);
+	if (ret_val)
+		return ret_val;
+
+	if (!link) {
+		DEBUGOUT("Phy info is only valid if link is up\n");
+		return -E1000_ERR_CONFIG;
+	}
+
+	phy->polarity_correction = true;
+
+	ret_val = e1000_check_polarity_igp(hw);
+	if (ret_val)
+		return ret_val;
+
+	ret_val = phy->ops.read_reg(hw, IGP01E1000_PHY_PORT_STATUS, &data);
+	if (ret_val)
+		return ret_val;
+
+	phy->is_mdix = !!(data & IGP01E1000_PSSR_MDIX);
+
+	if ((data & IGP01E1000_PSSR_SPEED_MASK) ==
+	    IGP01E1000_PSSR_SPEED_1000MBPS) {
+		ret_val = phy->ops.get_cable_length(hw);
+		if (ret_val)
+			return ret_val;
+
+		ret_val = phy->ops.read_reg(hw, PHY_1000T_STATUS, &data);
+		if (ret_val)
+			return ret_val;
+
+		phy->local_rx = (data & SR_1000T_LOCAL_RX_STATUS)
+				? e1000_1000t_rx_status_ok
+				: e1000_1000t_rx_status_not_ok;
+
+		phy->remote_rx = (data & SR_1000T_REMOTE_RX_STATUS)
+				 ? e1000_1000t_rx_status_ok
+				 : e1000_1000t_rx_status_not_ok;
+	} else {
+		phy->cable_length = E1000_CABLE_LENGTH_UNDEFINED;
+		phy->local_rx = e1000_1000t_rx_status_undefined;
+		phy->remote_rx = e1000_1000t_rx_status_undefined;
+	}
+
+	return ret_val;
+}
+
+/**
+ *  e1000_get_phy_info_ife - Retrieves various IFE PHY states
+ *  @hw: pointer to the HW structure
+ *
+ *  Populates "phy" structure with various feature states.
+ **/
+s32 e1000_get_phy_info_ife(struct e1000_hw *hw)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+	s32 ret_val;
+	u16 data;
+	bool link;
+
+	DEBUGFUNC("e1000_get_phy_info_ife");
+
+	ret_val = e1000_phy_has_link_generic(hw, 1, 0, &link);
+	if (ret_val)
+		return ret_val;
+
+	if (!link) {
+		DEBUGOUT("Phy info is only valid if link is up\n");
+		return -E1000_ERR_CONFIG;
+	}
+
+	ret_val = phy->ops.read_reg(hw, IFE_PHY_SPECIAL_CONTROL, &data);
+	if (ret_val)
+		return ret_val;
+	phy->polarity_correction = !(data & IFE_PSC_AUTO_POLARITY_DISABLE);
+
+	if (phy->polarity_correction) {
+		ret_val = e1000_check_polarity_ife(hw);
+		if (ret_val)
+			return ret_val;
+	} else {
+		/* Polarity is forced */
+		phy->cable_polarity = ((data & IFE_PSC_FORCE_POLARITY)
+				       ? e1000_rev_polarity_reversed
+				       : e1000_rev_polarity_normal);
+	}
+
+	ret_val = phy->ops.read_reg(hw, IFE_PHY_MDIX_CONTROL, &data);
+	if (ret_val)
+		return ret_val;
+
+	phy->is_mdix = !!(data & IFE_PMC_MDIX_STATUS);
+
+	/* The following parameters are undefined for 10/100 operation. */
+	phy->cable_length = E1000_CABLE_LENGTH_UNDEFINED;
+	phy->local_rx = e1000_1000t_rx_status_undefined;
+	phy->remote_rx = e1000_1000t_rx_status_undefined;
+
+	return E1000_SUCCESS;
+}
+
+/**
+ *  e1000_phy_sw_reset_generic - PHY software reset
+ *  @hw: pointer to the HW structure
+ *
+ *  Does a software reset of the PHY by reading the PHY control register and
+ *  setting/write the control register reset bit to the PHY.
+ **/
+s32 e1000_phy_sw_reset_generic(struct e1000_hw *hw)
+{
+	s32 ret_val;
+	u16 phy_ctrl;
+
+	DEBUGFUNC("e1000_phy_sw_reset_generic");
+
+	if (!hw->phy.ops.read_reg)
+		return E1000_SUCCESS;
+
+	ret_val = hw->phy.ops.read_reg(hw, PHY_CONTROL, &phy_ctrl);
+	if (ret_val)
+		return ret_val;
+
+	phy_ctrl |= MII_CR_RESET;
+	ret_val = hw->phy.ops.write_reg(hw, PHY_CONTROL, phy_ctrl);
+	if (ret_val)
+		return ret_val;
+
+	usec_delay(1);
+
+	return ret_val;
+}
+
+/**
+ *  e1000_phy_hw_reset_generic - PHY hardware reset
+ *  @hw: pointer to the HW structure
+ *
+ *  Verify the reset block is not blocking us from resetting.  Acquire
+ *  semaphore (if necessary) and read/set/write the device control reset
+ *  bit in the PHY.  Wait the appropriate delay time for the device to
+ *  reset and release the semaphore (if necessary).
+ **/
+s32 e1000_phy_hw_reset_generic(struct e1000_hw *hw)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+	s32 ret_val;
+	u32 ctrl;
+
+	DEBUGFUNC("e1000_phy_hw_reset_generic");
+
+	if (phy->ops.check_reset_block) {
+		ret_val = phy->ops.check_reset_block(hw);
+		if (ret_val)
+			return E1000_SUCCESS;
+	}
+
+	ret_val = phy->ops.acquire(hw);
+	if (ret_val)
+		return ret_val;
+
+	ctrl = E1000_READ_REG(hw, E1000_CTRL);
+	E1000_WRITE_REG(hw, E1000_CTRL, ctrl | E1000_CTRL_PHY_RST);
+	E1000_WRITE_FLUSH(hw);
+
+	usec_delay(phy->reset_delay_us);
+
+	E1000_WRITE_REG(hw, E1000_CTRL, ctrl);
+	E1000_WRITE_FLUSH(hw);
+
+	usec_delay(150);
+
+	phy->ops.release(hw);
+
+	return phy->ops.get_cfg_done(hw);
+}
+
+/**
+ *  e1000_get_cfg_done_generic - Generic configuration done
+ *  @hw: pointer to the HW structure
+ *
+ *  Generic function to wait 10 milli-seconds for configuration to complete
+ *  and return success.
+ **/
+s32 e1000_get_cfg_done_generic(struct e1000_hw E1000_UNUSEDARG *hw)
+{
+	DEBUGFUNC("e1000_get_cfg_done_generic");
+
+	msec_delay_irq(10);
+
+	return E1000_SUCCESS;
+}
+
+/**
+ *  e1000_phy_init_script_igp3 - Inits the IGP3 PHY
+ *  @hw: pointer to the HW structure
+ *
+ *  Initializes a Intel Gigabit PHY3 when an EEPROM is not present.
+ **/
+s32 e1000_phy_init_script_igp3(struct e1000_hw *hw)
+{
+	DEBUGOUT("Running IGP 3 PHY init script\n");
+
+	/* PHY init IGP 3 */
+	/* Enable rise/fall, 10-mode work in class-A */
+	hw->phy.ops.write_reg(hw, 0x2F5B, 0x9018);
+	/* Remove all caps from Replica path filter */
+	hw->phy.ops.write_reg(hw, 0x2F52, 0x0000);
+	/* Bias trimming for ADC, AFE and Driver (Default) */
+	hw->phy.ops.write_reg(hw, 0x2FB1, 0x8B24);
+	/* Increase Hybrid poly bias */
+	hw->phy.ops.write_reg(hw, 0x2FB2, 0xF8F0);
+	/* Add 4% to Tx amplitude in Gig mode */
+	hw->phy.ops.write_reg(hw, 0x2010, 0x10B0);
+	/* Disable trimming (TTT) */
+	hw->phy.ops.write_reg(hw, 0x2011, 0x0000);
+	/* Poly DC correction to 94.6% + 2% for all channels */
+	hw->phy.ops.write_reg(hw, 0x20DD, 0x249A);
+	/* ABS DC correction to 95.9% */
+	hw->phy.ops.write_reg(hw, 0x20DE, 0x00D3);
+	/* BG temp curve trim */
+	hw->phy.ops.write_reg(hw, 0x28B4, 0x04CE);
+	/* Increasing ADC OPAMP stage 1 currents to max */
+	hw->phy.ops.write_reg(hw, 0x2F70, 0x29E4);
+	/* Force 1000 ( required for enabling PHY regs configuration) */
+	hw->phy.ops.write_reg(hw, 0x0000, 0x0140);
+	/* Set upd_freq to 6 */
+	hw->phy.ops.write_reg(hw, 0x1F30, 0x1606);
+	/* Disable NPDFE */
+	hw->phy.ops.write_reg(hw, 0x1F31, 0xB814);
+	/* Disable adaptive fixed FFE (Default) */
+	hw->phy.ops.write_reg(hw, 0x1F35, 0x002A);
+	/* Enable FFE hysteresis */
+	hw->phy.ops.write_reg(hw, 0x1F3E, 0x0067);
+	/* Fixed FFE for short cable lengths */
+	hw->phy.ops.write_reg(hw, 0x1F54, 0x0065);
+	/* Fixed FFE for medium cable lengths */
+	hw->phy.ops.write_reg(hw, 0x1F55, 0x002A);
+	/* Fixed FFE for long cable lengths */
+	hw->phy.ops.write_reg(hw, 0x1F56, 0x002A);
+	/* Enable Adaptive Clip Threshold */
+	hw->phy.ops.write_reg(hw, 0x1F72, 0x3FB0);
+	/* AHT reset limit to 1 */
+	hw->phy.ops.write_reg(hw, 0x1F76, 0xC0FF);
+	/* Set AHT master delay to 127 msec */
+	hw->phy.ops.write_reg(hw, 0x1F77, 0x1DEC);
+	/* Set scan bits for AHT */
+	hw->phy.ops.write_reg(hw, 0x1F78, 0xF9EF);
+	/* Set AHT Preset bits */
+	hw->phy.ops.write_reg(hw, 0x1F79, 0x0210);
+	/* Change integ_factor of channel A to 3 */
+	hw->phy.ops.write_reg(hw, 0x1895, 0x0003);
+	/* Change prop_factor of channels BCD to 8 */
+	hw->phy.ops.write_reg(hw, 0x1796, 0x0008);
+	/* Change cg_icount + enable integbp for channels BCD */
+	hw->phy.ops.write_reg(hw, 0x1798, 0xD008);
+	/* Change cg_icount + enable integbp + change prop_factor_master
+	 * to 8 for channel A
+	 */
+	hw->phy.ops.write_reg(hw, 0x1898, 0xD918);
+	/* Disable AHT in Slave mode on channel A */
+	hw->phy.ops.write_reg(hw, 0x187A, 0x0800);
+	/* Enable LPLU and disable AN to 1000 in non-D0a states,
+	 * Enable SPD+B2B
+	 */
+	hw->phy.ops.write_reg(hw, 0x0019, 0x008D);
+	/* Enable restart AN on an1000_dis change */
+	hw->phy.ops.write_reg(hw, 0x001B, 0x2080);
+	/* Enable wh_fifo read clock in 10/100 modes */
+	hw->phy.ops.write_reg(hw, 0x0014, 0x0045);
+	/* Restart AN, Speed selection is 1000 */
+	hw->phy.ops.write_reg(hw, 0x0000, 0x1340);
+
+	return E1000_SUCCESS;
+}
+
+/**
+ *  e1000_get_phy_type_from_id - Get PHY type from id
+ *  @phy_id: phy_id read from the phy
+ *
+ *  Returns the phy type from the id.
+ **/
+enum e1000_phy_type e1000_get_phy_type_from_id(u32 phy_id)
+{
+	enum e1000_phy_type phy_type = e1000_phy_unknown;
+
+	switch (phy_id) {
+	case M88E1000_I_PHY_ID:
+	case M88E1000_E_PHY_ID:
+	case M88E1111_I_PHY_ID:
+	case M88E1011_I_PHY_ID:
+	case M88E1543_E_PHY_ID:
+	case I347AT4_E_PHY_ID:
+	case M88E1112_E_PHY_ID:
+	case M88E1340M_E_PHY_ID:
+		phy_type = e1000_phy_m88;
+		break;
+	case IGP01E1000_I_PHY_ID: /* IGP 1 & 2 share this */
+		phy_type = e1000_phy_igp_2;
+		break;
+	case GG82563_E_PHY_ID:
+		phy_type = e1000_phy_gg82563;
+		break;
+	case IGP03E1000_E_PHY_ID:
+		phy_type = e1000_phy_igp_3;
+		break;
+	case IFE_E_PHY_ID:
+	case IFE_PLUS_E_PHY_ID:
+	case IFE_C_E_PHY_ID:
+		phy_type = e1000_phy_ife;
+		break;
+	case I82580_I_PHY_ID:
+		phy_type = e1000_phy_82580;
+		break;
+	case I210_I_PHY_ID:
+		phy_type = e1000_phy_i210;
+		break;
+	default:
+		phy_type = e1000_phy_unknown;
+		break;
+	}
+	return phy_type;
+}
+
+/**
+ *  e1000_determine_phy_address - Determines PHY address.
+ *  @hw: pointer to the HW structure
+ *
+ *  This uses a trial and error method to loop through possible PHY
+ *  addresses. It tests each by reading the PHY ID registers and
+ *  checking for a match.
+ **/
+s32 e1000_determine_phy_address(struct e1000_hw *hw)
+{
+	u32 phy_addr = 0;
+	u32 i;
+	enum e1000_phy_type phy_type = e1000_phy_unknown;
+
+	hw->phy.id = phy_type;
+
+	for (phy_addr = 0; phy_addr < E1000_MAX_PHY_ADDR; phy_addr++) {
+		hw->phy.addr = phy_addr;
+		i = 0;
+
+		do {
+			e1000_get_phy_id(hw);
+			phy_type = e1000_get_phy_type_from_id(hw->phy.id);
+
+			/* If phy_type is valid, break - we found our
+			 * PHY address
+			 */
+			if (phy_type != e1000_phy_unknown)
+				return E1000_SUCCESS;
+
+			msec_delay(1);
+			i++;
+		} while (i < 10);
+	}
+
+	return -E1000_ERR_PHY_TYPE;
+}
+
+/**
+ * e1000_power_up_phy_copper - Restore copper link in case of PHY power down
+ * @hw: pointer to the HW structure
+ *
+ * In the case of a PHY power down to save power, or to turn off link during a
+ * driver unload, or wake on lan is not enabled, restore the link to previous
+ * settings.
+ **/
+void e1000_power_up_phy_copper(struct e1000_hw *hw)
+{
+	u16 mii_reg = 0;
+	u16 power_reg = 0;
+
+	/* The PHY will retain its settings across a power down/up cycle */
+	hw->phy.ops.read_reg(hw, PHY_CONTROL, &mii_reg);
+	mii_reg &= ~MII_CR_POWER_DOWN;
+	if (hw->phy.type == e1000_phy_i210) {
+		hw->phy.ops.read_reg(hw, GS40G_COPPER_SPEC, &power_reg);
+		power_reg &= ~GS40G_CS_POWER_DOWN;
+		hw->phy.ops.write_reg(hw, GS40G_COPPER_SPEC, power_reg);
+	}
+	hw->phy.ops.write_reg(hw, PHY_CONTROL, mii_reg);
+}
+
+/**
+ * e1000_power_down_phy_copper - Restore copper link in case of PHY power down
+ * @hw: pointer to the HW structure
+ *
+ * In the case of a PHY power down to save power, or to turn off link during a
+ * driver unload, or wake on lan is not enabled, restore the link to previous
+ * settings.
+ **/
+void e1000_power_down_phy_copper(struct e1000_hw *hw)
+{
+	u16 mii_reg = 0;
+	u16 power_reg = 0;
+
+	/* The PHY will retain its settings across a power down/up cycle */
+	hw->phy.ops.read_reg(hw, PHY_CONTROL, &mii_reg);
+	mii_reg |= MII_CR_POWER_DOWN;
+	/* i210 Phy requires an additional bit for power up/down */
+	if (hw->phy.type == e1000_phy_i210) {
+		hw->phy.ops.read_reg(hw, GS40G_COPPER_SPEC, &power_reg);
+		power_reg |= GS40G_CS_POWER_DOWN;
+		hw->phy.ops.write_reg(hw, GS40G_COPPER_SPEC, power_reg);
+	}
+	hw->phy.ops.write_reg(hw, PHY_CONTROL, mii_reg);
+	msec_delay(1);
+}
+
+/**
+ *  e1000_check_polarity_82577 - Checks the polarity.
+ *  @hw: pointer to the HW structure
+ *
+ *  Success returns 0, Failure returns -E1000_ERR_PHY (-2)
+ *
+ *  Polarity is determined based on the PHY specific status register.
+ **/
+s32 e1000_check_polarity_82577(struct e1000_hw *hw)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+	s32 ret_val;
+	u16 data;
+
+	DEBUGFUNC("e1000_check_polarity_82577");
+
+	ret_val = phy->ops.read_reg(hw, I82577_PHY_STATUS_2, &data);
+
+	if (!ret_val)
+		phy->cable_polarity = ((data & I82577_PHY_STATUS2_REV_POLARITY)
+				       ? e1000_rev_polarity_reversed
+				       : e1000_rev_polarity_normal);
+
+	return ret_val;
+}
+
+/**
+ *  e1000_phy_force_speed_duplex_82577 - Force speed/duplex for I82577 PHY
+ *  @hw: pointer to the HW structure
+ *
+ *  Calls the PHY setup function to force speed and duplex.
+ **/
+s32 e1000_phy_force_speed_duplex_82577(struct e1000_hw *hw)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+	s32 ret_val;
+	u16 phy_data;
+	bool link;
+
+	DEBUGFUNC("e1000_phy_force_speed_duplex_82577");
+
+	ret_val = phy->ops.read_reg(hw, PHY_CONTROL, &phy_data);
+	if (ret_val)
+		return ret_val;
+
+	e1000_phy_force_speed_duplex_setup(hw, &phy_data);
+
+	ret_val = phy->ops.write_reg(hw, PHY_CONTROL, phy_data);
+	if (ret_val)
+		return ret_val;
+
+	usec_delay(1);
+
+	if (phy->autoneg_wait_to_complete) {
+		DEBUGOUT("Waiting for forced speed/duplex link on 82577 phy\n");
+
+		ret_val = e1000_phy_has_link_generic(hw, PHY_FORCE_LIMIT,
+						     100000, &link);
+		if (ret_val)
+			return ret_val;
+
+		if (!link)
+			DEBUGOUT("Link taking longer than expected.\n");
+
+		/* Try once more */
+		ret_val = e1000_phy_has_link_generic(hw, PHY_FORCE_LIMIT,
+						     100000, &link);
+	}
+
+	return ret_val;
+}
+
+/**
+ *  e1000_get_phy_info_82577 - Retrieve I82577 PHY information
+ *  @hw: pointer to the HW structure
+ *
+ *  Read PHY status to determine if link is up.  If link is up, then
+ *  set/determine 10base-T extended distance and polarity correction.  Read
+ *  PHY port status to determine MDI/MDIx and speed.  Based on the speed,
+ *  determine on the cable length, local and remote receiver.
+ **/
+s32 e1000_get_phy_info_82577(struct e1000_hw *hw)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+	s32 ret_val;
+	u16 data;
+	bool link;
+
+	DEBUGFUNC("e1000_get_phy_info_82577");
+
+	ret_val = e1000_phy_has_link_generic(hw, 1, 0, &link);
+	if (ret_val)
+		return ret_val;
+
+	if (!link) {
+		DEBUGOUT("Phy info is only valid if link is up\n");
+		return -E1000_ERR_CONFIG;
+	}
+
+	phy->polarity_correction = true;
+
+	ret_val = e1000_check_polarity_82577(hw);
+	if (ret_val)
+		return ret_val;
+
+	ret_val = phy->ops.read_reg(hw, I82577_PHY_STATUS_2, &data);
+	if (ret_val)
+		return ret_val;
+
+	phy->is_mdix = !!(data & I82577_PHY_STATUS2_MDIX);
+
+	if ((data & I82577_PHY_STATUS2_SPEED_MASK) ==
+	    I82577_PHY_STATUS2_SPEED_1000MBPS) {
+		ret_val = hw->phy.ops.get_cable_length(hw);
+		if (ret_val)
+			return ret_val;
+
+		ret_val = phy->ops.read_reg(hw, PHY_1000T_STATUS, &data);
+		if (ret_val)
+			return ret_val;
+
+		phy->local_rx = (data & SR_1000T_LOCAL_RX_STATUS)
+				? e1000_1000t_rx_status_ok
+				: e1000_1000t_rx_status_not_ok;
+
+		phy->remote_rx = (data & SR_1000T_REMOTE_RX_STATUS)
+				 ? e1000_1000t_rx_status_ok
+				 : e1000_1000t_rx_status_not_ok;
+	} else {
+		phy->cable_length = E1000_CABLE_LENGTH_UNDEFINED;
+		phy->local_rx = e1000_1000t_rx_status_undefined;
+		phy->remote_rx = e1000_1000t_rx_status_undefined;
+	}
+
+	return E1000_SUCCESS;
+}
+
+/**
+ *  e1000_get_cable_length_82577 - Determine cable length for 82577 PHY
+ *  @hw: pointer to the HW structure
+ *
+ * Reads the diagnostic status register and verifies result is valid before
+ * placing it in the phy_cable_length field.
+ **/
+s32 e1000_get_cable_length_82577(struct e1000_hw *hw)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+	s32 ret_val;
+	u16 phy_data, length;
+
+	DEBUGFUNC("e1000_get_cable_length_82577");
+
+	ret_val = phy->ops.read_reg(hw, I82577_PHY_DIAG_STATUS, &phy_data);
+	if (ret_val)
+		return ret_val;
+
+	length = ((phy_data & I82577_DSTATUS_CABLE_LENGTH) >>
+		  I82577_DSTATUS_CABLE_LENGTH_SHIFT);
+
+	if (length == E1000_CABLE_LENGTH_UNDEFINED)
+		return -E1000_ERR_PHY;
+
+	phy->cable_length = length;
+
+	return E1000_SUCCESS;
+}
+
+/**
+ *  e1000_write_phy_reg_gs40g - Write GS40G  PHY register
+ *  @hw: pointer to the HW structure
+ *  @offset: register offset to write to
+ *  @data: data to write at register offset
+ *
+ *  Acquires semaphore, if necessary, then writes the data to PHY register
+ *  at the offset.  Release any acquired semaphores before exiting.
+ **/
+s32 e1000_write_phy_reg_gs40g(struct e1000_hw *hw, u32 offset, u16 data)
+{
+	s32 ret_val;
+	u16 page = offset >> GS40G_PAGE_SHIFT;
+
+	DEBUGFUNC("e1000_write_phy_reg_gs40g");
+
+	offset = offset & GS40G_OFFSET_MASK;
+	ret_val = hw->phy.ops.acquire(hw);
+	if (ret_val)
+		return ret_val;
+
+	ret_val = e1000_write_phy_reg_mdic(hw, GS40G_PAGE_SELECT, page);
+	if (ret_val)
+		goto release;
+	ret_val = e1000_write_phy_reg_mdic(hw, offset, data);
+
+release:
+	hw->phy.ops.release(hw);
+	return ret_val;
+}
+
+/**
+ *  e1000_read_phy_reg_gs40g - Read GS40G  PHY register
+ *  @hw: pointer to the HW structure
+ *  @offset: lower half is register offset to read to
+ *     upper half is page to use.
+ *  @data: data to read at register offset
+ *
+ *  Acquires semaphore, if necessary, then reads the data in the PHY register
+ *  at the offset.  Release any acquired semaphores before exiting.
+ **/
+s32 e1000_read_phy_reg_gs40g(struct e1000_hw *hw, u32 offset, u16 *data)
+{
+	s32 ret_val;
+	u16 page = offset >> GS40G_PAGE_SHIFT;
+
+	DEBUGFUNC("e1000_read_phy_reg_gs40g");
+
+	offset = offset & GS40G_OFFSET_MASK;
+	ret_val = hw->phy.ops.acquire(hw);
+	if (ret_val)
+		return ret_val;
+
+	ret_val = e1000_write_phy_reg_mdic(hw, GS40G_PAGE_SELECT, page);
+	if (ret_val)
+		goto release;
+	ret_val = e1000_read_phy_reg_mdic(hw, offset, data);
+
+release:
+	hw->phy.ops.release(hw);
+	return ret_val;
+}
+
+/**
+ *  e1000_read_phy_reg_mphy - Read mPHY control register
+ *  @hw: pointer to the HW structure
+ *  @address: address to be read
+ *  @data: pointer to the read data
+ *
+ *  Reads the mPHY control register in the PHY at offset and stores the
+ *  information read to data.
+ **/
+s32 e1000_read_phy_reg_mphy(struct e1000_hw *hw, u32 address, u32 *data)
+{
+	u32 mphy_ctrl = 0;
+	bool locked = false;
+	bool ready = false;
+
+	DEBUGFUNC("e1000_read_phy_reg_mphy");
+
+	/* Check if mPHY is ready to read/write operations */
+	ready = e1000_is_mphy_ready(hw);
+	if (!ready)
+		return -E1000_ERR_PHY;
+
+	/* Check if mPHY access is disabled and enable it if so */
+	mphy_ctrl = E1000_READ_REG(hw, E1000_MPHY_ADDR_CTRL);
+	if (mphy_ctrl & E1000_MPHY_DIS_ACCESS) {
+		locked = true;
+		ready = e1000_is_mphy_ready(hw);
+		if (!ready)
+			return -E1000_ERR_PHY;
+		mphy_ctrl |= E1000_MPHY_ENA_ACCESS;
+		E1000_WRITE_REG(hw, E1000_MPHY_ADDR_CTRL, mphy_ctrl);
+	}
+
+	/* Set the address that we want to read */
+	ready = e1000_is_mphy_ready(hw);
+	if (!ready)
+		return -E1000_ERR_PHY;
+
+	/* We mask address, because we want to use only current lane */
+	mphy_ctrl = (mphy_ctrl & ~E1000_MPHY_ADDRESS_MASK &
+		~E1000_MPHY_ADDRESS_FNC_OVERRIDE) |
+		(address & E1000_MPHY_ADDRESS_MASK);
+	E1000_WRITE_REG(hw, E1000_MPHY_ADDR_CTRL, mphy_ctrl);
+
+	/* Read data from the address */
+	ready = e1000_is_mphy_ready(hw);
+	if (!ready)
+		return -E1000_ERR_PHY;
+	*data = E1000_READ_REG(hw, E1000_MPHY_DATA);
+
+	/* Disable access to mPHY if it was originally disabled */
+	if (locked)
+		ready = e1000_is_mphy_ready(hw);
+		if (!ready)
+			return -E1000_ERR_PHY;
+		E1000_WRITE_REG(hw, E1000_MPHY_ADDR_CTRL,
+				E1000_MPHY_DIS_ACCESS);
+
+	return E1000_SUCCESS;
+}
+
+/**
+ *  e1000_write_phy_reg_mphy - Write mPHY control register
+ *  @hw: pointer to the HW structure
+ *  @address: address to write to
+ *  @data: data to write to register at offset
+ *  @line_override: used when we want to use different line than default one
+ *
+ *  Writes data to mPHY control register.
+ **/
+s32 e1000_write_phy_reg_mphy(struct e1000_hw *hw, u32 address, u32 data,
+			     bool line_override)
+{
+	u32 mphy_ctrl = 0;
+	bool locked = false;
+	bool ready = false;
+
+	DEBUGFUNC("e1000_write_phy_reg_mphy");
+
+	/* Check if mPHY is ready to read/write operations */
+	ready = e1000_is_mphy_ready(hw);
+	if (!ready)
+		return -E1000_ERR_PHY;
+
+	/* Check if mPHY access is disabled and enable it if so */
+	mphy_ctrl = E1000_READ_REG(hw, E1000_MPHY_ADDR_CTRL);
+	if (mphy_ctrl & E1000_MPHY_DIS_ACCESS) {
+		locked = true;
+		ready = e1000_is_mphy_ready(hw);
+		if (!ready)
+			return -E1000_ERR_PHY;
+		mphy_ctrl |= E1000_MPHY_ENA_ACCESS;
+		E1000_WRITE_REG(hw, E1000_MPHY_ADDR_CTRL, mphy_ctrl);
+	}
+
+	/* Set the address that we want to read */
+	ready = e1000_is_mphy_ready(hw);
+	if (!ready)
+		return -E1000_ERR_PHY;
+
+	/* We mask address, because we want to use only current lane */
+	if (line_override)
+		mphy_ctrl |= E1000_MPHY_ADDRESS_FNC_OVERRIDE;
+	else
+		mphy_ctrl &= ~E1000_MPHY_ADDRESS_FNC_OVERRIDE;
+	mphy_ctrl = (mphy_ctrl & ~E1000_MPHY_ADDRESS_MASK) |
+		(address & E1000_MPHY_ADDRESS_MASK);
+	E1000_WRITE_REG(hw, E1000_MPHY_ADDR_CTRL, mphy_ctrl);
+
+	/* Read data from the address */
+	ready = e1000_is_mphy_ready(hw);
+	if (!ready)
+		return -E1000_ERR_PHY;
+	E1000_WRITE_REG(hw, E1000_MPHY_DATA, data);
+
+	/* Disable access to mPHY if it was originally disabled */
+	if (locked)
+		ready = e1000_is_mphy_ready(hw);
+		if (!ready)
+			return -E1000_ERR_PHY;
+		E1000_WRITE_REG(hw, E1000_MPHY_ADDR_CTRL,
+				E1000_MPHY_DIS_ACCESS);
+
+	return E1000_SUCCESS;
+}
+
+/**
+ *  e1000_is_mphy_ready - Check if mPHY control register is not busy
+ *  @hw: pointer to the HW structure
+ *
+ *  Returns mPHY control register status.
+ **/
+bool e1000_is_mphy_ready(struct e1000_hw *hw)
+{
+	u16 retry_count = 0;
+	u32 mphy_ctrl = 0;
+	bool ready = false;
+
+	while (retry_count < 2) {
+		mphy_ctrl = E1000_READ_REG(hw, E1000_MPHY_ADDR_CTRL);
+		if (mphy_ctrl & E1000_MPHY_BUSY) {
+			usec_delay(20);
+			retry_count++;
+			continue;
+		}
+		ready = true;
+		break;
+	}
+
+	if (!ready)
+		DEBUGOUT("ERROR READING mPHY control register, phy is busy.\n");
+
+	return ready;
+}
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_phy.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_phy.h
new file mode 100644
index 00000000..5387c5e7
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_phy.h
@@ -0,0 +1,256 @@
+/*******************************************************************************
+
+  Intel(R) Gigabit Ethernet Linux driver
+  Copyright(c) 2007-2013 Intel Corporation.
+
+  This program is free software; you can redistribute it and/or modify it
+  under the terms and conditions of the GNU General Public License,
+  version 2, as published by the Free Software Foundation.
+
+  This program is distributed in the hope it will be useful, but WITHOUT
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+  more details.
+
+  You should have received a copy of the GNU General Public License along with
+  this program; if not, write to the Free Software Foundation, Inc.,
+  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+  The full GNU General Public License is included in this distribution in
+  the file called "COPYING".
+
+  Contact Information:
+  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+#ifndef _E1000_PHY_H_
+#define _E1000_PHY_H_
+
+void e1000_init_phy_ops_generic(struct e1000_hw *hw);
+s32  e1000_null_read_reg(struct e1000_hw *hw, u32 offset, u16 *data);
+void e1000_null_phy_generic(struct e1000_hw *hw);
+s32  e1000_null_lplu_state(struct e1000_hw *hw, bool active);
+s32  e1000_null_write_reg(struct e1000_hw *hw, u32 offset, u16 data);
+s32  e1000_null_set_page(struct e1000_hw *hw, u16 data);
+s32 e1000_read_i2c_byte_null(struct e1000_hw *hw, u8 byte_offset,
+			     u8 dev_addr, u8 *data);
+s32 e1000_write_i2c_byte_null(struct e1000_hw *hw, u8 byte_offset,
+			      u8 dev_addr, u8 data);
+s32  e1000_check_downshift_generic(struct e1000_hw *hw);
+s32  e1000_check_polarity_m88(struct e1000_hw *hw);
+s32  e1000_check_polarity_igp(struct e1000_hw *hw);
+s32  e1000_check_polarity_ife(struct e1000_hw *hw);
+s32  e1000_check_reset_block_generic(struct e1000_hw *hw);
+s32  e1000_copper_link_setup_igp(struct e1000_hw *hw);
+s32  e1000_copper_link_setup_m88(struct e1000_hw *hw);
+s32  e1000_copper_link_setup_m88_gen2(struct e1000_hw *hw);
+s32  e1000_phy_force_speed_duplex_igp(struct e1000_hw *hw);
+s32  e1000_phy_force_speed_duplex_m88(struct e1000_hw *hw);
+s32  e1000_phy_force_speed_duplex_ife(struct e1000_hw *hw);
+s32  e1000_get_cable_length_m88(struct e1000_hw *hw);
+s32  e1000_get_cable_length_m88_gen2(struct e1000_hw *hw);
+s32  e1000_get_cable_length_igp_2(struct e1000_hw *hw);
+s32  e1000_get_cfg_done_generic(struct e1000_hw *hw);
+s32  e1000_get_phy_id(struct e1000_hw *hw);
+s32  e1000_get_phy_info_igp(struct e1000_hw *hw);
+s32  e1000_get_phy_info_m88(struct e1000_hw *hw);
+s32  e1000_get_phy_info_ife(struct e1000_hw *hw);
+s32  e1000_phy_sw_reset_generic(struct e1000_hw *hw);
+void e1000_phy_force_speed_duplex_setup(struct e1000_hw *hw, u16 *phy_ctrl);
+s32  e1000_phy_hw_reset_generic(struct e1000_hw *hw);
+s32  e1000_phy_reset_dsp_generic(struct e1000_hw *hw);
+s32  e1000_read_kmrn_reg_generic(struct e1000_hw *hw, u32 offset, u16 *data);
+s32  e1000_read_kmrn_reg_locked(struct e1000_hw *hw, u32 offset, u16 *data);
+s32  e1000_set_page_igp(struct e1000_hw *hw, u16 page);
+s32  e1000_read_phy_reg_igp(struct e1000_hw *hw, u32 offset, u16 *data);
+s32  e1000_read_phy_reg_igp_locked(struct e1000_hw *hw, u32 offset, u16 *data);
+s32  e1000_read_phy_reg_m88(struct e1000_hw *hw, u32 offset, u16 *data);
+s32  e1000_set_d3_lplu_state_generic(struct e1000_hw *hw, bool active);
+s32  e1000_setup_copper_link_generic(struct e1000_hw *hw);
+s32  e1000_write_kmrn_reg_generic(struct e1000_hw *hw, u32 offset, u16 data);
+s32  e1000_write_kmrn_reg_locked(struct e1000_hw *hw, u32 offset, u16 data);
+s32  e1000_write_phy_reg_igp(struct e1000_hw *hw, u32 offset, u16 data);
+s32  e1000_write_phy_reg_igp_locked(struct e1000_hw *hw, u32 offset, u16 data);
+s32  e1000_write_phy_reg_m88(struct e1000_hw *hw, u32 offset, u16 data);
+s32  e1000_phy_has_link_generic(struct e1000_hw *hw, u32 iterations,
+				u32 usec_interval, bool *success);
+s32  e1000_phy_init_script_igp3(struct e1000_hw *hw);
+enum e1000_phy_type e1000_get_phy_type_from_id(u32 phy_id);
+s32  e1000_determine_phy_address(struct e1000_hw *hw);
+s32  e1000_enable_phy_wakeup_reg_access_bm(struct e1000_hw *hw, u16 *phy_reg);
+s32  e1000_disable_phy_wakeup_reg_access_bm(struct e1000_hw *hw, u16 *phy_reg);
+void e1000_power_up_phy_copper(struct e1000_hw *hw);
+void e1000_power_down_phy_copper(struct e1000_hw *hw);
+s32  e1000_read_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 *data);
+s32  e1000_write_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 data);
+s32  e1000_read_phy_reg_i2c(struct e1000_hw *hw, u32 offset, u16 *data);
+s32  e1000_write_phy_reg_i2c(struct e1000_hw *hw, u32 offset, u16 data);
+s32  e1000_read_sfp_data_byte(struct e1000_hw *hw, u16 offset, u8 *data);
+s32  e1000_write_sfp_data_byte(struct e1000_hw *hw, u16 offset, u8 data);
+s32  e1000_copper_link_setup_82577(struct e1000_hw *hw);
+s32  e1000_check_polarity_82577(struct e1000_hw *hw);
+s32  e1000_get_phy_info_82577(struct e1000_hw *hw);
+s32  e1000_phy_force_speed_duplex_82577(struct e1000_hw *hw);
+s32  e1000_get_cable_length_82577(struct e1000_hw *hw);
+s32  e1000_write_phy_reg_gs40g(struct e1000_hw *hw, u32 offset, u16 data);
+s32  e1000_read_phy_reg_gs40g(struct e1000_hw *hw, u32 offset, u16 *data);
+s32 e1000_read_phy_reg_mphy(struct e1000_hw *hw, u32 address, u32 *data);
+s32 e1000_write_phy_reg_mphy(struct e1000_hw *hw, u32 address, u32 data,
+			     bool line_override);
+bool e1000_is_mphy_ready(struct e1000_hw *hw);
+
+#define E1000_MAX_PHY_ADDR		8
+
+/* IGP01E1000 Specific Registers */
+#define IGP01E1000_PHY_PORT_CONFIG	0x10 /* Port Config */
+#define IGP01E1000_PHY_PORT_STATUS	0x11 /* Status */
+#define IGP01E1000_PHY_PORT_CTRL	0x12 /* Control */
+#define IGP01E1000_PHY_LINK_HEALTH	0x13 /* PHY Link Health */
+#define IGP02E1000_PHY_POWER_MGMT	0x19 /* Power Management */
+#define IGP01E1000_PHY_PAGE_SELECT	0x1F /* Page Select */
+#define BM_PHY_PAGE_SELECT		22   /* Page Select for BM */
+#define IGP_PAGE_SHIFT			5
+#define PHY_REG_MASK			0x1F
+
+/* GS40G - I210 PHY defines */
+#define GS40G_PAGE_SELECT		0x16
+#define GS40G_PAGE_SHIFT		16
+#define GS40G_OFFSET_MASK		0xFFFF
+#define GS40G_PAGE_2			0x20000
+#define GS40G_MAC_REG2			0x15
+#define GS40G_MAC_LB			0x4140
+#define GS40G_MAC_SPEED_1G		0X0006
+#define GS40G_COPPER_SPEC		0x0010
+#define GS40G_CS_POWER_DOWN		0x0002
+
+#define HV_INTC_FC_PAGE_START		768
+#define I82578_ADDR_REG			29
+#define I82577_ADDR_REG			16
+#define I82577_CFG_REG			22
+#define I82577_CFG_ASSERT_CRS_ON_TX	(1 << 15)
+#define I82577_CFG_ENABLE_DOWNSHIFT	(3 << 10) /* auto downshift */
+#define I82577_CTRL_REG			23
+
+/* 82577 specific PHY registers */
+#define I82577_PHY_CTRL_2		18
+#define I82577_PHY_LBK_CTRL		19
+#define I82577_PHY_STATUS_2		26
+#define I82577_PHY_DIAG_STATUS		31
+
+/* I82577 PHY Status 2 */
+#define I82577_PHY_STATUS2_REV_POLARITY		0x0400
+#define I82577_PHY_STATUS2_MDIX			0x0800
+#define I82577_PHY_STATUS2_SPEED_MASK		0x0300
+#define I82577_PHY_STATUS2_SPEED_1000MBPS	0x0200
+
+/* I82577 PHY Control 2 */
+#define I82577_PHY_CTRL2_MANUAL_MDIX		0x0200
+#define I82577_PHY_CTRL2_AUTO_MDI_MDIX		0x0400
+#define I82577_PHY_CTRL2_MDIX_CFG_MASK		0x0600
+
+/* I82577 PHY Diagnostics Status */
+#define I82577_DSTATUS_CABLE_LENGTH		0x03FC
+#define I82577_DSTATUS_CABLE_LENGTH_SHIFT	2
+
+/* 82580 PHY Power Management */
+#define E1000_82580_PHY_POWER_MGMT	0xE14
+#define E1000_82580_PM_SPD		0x0001 /* Smart Power Down */
+#define E1000_82580_PM_D0_LPLU		0x0002 /* For D0a states */
+#define E1000_82580_PM_D3_LPLU		0x0004 /* For all other states */
+#define E1000_82580_PM_GO_LINKD		0x0020 /* Go Link Disconnect */
+
+#define E1000_MPHY_DIS_ACCESS		0x80000000 /* disable_access bit */
+#define E1000_MPHY_ENA_ACCESS		0x40000000 /* enable_access bit */
+#define E1000_MPHY_BUSY			0x00010000 /* busy bit */
+#define E1000_MPHY_ADDRESS_FNC_OVERRIDE	0x20000000 /* fnc_override bit */
+#define E1000_MPHY_ADDRESS_MASK		0x0000FFFF /* address mask */
+
+#define IGP01E1000_PHY_PCS_INIT_REG	0x00B4
+#define IGP01E1000_PHY_POLARITY_MASK	0x0078
+
+#define IGP01E1000_PSCR_AUTO_MDIX	0x1000
+#define IGP01E1000_PSCR_FORCE_MDI_MDIX	0x2000 /* 0=MDI, 1=MDIX */
+
+#define IGP01E1000_PSCFR_SMART_SPEED	0x0080
+
+#define IGP02E1000_PM_SPD		0x0001 /* Smart Power Down */
+#define IGP02E1000_PM_D0_LPLU		0x0002 /* For D0a states */
+#define IGP02E1000_PM_D3_LPLU		0x0004 /* For all other states */
+
+#define IGP01E1000_PLHR_SS_DOWNGRADE	0x8000
+
+#define IGP01E1000_PSSR_POLARITY_REVERSED	0x0002
+#define IGP01E1000_PSSR_MDIX		0x0800
+#define IGP01E1000_PSSR_SPEED_MASK	0xC000
+#define IGP01E1000_PSSR_SPEED_1000MBPS	0xC000
+
+#define IGP02E1000_PHY_CHANNEL_NUM	4
+#define IGP02E1000_PHY_AGC_A		0x11B1
+#define IGP02E1000_PHY_AGC_B		0x12B1
+#define IGP02E1000_PHY_AGC_C		0x14B1
+#define IGP02E1000_PHY_AGC_D		0x18B1
+
+#define IGP02E1000_AGC_LENGTH_SHIFT	9   /* Course=15:13, Fine=12:9 */
+#define IGP02E1000_AGC_LENGTH_MASK	0x7F
+#define IGP02E1000_AGC_RANGE		15
+
+#define E1000_CABLE_LENGTH_UNDEFINED	0xFF
+
+#define E1000_KMRNCTRLSTA_OFFSET	0x001F0000
+#define E1000_KMRNCTRLSTA_OFFSET_SHIFT	16
+#define E1000_KMRNCTRLSTA_REN		0x00200000
+#define E1000_KMRNCTRLSTA_DIAG_OFFSET	0x3    /* Kumeran Diagnostic */
+#define E1000_KMRNCTRLSTA_TIMEOUTS	0x4    /* Kumeran Timeouts */
+#define E1000_KMRNCTRLSTA_INBAND_PARAM	0x9    /* Kumeran InBand Parameters */
+#define E1000_KMRNCTRLSTA_IBIST_DISABLE	0x0200 /* Kumeran IBIST Disable */
+#define E1000_KMRNCTRLSTA_DIAG_NELPBK	0x1000 /* Nearend Loopback mode */
+
+#define IFE_PHY_EXTENDED_STATUS_CONTROL	0x10
+#define IFE_PHY_SPECIAL_CONTROL		0x11 /* 100BaseTx PHY Special Ctrl */
+#define IFE_PHY_SPECIAL_CONTROL_LED	0x1B /* PHY Special and LED Ctrl */
+#define IFE_PHY_MDIX_CONTROL		0x1C /* MDI/MDI-X Control */
+
+/* IFE PHY Extended Status Control */
+#define IFE_PESC_POLARITY_REVERSED	0x0100
+
+/* IFE PHY Special Control */
+#define IFE_PSC_AUTO_POLARITY_DISABLE	0x0010
+#define IFE_PSC_FORCE_POLARITY		0x0020
+
+/* IFE PHY Special Control and LED Control */
+#define IFE_PSCL_PROBE_MODE		0x0020
+#define IFE_PSCL_PROBE_LEDS_OFF		0x0006 /* Force LEDs 0 and 2 off */
+#define IFE_PSCL_PROBE_LEDS_ON		0x0007 /* Force LEDs 0 and 2 on */
+
+/* IFE PHY MDIX Control */
+#define IFE_PMC_MDIX_STATUS		0x0020 /* 1=MDI-X, 0=MDI */
+#define IFE_PMC_FORCE_MDIX		0x0040 /* 1=force MDI-X, 0=force MDI */
+#define IFE_PMC_AUTO_MDIX		0x0080 /* 1=enable auto, 0=disable */
+
+/* SFP modules ID memory locations */
+#define E1000_SFF_IDENTIFIER_OFFSET	0x00
+#define E1000_SFF_IDENTIFIER_SFF	0x02
+#define E1000_SFF_IDENTIFIER_SFP	0x03
+
+#define E1000_SFF_ETH_FLAGS_OFFSET	0x06
+/* Flags for SFP modules compatible with ETH up to 1Gb */
+struct sfp_e1000_flags {
+	u8 e1000_base_sx:1;
+	u8 e1000_base_lx:1;
+	u8 e1000_base_cx:1;
+	u8 e1000_base_t:1;
+	u8 e100_base_lx:1;
+	u8 e100_base_fx:1;
+	u8 e10_base_bx10:1;
+	u8 e10_base_px:1;
+};
+
+/* Vendor OUIs: format of OUI is 0x[byte0][byte1][byte2][00] */
+#define E1000_SFF_VENDOR_OUI_TYCO	0x00407600
+#define E1000_SFF_VENDOR_OUI_FTL	0x00906500
+#define E1000_SFF_VENDOR_OUI_AVAGO	0x00176A00
+#define E1000_SFF_VENDOR_OUI_INTEL	0x001B2100
+
+#endif
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_regs.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_regs.h
new file mode 100644
index 00000000..0e083c54
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_regs.h
@@ -0,0 +1,646 @@
+/*******************************************************************************
+
+  Intel(R) Gigabit Ethernet Linux driver
+  Copyright(c) 2007-2013 Intel Corporation.
+
+  This program is free software; you can redistribute it and/or modify it
+  under the terms and conditions of the GNU General Public License,
+  version 2, as published by the Free Software Foundation.
+
+  This program is distributed in the hope it will be useful, but WITHOUT
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+  more details.
+
+  You should have received a copy of the GNU General Public License along with
+  this program; if not, write to the Free Software Foundation, Inc.,
+  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+  The full GNU General Public License is included in this distribution in
+  the file called "COPYING".
+
+  Contact Information:
+  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+#ifndef _E1000_REGS_H_
+#define _E1000_REGS_H_
+
+#define E1000_CTRL	0x00000  /* Device Control - RW */
+#define E1000_STATUS	0x00008  /* Device Status - RO */
+#define E1000_EECD	0x00010  /* EEPROM/Flash Control - RW */
+#define E1000_EERD	0x00014  /* EEPROM Read - RW */
+#define E1000_CTRL_EXT	0x00018  /* Extended Device Control - RW */
+#define E1000_FLA	0x0001C  /* Flash Access - RW */
+#define E1000_MDIC	0x00020  /* MDI Control - RW */
+#define E1000_MDICNFG	0x00E04  /* MDI Config - RW */
+#define E1000_REGISTER_SET_SIZE		0x20000 /* CSR Size */
+#define E1000_EEPROM_INIT_CTRL_WORD_2	0x0F /* EEPROM Init Ctrl Word 2 */
+#define E1000_EEPROM_PCIE_CTRL_WORD_2	0x28 /* EEPROM PCIe Ctrl Word 2 */
+#define E1000_BARCTRL			0x5BBC /* BAR ctrl reg */
+#define E1000_BARCTRL_FLSIZE		0x0700 /* BAR ctrl Flsize */
+#define E1000_BARCTRL_CSRSIZE		0x2000 /* BAR ctrl CSR size */
+#define E1000_MPHY_ADDR_CTRL	0x0024 /* GbE MPHY Address Control */
+#define E1000_MPHY_DATA		0x0E10 /* GBE MPHY Data */
+#define E1000_MPHY_STAT		0x0E0C /* GBE MPHY Statistics */
+#define E1000_PPHY_CTRL		0x5b48 /* PCIe PHY Control */
+#define E1000_I350_BARCTRL		0x5BFC /* BAR ctrl reg */
+#define E1000_I350_DTXMXPKTSZ		0x355C /* Maximum sent packet size reg*/
+#define E1000_SCTL	0x00024  /* SerDes Control - RW */
+#define E1000_FCAL	0x00028  /* Flow Control Address Low - RW */
+#define E1000_FCAH	0x0002C  /* Flow Control Address High -RW */
+#define E1000_FCT	0x00030  /* Flow Control Type - RW */
+#define E1000_CONNSW	0x00034  /* Copper/Fiber switch control - RW */
+#define E1000_VET	0x00038  /* VLAN Ether Type - RW */
+#define E1000_ICR	0x000C0  /* Interrupt Cause Read - R/clr */
+#define E1000_ITR	0x000C4  /* Interrupt Throttling Rate - RW */
+#define E1000_ICS	0x000C8  /* Interrupt Cause Set - WO */
+#define E1000_IMS	0x000D0  /* Interrupt Mask Set - RW */
+#define E1000_IMC	0x000D8  /* Interrupt Mask Clear - WO */
+#define E1000_IAM	0x000E0  /* Interrupt Acknowledge Auto Mask */
+#define E1000_RCTL	0x00100  /* Rx Control - RW */
+#define E1000_FCTTV	0x00170  /* Flow Control Transmit Timer Value - RW */
+#define E1000_TXCW	0x00178  /* Tx Configuration Word - RW */
+#define E1000_RXCW	0x00180  /* Rx Configuration Word - RO */
+#define E1000_EICR	0x01580  /* Ext. Interrupt Cause Read - R/clr */
+#define E1000_EITR(_n)	(0x01680 + (0x4 * (_n)))
+#define E1000_EICS	0x01520  /* Ext. Interrupt Cause Set - W0 */
+#define E1000_EIMS	0x01524  /* Ext. Interrupt Mask Set/Read - RW */
+#define E1000_EIMC	0x01528  /* Ext. Interrupt Mask Clear - WO */
+#define E1000_EIAC	0x0152C  /* Ext. Interrupt Auto Clear - RW */
+#define E1000_EIAM	0x01530  /* Ext. Interrupt Ack Auto Clear Mask - RW */
+#define E1000_GPIE	0x01514  /* General Purpose Interrupt Enable - RW */
+#define E1000_IVAR0	0x01700  /* Interrupt Vector Allocation (array) - RW */
+#define E1000_IVAR_MISC	0x01740 /* IVAR for "other" causes - RW */
+#define E1000_TCTL	0x00400  /* Tx Control - RW */
+#define E1000_TCTL_EXT	0x00404  /* Extended Tx Control - RW */
+#define E1000_TIPG	0x00410  /* Tx Inter-packet gap -RW */
+#define E1000_AIT	0x00458  /* Adaptive Interframe Spacing Throttle - RW */
+#define E1000_LEDCTL	0x00E00  /* LED Control - RW */
+#define E1000_LEDMUX	0x08130  /* LED MUX Control */
+#define E1000_EXTCNF_CTRL	0x00F00  /* Extended Configuration Control */
+#define E1000_EXTCNF_SIZE	0x00F08  /* Extended Configuration Size */
+#define E1000_PHY_CTRL	0x00F10  /* PHY Control Register in CSR */
+#define E1000_PBA	0x01000  /* Packet Buffer Allocation - RW */
+#define E1000_PBS	0x01008  /* Packet Buffer Size */
+#define E1000_EEMNGCTL	0x01010  /* MNG EEprom Control */
+#define E1000_EEARBC	0x01024  /* EEPROM Auto Read Bus Control */
+#define E1000_EEWR	0x0102C  /* EEPROM Write Register - RW */
+#define E1000_FLOP	0x0103C  /* FLASH Opcode Register */
+#define E1000_I2CCMD	0x01028  /* SFPI2C Command Register - RW */
+#define E1000_I2CPARAMS	0x0102C /* SFPI2C Parameters Register - RW */
+#define E1000_I2CBB_EN	0x00000100  /* I2C - Bit Bang Enable */
+#define E1000_I2C_CLK_OUT	0x00000200  /* I2C- Clock */
+#define E1000_I2C_DATA_OUT	0x00000400  /* I2C- Data Out */
+#define E1000_I2C_DATA_OE_N	0x00000800  /* I2C- Data Output Enable */
+#define E1000_I2C_DATA_IN	0x00001000  /* I2C- Data In */
+#define E1000_I2C_CLK_OE_N	0x00002000  /* I2C- Clock Output Enable */
+#define E1000_I2C_CLK_IN	0x00004000  /* I2C- Clock In */
+#define E1000_I2C_CLK_STRETCH_DIS	0x00008000 /* I2C- Dis Clk Stretching */
+#define E1000_WDSTP	0x01040  /* Watchdog Setup - RW */
+#define E1000_SWDSTS	0x01044  /* SW Device Status - RW */
+#define E1000_FRTIMER	0x01048  /* Free Running Timer - RW */
+#define E1000_TCPTIMER	0x0104C  /* TCP Timer - RW */
+#define E1000_VPDDIAG	0x01060  /* VPD Diagnostic - RO */
+#define E1000_ICR_V2	0x01500  /* Intr Cause - new location - RC */
+#define E1000_ICS_V2	0x01504  /* Intr Cause Set - new location - WO */
+#define E1000_IMS_V2	0x01508  /* Intr Mask Set/Read - new location - RW */
+#define E1000_IMC_V2	0x0150C  /* Intr Mask Clear - new location - WO */
+#define E1000_IAM_V2	0x01510  /* Intr Ack Auto Mask - new location - RW */
+#define E1000_ERT	0x02008  /* Early Rx Threshold - RW */
+#define E1000_FCRTL	0x02160  /* Flow Control Receive Threshold Low - RW */
+#define E1000_FCRTH	0x02168  /* Flow Control Receive Threshold High - RW */
+#define E1000_PSRCTL	0x02170  /* Packet Split Receive Control - RW */
+#define E1000_RDFH	0x02410  /* Rx Data FIFO Head - RW */
+#define E1000_RDFT	0x02418  /* Rx Data FIFO Tail - RW */
+#define E1000_RDFHS	0x02420  /* Rx Data FIFO Head Saved - RW */
+#define E1000_RDFTS	0x02428  /* Rx Data FIFO Tail Saved - RW */
+#define E1000_RDFPC	0x02430  /* Rx Data FIFO Packet Count - RW */
+#define E1000_PBRTH	0x02458  /* PB Rx Arbitration Threshold - RW */
+#define E1000_FCRTV	0x02460  /* Flow Control Refresh Timer Value - RW */
+/* Split and Replication Rx Control - RW */
+#define E1000_RDPUMB	0x025CC  /* DMA Rx Descriptor uC Mailbox - RW */
+#define E1000_RDPUAD	0x025D0  /* DMA Rx Descriptor uC Addr Command - RW */
+#define E1000_RDPUWD	0x025D4  /* DMA Rx Descriptor uC Data Write - RW */
+#define E1000_RDPURD	0x025D8  /* DMA Rx Descriptor uC Data Read - RW */
+#define E1000_RDPUCTL	0x025DC  /* DMA Rx Descriptor uC Control - RW */
+#define E1000_PBDIAG	0x02458  /* Packet Buffer Diagnostic - RW */
+#define E1000_RXPBS	0x02404  /* Rx Packet Buffer Size - RW */
+#define E1000_IRPBS	0x02404 /* Same as RXPBS, renamed for newer Si - RW */
+#define E1000_PBRWAC	0x024E8 /* Rx packet buffer wrap around counter - RO */
+#define E1000_RDTR	0x02820  /* Rx Delay Timer - RW */
+#define E1000_RADV	0x0282C  /* Rx Interrupt Absolute Delay Timer - RW */
+#define E1000_EMIADD	0x10     /* Extended Memory Indirect Address */
+#define E1000_EMIDATA	0x11     /* Extended Memory Indirect Data */
+#define E1000_SRWR		0x12018  /* Shadow Ram Write Register - RW */
+#define E1000_I210_FLMNGCTL	0x12038
+#define E1000_I210_FLMNGDATA	0x1203C
+#define E1000_I210_FLMNGCNT	0x12040
+
+#define E1000_I210_FLSWCTL	0x12048
+#define E1000_I210_FLSWDATA	0x1204C
+#define E1000_I210_FLSWCNT	0x12050
+
+#define E1000_I210_FLA		0x1201C
+
+#define E1000_INVM_DATA_REG(_n)	(0x12120 + 4*(_n))
+#define E1000_INVM_SIZE		64 /* Number of INVM Data Registers */
+
+/* QAV Tx mode control register */
+#define E1000_I210_TQAVCTRL	0x3570
+
+/* QAV Tx mode control register bitfields masks */
+/* QAV enable */
+#define E1000_TQAVCTRL_MODE			(1 << 0)
+/* Fetching arbitration type */
+#define E1000_TQAVCTRL_FETCH_ARB		(1 << 4)
+/* Fetching timer enable */
+#define E1000_TQAVCTRL_FETCH_TIMER_ENABLE	(1 << 5)
+/* Launch arbitration type */
+#define E1000_TQAVCTRL_LAUNCH_ARB		(1 << 8)
+/* Launch timer enable */
+#define E1000_TQAVCTRL_LAUNCH_TIMER_ENABLE	(1 << 9)
+/* SP waits for SR enable */
+#define E1000_TQAVCTRL_SP_WAIT_SR		(1 << 10)
+/* Fetching timer correction */
+#define E1000_TQAVCTRL_FETCH_TIMER_DELTA_OFFSET	16
+#define E1000_TQAVCTRL_FETCH_TIMER_DELTA	\
+			(0xFFFF << E1000_TQAVCTRL_FETCH_TIMER_DELTA_OFFSET)
+
+/* High credit registers where _n can be 0 or 1. */
+#define E1000_I210_TQAVHC(_n)			(0x300C + 0x40 * (_n))
+
+/* Queues fetch arbitration priority control register */
+#define E1000_I210_TQAVARBCTRL			0x3574
+/* Queues priority masks where _n and _p can be 0-3. */
+#define E1000_TQAVARBCTRL_QUEUE_PRI(_n, _p)	((_p) << (2 * _n))
+/* QAV Tx mode control registers where _n can be 0 or 1. */
+#define E1000_I210_TQAVCC(_n)			(0x3004 + 0x40 * (_n))
+
+/* QAV Tx mode control register bitfields masks */
+#define E1000_TQAVCC_IDLE_SLOPE		0xFFFF /* Idle slope */
+#define E1000_TQAVCC_KEEP_CREDITS	(1 << 30) /* Keep credits opt enable */
+#define E1000_TQAVCC_QUEUE_MODE		(1 << 31) /* SP vs. SR Tx mode */
+
+/* Good transmitted packets counter registers */
+#define E1000_PQGPTC(_n)		(0x010014 + (0x100 * (_n)))
+
+/* Queues packet buffer size masks where _n can be 0-3 and _s 0-63 [kB] */
+#define E1000_I210_TXPBS_SIZE(_n, _s)	((_s) << (6 * _n))
+
+#define E1000_MMDAC			13 /* MMD Access Control */
+#define E1000_MMDAAD			14 /* MMD Access Address/Data */
+
+/* Convenience macros
+ *
+ * Note: "_n" is the queue number of the register to be written to.
+ *
+ * Example usage:
+ * E1000_RDBAL_REG(current_rx_queue)
+ */
+#define E1000_RDBAL(_n)	((_n) < 4 ? (0x02800 + ((_n) * 0x100)) : \
+			 (0x0C000 + ((_n) * 0x40)))
+#define E1000_RDBAH(_n)	((_n) < 4 ? (0x02804 + ((_n) * 0x100)) : \
+			 (0x0C004 + ((_n) * 0x40)))
+#define E1000_RDLEN(_n)	((_n) < 4 ? (0x02808 + ((_n) * 0x100)) : \
+			 (0x0C008 + ((_n) * 0x40)))
+#define E1000_SRRCTL(_n)	((_n) < 4 ? (0x0280C + ((_n) * 0x100)) : \
+				 (0x0C00C + ((_n) * 0x40)))
+#define E1000_RDH(_n)	((_n) < 4 ? (0x02810 + ((_n) * 0x100)) : \
+			 (0x0C010 + ((_n) * 0x40)))
+#define E1000_RXCTL(_n)	((_n) < 4 ? (0x02814 + ((_n) * 0x100)) : \
+			 (0x0C014 + ((_n) * 0x40)))
+#define E1000_DCA_RXCTRL(_n)	E1000_RXCTL(_n)
+#define E1000_RDT(_n)	((_n) < 4 ? (0x02818 + ((_n) * 0x100)) : \
+			 (0x0C018 + ((_n) * 0x40)))
+#define E1000_RXDCTL(_n)	((_n) < 4 ? (0x02828 + ((_n) * 0x100)) : \
+				 (0x0C028 + ((_n) * 0x40)))
+#define E1000_RQDPC(_n)	((_n) < 4 ? (0x02830 + ((_n) * 0x100)) : \
+			 (0x0C030 + ((_n) * 0x40)))
+#define E1000_TDBAL(_n)	((_n) < 4 ? (0x03800 + ((_n) * 0x100)) : \
+			 (0x0E000 + ((_n) * 0x40)))
+#define E1000_TDBAH(_n)	((_n) < 4 ? (0x03804 + ((_n) * 0x100)) : \
+			 (0x0E004 + ((_n) * 0x40)))
+#define E1000_TDLEN(_n)	((_n) < 4 ? (0x03808 + ((_n) * 0x100)) : \
+			 (0x0E008 + ((_n) * 0x40)))
+#define E1000_TDH(_n)	((_n) < 4 ? (0x03810 + ((_n) * 0x100)) : \
+			 (0x0E010 + ((_n) * 0x40)))
+#define E1000_TXCTL(_n)	((_n) < 4 ? (0x03814 + ((_n) * 0x100)) : \
+			 (0x0E014 + ((_n) * 0x40)))
+#define E1000_DCA_TXCTRL(_n) E1000_TXCTL(_n)
+#define E1000_TDT(_n)	((_n) < 4 ? (0x03818 + ((_n) * 0x100)) : \
+			 (0x0E018 + ((_n) * 0x40)))
+#define E1000_TXDCTL(_n)	((_n) < 4 ? (0x03828 + ((_n) * 0x100)) : \
+				 (0x0E028 + ((_n) * 0x40)))
+#define E1000_TDWBAL(_n)	((_n) < 4 ? (0x03838 + ((_n) * 0x100)) : \
+				 (0x0E038 + ((_n) * 0x40)))
+#define E1000_TDWBAH(_n)	((_n) < 4 ? (0x0383C + ((_n) * 0x100)) : \
+				 (0x0E03C + ((_n) * 0x40)))
+#define E1000_TARC(_n)		(0x03840 + ((_n) * 0x100))
+#define E1000_RSRPD		0x02C00  /* Rx Small Packet Detect - RW */
+#define E1000_RAID		0x02C08  /* Receive Ack Interrupt Delay - RW */
+#define E1000_KABGTXD		0x03004  /* AFE Band Gap Transmit Ref Data */
+#define E1000_PSRTYPE(_i)	(0x05480 + ((_i) * 4))
+#define E1000_RAL(_i)		(((_i) <= 15) ? (0x05400 + ((_i) * 8)) : \
+				 (0x054E0 + ((_i - 16) * 8)))
+#define E1000_RAH(_i)		(((_i) <= 15) ? (0x05404 + ((_i) * 8)) : \
+				 (0x054E4 + ((_i - 16) * 8)))
+#define E1000_SHRAL(_i)		(0x05438 + ((_i) * 8))
+#define E1000_SHRAH(_i)		(0x0543C + ((_i) * 8))
+#define E1000_IP4AT_REG(_i)	(0x05840 + ((_i) * 8))
+#define E1000_IP6AT_REG(_i)	(0x05880 + ((_i) * 4))
+#define E1000_WUPM_REG(_i)	(0x05A00 + ((_i) * 4))
+#define E1000_FFMT_REG(_i)	(0x09000 + ((_i) * 8))
+#define E1000_FFVT_REG(_i)	(0x09800 + ((_i) * 8))
+#define E1000_FFLT_REG(_i)	(0x05F00 + ((_i) * 8))
+#define E1000_PBSLAC		0x03100  /* Pkt Buffer Slave Access Control */
+#define E1000_PBSLAD(_n)	(0x03110 + (0x4 * (_n)))  /* Pkt Buffer DWORD */
+#define E1000_TXPBS		0x03404  /* Tx Packet Buffer Size - RW */
+/* Same as TXPBS, renamed for newer Si - RW */
+#define E1000_ITPBS		0x03404
+#define E1000_TDFH		0x03410  /* Tx Data FIFO Head - RW */
+#define E1000_TDFT		0x03418  /* Tx Data FIFO Tail - RW */
+#define E1000_TDFHS		0x03420  /* Tx Data FIFO Head Saved - RW */
+#define E1000_TDFTS		0x03428  /* Tx Data FIFO Tail Saved - RW */
+#define E1000_TDFPC		0x03430  /* Tx Data FIFO Packet Count - RW */
+#define E1000_TDPUMB		0x0357C  /* DMA Tx Desc uC Mail Box - RW */
+#define E1000_TDPUAD		0x03580  /* DMA Tx Desc uC Addr Command - RW */
+#define E1000_TDPUWD		0x03584  /* DMA Tx Desc uC Data Write - RW */
+#define E1000_TDPURD		0x03588  /* DMA Tx Desc uC Data  Read  - RW */
+#define E1000_TDPUCTL		0x0358C  /* DMA Tx Desc uC Control - RW */
+#define E1000_DTXCTL		0x03590  /* DMA Tx Control - RW */
+#define E1000_DTXTCPFLGL	0x0359C /* DMA Tx Control flag low - RW */
+#define E1000_DTXTCPFLGH	0x035A0 /* DMA Tx Control flag high - RW */
+/* DMA Tx Max Total Allow Size Reqs - RW */
+#define E1000_DTXMXSZRQ		0x03540
+#define E1000_TIDV	0x03820  /* Tx Interrupt Delay Value - RW */
+#define E1000_TADV	0x0382C  /* Tx Interrupt Absolute Delay Val - RW */
+#define E1000_CRCERRS	0x04000  /* CRC Error Count - R/clr */
+#define E1000_ALGNERRC	0x04004  /* Alignment Error Count - R/clr */
+#define E1000_SYMERRS	0x04008  /* Symbol Error Count - R/clr */
+#define E1000_RXERRC	0x0400C  /* Receive Error Count - R/clr */
+#define E1000_MPC	0x04010  /* Missed Packet Count - R/clr */
+#define E1000_SCC	0x04014  /* Single Collision Count - R/clr */
+#define E1000_ECOL	0x04018  /* Excessive Collision Count - R/clr */
+#define E1000_MCC	0x0401C  /* Multiple Collision Count - R/clr */
+#define E1000_LATECOL	0x04020  /* Late Collision Count - R/clr */
+#define E1000_COLC	0x04028  /* Collision Count - R/clr */
+#define E1000_DC	0x04030  /* Defer Count - R/clr */
+#define E1000_TNCRS	0x04034  /* Tx-No CRS - R/clr */
+#define E1000_SEC	0x04038  /* Sequence Error Count - R/clr */
+#define E1000_CEXTERR	0x0403C  /* Carrier Extension Error Count - R/clr */
+#define E1000_RLEC	0x04040  /* Receive Length Error Count - R/clr */
+#define E1000_XONRXC	0x04048  /* XON Rx Count - R/clr */
+#define E1000_XONTXC	0x0404C  /* XON Tx Count - R/clr */
+#define E1000_XOFFRXC	0x04050  /* XOFF Rx Count - R/clr */
+#define E1000_XOFFTXC	0x04054  /* XOFF Tx Count - R/clr */
+#define E1000_FCRUC	0x04058  /* Flow Control Rx Unsupported Count- R/clr */
+#define E1000_PRC64	0x0405C  /* Packets Rx (64 bytes) - R/clr */
+#define E1000_PRC127	0x04060  /* Packets Rx (65-127 bytes) - R/clr */
+#define E1000_PRC255	0x04064  /* Packets Rx (128-255 bytes) - R/clr */
+#define E1000_PRC511	0x04068  /* Packets Rx (255-511 bytes) - R/clr */
+#define E1000_PRC1023	0x0406C  /* Packets Rx (512-1023 bytes) - R/clr */
+#define E1000_PRC1522	0x04070  /* Packets Rx (1024-1522 bytes) - R/clr */
+#define E1000_GPRC	0x04074  /* Good Packets Rx Count - R/clr */
+#define E1000_BPRC	0x04078  /* Broadcast Packets Rx Count - R/clr */
+#define E1000_MPRC	0x0407C  /* Multicast Packets Rx Count - R/clr */
+#define E1000_GPTC	0x04080  /* Good Packets Tx Count - R/clr */
+#define E1000_GORCL	0x04088  /* Good Octets Rx Count Low - R/clr */
+#define E1000_GORCH	0x0408C  /* Good Octets Rx Count High - R/clr */
+#define E1000_GOTCL	0x04090  /* Good Octets Tx Count Low - R/clr */
+#define E1000_GOTCH	0x04094  /* Good Octets Tx Count High - R/clr */
+#define E1000_RNBC	0x040A0  /* Rx No Buffers Count - R/clr */
+#define E1000_RUC	0x040A4  /* Rx Undersize Count - R/clr */
+#define E1000_RFC	0x040A8  /* Rx Fragment Count - R/clr */
+#define E1000_ROC	0x040AC  /* Rx Oversize Count - R/clr */
+#define E1000_RJC	0x040B0  /* Rx Jabber Count - R/clr */
+#define E1000_MGTPRC	0x040B4  /* Management Packets Rx Count - R/clr */
+#define E1000_MGTPDC	0x040B8  /* Management Packets Dropped Count - R/clr */
+#define E1000_MGTPTC	0x040BC  /* Management Packets Tx Count - R/clr */
+#define E1000_TORL	0x040C0  /* Total Octets Rx Low - R/clr */
+#define E1000_TORH	0x040C4  /* Total Octets Rx High - R/clr */
+#define E1000_TOTL	0x040C8  /* Total Octets Tx Low - R/clr */
+#define E1000_TOTH	0x040CC  /* Total Octets Tx High - R/clr */
+#define E1000_TPR	0x040D0  /* Total Packets Rx - R/clr */
+#define E1000_TPT	0x040D4  /* Total Packets Tx - R/clr */
+#define E1000_PTC64	0x040D8  /* Packets Tx (64 bytes) - R/clr */
+#define E1000_PTC127	0x040DC  /* Packets Tx (65-127 bytes) - R/clr */
+#define E1000_PTC255	0x040E0  /* Packets Tx (128-255 bytes) - R/clr */
+#define E1000_PTC511	0x040E4  /* Packets Tx (256-511 bytes) - R/clr */
+#define E1000_PTC1023	0x040E8  /* Packets Tx (512-1023 bytes) - R/clr */
+#define E1000_PTC1522	0x040EC  /* Packets Tx (1024-1522 Bytes) - R/clr */
+#define E1000_MPTC	0x040F0  /* Multicast Packets Tx Count - R/clr */
+#define E1000_BPTC	0x040F4  /* Broadcast Packets Tx Count - R/clr */
+#define E1000_TSCTC	0x040F8  /* TCP Segmentation Context Tx - R/clr */
+#define E1000_TSCTFC	0x040FC  /* TCP Segmentation Context Tx Fail - R/clr */
+#define E1000_IAC	0x04100  /* Interrupt Assertion Count */
+#define E1000_ICRXPTC	0x04104  /* Interrupt Cause Rx Pkt Timer Expire Count */
+#define E1000_ICRXATC	0x04108  /* Interrupt Cause Rx Abs Timer Expire Count */
+#define E1000_ICTXPTC	0x0410C  /* Interrupt Cause Tx Pkt Timer Expire Count */
+#define E1000_ICTXATC	0x04110  /* Interrupt Cause Tx Abs Timer Expire Count */
+#define E1000_ICTXQEC	0x04118  /* Interrupt Cause Tx Queue Empty Count */
+#define E1000_ICTXQMTC	0x0411C  /* Interrupt Cause Tx Queue Min Thresh Count */
+#define E1000_ICRXDMTC	0x04120  /* Interrupt Cause Rx Desc Min Thresh Count */
+#define E1000_ICRXOC	0x04124  /* Interrupt Cause Receiver Overrun Count */
+
+/* Virtualization statistical counters */
+#define E1000_PFVFGPRC(_n)	(0x010010 + (0x100 * (_n)))
+#define E1000_PFVFGPTC(_n)	(0x010014 + (0x100 * (_n)))
+#define E1000_PFVFGORC(_n)	(0x010018 + (0x100 * (_n)))
+#define E1000_PFVFGOTC(_n)	(0x010034 + (0x100 * (_n)))
+#define E1000_PFVFMPRC(_n)	(0x010038 + (0x100 * (_n)))
+#define E1000_PFVFGPRLBC(_n)	(0x010040 + (0x100 * (_n)))
+#define E1000_PFVFGPTLBC(_n)	(0x010044 + (0x100 * (_n)))
+#define E1000_PFVFGORLBC(_n)	(0x010048 + (0x100 * (_n)))
+#define E1000_PFVFGOTLBC(_n)	(0x010050 + (0x100 * (_n)))
+
+/* LinkSec */
+#define E1000_LSECTXUT		0x04300  /* Tx Untagged Pkt Cnt */
+#define E1000_LSECTXPKTE	0x04304  /* Encrypted Tx Pkts Cnt */
+#define E1000_LSECTXPKTP	0x04308  /* Protected Tx Pkt Cnt */
+#define E1000_LSECTXOCTE	0x0430C  /* Encrypted Tx Octets Cnt */
+#define E1000_LSECTXOCTP	0x04310  /* Protected Tx Octets Cnt */
+#define E1000_LSECRXUT		0x04314  /* Untagged non-Strict Rx Pkt Cnt */
+#define E1000_LSECRXOCTD	0x0431C  /* Rx Octets Decrypted Count */
+#define E1000_LSECRXOCTV	0x04320  /* Rx Octets Validated */
+#define E1000_LSECRXBAD		0x04324  /* Rx Bad Tag */
+#define E1000_LSECRXNOSCI	0x04328  /* Rx Packet No SCI Count */
+#define E1000_LSECRXUNSCI	0x0432C  /* Rx Packet Unknown SCI Count */
+#define E1000_LSECRXUNCH	0x04330  /* Rx Unchecked Packets Count */
+#define E1000_LSECRXDELAY	0x04340  /* Rx Delayed Packet Count */
+#define E1000_LSECRXLATE	0x04350  /* Rx Late Packets Count */
+#define E1000_LSECRXOK(_n)	(0x04360 + (0x04 * (_n))) /* Rx Pkt OK Cnt */
+#define E1000_LSECRXINV(_n)	(0x04380 + (0x04 * (_n))) /* Rx Invalid Cnt */
+#define E1000_LSECRXNV(_n)	(0x043A0 + (0x04 * (_n))) /* Rx Not Valid Cnt */
+#define E1000_LSECRXUNSA	0x043C0  /* Rx Unused SA Count */
+#define E1000_LSECRXNUSA	0x043D0  /* Rx Not Using SA Count */
+#define E1000_LSECTXCAP		0x0B000  /* Tx Capabilities Register - RO */
+#define E1000_LSECRXCAP		0x0B300  /* Rx Capabilities Register - RO */
+#define E1000_LSECTXCTRL	0x0B004  /* Tx Control - RW */
+#define E1000_LSECRXCTRL	0x0B304  /* Rx Control - RW */
+#define E1000_LSECTXSCL		0x0B008  /* Tx SCI Low - RW */
+#define E1000_LSECTXSCH		0x0B00C  /* Tx SCI High - RW */
+#define E1000_LSECTXSA		0x0B010  /* Tx SA0 - RW */
+#define E1000_LSECTXPN0		0x0B018  /* Tx SA PN 0 - RW */
+#define E1000_LSECTXPN1		0x0B01C  /* Tx SA PN 1 - RW */
+#define E1000_LSECRXSCL		0x0B3D0  /* Rx SCI Low - RW */
+#define E1000_LSECRXSCH		0x0B3E0  /* Rx SCI High - RW */
+/* LinkSec Tx 128-bit Key 0 - WO */
+#define E1000_LSECTXKEY0(_n)	(0x0B020 + (0x04 * (_n)))
+/* LinkSec Tx 128-bit Key 1 - WO */
+#define E1000_LSECTXKEY1(_n)	(0x0B030 + (0x04 * (_n)))
+#define E1000_LSECRXSA(_n)	(0x0B310 + (0x04 * (_n))) /* Rx SAs - RW */
+#define E1000_LSECRXPN(_n)	(0x0B330 + (0x04 * (_n))) /* Rx SAs - RW */
+/* LinkSec Rx Keys  - where _n is the SA no. and _m the 4 dwords of the 128 bit
+ * key - RW.
+ */
+#define E1000_LSECRXKEY(_n, _m)	(0x0B350 + (0x10 * (_n)) + (0x04 * (_m)))
+
+#define E1000_SSVPC		0x041A0 /* Switch Security Violation Pkt Cnt */
+#define E1000_IPSCTRL		0xB430  /* IpSec Control Register */
+#define E1000_IPSRXCMD		0x0B408 /* IPSec Rx Command Register - RW */
+#define E1000_IPSRXIDX		0x0B400 /* IPSec Rx Index - RW */
+/* IPSec Rx IPv4/v6 Address - RW */
+#define E1000_IPSRXIPADDR(_n)	(0x0B420 + (0x04 * (_n)))
+/* IPSec Rx 128-bit Key - RW */
+#define E1000_IPSRXKEY(_n)	(0x0B410 + (0x04 * (_n)))
+#define E1000_IPSRXSALT		0x0B404  /* IPSec Rx Salt - RW */
+#define E1000_IPSRXSPI		0x0B40C  /* IPSec Rx SPI - RW */
+/* IPSec Tx 128-bit Key - RW */
+#define E1000_IPSTXKEY(_n)	(0x0B460 + (0x04 * (_n)))
+#define E1000_IPSTXSALT		0x0B454  /* IPSec Tx Salt - RW */
+#define E1000_IPSTXIDX		0x0B450  /* IPSec Tx SA IDX - RW */
+#define E1000_PCS_CFG0	0x04200  /* PCS Configuration 0 - RW */
+#define E1000_PCS_LCTL	0x04208  /* PCS Link Control - RW */
+#define E1000_PCS_LSTAT	0x0420C  /* PCS Link Status - RO */
+#define E1000_CBTMPC	0x0402C  /* Circuit Breaker Tx Packet Count */
+#define E1000_HTDPMC	0x0403C  /* Host Transmit Discarded Packets */
+#define E1000_CBRDPC	0x04044  /* Circuit Breaker Rx Dropped Count */
+#define E1000_CBRMPC	0x040FC  /* Circuit Breaker Rx Packet Count */
+#define E1000_RPTHC	0x04104  /* Rx Packets To Host */
+#define E1000_HGPTC	0x04118  /* Host Good Packets Tx Count */
+#define E1000_HTCBDPC	0x04124  /* Host Tx Circuit Breaker Dropped Count */
+#define E1000_HGORCL	0x04128  /* Host Good Octets Received Count Low */
+#define E1000_HGORCH	0x0412C  /* Host Good Octets Received Count High */
+#define E1000_HGOTCL	0x04130  /* Host Good Octets Transmit Count Low */
+#define E1000_HGOTCH	0x04134  /* Host Good Octets Transmit Count High */
+#define E1000_LENERRS	0x04138  /* Length Errors Count */
+#define E1000_SCVPC	0x04228  /* SerDes/SGMII Code Violation Pkt Count */
+#define E1000_HRMPC	0x0A018  /* Header Redirection Missed Packet Count */
+#define E1000_PCS_ANADV	0x04218  /* AN advertisement - RW */
+#define E1000_PCS_LPAB	0x0421C  /* Link Partner Ability - RW */
+#define E1000_PCS_NPTX	0x04220  /* AN Next Page Transmit - RW */
+#define E1000_PCS_LPABNP	0x04224 /* Link Partner Ability Next Pg - RW */
+#define E1000_RXCSUM	0x05000  /* Rx Checksum Control - RW */
+#define E1000_RLPML	0x05004  /* Rx Long Packet Max Length */
+#define E1000_RFCTL	0x05008  /* Receive Filter Control*/
+#define E1000_MTA	0x05200  /* Multicast Table Array - RW Array */
+#define E1000_RA	0x05400  /* Receive Address - RW Array */
+#define E1000_RA2	0x054E0  /* 2nd half of Rx address array - RW Array */
+#define E1000_VFTA	0x05600  /* VLAN Filter Table Array - RW Array */
+#define E1000_VT_CTL	0x0581C  /* VMDq Control - RW */
+#define E1000_CIAA	0x05B88  /* Config Indirect Access Address - RW */
+#define E1000_CIAD	0x05B8C  /* Config Indirect Access Data - RW */
+#define E1000_VFQA0	0x0B000  /* VLAN Filter Queue Array 0 - RW Array */
+#define E1000_VFQA1	0x0B200  /* VLAN Filter Queue Array 1 - RW Array */
+#define E1000_WUC	0x05800  /* Wakeup Control - RW */
+#define E1000_WUFC	0x05808  /* Wakeup Filter Control - RW */
+#define E1000_WUS	0x05810  /* Wakeup Status - RO */
+#define E1000_MANC	0x05820  /* Management Control - RW */
+#define E1000_IPAV	0x05838  /* IP Address Valid - RW */
+#define E1000_IP4AT	0x05840  /* IPv4 Address Table - RW Array */
+#define E1000_IP6AT	0x05880  /* IPv6 Address Table - RW Array */
+#define E1000_WUPL	0x05900  /* Wakeup Packet Length - RW */
+#define E1000_WUPM	0x05A00  /* Wakeup Packet Memory - RO A */
+#define E1000_PBACL	0x05B68  /* MSIx PBA Clear - Read/Write 1's to clear */
+#define E1000_FFLT	0x05F00  /* Flexible Filter Length Table - RW Array */
+#define E1000_HOST_IF	0x08800  /* Host Interface */
+#define E1000_FFMT	0x09000  /* Flexible Filter Mask Table - RW Array */
+#define E1000_FFVT	0x09800  /* Flexible Filter Value Table - RW Array */
+#define E1000_HIBBA	0x8F40   /* Host Interface Buffer Base Address */
+/* Flexible Host Filter Table */
+#define E1000_FHFT(_n)	(0x09000 + ((_n) * 0x100))
+/* Ext Flexible Host Filter Table */
+#define E1000_FHFT_EXT(_n)	(0x09A00 + ((_n) * 0x100))
+
+
+#define E1000_KMRNCTRLSTA	0x00034 /* MAC-PHY interface - RW */
+#define E1000_MANC2H		0x05860 /* Management Control To Host - RW */
+/* Management Decision Filters */
+#define E1000_MDEF(_n)		(0x05890 + (4 * (_n)))
+#define E1000_SW_FW_SYNC	0x05B5C /* SW-FW Synchronization - RW */
+#define E1000_CCMCTL	0x05B48 /* CCM Control Register */
+#define E1000_GIOCTL	0x05B44 /* GIO Analog Control Register */
+#define E1000_SCCTL	0x05B4C /* PCIc PLL Configuration Register */
+#define E1000_GCR	0x05B00 /* PCI-Ex Control */
+#define E1000_GCR2	0x05B64 /* PCI-Ex Control #2 */
+#define E1000_GSCL_1	0x05B10 /* PCI-Ex Statistic Control #1 */
+#define E1000_GSCL_2	0x05B14 /* PCI-Ex Statistic Control #2 */
+#define E1000_GSCL_3	0x05B18 /* PCI-Ex Statistic Control #3 */
+#define E1000_GSCL_4	0x05B1C /* PCI-Ex Statistic Control #4 */
+#define E1000_FACTPS	0x05B30 /* Function Active and Power State to MNG */
+#define E1000_SWSM	0x05B50 /* SW Semaphore */
+#define E1000_FWSM	0x05B54 /* FW Semaphore */
+/* Driver-only SW semaphore (not used by BOOT agents) */
+#define E1000_SWSM2	0x05B58
+#define E1000_DCA_ID	0x05B70 /* DCA Requester ID Information - RO */
+#define E1000_DCA_CTRL	0x05B74 /* DCA Control - RW */
+#define E1000_UFUSE	0x05B78 /* UFUSE - RO */
+#define E1000_FFLT_DBG	0x05F04 /* Debug Register */
+#define E1000_HICR	0x08F00 /* Host Interface Control */
+#define E1000_FWSTS	0x08F0C /* FW Status */
+
+/* RSS registers */
+#define E1000_CPUVEC	0x02C10 /* CPU Vector Register - RW */
+#define E1000_MRQC	0x05818 /* Multiple Receive Control - RW */
+#define E1000_IMIR(_i)	(0x05A80 + ((_i) * 4))  /* Immediate Interrupt */
+#define E1000_IMIREXT(_i)	(0x05AA0 + ((_i) * 4)) /* Immediate INTR Ext*/
+#define E1000_IMIRVP		0x05AC0 /* Immediate INT Rx VLAN Priority -RW */
+#define E1000_MSIXBM(_i)	(0x01600 + ((_i) * 4)) /* MSI-X Alloc Reg -RW */
+#define E1000_RETA(_i)	(0x05C00 + ((_i) * 4)) /* Redirection Table - RW */
+#define E1000_RSSRK(_i)	(0x05C80 + ((_i) * 4)) /* RSS Random Key - RW */
+#define E1000_RSSIM	0x05864 /* RSS Interrupt Mask */
+#define E1000_RSSIR	0x05868 /* RSS Interrupt Request */
+/* VT Registers */
+#define E1000_SWPBS	0x03004 /* Switch Packet Buffer Size - RW */
+#define E1000_MBVFICR	0x00C80 /* Mailbox VF Cause - RWC */
+#define E1000_MBVFIMR	0x00C84 /* Mailbox VF int Mask - RW */
+#define E1000_VFLRE	0x00C88 /* VF Register Events - RWC */
+#define E1000_VFRE	0x00C8C /* VF Receive Enables */
+#define E1000_VFTE	0x00C90 /* VF Transmit Enables */
+#define E1000_QDE	0x02408 /* Queue Drop Enable - RW */
+#define E1000_DTXSWC	0x03500 /* DMA Tx Switch Control - RW */
+#define E1000_WVBR	0x03554 /* VM Wrong Behavior - RWS */
+#define E1000_RPLOLR	0x05AF0 /* Replication Offload - RW */
+#define E1000_UTA	0x0A000 /* Unicast Table Array - RW */
+#define E1000_IOVTCL	0x05BBC /* IOV Control Register */
+#define E1000_VMRCTL	0X05D80 /* Virtual Mirror Rule Control */
+#define E1000_VMRVLAN	0x05D90 /* Virtual Mirror Rule VLAN */
+#define E1000_VMRVM	0x05DA0 /* Virtual Mirror Rule VM */
+#define E1000_MDFB	0x03558 /* Malicious Driver free block */
+#define E1000_LVMMC	0x03548 /* Last VM Misbehavior cause */
+#define E1000_TXSWC	0x05ACC /* Tx Switch Control */
+#define E1000_SCCRL	0x05DB0 /* Storm Control Control */
+#define E1000_BSCTRH	0x05DB8 /* Broadcast Storm Control Threshold */
+#define E1000_MSCTRH	0x05DBC /* Multicast Storm Control Threshold */
+/* These act per VF so an array friendly macro is used */
+#define E1000_V2PMAILBOX(_n)	(0x00C40 + (4 * (_n)))
+#define E1000_P2VMAILBOX(_n)	(0x00C00 + (4 * (_n)))
+#define E1000_VMBMEM(_n)	(0x00800 + (64 * (_n)))
+#define E1000_VFVMBMEM(_n)	(0x00800 + (_n))
+#define E1000_VMOLR(_n)		(0x05AD0 + (4 * (_n)))
+/* VLAN Virtual Machine Filter - RW */
+#define E1000_VLVF(_n)		(0x05D00 + (4 * (_n)))
+#define E1000_VMVIR(_n)		(0x03700 + (4 * (_n)))
+#define E1000_DVMOLR(_n)	(0x0C038 + (0x40 * (_n))) /* DMA VM offload */
+#define E1000_VTCTRL(_n)	(0x10000 + (0x100 * (_n))) /* VT Control */
+#define E1000_TSYNCRXCTL	0x0B620 /* Rx Time Sync Control register - RW */
+#define E1000_TSYNCTXCTL	0x0B614 /* Tx Time Sync Control register - RW */
+#define E1000_TSYNCRXCFG	0x05F50 /* Time Sync Rx Configuration - RW */
+#define E1000_RXSTMPL	0x0B624 /* Rx timestamp Low - RO */
+#define E1000_RXSTMPH	0x0B628 /* Rx timestamp High - RO */
+#define E1000_RXSATRL	0x0B62C /* Rx timestamp attribute low - RO */
+#define E1000_RXSATRH	0x0B630 /* Rx timestamp attribute high - RO */
+#define E1000_TXSTMPL	0x0B618 /* Tx timestamp value Low - RO */
+#define E1000_TXSTMPH	0x0B61C /* Tx timestamp value High - RO */
+#define E1000_SYSTIML	0x0B600 /* System time register Low - RO */
+#define E1000_SYSTIMH	0x0B604 /* System time register High - RO */
+#define E1000_TIMINCA	0x0B608 /* Increment attributes register - RW */
+#define E1000_TIMADJL	0x0B60C /* Time sync time adjustment offset Low - RW */
+#define E1000_TIMADJH	0x0B610 /* Time sync time adjustment offset High - RW */
+#define E1000_TSAUXC	0x0B640 /* Timesync Auxiliary Control register */
+#define E1000_SYSTIMR	0x0B6F8 /* System time register Residue */
+#define E1000_TSICR	0x0B66C /* Interrupt Cause Register */
+#define E1000_TSIM	0x0B674 /* Interrupt Mask Register */
+
+/* Filtering Registers */
+#define E1000_SAQF(_n)	(0x05980 + (4 * (_n))) /* Source Address Queue Fltr */
+#define E1000_DAQF(_n)	(0x059A0 + (4 * (_n))) /* Dest Address Queue Fltr */
+#define E1000_SPQF(_n)	(0x059C0 + (4 * (_n))) /* Source Port Queue Fltr */
+#define E1000_FTQF(_n)	(0x059E0 + (4 * (_n))) /* 5-tuple Queue Fltr */
+#define E1000_TTQF(_n)	(0x059E0 + (4 * (_n))) /* 2-tuple Queue Fltr */
+#define E1000_SYNQF(_n)	(0x055FC + (4 * (_n))) /* SYN Packet Queue Fltr */
+#define E1000_ETQF(_n)	(0x05CB0 + (4 * (_n))) /* EType Queue Fltr */
+
+#define E1000_RTTDCS	0x3600 /* Reedtown Tx Desc plane control and status */
+#define E1000_RTTPCS	0x3474 /* Reedtown Tx Packet Plane control and status */
+#define E1000_RTRPCS	0x2474 /* Rx packet plane control and status */
+#define E1000_RTRUP2TC	0x05AC4 /* Rx User Priority to Traffic Class */
+#define E1000_RTTUP2TC	0x0418 /* Transmit User Priority to Traffic Class */
+/* Tx Desc plane TC Rate-scheduler config */
+#define E1000_RTTDTCRC(_n)	(0x3610 + ((_n) * 4))
+/* Tx Packet plane TC Rate-Scheduler Config */
+#define E1000_RTTPTCRC(_n)	(0x3480 + ((_n) * 4))
+/* Rx Packet plane TC Rate-Scheduler Config */
+#define E1000_RTRPTCRC(_n)	(0x2480 + ((_n) * 4))
+/* Tx Desc Plane TC Rate-Scheduler Status */
+#define E1000_RTTDTCRS(_n)	(0x3630 + ((_n) * 4))
+/* Tx Desc Plane TC Rate-Scheduler MMW */
+#define E1000_RTTDTCRM(_n)	(0x3650 + ((_n) * 4))
+/* Tx Packet plane TC Rate-Scheduler Status */
+#define E1000_RTTPTCRS(_n)	(0x34A0 + ((_n) * 4))
+/* Tx Packet plane TC Rate-scheduler MMW */
+#define E1000_RTTPTCRM(_n)	(0x34C0 + ((_n) * 4))
+/* Rx Packet plane TC Rate-Scheduler Status */
+#define E1000_RTRPTCRS(_n)	(0x24A0 + ((_n) * 4))
+/* Rx Packet plane TC Rate-Scheduler MMW */
+#define E1000_RTRPTCRM(_n)	(0x24C0 + ((_n) * 4))
+/* Tx Desc plane VM Rate-Scheduler MMW*/
+#define E1000_RTTDVMRM(_n)	(0x3670 + ((_n) * 4))
+/* Tx BCN Rate-Scheduler MMW */
+#define E1000_RTTBCNRM(_n)	(0x3690 + ((_n) * 4))
+#define E1000_RTTDQSEL	0x3604  /* Tx Desc Plane Queue Select */
+#define E1000_RTTDVMRC	0x3608  /* Tx Desc Plane VM Rate-Scheduler Config */
+#define E1000_RTTDVMRS	0x360C  /* Tx Desc Plane VM Rate-Scheduler Status */
+#define E1000_RTTBCNRC	0x36B0  /* Tx BCN Rate-Scheduler Config */
+#define E1000_RTTBCNRS	0x36B4  /* Tx BCN Rate-Scheduler Status */
+#define E1000_RTTBCNCR	0xB200  /* Tx BCN Control Register */
+#define E1000_RTTBCNTG	0x35A4  /* Tx BCN Tagging */
+#define E1000_RTTBCNCP	0xB208  /* Tx BCN Congestion point */
+#define E1000_RTRBCNCR	0xB20C  /* Rx BCN Control Register */
+#define E1000_RTTBCNRD	0x36B8  /* Tx BCN Rate Drift */
+#define E1000_PFCTOP	0x1080  /* Priority Flow Control Type and Opcode */
+#define E1000_RTTBCNIDX	0xB204  /* Tx BCN Congestion Point */
+#define E1000_RTTBCNACH	0x0B214 /* Tx BCN Control High */
+#define E1000_RTTBCNACL	0x0B210 /* Tx BCN Control Low */
+
+/* DMA Coalescing registers */
+#define E1000_DMACR	0x02508 /* Control Register */
+#define E1000_DMCTXTH	0x03550 /* Transmit Threshold */
+#define E1000_DMCTLX	0x02514 /* Time to Lx Request */
+#define E1000_DMCRTRH	0x05DD0 /* Receive Packet Rate Threshold */
+#define E1000_DMCCNT	0x05DD4 /* Current Rx Count */
+#define E1000_FCRTC	0x02170 /* Flow Control Rx high watermark */
+#define E1000_PCIEMISC	0x05BB8 /* PCIE misc config register */
+
+/* PCIe Parity Status Register */
+#define E1000_PCIEERRSTS	0x05BA8
+
+#define E1000_PROXYS	0x5F64 /* Proxying Status */
+#define E1000_PROXYFC	0x5F60 /* Proxying Filter Control */
+/* Thermal sensor configuration and status registers */
+#define E1000_THMJT	0x08100 /* Junction Temperature */
+#define E1000_THLOWTC	0x08104 /* Low Threshold Control */
+#define E1000_THMIDTC	0x08108 /* Mid Threshold Control */
+#define E1000_THHIGHTC	0x0810C /* High Threshold Control */
+#define E1000_THSTAT	0x08110 /* Thermal Sensor Status */
+
+/* Energy Efficient Ethernet "EEE" registers */
+#define E1000_IPCNFG	0x0E38 /* Internal PHY Configuration */
+#define E1000_LTRC	0x01A0 /* Latency Tolerance Reporting Control */
+#define E1000_EEER	0x0E30 /* Energy Efficient Ethernet "EEE"*/
+#define E1000_EEE_SU	0x0E34 /* EEE Setup */
+#define E1000_TLPIC	0x4148 /* EEE Tx LPI Count - TLPIC */
+#define E1000_RLPIC	0x414C /* EEE Rx LPI Count - RLPIC */
+
+/* OS2BMC Registers */
+#define E1000_B2OSPC	0x08FE0 /* BMC2OS packets sent by BMC */
+#define E1000_B2OGPRC	0x04158 /* BMC2OS packets received by host */
+#define E1000_O2BGPTC	0x08FE4 /* OS2BMC packets received by BMC */
+#define E1000_O2BSPC	0x0415C /* OS2BMC packets transmitted by host */
+
+
+
+#endif
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb.h
new file mode 100644
index 00000000..e5554ca3
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb.h
@@ -0,0 +1,859 @@
+/*******************************************************************************
+
+  Intel(R) Gigabit Ethernet Linux driver
+  Copyright(c) 2007-2013 Intel Corporation.
+
+  This program is free software; you can redistribute it and/or modify it
+  under the terms and conditions of the GNU General Public License,
+  version 2, as published by the Free Software Foundation.
+
+  This program is distributed in the hope it will be useful, but WITHOUT
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+  more details.
+
+  You should have received a copy of the GNU General Public License along with
+  this program; if not, write to the Free Software Foundation, Inc.,
+  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+  The full GNU General Public License is included in this distribution in
+  the file called "COPYING".
+
+  Contact Information:
+  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+/* Linux PRO/1000 Ethernet Driver main header file */
+
+#ifndef _IGB_H_
+#define _IGB_H_
+
+#include <linux/kobject.h>
+
+#ifndef IGB_NO_LRO
+#include <net/tcp.h>
+#endif
+
+#undef HAVE_HW_TIME_STAMP
+#ifdef HAVE_HW_TIME_STAMP
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+#include <linux/vmalloc.h>
+
+#endif
+#ifdef SIOCETHTOOL
+#include <linux/ethtool.h>
+#endif
+
+struct igb_adapter;
+
+#if defined(CONFIG_DCA) || defined(CONFIG_DCA_MODULE)
+//#define IGB_DCA
+#endif
+#ifdef IGB_DCA
+#include <linux/dca.h>
+#endif
+
+#include "kcompat.h"
+
+#ifdef HAVE_SCTP
+#include <linux/sctp.h>
+#endif
+
+#include "e1000_api.h"
+#include "e1000_82575.h"
+#include "e1000_manage.h"
+#include "e1000_mbx.h"
+
+#define IGB_ERR(args...) printk(KERN_ERR "igb: " args)
+
+#define PFX "igb: "
+#define DPRINTK(nlevel, klevel, fmt, args...) \
+	(void)((NETIF_MSG_##nlevel & adapter->msg_enable) && \
+	printk(KERN_##klevel PFX "%s: %s: " fmt, adapter->netdev->name, \
+		__FUNCTION__ , ## args))
+
+#ifdef HAVE_PTP_1588_CLOCK
+#include <linux/clocksource.h>
+#include <linux/net_tstamp.h>
+#include <linux/ptp_clock_kernel.h>
+#endif /* HAVE_PTP_1588_CLOCK */
+
+#ifdef HAVE_I2C_SUPPORT
+#include <linux/i2c.h>
+#include <linux/i2c-algo-bit.h>
+#endif /* HAVE_I2C_SUPPORT */
+
+/* Interrupt defines */
+#define IGB_START_ITR                    648 /* ~6000 ints/sec */
+#define IGB_4K_ITR                       980
+#define IGB_20K_ITR                      196
+#define IGB_70K_ITR                       56
+
+/* Interrupt modes, as used by the IntMode parameter */
+#define IGB_INT_MODE_LEGACY                0
+#define IGB_INT_MODE_MSI                   1
+#define IGB_INT_MODE_MSIX                  2
+
+/* TX/RX descriptor defines */
+#define IGB_DEFAULT_TXD                  256
+#define IGB_DEFAULT_TX_WORK		 128
+#define IGB_MIN_TXD                       80
+#define IGB_MAX_TXD                     4096
+
+#define IGB_DEFAULT_RXD                  256
+#define IGB_MIN_RXD                       80
+#define IGB_MAX_RXD                     4096
+
+#define IGB_MIN_ITR_USECS                 10 /* 100k irq/sec */
+#define IGB_MAX_ITR_USECS               8191 /* 120  irq/sec */
+
+#define NON_Q_VECTORS                      1
+#define MAX_Q_VECTORS                     10
+
+/* Transmit and receive queues */
+#define IGB_MAX_RX_QUEUES                 16
+#define IGB_MAX_TX_QUEUES                 16
+
+#define IGB_MAX_VF_MC_ENTRIES             30
+#define IGB_MAX_VF_FUNCTIONS               8
+#define IGB_82576_VF_DEV_ID           0x10CA
+#define IGB_I350_VF_DEV_ID            0x1520
+#define IGB_MAX_UTA_ENTRIES              128
+#define MAX_EMULATION_MAC_ADDRS           16
+#define OUI_LEN                            3
+#define IGB_MAX_VMDQ_QUEUES                8
+
+
+struct vf_data_storage {
+	unsigned char vf_mac_addresses[ETH_ALEN];
+	u16 vf_mc_hashes[IGB_MAX_VF_MC_ENTRIES];
+	u16 num_vf_mc_hashes;
+	u16 default_vf_vlan_id;
+	u16 vlans_enabled;
+	unsigned char em_mac_addresses[MAX_EMULATION_MAC_ADDRS * ETH_ALEN];
+	u32 uta_table_copy[IGB_MAX_UTA_ENTRIES];
+	u32 flags;
+	unsigned long last_nack;
+#ifdef IFLA_VF_MAX
+	u16 pf_vlan; /* When set, guest VLAN config not allowed. */
+	u16 pf_qos;
+	u16 tx_rate;
+#ifdef HAVE_VF_SPOOFCHK_CONFIGURE
+	bool spoofchk_enabled;
+#endif
+#endif
+};
+
+#define IGB_VF_FLAG_CTS            0x00000001 /* VF is clear to send data */
+#define IGB_VF_FLAG_UNI_PROMISC    0x00000002 /* VF has unicast promisc */
+#define IGB_VF_FLAG_MULTI_PROMISC  0x00000004 /* VF has multicast promisc */
+#define IGB_VF_FLAG_PF_SET_MAC     0x00000008 /* PF has set MAC address */
+
+/* RX descriptor control thresholds.
+ * PTHRESH - MAC will consider prefetch if it has fewer than this number of
+ *           descriptors available in its onboard memory.
+ *           Setting this to 0 disables RX descriptor prefetch.
+ * HTHRESH - MAC will only prefetch if there are at least this many descriptors
+ *           available in host memory.
+ *           If PTHRESH is 0, this should also be 0.
+ * WTHRESH - RX descriptor writeback threshold - MAC will delay writing back
+ *           descriptors until either it has this many to write back, or the
+ *           ITR timer expires.
+ */
+#define IGB_RX_PTHRESH	((hw->mac.type == e1000_i354) ? 12 : 8)
+#define IGB_RX_HTHRESH	8
+#define IGB_TX_PTHRESH	((hw->mac.type == e1000_i354) ? 20 : 8)
+#define IGB_TX_HTHRESH	1
+#define IGB_RX_WTHRESH	((hw->mac.type == e1000_82576 && \
+			  adapter->msix_entries) ? 1 : 4)
+
+/* this is the size past which hardware will drop packets when setting LPE=0 */
+#define MAXIMUM_ETHERNET_VLAN_SIZE 1522
+
+/* NOTE: netdev_alloc_skb reserves 16 bytes, NET_IP_ALIGN means we
+ * reserve 2 more, and skb_shared_info adds an additional 384 more,
+ * this adds roughly 448 bytes of extra data meaning the smallest
+ * allocation we could have is 1K.
+ * i.e. RXBUFFER_512 --> size-1024 slab
+ */
+/* Supported Rx Buffer Sizes */
+#define IGB_RXBUFFER_256   256
+#define IGB_RXBUFFER_2048  2048
+#define IGB_RXBUFFER_16384 16384
+#define IGB_RX_HDR_LEN	   IGB_RXBUFFER_256
+#if MAX_SKB_FRAGS < 8
+#define IGB_RX_BUFSZ	   ALIGN(MAX_JUMBO_FRAME_SIZE / MAX_SKB_FRAGS, 1024)
+#else
+#define IGB_RX_BUFSZ	   IGB_RXBUFFER_2048
+#endif
+
+
+/* Packet Buffer allocations */
+#define IGB_PBA_BYTES_SHIFT 0xA
+#define IGB_TX_HEAD_ADDR_SHIFT 7
+#define IGB_PBA_TX_MASK 0xFFFF0000
+
+#define IGB_FC_PAUSE_TIME 0x0680 /* 858 usec */
+
+/* How many Rx Buffers do we bundle into one write to the hardware ? */
+#define IGB_RX_BUFFER_WRITE	16	/* Must be power of 2 */
+
+#define IGB_EEPROM_APME         0x0400
+#define AUTO_ALL_MODES          0
+
+#ifndef IGB_MASTER_SLAVE
+/* Switch to override PHY master/slave setting */
+#define IGB_MASTER_SLAVE	e1000_ms_hw_default
+#endif
+
+#define IGB_MNG_VLAN_NONE -1
+
+#ifndef IGB_NO_LRO
+#define IGB_LRO_MAX 32 /*Maximum number of LRO descriptors*/
+struct igb_lro_stats {
+	u32 flushed;
+	u32 coal;
+};
+
+/*
+ * igb_lro_header - header format to be aggregated by LRO
+ * @iph: IP header without options
+ * @tcp: TCP header
+ * @ts:  Optional TCP timestamp data in TCP options
+ *
+ * This structure relies on the check above that verifies that the header
+ * is IPv4 and does not contain any options.
+ */
+struct igb_lrohdr {
+	struct iphdr iph;
+	struct tcphdr th;
+	__be32 ts[0];
+};
+
+struct igb_lro_list {
+	struct sk_buff_head active;
+	struct igb_lro_stats stats;
+};
+
+#endif /* IGB_NO_LRO */
+struct igb_cb {
+#ifndef IGB_NO_LRO
+#ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT
+	union {				/* Union defining head/tail partner */
+		struct sk_buff *head;
+		struct sk_buff *tail;
+	};
+#endif
+	__be32	tsecr;			/* timestamp echo response */
+	u32	tsval;			/* timestamp value in host order */
+	u32	next_seq;		/* next expected sequence number */
+	u16	free;			/* 65521 minus total size */
+	u16	mss;			/* size of data portion of packet */
+	u16	append_cnt;		/* number of skb's appended */
+#endif /* IGB_NO_LRO */
+#ifdef HAVE_VLAN_RX_REGISTER
+	u16	vid;			/* VLAN tag */
+#endif
+};
+#define IGB_CB(skb) ((struct igb_cb *)(skb)->cb)
+
+enum igb_tx_flags {
+	/* cmd_type flags */
+	IGB_TX_FLAGS_VLAN	= 0x01,
+	IGB_TX_FLAGS_TSO	= 0x02,
+	IGB_TX_FLAGS_TSTAMP	= 0x04,
+
+	/* olinfo flags */
+	IGB_TX_FLAGS_IPV4	= 0x10,
+	IGB_TX_FLAGS_CSUM	= 0x20,
+};
+
+/* VLAN info */
+#define IGB_TX_FLAGS_VLAN_MASK		0xffff0000
+#define IGB_TX_FLAGS_VLAN_SHIFT		        16
+
+/*
+ * The largest size we can write to the descriptor is 65535.  In order to
+ * maintain a power of two alignment we have to limit ourselves to 32K.
+ */
+#define IGB_MAX_TXD_PWR		15
+#define IGB_MAX_DATA_PER_TXD	(1 << IGB_MAX_TXD_PWR)
+
+/* Tx Descriptors needed, worst case */
+#define TXD_USE_COUNT(S)	DIV_ROUND_UP((S), IGB_MAX_DATA_PER_TXD)
+#ifndef MAX_SKB_FRAGS
+#define DESC_NEEDED	4
+#elif (MAX_SKB_FRAGS < 16)
+#define DESC_NEEDED	((MAX_SKB_FRAGS * TXD_USE_COUNT(PAGE_SIZE)) + 4)
+#else
+#define DESC_NEEDED	(MAX_SKB_FRAGS + 4)
+#endif
+
+/* wrapper around a pointer to a socket buffer,
+ * so a DMA handle can be stored along with the buffer */
+struct igb_tx_buffer {
+	union e1000_adv_tx_desc *next_to_watch;
+	unsigned long time_stamp;
+	struct sk_buff *skb;
+	unsigned int bytecount;
+	u16 gso_segs;
+	__be16 protocol;
+	DEFINE_DMA_UNMAP_ADDR(dma);
+	DEFINE_DMA_UNMAP_LEN(len);
+	u32 tx_flags;
+};
+
+struct igb_rx_buffer {
+	dma_addr_t dma;
+#ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT
+	struct sk_buff *skb;
+#else
+	struct page *page;
+	u32 page_offset;
+#endif
+};
+
+struct igb_tx_queue_stats {
+	u64 packets;
+	u64 bytes;
+	u64 restart_queue;
+};
+
+struct igb_rx_queue_stats {
+	u64 packets;
+	u64 bytes;
+	u64 drops;
+	u64 csum_err;
+	u64 alloc_failed;
+	u64 ipv4_packets;      /* IPv4 headers processed */
+	u64 ipv4e_packets;     /* IPv4E headers with extensions processed */
+	u64 ipv6_packets;      /* IPv6 headers processed */
+	u64 ipv6e_packets;     /* IPv6E headers with extensions processed */
+	u64 tcp_packets;       /* TCP headers processed */
+	u64 udp_packets;       /* UDP headers processed */
+	u64 sctp_packets;      /* SCTP headers processed */
+	u64 nfs_packets;       /* NFS headers processe */
+};
+
+struct igb_ring_container {
+	struct igb_ring *ring;		/* pointer to linked list of rings */
+	unsigned int total_bytes;	/* total bytes processed this int */
+	unsigned int total_packets;	/* total packets processed this int */
+	u16 work_limit;			/* total work allowed per interrupt */
+	u8 count;			/* total number of rings in vector */
+	u8 itr;				/* current ITR setting for ring */
+};
+
+struct igb_ring {
+	struct igb_q_vector *q_vector;  /* backlink to q_vector */
+	struct net_device *netdev;      /* back pointer to net_device */
+	struct device *dev;             /* device for dma mapping */
+	union {				/* array of buffer info structs */
+		struct igb_tx_buffer *tx_buffer_info;
+		struct igb_rx_buffer *rx_buffer_info;
+	};
+#ifdef HAVE_PTP_1588_CLOCK
+	unsigned long last_rx_timestamp;
+#endif /* HAVE_PTP_1588_CLOCK */
+	void *desc;                     /* descriptor ring memory */
+	unsigned long flags;            /* ring specific flags */
+	void __iomem *tail;             /* pointer to ring tail register */
+	dma_addr_t dma;			/* phys address of the ring */
+	unsigned int size;		/* length of desc. ring in bytes */
+
+	u16 count;                      /* number of desc. in the ring */
+	u8 queue_index;                 /* logical index of the ring*/
+	u8 reg_idx;                     /* physical index of the ring */
+
+	/* everything past this point are written often */
+	u16 next_to_clean;
+	u16 next_to_use;
+	u16 next_to_alloc;
+
+	union {
+		/* TX */
+		struct {
+			struct igb_tx_queue_stats tx_stats;
+		};
+		/* RX */
+		struct {
+			struct igb_rx_queue_stats rx_stats;
+#ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT
+			u16 rx_buffer_len;
+#else
+			struct sk_buff *skb;
+#endif
+		};
+	};
+#ifdef CONFIG_IGB_VMDQ_NETDEV
+	struct net_device *vmdq_netdev;
+	int vqueue_index;		/* queue index for virtual netdev */
+#endif
+} ____cacheline_internodealigned_in_smp;
+
+struct igb_q_vector {
+	struct igb_adapter *adapter;	/* backlink */
+	int cpu;			/* CPU for DCA */
+	u32 eims_value;			/* EIMS mask value */
+
+	u16 itr_val;
+	u8 set_itr;
+	void __iomem *itr_register;
+
+	struct igb_ring_container rx, tx;
+
+	struct napi_struct napi;
+#ifndef IGB_NO_LRO
+	struct igb_lro_list lrolist;   /* LRO list for queue vector*/
+#endif
+	char name[IFNAMSIZ + 9];
+#ifndef HAVE_NETDEV_NAPI_LIST
+	struct net_device poll_dev;
+#endif
+
+	/* for dynamic allocation of rings associated with this q_vector */
+	struct igb_ring ring[0] ____cacheline_internodealigned_in_smp;
+};
+
+enum e1000_ring_flags_t {
+#ifndef HAVE_NDO_SET_FEATURES
+	IGB_RING_FLAG_RX_CSUM,
+#endif
+	IGB_RING_FLAG_RX_SCTP_CSUM,
+	IGB_RING_FLAG_RX_LB_VLAN_BSWAP,
+	IGB_RING_FLAG_TX_CTX_IDX,
+	IGB_RING_FLAG_TX_DETECT_HANG,
+};
+
+struct igb_mac_addr {
+	u8 addr[ETH_ALEN];
+	u16 queue;
+	u16 state; /* bitmask */
+};
+#define IGB_MAC_STATE_DEFAULT	0x1
+#define IGB_MAC_STATE_MODIFIED	0x2
+#define IGB_MAC_STATE_IN_USE	0x4
+
+#define IGB_TXD_DCMD (E1000_ADVTXD_DCMD_EOP | E1000_ADVTXD_DCMD_RS)
+
+#define IGB_RX_DESC(R, i)	    \
+	(&(((union e1000_adv_rx_desc *)((R)->desc))[i]))
+#define IGB_TX_DESC(R, i)	    \
+	(&(((union e1000_adv_tx_desc *)((R)->desc))[i]))
+#define IGB_TX_CTXTDESC(R, i)	    \
+	(&(((struct e1000_adv_tx_context_desc *)((R)->desc))[i]))
+
+#ifdef CONFIG_IGB_VMDQ_NETDEV
+#define netdev_ring(ring) \
+	((ring->vmdq_netdev ? ring->vmdq_netdev : ring->netdev))
+#define ring_queue_index(ring) \
+	((ring->vmdq_netdev ? ring->vqueue_index : ring->queue_index))
+#else
+#define netdev_ring(ring) (ring->netdev)
+#define ring_queue_index(ring) (ring->queue_index)
+#endif /* CONFIG_IGB_VMDQ_NETDEV */
+
+/* igb_test_staterr - tests bits within Rx descriptor status and error fields */
+static inline __le32 igb_test_staterr(union e1000_adv_rx_desc *rx_desc,
+				      const u32 stat_err_bits)
+{
+	return rx_desc->wb.upper.status_error & cpu_to_le32(stat_err_bits);
+}
+
+/* igb_desc_unused - calculate if we have unused descriptors */
+static inline u16 igb_desc_unused(const struct igb_ring *ring)
+{
+	u16 ntc = ring->next_to_clean;
+	u16 ntu = ring->next_to_use;
+
+	return ((ntc > ntu) ? 0 : ring->count) + ntc - ntu - 1;
+}
+
+#ifdef CONFIG_BQL
+static inline struct netdev_queue *txring_txq(const struct igb_ring *tx_ring)
+{
+	return netdev_get_tx_queue(tx_ring->netdev, tx_ring->queue_index);
+}
+#endif /* CONFIG_BQL */
+
+// #ifdef EXT_THERMAL_SENSOR_SUPPORT
+// #ifdef IGB_PROCFS
+struct igb_therm_proc_data
+{
+	struct e1000_hw *hw;
+	struct e1000_thermal_diode_data *sensor_data;
+};
+
+//  #endif /* IGB_PROCFS */
+// #endif /* EXT_THERMAL_SENSOR_SUPPORT */
+
+#ifdef IGB_HWMON
+#define IGB_HWMON_TYPE_LOC	0
+#define IGB_HWMON_TYPE_TEMP	1
+#define IGB_HWMON_TYPE_CAUTION	2
+#define IGB_HWMON_TYPE_MAX	3
+
+struct hwmon_attr {
+	struct device_attribute dev_attr;
+	struct e1000_hw *hw;
+	struct e1000_thermal_diode_data *sensor;
+	char name[12];
+	};
+
+struct hwmon_buff {
+	struct device *device;
+	struct hwmon_attr *hwmon_list;
+	unsigned int n_hwmon;
+	};
+#endif /* IGB_HWMON */
+
+/* board specific private data structure */
+struct igb_adapter {
+#ifdef HAVE_VLAN_RX_REGISTER
+	/* vlgrp must be first member of structure */
+	struct vlan_group *vlgrp;
+#else
+	unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)];
+#endif
+	struct net_device *netdev;
+
+	unsigned long state;
+	unsigned int flags;
+
+	unsigned int num_q_vectors;
+	struct msix_entry *msix_entries;
+
+
+	/* TX */
+	u16 tx_work_limit;
+	u32 tx_timeout_count;
+	int num_tx_queues;
+	struct igb_ring *tx_ring[IGB_MAX_TX_QUEUES];
+
+	/* RX */
+	int num_rx_queues;
+	struct igb_ring *rx_ring[IGB_MAX_RX_QUEUES];
+
+	struct timer_list watchdog_timer;
+	struct timer_list dma_err_timer;
+	struct timer_list phy_info_timer;
+	u16 mng_vlan_id;
+	u32 bd_number;
+	u32 wol;
+	u32 en_mng_pt;
+	u16 link_speed;
+	u16 link_duplex;
+	u8 port_num;
+
+	/* Interrupt Throttle Rate */
+	u32 rx_itr_setting;
+	u32 tx_itr_setting;
+
+	struct work_struct reset_task;
+	struct work_struct watchdog_task;
+	struct work_struct dma_err_task;
+	bool fc_autoneg;
+	u8  tx_timeout_factor;
+
+#ifdef DEBUG
+	bool tx_hang_detected;
+	bool disable_hw_reset;
+#endif
+	u32 max_frame_size;
+
+	/* OS defined structs */
+	struct pci_dev *pdev;
+#ifndef HAVE_NETDEV_STATS_IN_NETDEV
+	struct net_device_stats net_stats;
+#endif
+#ifndef IGB_NO_LRO
+	struct igb_lro_stats lro_stats;
+#endif
+
+	/* structs defined in e1000_hw.h */
+	struct e1000_hw hw;
+	struct e1000_hw_stats stats;
+	struct e1000_phy_info phy_info;
+	struct e1000_phy_stats phy_stats;
+
+#ifdef ETHTOOL_TEST
+	u32 test_icr;
+	struct igb_ring test_tx_ring;
+	struct igb_ring test_rx_ring;
+#endif
+
+	int msg_enable;
+
+	struct igb_q_vector *q_vector[MAX_Q_VECTORS];
+	u32 eims_enable_mask;
+	u32 eims_other;
+
+	/* to not mess up cache alignment, always add to the bottom */
+	u32 *config_space;
+	u16 tx_ring_count;
+	u16 rx_ring_count;
+	struct vf_data_storage *vf_data;
+#ifdef IFLA_VF_MAX
+	int vf_rate_link_speed;
+#endif
+	u32 lli_port;
+	u32 lli_size;
+	unsigned int vfs_allocated_count;
+	/* Malicious Driver Detection flag. Valid only when SR-IOV is enabled */
+	bool mdd;
+	int int_mode;
+	u32 rss_queues;
+	u32 vmdq_pools;
+	char fw_version[32];
+	u32 wvbr;
+	struct igb_mac_addr *mac_table;
+#ifdef CONFIG_IGB_VMDQ_NETDEV
+	struct net_device *vmdq_netdev[IGB_MAX_VMDQ_QUEUES];
+#endif
+	int vferr_refcount;
+	int dmac;
+	u32 *shadow_vfta;
+
+	/* External Thermal Sensor support flag */
+	bool ets;
+#ifdef IGB_HWMON
+	struct hwmon_buff igb_hwmon_buff;
+#else /* IGB_HWMON */
+#ifdef IGB_PROCFS
+	struct proc_dir_entry *eth_dir;
+	struct proc_dir_entry *info_dir;
+	struct proc_dir_entry *therm_dir[E1000_MAX_SENSORS];
+	struct igb_therm_proc_data therm_data[E1000_MAX_SENSORS];
+	bool old_lsc;
+#endif /* IGB_PROCFS */
+#endif /* IGB_HWMON */
+	u32 etrack_id;
+
+#ifdef HAVE_PTP_1588_CLOCK
+	struct ptp_clock *ptp_clock;
+	struct ptp_clock_info ptp_caps;
+	struct delayed_work ptp_overflow_work;
+	struct work_struct ptp_tx_work;
+	struct sk_buff *ptp_tx_skb;
+	unsigned long ptp_tx_start;
+	unsigned long last_rx_ptp_check;
+	spinlock_t tmreg_lock;
+	struct cyclecounter cc;
+	struct timecounter tc;
+	u32 tx_hwtstamp_timeouts;
+	u32 rx_hwtstamp_cleared;
+#endif /* HAVE_PTP_1588_CLOCK */
+
+#ifdef HAVE_I2C_SUPPORT
+	struct i2c_algo_bit_data i2c_algo;
+	struct i2c_adapter i2c_adap;
+	struct i2c_client *i2c_client;
+#endif /* HAVE_I2C_SUPPORT */
+	unsigned long link_check_timeout;
+
+
+	int devrc;
+
+	int copper_tries;
+	u16 eee_advert;
+};
+
+#ifdef CONFIG_IGB_VMDQ_NETDEV
+struct igb_vmdq_adapter {
+#ifdef HAVE_VLAN_RX_REGISTER
+	/* vlgrp must be first member of structure */
+	struct vlan_group *vlgrp;
+#else
+	unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)];
+#endif
+	struct igb_adapter *real_adapter;
+	struct net_device *vnetdev;
+	struct net_device_stats net_stats;
+	struct igb_ring *tx_ring;
+	struct igb_ring *rx_ring;
+};
+#endif
+
+#define IGB_FLAG_HAS_MSI		(1 << 0)
+#define IGB_FLAG_DCA_ENABLED		(1 << 1)
+#define IGB_FLAG_LLI_PUSH		(1 << 2)
+#define IGB_FLAG_QUAD_PORT_A		(1 << 3)
+#define IGB_FLAG_QUEUE_PAIRS		(1 << 4)
+#define IGB_FLAG_EEE			(1 << 5)
+#define IGB_FLAG_DMAC			(1 << 6)
+#define IGB_FLAG_DETECT_BAD_DMA		(1 << 7)
+#define IGB_FLAG_PTP			(1 << 8)
+#define IGB_FLAG_RSS_FIELD_IPV4_UDP	(1 << 9)
+#define IGB_FLAG_RSS_FIELD_IPV6_UDP	(1 << 10)
+#define IGB_FLAG_WOL_SUPPORTED		(1 << 11)
+#define IGB_FLAG_NEED_LINK_UPDATE	(1 << 12)
+#define IGB_FLAG_LOOPBACK_ENABLE	(1 << 13)
+#define IGB_FLAG_MEDIA_RESET		(1 << 14)
+#define IGB_FLAG_MAS_ENABLE		(1 << 15)
+
+/* Media Auto Sense */
+#define IGB_MAS_ENABLE_0		0X0001
+#define IGB_MAS_ENABLE_1		0X0002
+#define IGB_MAS_ENABLE_2		0X0004
+#define IGB_MAS_ENABLE_3		0X0008
+
+#define IGB_MIN_TXPBSIZE           20408
+#define IGB_TX_BUF_4096            4096
+
+#define IGB_DMCTLX_DCFLUSH_DIS     0x80000000  /* Disable DMA Coal Flush */
+
+/* DMA Coalescing defines */
+#define IGB_DMAC_DISABLE          0
+#define IGB_DMAC_MIN            250
+#define IGB_DMAC_500            500
+#define IGB_DMAC_EN_DEFAULT    1000
+#define IGB_DMAC_2000          2000
+#define IGB_DMAC_3000          3000
+#define IGB_DMAC_4000          4000
+#define IGB_DMAC_5000          5000
+#define IGB_DMAC_6000          6000
+#define IGB_DMAC_7000          7000
+#define IGB_DMAC_8000          8000
+#define IGB_DMAC_9000          9000
+#define IGB_DMAC_MAX          10000
+
+#define IGB_82576_TSYNC_SHIFT 19
+#define IGB_82580_TSYNC_SHIFT 24
+#define IGB_TS_HDR_LEN        16
+
+/* CEM Support */
+#define FW_HDR_LEN           0x4
+#define FW_CMD_DRV_INFO      0xDD
+#define FW_CMD_DRV_INFO_LEN  0x5
+#define FW_CMD_RESERVED      0X0
+#define FW_RESP_SUCCESS      0x1
+#define FW_UNUSED_VER        0x0
+#define FW_MAX_RETRIES       3
+#define FW_STATUS_SUCCESS    0x1
+#define FW_FAMILY_DRV_VER    0Xffffffff
+
+#define IGB_MAX_LINK_TRIES   20
+
+struct e1000_fw_hdr {
+	u8 cmd;
+	u8 buf_len;
+	union
+	{
+		u8 cmd_resv;
+		u8 ret_status;
+	} cmd_or_resp;
+	u8 checksum;
+};
+
+#pragma pack(push,1)
+struct e1000_fw_drv_info {
+	struct e1000_fw_hdr hdr;
+	u8 port_num;
+	u32 drv_version;
+	u16 pad; /* end spacing to ensure length is mult. of dword */
+	u8  pad2; /* end spacing to ensure length is mult. of dword2 */
+};
+#pragma pack(pop)
+
+enum e1000_state_t {
+	__IGB_TESTING,
+	__IGB_RESETTING,
+	__IGB_DOWN
+};
+
+extern char igb_driver_name[];
+extern char igb_driver_version[];
+
+extern int igb_up(struct igb_adapter *);
+extern void igb_down(struct igb_adapter *);
+extern void igb_reinit_locked(struct igb_adapter *);
+extern void igb_reset(struct igb_adapter *);
+extern int igb_set_spd_dplx(struct igb_adapter *, u16);
+extern int igb_setup_tx_resources(struct igb_ring *);
+extern int igb_setup_rx_resources(struct igb_ring *);
+extern void igb_free_tx_resources(struct igb_ring *);
+extern void igb_free_rx_resources(struct igb_ring *);
+extern void igb_configure_tx_ring(struct igb_adapter *, struct igb_ring *);
+extern void igb_configure_rx_ring(struct igb_adapter *, struct igb_ring *);
+extern void igb_setup_tctl(struct igb_adapter *);
+extern void igb_setup_rctl(struct igb_adapter *);
+extern netdev_tx_t igb_xmit_frame_ring(struct sk_buff *, struct igb_ring *);
+extern void igb_unmap_and_free_tx_resource(struct igb_ring *,
+                                           struct igb_tx_buffer *);
+extern void igb_alloc_rx_buffers(struct igb_ring *, u16);
+extern void igb_clean_rx_ring(struct igb_ring *);
+extern void igb_update_stats(struct igb_adapter *);
+extern bool igb_has_link(struct igb_adapter *adapter);
+extern void igb_set_ethtool_ops(struct net_device *);
+extern void igb_check_options(struct igb_adapter *);
+extern void igb_power_up_link(struct igb_adapter *);
+#ifdef HAVE_PTP_1588_CLOCK
+extern void igb_ptp_init(struct igb_adapter *adapter);
+extern void igb_ptp_stop(struct igb_adapter *adapter);
+extern void igb_ptp_reset(struct igb_adapter *adapter);
+extern void igb_ptp_tx_work(struct work_struct *work);
+extern void igb_ptp_rx_hang(struct igb_adapter *adapter);
+extern void igb_ptp_tx_hwtstamp(struct igb_adapter *adapter);
+extern void igb_ptp_rx_rgtstamp(struct igb_q_vector *q_vector,
+				struct sk_buff *skb);
+extern void igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector,
+				unsigned char *va,
+				struct sk_buff *skb);
+static inline void igb_ptp_rx_hwtstamp(struct igb_ring *rx_ring,
+				       union e1000_adv_rx_desc *rx_desc,
+				       struct sk_buff *skb)
+{
+	if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) {
+#ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT
+		igb_ptp_rx_pktstamp(rx_ring->q_vector, skb->data, skb);
+		skb_pull(skb, IGB_TS_HDR_LEN);
+#endif
+		return;
+	}
+
+	if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TS))
+		igb_ptp_rx_rgtstamp(rx_ring->q_vector, skb);
+
+	/* Update the last_rx_timestamp timer in order to enable watchdog check
+	 * for error case of latched timestamp on a dropped packet.
+	 */
+	rx_ring->last_rx_timestamp = jiffies;
+}
+
+extern int igb_ptp_hwtstamp_ioctl(struct net_device *netdev,
+				  struct ifreq *ifr, int cmd);
+#endif /* HAVE_PTP_1588_CLOCK */
+#ifdef ETHTOOL_OPS_COMPAT
+extern int ethtool_ioctl(struct ifreq *);
+#endif
+extern int igb_write_mc_addr_list(struct net_device *netdev);
+extern int igb_add_mac_filter(struct igb_adapter *adapter, u8 *addr, u16 queue);
+extern int igb_del_mac_filter(struct igb_adapter *adapter, u8* addr, u16 queue);
+extern int igb_available_rars(struct igb_adapter *adapter);
+extern s32 igb_vlvf_set(struct igb_adapter *, u32, bool, u32);
+extern void igb_configure_vt_default_pool(struct igb_adapter *adapter);
+extern void igb_enable_vlan_tags(struct igb_adapter *adapter);
+#ifndef HAVE_VLAN_RX_REGISTER
+extern void igb_vlan_mode(struct net_device *, u32);
+#endif
+
+#define E1000_PCS_CFG_IGN_SD	1
+
+#ifdef IGB_HWMON
+void igb_sysfs_exit(struct igb_adapter *adapter);
+int igb_sysfs_init(struct igb_adapter *adapter);
+#else
+#ifdef IGB_PROCFS
+int igb_procfs_init(struct igb_adapter* adapter);
+void igb_procfs_exit(struct igb_adapter* adapter);
+int igb_procfs_topdir_init(void);
+void igb_procfs_topdir_exit(void);
+#endif /* IGB_PROCFS */
+#endif /* IGB_HWMON */
+
+
+
+#endif /* _IGB_H_ */
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_debugfs.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_debugfs.c
new file mode 100644
index 00000000..c07f9f53
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_debugfs.c
@@ -0,0 +1,28 @@
+/*******************************************************************************
+
+  Intel(R) Gigabit Ethernet Linux driver
+  Copyright(c) 2007-2013 Intel Corporation.
+
+  This program is free software; you can redistribute it and/or modify it
+  under the terms and conditions of the GNU General Public License,
+  version 2, as published by the Free Software Foundation.
+
+  This program is distributed in the hope it will be useful, but WITHOUT
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+  more details.
+
+  You should have received a copy of the GNU General Public License along with
+  this program; if not, write to the Free Software Foundation, Inc.,
+  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+  The full GNU General Public License is included in this distribution in
+  the file called "COPYING".
+
+  Contact Information:
+  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+#include "igb.h"
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_ethtool.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_ethtool.c
new file mode 100644
index 00000000..af7e68a5
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_ethtool.c
@@ -0,0 +1,2857 @@
+/*******************************************************************************
+
+  Intel(R) Gigabit Ethernet Linux driver
+  Copyright(c) 2007-2013 Intel Corporation.
+
+  This program is free software; you can redistribute it and/or modify it
+  under the terms and conditions of the GNU General Public License,
+  version 2, as published by the Free Software Foundation.
+
+  This program is distributed in the hope it will be useful, but WITHOUT
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+  more details.
+
+  You should have received a copy of the GNU General Public License along with
+  this program; if not, write to the Free Software Foundation, Inc.,
+  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+  The full GNU General Public License is included in this distribution in
+  the file called "COPYING".
+
+  Contact Information:
+  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+/* ethtool support for igb */
+
+#include <linux/netdevice.h>
+#include <linux/vmalloc.h>
+
+#ifdef SIOCETHTOOL
+#include <linux/ethtool.h>
+#ifdef CONFIG_PM_RUNTIME
+#include <linux/pm_runtime.h>
+#endif /* CONFIG_PM_RUNTIME */
+#include <linux/highmem.h>
+
+#include "igb.h"
+#include "igb_regtest.h"
+#include <linux/if_vlan.h>
+#ifdef ETHTOOL_GEEE
+#include <linux/mdio.h>
+#endif
+
+#ifdef ETHTOOL_OPS_COMPAT
+#include "kcompat_ethtool.c"
+#endif
+#ifdef ETHTOOL_GSTATS
+struct igb_stats {
+	char stat_string[ETH_GSTRING_LEN];
+	int sizeof_stat;
+	int stat_offset;
+};
+
+#define IGB_STAT(_name, _stat) { \
+	.stat_string = _name, \
+	.sizeof_stat = FIELD_SIZEOF(struct igb_adapter, _stat), \
+	.stat_offset = offsetof(struct igb_adapter, _stat) \
+}
+static const struct igb_stats igb_gstrings_stats[] = {
+	IGB_STAT("rx_packets", stats.gprc),
+	IGB_STAT("tx_packets", stats.gptc),
+	IGB_STAT("rx_bytes", stats.gorc),
+	IGB_STAT("tx_bytes", stats.gotc),
+	IGB_STAT("rx_broadcast", stats.bprc),
+	IGB_STAT("tx_broadcast", stats.bptc),
+	IGB_STAT("rx_multicast", stats.mprc),
+	IGB_STAT("tx_multicast", stats.mptc),
+	IGB_STAT("multicast", stats.mprc),
+	IGB_STAT("collisions", stats.colc),
+	IGB_STAT("rx_crc_errors", stats.crcerrs),
+	IGB_STAT("rx_no_buffer_count", stats.rnbc),
+	IGB_STAT("rx_missed_errors", stats.mpc),
+	IGB_STAT("tx_aborted_errors", stats.ecol),
+	IGB_STAT("tx_carrier_errors", stats.tncrs),
+	IGB_STAT("tx_window_errors", stats.latecol),
+	IGB_STAT("tx_abort_late_coll", stats.latecol),
+	IGB_STAT("tx_deferred_ok", stats.dc),
+	IGB_STAT("tx_single_coll_ok", stats.scc),
+	IGB_STAT("tx_multi_coll_ok", stats.mcc),
+	IGB_STAT("tx_timeout_count", tx_timeout_count),
+	IGB_STAT("rx_long_length_errors", stats.roc),
+	IGB_STAT("rx_short_length_errors", stats.ruc),
+	IGB_STAT("rx_align_errors", stats.algnerrc),
+	IGB_STAT("tx_tcp_seg_good", stats.tsctc),
+	IGB_STAT("tx_tcp_seg_failed", stats.tsctfc),
+	IGB_STAT("rx_flow_control_xon", stats.xonrxc),
+	IGB_STAT("rx_flow_control_xoff", stats.xoffrxc),
+	IGB_STAT("tx_flow_control_xon", stats.xontxc),
+	IGB_STAT("tx_flow_control_xoff", stats.xofftxc),
+	IGB_STAT("rx_long_byte_count", stats.gorc),
+	IGB_STAT("tx_dma_out_of_sync", stats.doosync),
+#ifndef IGB_NO_LRO
+	IGB_STAT("lro_aggregated", lro_stats.coal),
+	IGB_STAT("lro_flushed", lro_stats.flushed),
+#endif /* IGB_LRO */
+	IGB_STAT("tx_smbus", stats.mgptc),
+	IGB_STAT("rx_smbus", stats.mgprc),
+	IGB_STAT("dropped_smbus", stats.mgpdc),
+	IGB_STAT("os2bmc_rx_by_bmc", stats.o2bgptc),
+	IGB_STAT("os2bmc_tx_by_bmc", stats.b2ospc),
+	IGB_STAT("os2bmc_tx_by_host", stats.o2bspc),
+	IGB_STAT("os2bmc_rx_by_host", stats.b2ogprc),
+#ifdef HAVE_PTP_1588_CLOCK
+	IGB_STAT("tx_hwtstamp_timeouts", tx_hwtstamp_timeouts),
+	IGB_STAT("rx_hwtstamp_cleared", rx_hwtstamp_cleared),
+#endif /* HAVE_PTP_1588_CLOCK */
+};
+
+#define IGB_NETDEV_STAT(_net_stat) { \
+	.stat_string = #_net_stat, \
+	.sizeof_stat = FIELD_SIZEOF(struct net_device_stats, _net_stat), \
+	.stat_offset = offsetof(struct net_device_stats, _net_stat) \
+}
+static const struct igb_stats igb_gstrings_net_stats[] = {
+	IGB_NETDEV_STAT(rx_errors),
+	IGB_NETDEV_STAT(tx_errors),
+	IGB_NETDEV_STAT(tx_dropped),
+	IGB_NETDEV_STAT(rx_length_errors),
+	IGB_NETDEV_STAT(rx_over_errors),
+	IGB_NETDEV_STAT(rx_frame_errors),
+	IGB_NETDEV_STAT(rx_fifo_errors),
+	IGB_NETDEV_STAT(tx_fifo_errors),
+	IGB_NETDEV_STAT(tx_heartbeat_errors)
+};
+
+#define IGB_GLOBAL_STATS_LEN ARRAY_SIZE(igb_gstrings_stats)
+#define IGB_NETDEV_STATS_LEN ARRAY_SIZE(igb_gstrings_net_stats)
+#define IGB_RX_QUEUE_STATS_LEN \
+	(sizeof(struct igb_rx_queue_stats) / sizeof(u64))
+#define IGB_TX_QUEUE_STATS_LEN \
+	(sizeof(struct igb_tx_queue_stats) / sizeof(u64))
+#define IGB_QUEUE_STATS_LEN \
+	((((struct igb_adapter *)netdev_priv(netdev))->num_rx_queues * \
+	  IGB_RX_QUEUE_STATS_LEN) + \
+	 (((struct igb_adapter *)netdev_priv(netdev))->num_tx_queues * \
+	  IGB_TX_QUEUE_STATS_LEN))
+#define IGB_STATS_LEN \
+	(IGB_GLOBAL_STATS_LEN + IGB_NETDEV_STATS_LEN + IGB_QUEUE_STATS_LEN)
+
+#endif /* ETHTOOL_GSTATS */
+#ifdef ETHTOOL_TEST
+static const char igb_gstrings_test[][ETH_GSTRING_LEN] = {
+	"Register test  (offline)", "Eeprom test    (offline)",
+	"Interrupt test (offline)", "Loopback test  (offline)",
+	"Link test   (on/offline)"
+};
+#define IGB_TEST_LEN (sizeof(igb_gstrings_test) / ETH_GSTRING_LEN)
+#endif /* ETHTOOL_TEST */
+
+static int igb_get_settings(struct net_device *netdev, struct ethtool_cmd *ecmd)
+{
+	struct igb_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+	u32 status;
+
+	if (hw->phy.media_type == e1000_media_type_copper) {
+
+		ecmd->supported = (SUPPORTED_10baseT_Half |
+				   SUPPORTED_10baseT_Full |
+				   SUPPORTED_100baseT_Half |
+				   SUPPORTED_100baseT_Full |
+				   SUPPORTED_1000baseT_Full|
+				   SUPPORTED_Autoneg |
+				   SUPPORTED_TP |
+				   SUPPORTED_Pause);
+		ecmd->advertising = ADVERTISED_TP;
+
+		if (hw->mac.autoneg == 1) {
+			ecmd->advertising |= ADVERTISED_Autoneg;
+			/* the e1000 autoneg seems to match ethtool nicely */
+			ecmd->advertising |= hw->phy.autoneg_advertised;
+		}
+
+		ecmd->port = PORT_TP;
+		ecmd->phy_address = hw->phy.addr;
+		ecmd->transceiver = XCVR_INTERNAL;
+
+	} else {
+		ecmd->supported = (SUPPORTED_1000baseT_Full |
+				   SUPPORTED_100baseT_Full |
+				   SUPPORTED_FIBRE |
+				   SUPPORTED_Autoneg |
+				   SUPPORTED_Pause);
+		if (hw->mac.type == e1000_i354)
+			ecmd->supported |= (SUPPORTED_2500baseX_Full);
+
+		ecmd->advertising = ADVERTISED_FIBRE;
+
+		switch (adapter->link_speed) {
+		case SPEED_2500:
+			ecmd->advertising = ADVERTISED_2500baseX_Full;
+			break;
+		case SPEED_1000:
+			ecmd->advertising = ADVERTISED_1000baseT_Full;
+			break;
+		case SPEED_100:
+			ecmd->advertising = ADVERTISED_100baseT_Full;
+			break;
+		default:
+			break;
+		}
+
+		if (hw->mac.autoneg == 1)
+			ecmd->advertising |= ADVERTISED_Autoneg;
+
+		ecmd->port = PORT_FIBRE;
+		ecmd->transceiver = XCVR_EXTERNAL;
+	}
+
+	if (hw->mac.autoneg != 1)
+		ecmd->advertising &= ~(ADVERTISED_Pause |
+				       ADVERTISED_Asym_Pause);
+
+	if (hw->fc.requested_mode == e1000_fc_full)
+		ecmd->advertising |= ADVERTISED_Pause;
+	else if (hw->fc.requested_mode == e1000_fc_rx_pause)
+		ecmd->advertising |= (ADVERTISED_Pause |
+				      ADVERTISED_Asym_Pause);
+	else if (hw->fc.requested_mode == e1000_fc_tx_pause)
+		ecmd->advertising |=  ADVERTISED_Asym_Pause;
+	else
+		ecmd->advertising &= ~(ADVERTISED_Pause |
+				       ADVERTISED_Asym_Pause);
+
+	status = E1000_READ_REG(hw, E1000_STATUS);
+
+	if (status & E1000_STATUS_LU) {
+		if ((hw->mac.type == e1000_i354) &&
+		    (status & E1000_STATUS_2P5_SKU) &&
+		    !(status & E1000_STATUS_2P5_SKU_OVER))
+			ecmd->speed = SPEED_2500;
+		else if (status & E1000_STATUS_SPEED_1000)
+			ecmd->speed = SPEED_1000;
+		else if (status & E1000_STATUS_SPEED_100)
+			ecmd->speed = SPEED_100;
+		else
+			ecmd->speed = SPEED_10;
+
+		if ((status & E1000_STATUS_FD) ||
+		    hw->phy.media_type != e1000_media_type_copper)
+			ecmd->duplex = DUPLEX_FULL;
+		else
+			ecmd->duplex = DUPLEX_HALF;
+
+	} else {
+		ecmd->speed = -1;
+		ecmd->duplex = -1;
+	}
+
+	if ((hw->phy.media_type == e1000_media_type_fiber) ||
+	    hw->mac.autoneg)
+		ecmd->autoneg = AUTONEG_ENABLE;
+	else
+		ecmd->autoneg = AUTONEG_DISABLE;
+#ifdef ETH_TP_MDI_X
+
+	/* MDI-X => 2; MDI =>1; Invalid =>0 */
+	if (hw->phy.media_type == e1000_media_type_copper)
+		ecmd->eth_tp_mdix = hw->phy.is_mdix ? ETH_TP_MDI_X :
+						      ETH_TP_MDI;
+	else
+		ecmd->eth_tp_mdix = ETH_TP_MDI_INVALID;
+
+#ifdef ETH_TP_MDI_AUTO
+	if (hw->phy.mdix == AUTO_ALL_MODES)
+		ecmd->eth_tp_mdix_ctrl = ETH_TP_MDI_AUTO;
+	else
+		ecmd->eth_tp_mdix_ctrl = hw->phy.mdix;
+
+#endif
+#endif /* ETH_TP_MDI_X */
+	return 0;
+}
+
+static int igb_set_settings(struct net_device *netdev, struct ethtool_cmd *ecmd)
+{
+	struct igb_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+
+	if (ecmd->duplex  == DUPLEX_HALF) {
+		if (!hw->dev_spec._82575.eee_disable)
+			dev_info(pci_dev_to_dev(adapter->pdev), "EEE disabled: not supported with half duplex\n");
+		hw->dev_spec._82575.eee_disable = true;
+	} else {
+		if (hw->dev_spec._82575.eee_disable)
+			dev_info(pci_dev_to_dev(adapter->pdev), "EEE enabled\n");
+		hw->dev_spec._82575.eee_disable = false;
+	}
+
+	/* When SoL/IDER sessions are active, autoneg/speed/duplex
+	 * cannot be changed */
+	if (e1000_check_reset_block(hw)) {
+		dev_err(pci_dev_to_dev(adapter->pdev), "Cannot change link "
+			"characteristics when SoL/IDER is active.\n");
+		return -EINVAL;
+	}
+
+#ifdef ETH_TP_MDI_AUTO
+	/*
+	 * MDI setting is only allowed when autoneg enabled because
+	 * some hardware doesn't allow MDI setting when speed or
+	 * duplex is forced.
+	 */
+	if (ecmd->eth_tp_mdix_ctrl) {
+		if (hw->phy.media_type != e1000_media_type_copper)
+			return -EOPNOTSUPP;
+
+		if ((ecmd->eth_tp_mdix_ctrl != ETH_TP_MDI_AUTO) &&
+		    (ecmd->autoneg != AUTONEG_ENABLE)) {
+			dev_err(&adapter->pdev->dev, "forcing MDI/MDI-X state is not supported when link speed and/or duplex are forced\n");
+			return -EINVAL;
+		}
+	}
+
+#endif /* ETH_TP_MDI_AUTO */
+	while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
+		usleep_range(1000, 2000);
+
+	if (ecmd->autoneg == AUTONEG_ENABLE) {
+		hw->mac.autoneg = 1;
+		if (hw->phy.media_type == e1000_media_type_fiber) {
+			hw->phy.autoneg_advertised = ecmd->advertising |
+						     ADVERTISED_FIBRE |
+						     ADVERTISED_Autoneg;
+			switch (adapter->link_speed) {
+			case SPEED_2500:
+				hw->phy.autoneg_advertised =
+					ADVERTISED_2500baseX_Full;
+				break;
+			case SPEED_1000:
+				hw->phy.autoneg_advertised =
+					ADVERTISED_1000baseT_Full;
+				break;
+			case SPEED_100:
+				hw->phy.autoneg_advertised =
+					ADVERTISED_100baseT_Full;
+				break;
+			default:
+				break;
+			}
+		} else {
+			hw->phy.autoneg_advertised = ecmd->advertising |
+						     ADVERTISED_TP |
+						     ADVERTISED_Autoneg;
+		}
+		ecmd->advertising = hw->phy.autoneg_advertised;
+		if (adapter->fc_autoneg)
+			hw->fc.requested_mode = e1000_fc_default;
+	} else {
+		if (igb_set_spd_dplx(adapter, ecmd->speed + ecmd->duplex)) {
+			clear_bit(__IGB_RESETTING, &adapter->state);
+			return -EINVAL;
+		}
+	}
+
+#ifdef ETH_TP_MDI_AUTO
+	/* MDI-X => 2; MDI => 1; Auto => 3 */
+	if (ecmd->eth_tp_mdix_ctrl) {
+		/* fix up the value for auto (3 => 0) as zero is mapped
+		 * internally to auto
+		 */
+		if (ecmd->eth_tp_mdix_ctrl == ETH_TP_MDI_AUTO)
+			hw->phy.mdix = AUTO_ALL_MODES;
+		else
+			hw->phy.mdix = ecmd->eth_tp_mdix_ctrl;
+	}
+
+#endif /* ETH_TP_MDI_AUTO */
+	/* reset the link */
+	if (netif_running(adapter->netdev)) {
+		igb_down(adapter);
+		igb_up(adapter);
+	} else
+		igb_reset(adapter);
+
+	clear_bit(__IGB_RESETTING, &adapter->state);
+	return 0;
+}
+
+static u32 igb_get_link(struct net_device *netdev)
+{
+	struct igb_adapter *adapter = netdev_priv(netdev);
+	struct e1000_mac_info *mac = &adapter->hw.mac;
+
+	/*
+	 * If the link is not reported up to netdev, interrupts are disabled,
+	 * and so the physical link state may have changed since we last
+	 * looked. Set get_link_status to make sure that the true link
+	 * state is interrogated, rather than pulling a cached and possibly
+	 * stale link state from the driver.
+	 */
+	if (!netif_carrier_ok(netdev))
+		mac->get_link_status = 1;
+
+	return igb_has_link(adapter);
+}
+
+static void igb_get_pauseparam(struct net_device *netdev,
+			       struct ethtool_pauseparam *pause)
+{
+	struct igb_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+
+	pause->autoneg =
+		(adapter->fc_autoneg ? AUTONEG_ENABLE : AUTONEG_DISABLE);
+
+	if (hw->fc.current_mode == e1000_fc_rx_pause)
+		pause->rx_pause = 1;
+	else if (hw->fc.current_mode == e1000_fc_tx_pause)
+		pause->tx_pause = 1;
+	else if (hw->fc.current_mode == e1000_fc_full) {
+		pause->rx_pause = 1;
+		pause->tx_pause = 1;
+	}
+}
+
+static int igb_set_pauseparam(struct net_device *netdev,
+			      struct ethtool_pauseparam *pause)
+{
+	struct igb_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+	int retval = 0;
+
+	adapter->fc_autoneg = pause->autoneg;
+
+	while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
+		usleep_range(1000, 2000);
+
+	if (adapter->fc_autoneg == AUTONEG_ENABLE) {
+		hw->fc.requested_mode = e1000_fc_default;
+		if (netif_running(adapter->netdev)) {
+			igb_down(adapter);
+			igb_up(adapter);
+		} else {
+			igb_reset(adapter);
+		}
+	} else {
+		if (pause->rx_pause && pause->tx_pause)
+			hw->fc.requested_mode = e1000_fc_full;
+		else if (pause->rx_pause && !pause->tx_pause)
+			hw->fc.requested_mode = e1000_fc_rx_pause;
+		else if (!pause->rx_pause && pause->tx_pause)
+			hw->fc.requested_mode = e1000_fc_tx_pause;
+		else if (!pause->rx_pause && !pause->tx_pause)
+			hw->fc.requested_mode = e1000_fc_none;
+
+		hw->fc.current_mode = hw->fc.requested_mode;
+
+		if (hw->phy.media_type == e1000_media_type_fiber) {
+			retval = hw->mac.ops.setup_link(hw);
+			/* implicit goto out */
+		} else {
+			retval = e1000_force_mac_fc(hw);
+			if (retval)
+				goto out;
+			e1000_set_fc_watermarks_generic(hw);
+		}
+	}
+
+out:
+	clear_bit(__IGB_RESETTING, &adapter->state);
+	return retval;
+}
+
+static u32 igb_get_msglevel(struct net_device *netdev)
+{
+	struct igb_adapter *adapter = netdev_priv(netdev);
+	return adapter->msg_enable;
+}
+
+static void igb_set_msglevel(struct net_device *netdev, u32 data)
+{
+	struct igb_adapter *adapter = netdev_priv(netdev);
+	adapter->msg_enable = data;
+}
+
+static int igb_get_regs_len(struct net_device *netdev)
+{
+#define IGB_REGS_LEN 555
+	return IGB_REGS_LEN * sizeof(u32);
+}
+
+static void igb_get_regs(struct net_device *netdev,
+			 struct ethtool_regs *regs, void *p)
+{
+	struct igb_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+	u32 *regs_buff = p;
+	u8 i;
+
+	memset(p, 0, IGB_REGS_LEN * sizeof(u32));
+
+	regs->version = (1 << 24) | (hw->revision_id << 16) | hw->device_id;
+
+	/* General Registers */
+	regs_buff[0] = E1000_READ_REG(hw, E1000_CTRL);
+	regs_buff[1] = E1000_READ_REG(hw, E1000_STATUS);
+	regs_buff[2] = E1000_READ_REG(hw, E1000_CTRL_EXT);
+	regs_buff[3] = E1000_READ_REG(hw, E1000_MDIC);
+	regs_buff[4] = E1000_READ_REG(hw, E1000_SCTL);
+	regs_buff[5] = E1000_READ_REG(hw, E1000_CONNSW);
+	regs_buff[6] = E1000_READ_REG(hw, E1000_VET);
+	regs_buff[7] = E1000_READ_REG(hw, E1000_LEDCTL);
+	regs_buff[8] = E1000_READ_REG(hw, E1000_PBA);
+	regs_buff[9] = E1000_READ_REG(hw, E1000_PBS);
+	regs_buff[10] = E1000_READ_REG(hw, E1000_FRTIMER);
+	regs_buff[11] = E1000_READ_REG(hw, E1000_TCPTIMER);
+
+	/* NVM Register */
+	regs_buff[12] = E1000_READ_REG(hw, E1000_EECD);
+
+	/* Interrupt */
+	/* Reading EICS for EICR because they read the
+	 * same but EICS does not clear on read */
+	regs_buff[13] = E1000_READ_REG(hw, E1000_EICS);
+	regs_buff[14] = E1000_READ_REG(hw, E1000_EICS);
+	regs_buff[15] = E1000_READ_REG(hw, E1000_EIMS);
+	regs_buff[16] = E1000_READ_REG(hw, E1000_EIMC);
+	regs_buff[17] = E1000_READ_REG(hw, E1000_EIAC);
+	regs_buff[18] = E1000_READ_REG(hw, E1000_EIAM);
+	/* Reading ICS for ICR because they read the
+	 * same but ICS does not clear on read */
+	regs_buff[19] = E1000_READ_REG(hw, E1000_ICS);
+	regs_buff[20] = E1000_READ_REG(hw, E1000_ICS);
+	regs_buff[21] = E1000_READ_REG(hw, E1000_IMS);
+	regs_buff[22] = E1000_READ_REG(hw, E1000_IMC);
+	regs_buff[23] = E1000_READ_REG(hw, E1000_IAC);
+	regs_buff[24] = E1000_READ_REG(hw, E1000_IAM);
+	regs_buff[25] = E1000_READ_REG(hw, E1000_IMIRVP);
+
+	/* Flow Control */
+	regs_buff[26] = E1000_READ_REG(hw, E1000_FCAL);
+	regs_buff[27] = E1000_READ_REG(hw, E1000_FCAH);
+	regs_buff[28] = E1000_READ_REG(hw, E1000_FCTTV);
+	regs_buff[29] = E1000_READ_REG(hw, E1000_FCRTL);
+	regs_buff[30] = E1000_READ_REG(hw, E1000_FCRTH);
+	regs_buff[31] = E1000_READ_REG(hw, E1000_FCRTV);
+
+	/* Receive */
+	regs_buff[32] = E1000_READ_REG(hw, E1000_RCTL);
+	regs_buff[33] = E1000_READ_REG(hw, E1000_RXCSUM);
+	regs_buff[34] = E1000_READ_REG(hw, E1000_RLPML);
+	regs_buff[35] = E1000_READ_REG(hw, E1000_RFCTL);
+	regs_buff[36] = E1000_READ_REG(hw, E1000_MRQC);
+	regs_buff[37] = E1000_READ_REG(hw, E1000_VT_CTL);
+
+	/* Transmit */
+	regs_buff[38] = E1000_READ_REG(hw, E1000_TCTL);
+	regs_buff[39] = E1000_READ_REG(hw, E1000_TCTL_EXT);
+	regs_buff[40] = E1000_READ_REG(hw, E1000_TIPG);
+	regs_buff[41] = E1000_READ_REG(hw, E1000_DTXCTL);
+
+	/* Wake Up */
+	regs_buff[42] = E1000_READ_REG(hw, E1000_WUC);
+	regs_buff[43] = E1000_READ_REG(hw, E1000_WUFC);
+	regs_buff[44] = E1000_READ_REG(hw, E1000_WUS);
+	regs_buff[45] = E1000_READ_REG(hw, E1000_IPAV);
+	regs_buff[46] = E1000_READ_REG(hw, E1000_WUPL);
+
+	/* MAC */
+	regs_buff[47] = E1000_READ_REG(hw, E1000_PCS_CFG0);
+	regs_buff[48] = E1000_READ_REG(hw, E1000_PCS_LCTL);
+	regs_buff[49] = E1000_READ_REG(hw, E1000_PCS_LSTAT);
+	regs_buff[50] = E1000_READ_REG(hw, E1000_PCS_ANADV);
+	regs_buff[51] = E1000_READ_REG(hw, E1000_PCS_LPAB);
+	regs_buff[52] = E1000_READ_REG(hw, E1000_PCS_NPTX);
+	regs_buff[53] = E1000_READ_REG(hw, E1000_PCS_LPABNP);
+
+	/* Statistics */
+	regs_buff[54] = adapter->stats.crcerrs;
+	regs_buff[55] = adapter->stats.algnerrc;
+	regs_buff[56] = adapter->stats.symerrs;
+	regs_buff[57] = adapter->stats.rxerrc;
+	regs_buff[58] = adapter->stats.mpc;
+	regs_buff[59] = adapter->stats.scc;
+	regs_buff[60] = adapter->stats.ecol;
+	regs_buff[61] = adapter->stats.mcc;
+	regs_buff[62] = adapter->stats.latecol;
+	regs_buff[63] = adapter->stats.colc;
+	regs_buff[64] = adapter->stats.dc;
+	regs_buff[65] = adapter->stats.tncrs;
+	regs_buff[66] = adapter->stats.sec;
+	regs_buff[67] = adapter->stats.htdpmc;
+	regs_buff[68] = adapter->stats.rlec;
+	regs_buff[69] = adapter->stats.xonrxc;
+	regs_buff[70] = adapter->stats.xontxc;
+	regs_buff[71] = adapter->stats.xoffrxc;
+	regs_buff[72] = adapter->stats.xofftxc;
+	regs_buff[73] = adapter->stats.fcruc;
+	regs_buff[74] = adapter->stats.prc64;
+	regs_buff[75] = adapter->stats.prc127;
+	regs_buff[76] = adapter->stats.prc255;
+	regs_buff[77] = adapter->stats.prc511;
+	regs_buff[78] = adapter->stats.prc1023;
+	regs_buff[79] = adapter->stats.prc1522;
+	regs_buff[80] = adapter->stats.gprc;
+	regs_buff[81] = adapter->stats.bprc;
+	regs_buff[82] = adapter->stats.mprc;
+	regs_buff[83] = adapter->stats.gptc;
+	regs_buff[84] = adapter->stats.gorc;
+	regs_buff[86] = adapter->stats.gotc;
+	regs_buff[88] = adapter->stats.rnbc;
+	regs_buff[89] = adapter->stats.ruc;
+	regs_buff[90] = adapter->stats.rfc;
+	regs_buff[91] = adapter->stats.roc;
+	regs_buff[92] = adapter->stats.rjc;
+	regs_buff[93] = adapter->stats.mgprc;
+	regs_buff[94] = adapter->stats.mgpdc;
+	regs_buff[95] = adapter->stats.mgptc;
+	regs_buff[96] = adapter->stats.tor;
+	regs_buff[98] = adapter->stats.tot;
+	regs_buff[100] = adapter->stats.tpr;
+	regs_buff[101] = adapter->stats.tpt;
+	regs_buff[102] = adapter->stats.ptc64;
+	regs_buff[103] = adapter->stats.ptc127;
+	regs_buff[104] = adapter->stats.ptc255;
+	regs_buff[105] = adapter->stats.ptc511;
+	regs_buff[106] = adapter->stats.ptc1023;
+	regs_buff[107] = adapter->stats.ptc1522;
+	regs_buff[108] = adapter->stats.mptc;
+	regs_buff[109] = adapter->stats.bptc;
+	regs_buff[110] = adapter->stats.tsctc;
+	regs_buff[111] = adapter->stats.iac;
+	regs_buff[112] = adapter->stats.rpthc;
+	regs_buff[113] = adapter->stats.hgptc;
+	regs_buff[114] = adapter->stats.hgorc;
+	regs_buff[116] = adapter->stats.hgotc;
+	regs_buff[118] = adapter->stats.lenerrs;
+	regs_buff[119] = adapter->stats.scvpc;
+	regs_buff[120] = adapter->stats.hrmpc;
+
+	for (i = 0; i < 4; i++)
+		regs_buff[121 + i] = E1000_READ_REG(hw, E1000_SRRCTL(i));
+	for (i = 0; i < 4; i++)
+		regs_buff[125 + i] = E1000_READ_REG(hw, E1000_PSRTYPE(i));
+	for (i = 0; i < 4; i++)
+		regs_buff[129 + i] = E1000_READ_REG(hw, E1000_RDBAL(i));
+	for (i = 0; i < 4; i++)
+		regs_buff[133 + i] = E1000_READ_REG(hw, E1000_RDBAH(i));
+	for (i = 0; i < 4; i++)
+		regs_buff[137 + i] = E1000_READ_REG(hw, E1000_RDLEN(i));
+	for (i = 0; i < 4; i++)
+		regs_buff[141 + i] = E1000_READ_REG(hw, E1000_RDH(i));
+	for (i = 0; i < 4; i++)
+		regs_buff[145 + i] = E1000_READ_REG(hw, E1000_RDT(i));
+	for (i = 0; i < 4; i++)
+		regs_buff[149 + i] = E1000_READ_REG(hw, E1000_RXDCTL(i));
+
+	for (i = 0; i < 10; i++)
+		regs_buff[153 + i] = E1000_READ_REG(hw, E1000_EITR(i));
+	for (i = 0; i < 8; i++)
+		regs_buff[163 + i] = E1000_READ_REG(hw, E1000_IMIR(i));
+	for (i = 0; i < 8; i++)
+		regs_buff[171 + i] = E1000_READ_REG(hw, E1000_IMIREXT(i));
+	for (i = 0; i < 16; i++)
+		regs_buff[179 + i] = E1000_READ_REG(hw, E1000_RAL(i));
+	for (i = 0; i < 16; i++)
+		regs_buff[195 + i] = E1000_READ_REG(hw, E1000_RAH(i));
+
+	for (i = 0; i < 4; i++)
+		regs_buff[211 + i] = E1000_READ_REG(hw, E1000_TDBAL(i));
+	for (i = 0; i < 4; i++)
+		regs_buff[215 + i] = E1000_READ_REG(hw, E1000_TDBAH(i));
+	for (i = 0; i < 4; i++)
+		regs_buff[219 + i] = E1000_READ_REG(hw, E1000_TDLEN(i));
+	for (i = 0; i < 4; i++)
+		regs_buff[223 + i] = E1000_READ_REG(hw, E1000_TDH(i));
+	for (i = 0; i < 4; i++)
+		regs_buff[227 + i] = E1000_READ_REG(hw, E1000_TDT(i));
+	for (i = 0; i < 4; i++)
+		regs_buff[231 + i] = E1000_READ_REG(hw, E1000_TXDCTL(i));
+	for (i = 0; i < 4; i++)
+		regs_buff[235 + i] = E1000_READ_REG(hw, E1000_TDWBAL(i));
+	for (i = 0; i < 4; i++)
+		regs_buff[239 + i] = E1000_READ_REG(hw, E1000_TDWBAH(i));
+	for (i = 0; i < 4; i++)
+		regs_buff[243 + i] = E1000_READ_REG(hw, E1000_DCA_TXCTRL(i));
+
+	for (i = 0; i < 4; i++)
+		regs_buff[247 + i] = E1000_READ_REG(hw, E1000_IP4AT_REG(i));
+	for (i = 0; i < 4; i++)
+		regs_buff[251 + i] = E1000_READ_REG(hw, E1000_IP6AT_REG(i));
+	for (i = 0; i < 32; i++)
+		regs_buff[255 + i] = E1000_READ_REG(hw, E1000_WUPM_REG(i));
+	for (i = 0; i < 128; i++)
+		regs_buff[287 + i] = E1000_READ_REG(hw, E1000_FFMT_REG(i));
+	for (i = 0; i < 128; i++)
+		regs_buff[415 + i] = E1000_READ_REG(hw, E1000_FFVT_REG(i));
+	for (i = 0; i < 4; i++)
+		regs_buff[543 + i] = E1000_READ_REG(hw, E1000_FFLT_REG(i));
+
+	regs_buff[547] = E1000_READ_REG(hw, E1000_TDFH);
+	regs_buff[548] = E1000_READ_REG(hw, E1000_TDFT);
+	regs_buff[549] = E1000_READ_REG(hw, E1000_TDFHS);
+	regs_buff[550] = E1000_READ_REG(hw, E1000_TDFPC);
+	if (hw->mac.type > e1000_82580) {
+		regs_buff[551] = adapter->stats.o2bgptc;
+		regs_buff[552] = adapter->stats.b2ospc;
+		regs_buff[553] = adapter->stats.o2bspc;
+		regs_buff[554] = adapter->stats.b2ogprc;
+	}
+}
+
+static int igb_get_eeprom_len(struct net_device *netdev)
+{
+	struct igb_adapter *adapter = netdev_priv(netdev);
+	return adapter->hw.nvm.word_size * 2;
+}
+
+static int igb_get_eeprom(struct net_device *netdev,
+			  struct ethtool_eeprom *eeprom, u8 *bytes)
+{
+	struct igb_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+	u16 *eeprom_buff;
+	int first_word, last_word;
+	int ret_val = 0;
+	u16 i;
+
+	if (eeprom->len == 0)
+		return -EINVAL;
+
+	eeprom->magic = hw->vendor_id | (hw->device_id << 16);
+
+	first_word = eeprom->offset >> 1;
+	last_word = (eeprom->offset + eeprom->len - 1) >> 1;
+
+	eeprom_buff = kmalloc(sizeof(u16) *
+			(last_word - first_word + 1), GFP_KERNEL);
+	if (!eeprom_buff)
+		return -ENOMEM;
+
+	if (hw->nvm.type == e1000_nvm_eeprom_spi)
+		ret_val = e1000_read_nvm(hw, first_word,
+					 last_word - first_word + 1,
+					 eeprom_buff);
+	else {
+		for (i = 0; i < last_word - first_word + 1; i++) {
+			ret_val = e1000_read_nvm(hw, first_word + i, 1,
+						 &eeprom_buff[i]);
+			if (ret_val)
+				break;
+		}
+	}
+
+	/* Device's eeprom is always little-endian, word addressable */
+	for (i = 0; i < last_word - first_word + 1; i++)
+		eeprom_buff[i] = le16_to_cpu(eeprom_buff[i]);
+
+	memcpy(bytes, (u8 *)eeprom_buff + (eeprom->offset & 1),
+			eeprom->len);
+	kfree(eeprom_buff);
+
+	return ret_val;
+}
+
+static int igb_set_eeprom(struct net_device *netdev,
+			  struct ethtool_eeprom *eeprom, u8 *bytes)
+{
+	struct igb_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+	u16 *eeprom_buff;
+	void *ptr;
+	int max_len, first_word, last_word, ret_val = 0;
+	u16 i;
+
+	if (eeprom->len == 0)
+		return -EOPNOTSUPP;
+
+	if (eeprom->magic != (hw->vendor_id | (hw->device_id << 16)))
+		return -EFAULT;
+
+	max_len = hw->nvm.word_size * 2;
+
+	first_word = eeprom->offset >> 1;
+	last_word = (eeprom->offset + eeprom->len - 1) >> 1;
+	eeprom_buff = kmalloc(max_len, GFP_KERNEL);
+	if (!eeprom_buff)
+		return -ENOMEM;
+
+	ptr = (void *)eeprom_buff;
+
+	if (eeprom->offset & 1) {
+		/* need read/modify/write of first changed EEPROM word */
+		/* only the second byte of the word is being modified */
+		ret_val = e1000_read_nvm(hw, first_word, 1,
+					    &eeprom_buff[0]);
+		ptr++;
+	}
+	if (((eeprom->offset + eeprom->len) & 1) && (ret_val == 0)) {
+		/* need read/modify/write of last changed EEPROM word */
+		/* only the first byte of the word is being modified */
+		ret_val = e1000_read_nvm(hw, last_word, 1,
+			  &eeprom_buff[last_word - first_word]);
+	}
+
+	/* Device's eeprom is always little-endian, word addressable */
+	for (i = 0; i < last_word - first_word + 1; i++)
+		le16_to_cpus(&eeprom_buff[i]);
+
+	memcpy(ptr, bytes, eeprom->len);
+
+	for (i = 0; i < last_word - first_word + 1; i++)
+		cpu_to_le16s(&eeprom_buff[i]);
+
+	ret_val = e1000_write_nvm(hw, first_word,
+				  last_word - first_word + 1, eeprom_buff);
+
+	/* Update the checksum if write succeeded.
+	 * and flush shadow RAM for 82573 controllers */
+	if (ret_val == 0)
+		e1000_update_nvm_checksum(hw);
+
+	kfree(eeprom_buff);
+	return ret_val;
+}
+
+static void igb_get_drvinfo(struct net_device *netdev,
+			    struct ethtool_drvinfo *drvinfo)
+{
+	struct igb_adapter *adapter = netdev_priv(netdev);
+
+	strncpy(drvinfo->driver,  igb_driver_name, sizeof(drvinfo->driver) - 1);
+	strncpy(drvinfo->version, igb_driver_version, sizeof(drvinfo->version) - 1);
+
+	strncpy(drvinfo->fw_version, adapter->fw_version,
+		sizeof(drvinfo->fw_version) - 1);
+	strncpy(drvinfo->bus_info, pci_name(adapter->pdev), sizeof(drvinfo->bus_info) -1);
+	drvinfo->n_stats = IGB_STATS_LEN;
+	drvinfo->testinfo_len = IGB_TEST_LEN;
+	drvinfo->regdump_len = igb_get_regs_len(netdev);
+	drvinfo->eedump_len = igb_get_eeprom_len(netdev);
+}
+
+static void igb_get_ringparam(struct net_device *netdev,
+			      struct ethtool_ringparam *ring)
+{
+	struct igb_adapter *adapter = netdev_priv(netdev);
+
+	ring->rx_max_pending = IGB_MAX_RXD;
+	ring->tx_max_pending = IGB_MAX_TXD;
+	ring->rx_mini_max_pending = 0;
+	ring->rx_jumbo_max_pending = 0;
+	ring->rx_pending = adapter->rx_ring_count;
+	ring->tx_pending = adapter->tx_ring_count;
+	ring->rx_mini_pending = 0;
+	ring->rx_jumbo_pending = 0;
+}
+
+static int igb_set_ringparam(struct net_device *netdev,
+			     struct ethtool_ringparam *ring)
+{
+	struct igb_adapter *adapter = netdev_priv(netdev);
+	struct igb_ring *temp_ring;
+	int i, err = 0;
+	u16 new_rx_count, new_tx_count;
+
+	if ((ring->rx_mini_pending) || (ring->rx_jumbo_pending))
+		return -EINVAL;
+
+	new_rx_count = min(ring->rx_pending, (u32)IGB_MAX_RXD);
+	new_rx_count = max(new_rx_count, (u16)IGB_MIN_RXD);
+	new_rx_count = ALIGN(new_rx_count, REQ_RX_DESCRIPTOR_MULTIPLE);
+
+	new_tx_count = min(ring->tx_pending, (u32)IGB_MAX_TXD);
+	new_tx_count = max(new_tx_count, (u16)IGB_MIN_TXD);
+	new_tx_count = ALIGN(new_tx_count, REQ_TX_DESCRIPTOR_MULTIPLE);
+
+	if ((new_tx_count == adapter->tx_ring_count) &&
+	    (new_rx_count == adapter->rx_ring_count)) {
+		/* nothing to do */
+		return 0;
+	}
+
+	while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
+		usleep_range(1000, 2000);
+
+	if (!netif_running(adapter->netdev)) {
+		for (i = 0; i < adapter->num_tx_queues; i++)
+			adapter->tx_ring[i]->count = new_tx_count;
+		for (i = 0; i < adapter->num_rx_queues; i++)
+			adapter->rx_ring[i]->count = new_rx_count;
+		adapter->tx_ring_count = new_tx_count;
+		adapter->rx_ring_count = new_rx_count;
+		goto clear_reset;
+	}
+
+	if (adapter->num_tx_queues > adapter->num_rx_queues)
+		temp_ring = vmalloc(adapter->num_tx_queues * sizeof(struct igb_ring));
+	else
+		temp_ring = vmalloc(adapter->num_rx_queues * sizeof(struct igb_ring));
+
+	if (!temp_ring) {
+		err = -ENOMEM;
+		goto clear_reset;
+	}
+
+	igb_down(adapter);
+
+	/*
+	 * We can't just free everything and then setup again,
+	 * because the ISRs in MSI-X mode get passed pointers
+	 * to the tx and rx ring structs.
+	 */
+	if (new_tx_count != adapter->tx_ring_count) {
+		for (i = 0; i < adapter->num_tx_queues; i++) {
+			memcpy(&temp_ring[i], adapter->tx_ring[i],
+			       sizeof(struct igb_ring));
+
+			temp_ring[i].count = new_tx_count;
+			err = igb_setup_tx_resources(&temp_ring[i]);
+			if (err) {
+				while (i) {
+					i--;
+					igb_free_tx_resources(&temp_ring[i]);
+				}
+				goto err_setup;
+			}
+		}
+
+		for (i = 0; i < adapter->num_tx_queues; i++) {
+			igb_free_tx_resources(adapter->tx_ring[i]);
+
+			memcpy(adapter->tx_ring[i], &temp_ring[i],
+			       sizeof(struct igb_ring));
+		}
+
+		adapter->tx_ring_count = new_tx_count;
+	}
+
+	if (new_rx_count != adapter->rx_ring_count) {
+		for (i = 0; i < adapter->num_rx_queues; i++) {
+			memcpy(&temp_ring[i], adapter->rx_ring[i],
+			       sizeof(struct igb_ring));
+
+			temp_ring[i].count = new_rx_count;
+			err = igb_setup_rx_resources(&temp_ring[i]);
+			if (err) {
+				while (i) {
+					i--;
+					igb_free_rx_resources(&temp_ring[i]);
+				}
+				goto err_setup;
+			}
+
+		}
+
+		for (i = 0; i < adapter->num_rx_queues; i++) {
+			igb_free_rx_resources(adapter->rx_ring[i]);
+
+			memcpy(adapter->rx_ring[i], &temp_ring[i],
+			       sizeof(struct igb_ring));
+		}
+
+		adapter->rx_ring_count = new_rx_count;
+	}
+err_setup:
+	igb_up(adapter);
+	vfree(temp_ring);
+clear_reset:
+	clear_bit(__IGB_RESETTING, &adapter->state);
+	return err;
+}
+static bool reg_pattern_test(struct igb_adapter *adapter, u64 *data,
+			     int reg, u32 mask, u32 write)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u32 pat, val;
+	static const u32 _test[] =
+		{0x5A5A5A5A, 0xA5A5A5A5, 0x00000000, 0xFFFFFFFF};
+	for (pat = 0; pat < ARRAY_SIZE(_test); pat++) {
+		E1000_WRITE_REG(hw, reg, (_test[pat] & write));
+		val = E1000_READ_REG(hw, reg) & mask;
+		if (val != (_test[pat] & write & mask)) {
+			dev_err(pci_dev_to_dev(adapter->pdev), "pattern test reg %04X "
+				"failed: got 0x%08X expected 0x%08X\n",
+			        E1000_REGISTER(hw, reg), val, (_test[pat] & write & mask));
+			*data = E1000_REGISTER(hw, reg);
+			return 1;
+		}
+	}
+
+	return 0;
+}
+
+static bool reg_set_and_check(struct igb_adapter *adapter, u64 *data,
+			      int reg, u32 mask, u32 write)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u32 val;
+	E1000_WRITE_REG(hw, reg, write & mask);
+	val = E1000_READ_REG(hw, reg);
+	if ((write & mask) != (val & mask)) {
+		dev_err(pci_dev_to_dev(adapter->pdev), "set/check reg %04X test failed:"
+			" got 0x%08X expected 0x%08X\n", reg,
+			(val & mask), (write & mask));
+		*data = E1000_REGISTER(hw, reg);
+		return 1;
+	}
+
+	return 0;
+}
+
+#define REG_PATTERN_TEST(reg, mask, write) \
+	do { \
+		if (reg_pattern_test(adapter, data, reg, mask, write)) \
+			return 1; \
+	} while (0)
+
+#define REG_SET_AND_CHECK(reg, mask, write) \
+	do { \
+		if (reg_set_and_check(adapter, data, reg, mask, write)) \
+			return 1; \
+	} while (0)
+
+static int igb_reg_test(struct igb_adapter *adapter, u64 *data)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	struct igb_reg_test *test;
+	u32 value, before, after;
+	u32 i, toggle;
+
+	switch (adapter->hw.mac.type) {
+	case e1000_i350:
+	case e1000_i354:
+		test = reg_test_i350;
+		toggle = 0x7FEFF3FF;
+		break;
+	case e1000_i210:
+	case e1000_i211:
+		test = reg_test_i210;
+		toggle = 0x7FEFF3FF;
+		break;
+	case e1000_82580:
+		test = reg_test_82580;
+		toggle = 0x7FEFF3FF;
+		break;
+	case e1000_82576:
+		test = reg_test_82576;
+		toggle = 0x7FFFF3FF;
+		break;
+	default:
+		test = reg_test_82575;
+		toggle = 0x7FFFF3FF;
+		break;
+	}
+
+	/* Because the status register is such a special case,
+	 * we handle it separately from the rest of the register
+	 * tests.  Some bits are read-only, some toggle, and some
+	 * are writable on newer MACs.
+	 */
+	before = E1000_READ_REG(hw, E1000_STATUS);
+	value = (E1000_READ_REG(hw, E1000_STATUS) & toggle);
+	E1000_WRITE_REG(hw, E1000_STATUS, toggle);
+	after = E1000_READ_REG(hw, E1000_STATUS) & toggle;
+	if (value != after) {
+		dev_err(pci_dev_to_dev(adapter->pdev), "failed STATUS register test "
+			"got: 0x%08X expected: 0x%08X\n", after, value);
+		*data = 1;
+		return 1;
+	}
+	/* restore previous status */
+	E1000_WRITE_REG(hw, E1000_STATUS, before);
+
+	/* Perform the remainder of the register test, looping through
+	 * the test table until we either fail or reach the null entry.
+	 */
+	while (test->reg) {
+		for (i = 0; i < test->array_len; i++) {
+			switch (test->test_type) {
+			case PATTERN_TEST:
+				REG_PATTERN_TEST(test->reg +
+						(i * test->reg_offset),
+						test->mask,
+						test->write);
+				break;
+			case SET_READ_TEST:
+				REG_SET_AND_CHECK(test->reg +
+						(i * test->reg_offset),
+						test->mask,
+						test->write);
+				break;
+			case WRITE_NO_TEST:
+				writel(test->write,
+				       (adapter->hw.hw_addr + test->reg)
+					+ (i * test->reg_offset));
+				break;
+			case TABLE32_TEST:
+				REG_PATTERN_TEST(test->reg + (i * 4),
+						test->mask,
+						test->write);
+				break;
+			case TABLE64_TEST_LO:
+				REG_PATTERN_TEST(test->reg + (i * 8),
+						test->mask,
+						test->write);
+				break;
+			case TABLE64_TEST_HI:
+				REG_PATTERN_TEST((test->reg + 4) + (i * 8),
+						test->mask,
+						test->write);
+				break;
+			}
+		}
+		test++;
+	}
+
+	*data = 0;
+	return 0;
+}
+
+static int igb_eeprom_test(struct igb_adapter *adapter, u64 *data)
+{
+	*data = 0;
+
+	/* Validate NVM checksum */
+	if (e1000_validate_nvm_checksum(&adapter->hw) < 0)
+		*data = 2;
+
+	return *data;
+}
+
+static irqreturn_t igb_test_intr(int irq, void *data)
+{
+	struct igb_adapter *adapter = (struct igb_adapter *) data;
+	struct e1000_hw *hw = &adapter->hw;
+
+	adapter->test_icr |= E1000_READ_REG(hw, E1000_ICR);
+
+	return IRQ_HANDLED;
+}
+
+static int igb_intr_test(struct igb_adapter *adapter, u64 *data)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	struct net_device *netdev = adapter->netdev;
+	u32 mask, ics_mask, i = 0, shared_int = TRUE;
+	u32 irq = adapter->pdev->irq;
+
+	*data = 0;
+
+	/* Hook up test interrupt handler just for this test */
+	if (adapter->msix_entries) {
+		if (request_irq(adapter->msix_entries[0].vector,
+		                &igb_test_intr, 0, netdev->name, adapter)) {
+			*data = 1;
+			return -1;
+		}
+	} else if (adapter->flags & IGB_FLAG_HAS_MSI) {
+		shared_int = FALSE;
+		if (request_irq(irq,
+		                igb_test_intr, 0, netdev->name, adapter)) {
+			*data = 1;
+			return -1;
+		}
+	} else if (!request_irq(irq, igb_test_intr, IRQF_PROBE_SHARED,
+				netdev->name, adapter)) {
+		shared_int = FALSE;
+	} else if (request_irq(irq, &igb_test_intr, IRQF_SHARED,
+		 netdev->name, adapter)) {
+		*data = 1;
+		return -1;
+	}
+	dev_info(pci_dev_to_dev(adapter->pdev), "testing %s interrupt\n",
+		 (shared_int ? "shared" : "unshared"));
+
+	/* Disable all the interrupts */
+	E1000_WRITE_REG(hw, E1000_IMC, ~0);
+	E1000_WRITE_FLUSH(hw);
+	usleep_range(10000, 20000);
+
+	/* Define all writable bits for ICS */
+	switch (hw->mac.type) {
+	case e1000_82575:
+		ics_mask = 0x37F47EDD;
+		break;
+	case e1000_82576:
+		ics_mask = 0x77D4FBFD;
+		break;
+	case e1000_82580:
+		ics_mask = 0x77DCFED5;
+		break;
+	case e1000_i350:
+	case e1000_i354:
+		ics_mask = 0x77DCFED5;
+		break;
+	case e1000_i210:
+	case e1000_i211:
+		ics_mask = 0x774CFED5;
+		break;
+	default:
+		ics_mask = 0x7FFFFFFF;
+		break;
+	}
+
+	/* Test each interrupt */
+	for (; i < 31; i++) {
+		/* Interrupt to test */
+		mask = 1 << i;
+
+		if (!(mask & ics_mask))
+			continue;
+
+		if (!shared_int) {
+			/* Disable the interrupt to be reported in
+			 * the cause register and then force the same
+			 * interrupt and see if one gets posted.  If
+			 * an interrupt was posted to the bus, the
+			 * test failed.
+			 */
+			adapter->test_icr = 0;
+
+			/* Flush any pending interrupts */
+			E1000_WRITE_REG(hw, E1000_ICR, ~0);
+
+			E1000_WRITE_REG(hw, E1000_IMC, mask);
+			E1000_WRITE_REG(hw, E1000_ICS, mask);
+			E1000_WRITE_FLUSH(hw);
+			usleep_range(10000, 20000);
+
+			if (adapter->test_icr & mask) {
+				*data = 3;
+				break;
+			}
+		}
+
+		/* Enable the interrupt to be reported in
+		 * the cause register and then force the same
+		 * interrupt and see if one gets posted.  If
+		 * an interrupt was not posted to the bus, the
+		 * test failed.
+		 */
+		adapter->test_icr = 0;
+
+		/* Flush any pending interrupts */
+		E1000_WRITE_REG(hw, E1000_ICR, ~0);
+
+		E1000_WRITE_REG(hw, E1000_IMS, mask);
+		E1000_WRITE_REG(hw, E1000_ICS, mask);
+		E1000_WRITE_FLUSH(hw);
+		usleep_range(10000, 20000);
+
+		if (!(adapter->test_icr & mask)) {
+			*data = 4;
+			break;
+		}
+
+		if (!shared_int) {
+			/* Disable the other interrupts to be reported in
+			 * the cause register and then force the other
+			 * interrupts and see if any get posted.  If
+			 * an interrupt was posted to the bus, the
+			 * test failed.
+			 */
+			adapter->test_icr = 0;
+
+			/* Flush any pending interrupts */
+			E1000_WRITE_REG(hw, E1000_ICR, ~0);
+
+			E1000_WRITE_REG(hw, E1000_IMC, ~mask);
+			E1000_WRITE_REG(hw, E1000_ICS, ~mask);
+			E1000_WRITE_FLUSH(hw);
+			usleep_range(10000, 20000);
+
+			if (adapter->test_icr & mask) {
+				*data = 5;
+				break;
+			}
+		}
+	}
+
+	/* Disable all the interrupts */
+	E1000_WRITE_REG(hw, E1000_IMC, ~0);
+	E1000_WRITE_FLUSH(hw);
+	usleep_range(10000, 20000);
+
+	/* Unhook test interrupt handler */
+	if (adapter->msix_entries)
+		free_irq(adapter->msix_entries[0].vector, adapter);
+	else
+		free_irq(irq, adapter);
+
+	return *data;
+}
+
+static void igb_free_desc_rings(struct igb_adapter *adapter)
+{
+	igb_free_tx_resources(&adapter->test_tx_ring);
+	igb_free_rx_resources(&adapter->test_rx_ring);
+}
+
+static int igb_setup_desc_rings(struct igb_adapter *adapter)
+{
+	struct igb_ring *tx_ring = &adapter->test_tx_ring;
+	struct igb_ring *rx_ring = &adapter->test_rx_ring;
+	struct e1000_hw *hw = &adapter->hw;
+	int ret_val;
+
+	/* Setup Tx descriptor ring and Tx buffers */
+	tx_ring->count = IGB_DEFAULT_TXD;
+	tx_ring->dev = pci_dev_to_dev(adapter->pdev);
+	tx_ring->netdev = adapter->netdev;
+	tx_ring->reg_idx = adapter->vfs_allocated_count;
+
+	if (igb_setup_tx_resources(tx_ring)) {
+		ret_val = 1;
+		goto err_nomem;
+	}
+
+	igb_setup_tctl(adapter);
+	igb_configure_tx_ring(adapter, tx_ring);
+
+	/* Setup Rx descriptor ring and Rx buffers */
+	rx_ring->count = IGB_DEFAULT_RXD;
+	rx_ring->dev = pci_dev_to_dev(adapter->pdev);
+	rx_ring->netdev = adapter->netdev;
+#ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT
+	rx_ring->rx_buffer_len = IGB_RX_HDR_LEN;
+#endif
+	rx_ring->reg_idx = adapter->vfs_allocated_count;
+
+	if (igb_setup_rx_resources(rx_ring)) {
+		ret_val = 2;
+		goto err_nomem;
+	}
+
+	/* set the default queue to queue 0 of PF */
+	E1000_WRITE_REG(hw, E1000_MRQC, adapter->vfs_allocated_count << 3);
+
+	/* enable receive ring */
+	igb_setup_rctl(adapter);
+	igb_configure_rx_ring(adapter, rx_ring);
+
+	igb_alloc_rx_buffers(rx_ring, igb_desc_unused(rx_ring));
+
+	return 0;
+
+err_nomem:
+	igb_free_desc_rings(adapter);
+	return ret_val;
+}
+
+static void igb_phy_disable_receiver(struct igb_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+
+	/* Write out to PHY registers 29 and 30 to disable the Receiver. */
+	e1000_write_phy_reg(hw, 29, 0x001F);
+	e1000_write_phy_reg(hw, 30, 0x8FFC);
+	e1000_write_phy_reg(hw, 29, 0x001A);
+	e1000_write_phy_reg(hw, 30, 0x8FF0);
+}
+
+static int igb_integrated_phy_loopback(struct igb_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u32 ctrl_reg = 0;
+
+	hw->mac.autoneg = FALSE;
+
+	if (hw->phy.type == e1000_phy_m88) {
+		if (hw->phy.id != I210_I_PHY_ID) {
+			/* Auto-MDI/MDIX Off */
+			e1000_write_phy_reg(hw, M88E1000_PHY_SPEC_CTRL, 0x0808);
+			/* reset to update Auto-MDI/MDIX */
+			e1000_write_phy_reg(hw, PHY_CONTROL, 0x9140);
+			/* autoneg off */
+			e1000_write_phy_reg(hw, PHY_CONTROL, 0x8140);
+		} else {
+			/* force 1000, set loopback  */
+			e1000_write_phy_reg(hw, I347AT4_PAGE_SELECT, 0);
+			e1000_write_phy_reg(hw, PHY_CONTROL, 0x4140);
+		}
+	} else {
+		/* enable MII loopback */
+		if (hw->phy.type == e1000_phy_82580)
+			e1000_write_phy_reg(hw, I82577_PHY_LBK_CTRL, 0x8041);
+	}
+
+	/* force 1000, set loopback  */
+	e1000_write_phy_reg(hw, PHY_CONTROL, 0x4140);
+
+	/* Now set up the MAC to the same speed/duplex as the PHY. */
+	ctrl_reg = E1000_READ_REG(hw, E1000_CTRL);
+	ctrl_reg &= ~E1000_CTRL_SPD_SEL; /* Clear the speed sel bits */
+	ctrl_reg |= (E1000_CTRL_FRCSPD | /* Set the Force Speed Bit */
+		     E1000_CTRL_FRCDPX | /* Set the Force Duplex Bit */
+		     E1000_CTRL_SPD_1000 |/* Force Speed to 1000 */
+		     E1000_CTRL_FD |	 /* Force Duplex to FULL */
+		     E1000_CTRL_SLU);	 /* Set link up enable bit */
+
+	if (hw->phy.type == e1000_phy_m88)
+		ctrl_reg |= E1000_CTRL_ILOS; /* Invert Loss of Signal */
+
+	E1000_WRITE_REG(hw, E1000_CTRL, ctrl_reg);
+
+	/* Disable the receiver on the PHY so when a cable is plugged in, the
+	 * PHY does not begin to autoneg when a cable is reconnected to the NIC.
+	 */
+	if (hw->phy.type == e1000_phy_m88)
+		igb_phy_disable_receiver(adapter);
+
+	mdelay(500);
+	return 0;
+}
+
+static int igb_set_phy_loopback(struct igb_adapter *adapter)
+{
+	return igb_integrated_phy_loopback(adapter);
+}
+
+static int igb_setup_loopback_test(struct igb_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u32 reg;
+
+	reg = E1000_READ_REG(hw, E1000_CTRL_EXT);
+
+	/* use CTRL_EXT to identify link type as SGMII can appear as copper */
+	if (reg & E1000_CTRL_EXT_LINK_MODE_MASK) {
+                if ((hw->device_id == E1000_DEV_ID_DH89XXCC_SGMII) ||
+                    (hw->device_id == E1000_DEV_ID_DH89XXCC_SERDES) ||
+                    (hw->device_id == E1000_DEV_ID_DH89XXCC_BACKPLANE) ||
+                    (hw->device_id == E1000_DEV_ID_DH89XXCC_SFP)) {
+
+                        /* Enable DH89xxCC MPHY for near end loopback */
+                        reg = E1000_READ_REG(hw, E1000_MPHY_ADDR_CTL);
+                        reg = (reg & E1000_MPHY_ADDR_CTL_OFFSET_MASK) |
+                                E1000_MPHY_PCS_CLK_REG_OFFSET;
+                        E1000_WRITE_REG(hw, E1000_MPHY_ADDR_CTL, reg);
+
+                        reg = E1000_READ_REG(hw, E1000_MPHY_DATA);
+                        reg |= E1000_MPHY_PCS_CLK_REG_DIGINELBEN;
+                        E1000_WRITE_REG(hw, E1000_MPHY_DATA, reg);
+                }
+
+		reg = E1000_READ_REG(hw, E1000_RCTL);
+		reg |= E1000_RCTL_LBM_TCVR;
+		E1000_WRITE_REG(hw, E1000_RCTL, reg);
+
+		E1000_WRITE_REG(hw, E1000_SCTL, E1000_ENABLE_SERDES_LOOPBACK);
+
+		reg = E1000_READ_REG(hw, E1000_CTRL);
+		reg &= ~(E1000_CTRL_RFCE |
+			 E1000_CTRL_TFCE |
+			 E1000_CTRL_LRST);
+		reg |= E1000_CTRL_SLU |
+		       E1000_CTRL_FD;
+		E1000_WRITE_REG(hw, E1000_CTRL, reg);
+
+		/* Unset switch control to serdes energy detect */
+		reg = E1000_READ_REG(hw, E1000_CONNSW);
+		reg &= ~E1000_CONNSW_ENRGSRC;
+		E1000_WRITE_REG(hw, E1000_CONNSW, reg);
+
+		/* Unset sigdetect for SERDES loopback on
+		 * 82580 and newer devices
+		 */
+		if (hw->mac.type >= e1000_82580) {
+			reg = E1000_READ_REG(hw, E1000_PCS_CFG0);
+			reg |= E1000_PCS_CFG_IGN_SD;
+			E1000_WRITE_REG(hw, E1000_PCS_CFG0, reg);
+		}
+
+		/* Set PCS register for forced speed */
+		reg = E1000_READ_REG(hw, E1000_PCS_LCTL);
+		reg &= ~E1000_PCS_LCTL_AN_ENABLE;     /* Disable Autoneg*/
+		reg |= E1000_PCS_LCTL_FLV_LINK_UP |   /* Force link up */
+		       E1000_PCS_LCTL_FSV_1000 |      /* Force 1000    */
+		       E1000_PCS_LCTL_FDV_FULL |      /* SerDes Full duplex */
+		       E1000_PCS_LCTL_FSD |           /* Force Speed */
+		       E1000_PCS_LCTL_FORCE_LINK;     /* Force Link */
+		E1000_WRITE_REG(hw, E1000_PCS_LCTL, reg);
+
+		return 0;
+	}
+
+	return igb_set_phy_loopback(adapter);
+}
+
+static void igb_loopback_cleanup(struct igb_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u32 rctl;
+	u16 phy_reg;
+
+        if ((hw->device_id == E1000_DEV_ID_DH89XXCC_SGMII) ||
+	    (hw->device_id == E1000_DEV_ID_DH89XXCC_SERDES) ||
+	    (hw->device_id == E1000_DEV_ID_DH89XXCC_BACKPLANE) ||
+            (hw->device_id == E1000_DEV_ID_DH89XXCC_SFP)) {
+		u32 reg;
+
+		/* Disable near end loopback on DH89xxCC */
+		reg = E1000_READ_REG(hw, E1000_MPHY_ADDR_CTL);
+                reg = (reg & E1000_MPHY_ADDR_CTL_OFFSET_MASK ) |
+                        E1000_MPHY_PCS_CLK_REG_OFFSET;
+	E1000_WRITE_REG(hw, E1000_MPHY_ADDR_CTL, reg);
+
+		reg = E1000_READ_REG(hw, E1000_MPHY_DATA);
+	reg &= ~E1000_MPHY_PCS_CLK_REG_DIGINELBEN;
+	E1000_WRITE_REG(hw, E1000_MPHY_DATA, reg);
+	}
+
+	rctl = E1000_READ_REG(hw, E1000_RCTL);
+	rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
+	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
+
+	hw->mac.autoneg = TRUE;
+	e1000_read_phy_reg(hw, PHY_CONTROL, &phy_reg);
+	if (phy_reg & MII_CR_LOOPBACK) {
+		phy_reg &= ~MII_CR_LOOPBACK;
+		if (hw->phy.type == I210_I_PHY_ID)
+			e1000_write_phy_reg(hw, I347AT4_PAGE_SELECT, 0);
+		e1000_write_phy_reg(hw, PHY_CONTROL, phy_reg);
+		e1000_phy_commit(hw);
+	}
+}
+static void igb_create_lbtest_frame(struct sk_buff *skb,
+				    unsigned int frame_size)
+{
+	memset(skb->data, 0xFF, frame_size);
+	frame_size /= 2;
+	memset(&skb->data[frame_size], 0xAA, frame_size - 1);
+	memset(&skb->data[frame_size + 10], 0xBE, 1);
+	memset(&skb->data[frame_size + 12], 0xAF, 1);
+}
+
+static int igb_check_lbtest_frame(struct igb_rx_buffer *rx_buffer,
+				  unsigned int frame_size)
+{
+	unsigned char *data;
+	bool match = true;
+
+	frame_size >>= 1;
+
+#ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT
+	data = rx_buffer->skb->data;
+#else
+	data = kmap(rx_buffer->page);
+#endif
+
+	if (data[3] != 0xFF ||
+	    data[frame_size + 10] != 0xBE ||
+	    data[frame_size + 12] != 0xAF)
+		match = false;
+
+#ifndef CONFIG_IGB_DISABLE_PACKET_SPLIT
+	kunmap(rx_buffer->page);
+
+#endif
+	return match;
+}
+
+static u16 igb_clean_test_rings(struct igb_ring *rx_ring,
+                                struct igb_ring *tx_ring,
+                                unsigned int size)
+{
+	union e1000_adv_rx_desc *rx_desc;
+	struct igb_rx_buffer *rx_buffer_info;
+	struct igb_tx_buffer *tx_buffer_info;
+	u16 rx_ntc, tx_ntc, count = 0;
+
+	/* initialize next to clean and descriptor values */
+	rx_ntc = rx_ring->next_to_clean;
+	tx_ntc = tx_ring->next_to_clean;
+	rx_desc = IGB_RX_DESC(rx_ring, rx_ntc);
+
+	while (igb_test_staterr(rx_desc, E1000_RXD_STAT_DD)) {
+		/* check rx buffer */
+		rx_buffer_info = &rx_ring->rx_buffer_info[rx_ntc];
+
+		/* sync Rx buffer for CPU read */
+		dma_sync_single_for_cpu(rx_ring->dev,
+					rx_buffer_info->dma,
+#ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT
+					IGB_RX_HDR_LEN,
+#else
+					IGB_RX_BUFSZ,
+#endif
+					DMA_FROM_DEVICE);
+
+		/* verify contents of skb */
+		if (igb_check_lbtest_frame(rx_buffer_info, size))
+			count++;
+
+		/* sync Rx buffer for device write */
+		dma_sync_single_for_device(rx_ring->dev,
+					   rx_buffer_info->dma,
+#ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT
+					   IGB_RX_HDR_LEN,
+#else
+					   IGB_RX_BUFSZ,
+#endif
+					   DMA_FROM_DEVICE);
+
+		/* unmap buffer on tx side */
+		tx_buffer_info = &tx_ring->tx_buffer_info[tx_ntc];
+		igb_unmap_and_free_tx_resource(tx_ring, tx_buffer_info);
+
+		/* increment rx/tx next to clean counters */
+		rx_ntc++;
+		if (rx_ntc == rx_ring->count)
+			rx_ntc = 0;
+		tx_ntc++;
+		if (tx_ntc == tx_ring->count)
+			tx_ntc = 0;
+
+		/* fetch next descriptor */
+		rx_desc = IGB_RX_DESC(rx_ring, rx_ntc);
+	}
+
+	/* re-map buffers to ring, store next to clean values */
+	igb_alloc_rx_buffers(rx_ring, count);
+	rx_ring->next_to_clean = rx_ntc;
+	tx_ring->next_to_clean = tx_ntc;
+
+	return count;
+}
+
+static int igb_run_loopback_test(struct igb_adapter *adapter)
+{
+	struct igb_ring *tx_ring = &adapter->test_tx_ring;
+	struct igb_ring *rx_ring = &adapter->test_rx_ring;
+	u16 i, j, lc, good_cnt;
+	int ret_val = 0;
+	unsigned int size = IGB_RX_HDR_LEN;
+	netdev_tx_t tx_ret_val;
+	struct sk_buff *skb;
+
+	/* allocate test skb */
+	skb = alloc_skb(size, GFP_KERNEL);
+	if (!skb)
+		return 11;
+
+	/* place data into test skb */
+	igb_create_lbtest_frame(skb, size);
+	skb_put(skb, size);
+
+	/*
+	 * Calculate the loop count based on the largest descriptor ring
+	 * The idea is to wrap the largest ring a number of times using 64
+	 * send/receive pairs during each loop
+	 */
+
+	if (rx_ring->count <= tx_ring->count)
+		lc = ((tx_ring->count / 64) * 2) + 1;
+	else
+		lc = ((rx_ring->count / 64) * 2) + 1;
+
+	for (j = 0; j <= lc; j++) { /* loop count loop */
+		/* reset count of good packets */
+		good_cnt = 0;
+
+		/* place 64 packets on the transmit queue*/
+		for (i = 0; i < 64; i++) {
+			skb_get(skb);
+			tx_ret_val = igb_xmit_frame_ring(skb, tx_ring);
+			if (tx_ret_val == NETDEV_TX_OK)
+				good_cnt++;
+		}
+
+		if (good_cnt != 64) {
+			ret_val = 12;
+			break;
+		}
+
+		/* allow 200 milliseconds for packets to go from tx to rx */
+		msleep(200);
+
+		good_cnt = igb_clean_test_rings(rx_ring, tx_ring, size);
+		if (good_cnt != 64) {
+			ret_val = 13;
+			break;
+		}
+	} /* end loop count loop */
+
+	/* free the original skb */
+	kfree_skb(skb);
+
+	return ret_val;
+}
+
+static int igb_loopback_test(struct igb_adapter *adapter, u64 *data)
+{
+	/* PHY loopback cannot be performed if SoL/IDER
+	 * sessions are active */
+	if (e1000_check_reset_block(&adapter->hw)) {
+		dev_err(pci_dev_to_dev(adapter->pdev),
+			"Cannot do PHY loopback test "
+			"when SoL/IDER is active.\n");
+		*data = 0;
+		goto out;
+	}
+	if (adapter->hw.mac.type == e1000_i354) {
+		dev_info(&adapter->pdev->dev,
+			"Loopback test not supported on i354.\n");
+		*data = 0;
+		goto out;
+	}
+	*data = igb_setup_desc_rings(adapter);
+	if (*data)
+		goto out;
+	*data = igb_setup_loopback_test(adapter);
+	if (*data)
+		goto err_loopback;
+	*data = igb_run_loopback_test(adapter);
+
+	igb_loopback_cleanup(adapter);
+
+err_loopback:
+	igb_free_desc_rings(adapter);
+out:
+	return *data;
+}
+
+static int igb_link_test(struct igb_adapter *adapter, u64 *data)
+{
+	u32 link;
+	int i, time;
+
+	*data = 0;
+	time = 0;
+	if (adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
+		int i = 0;
+		adapter->hw.mac.serdes_has_link = FALSE;
+
+		/* On some blade server designs, link establishment
+		 * could take as long as 2-3 minutes */
+		do {
+			e1000_check_for_link(&adapter->hw);
+			if (adapter->hw.mac.serdes_has_link)
+				goto out;
+			msleep(20);
+		} while (i++ < 3750);
+
+		*data = 1;
+	} else {
+		for (i=0; i < IGB_MAX_LINK_TRIES; i++) {
+		link = igb_has_link(adapter);
+			if (link)
+				goto out;
+			else {
+				time++;
+				msleep(1000);
+			}
+		}
+		if (!link)
+			*data = 1;
+	}
+	out:
+		return *data;
+}
+
+static void igb_diag_test(struct net_device *netdev,
+			  struct ethtool_test *eth_test, u64 *data)
+{
+	struct igb_adapter *adapter = netdev_priv(netdev);
+	u16 autoneg_advertised;
+	u8 forced_speed_duplex, autoneg;
+	bool if_running = netif_running(netdev);
+
+	set_bit(__IGB_TESTING, &adapter->state);
+	if (eth_test->flags == ETH_TEST_FL_OFFLINE) {
+		/* Offline tests */
+
+		/* save speed, duplex, autoneg settings */
+		autoneg_advertised = adapter->hw.phy.autoneg_advertised;
+		forced_speed_duplex = adapter->hw.mac.forced_speed_duplex;
+		autoneg = adapter->hw.mac.autoneg;
+
+		dev_info(pci_dev_to_dev(adapter->pdev), "offline testing starting\n");
+
+		/* power up link for link test */
+		igb_power_up_link(adapter);
+
+		/* Link test performed before hardware reset so autoneg doesn't
+		 * interfere with test result */
+		if (igb_link_test(adapter, &data[4]))
+			eth_test->flags |= ETH_TEST_FL_FAILED;
+
+		if (if_running)
+			/* indicate we're in test mode */
+			dev_close(netdev);
+		else
+			igb_reset(adapter);
+
+		if (igb_reg_test(adapter, &data[0]))
+			eth_test->flags |= ETH_TEST_FL_FAILED;
+
+		igb_reset(adapter);
+		if (igb_eeprom_test(adapter, &data[1]))
+			eth_test->flags |= ETH_TEST_FL_FAILED;
+
+		igb_reset(adapter);
+		if (igb_intr_test(adapter, &data[2]))
+			eth_test->flags |= ETH_TEST_FL_FAILED;
+
+		igb_reset(adapter);
+
+		/* power up link for loopback test */
+		igb_power_up_link(adapter);
+
+		if (igb_loopback_test(adapter, &data[3]))
+			eth_test->flags |= ETH_TEST_FL_FAILED;
+
+		/* restore speed, duplex, autoneg settings */
+		adapter->hw.phy.autoneg_advertised = autoneg_advertised;
+		adapter->hw.mac.forced_speed_duplex = forced_speed_duplex;
+		adapter->hw.mac.autoneg = autoneg;
+
+		/* force this routine to wait until autoneg complete/timeout */
+		adapter->hw.phy.autoneg_wait_to_complete = TRUE;
+		igb_reset(adapter);
+		adapter->hw.phy.autoneg_wait_to_complete = FALSE;
+
+		clear_bit(__IGB_TESTING, &adapter->state);
+		if (if_running)
+			dev_open(netdev);
+	} else {
+		dev_info(pci_dev_to_dev(adapter->pdev), "online testing starting\n");
+
+		/* PHY is powered down when interface is down */
+		if (if_running && igb_link_test(adapter, &data[4]))
+			eth_test->flags |= ETH_TEST_FL_FAILED;
+		else
+			data[4] = 0;
+
+		/* Online tests aren't run; pass by default */
+		data[0] = 0;
+		data[1] = 0;
+		data[2] = 0;
+		data[3] = 0;
+
+		clear_bit(__IGB_TESTING, &adapter->state);
+	}
+	msleep_interruptible(4 * 1000);
+}
+
+static void igb_get_wol(struct net_device *netdev, struct ethtool_wolinfo *wol)
+{
+	struct igb_adapter *adapter = netdev_priv(netdev);
+
+	wol->supported = WAKE_UCAST | WAKE_MCAST |
+	                 WAKE_BCAST | WAKE_MAGIC |
+	                 WAKE_PHY;
+	wol->wolopts = 0;
+
+	if (!(adapter->flags & IGB_FLAG_WOL_SUPPORTED))
+		return;
+
+	/* apply any specific unsupported masks here */
+	switch (adapter->hw.device_id) {
+	default:
+		break;
+	}
+
+	if (adapter->wol & E1000_WUFC_EX)
+		wol->wolopts |= WAKE_UCAST;
+	if (adapter->wol & E1000_WUFC_MC)
+		wol->wolopts |= WAKE_MCAST;
+	if (adapter->wol & E1000_WUFC_BC)
+		wol->wolopts |= WAKE_BCAST;
+	if (adapter->wol & E1000_WUFC_MAG)
+		wol->wolopts |= WAKE_MAGIC;
+	if (adapter->wol & E1000_WUFC_LNKC)
+		wol->wolopts |= WAKE_PHY;
+}
+
+static int igb_set_wol(struct net_device *netdev, struct ethtool_wolinfo *wol)
+{
+	struct igb_adapter *adapter = netdev_priv(netdev);
+
+	if (wol->wolopts & (WAKE_ARP | WAKE_MAGICSECURE))
+		return -EOPNOTSUPP;
+
+	if (!(adapter->flags & IGB_FLAG_WOL_SUPPORTED))
+		return wol->wolopts ? -EOPNOTSUPP : 0;
+
+	/* these settings will always override what we currently have */
+	adapter->wol = 0;
+
+	if (wol->wolopts & WAKE_UCAST)
+		adapter->wol |= E1000_WUFC_EX;
+	if (wol->wolopts & WAKE_MCAST)
+		adapter->wol |= E1000_WUFC_MC;
+	if (wol->wolopts & WAKE_BCAST)
+		adapter->wol |= E1000_WUFC_BC;
+	if (wol->wolopts & WAKE_MAGIC)
+		adapter->wol |= E1000_WUFC_MAG;
+	if (wol->wolopts & WAKE_PHY)
+		adapter->wol |= E1000_WUFC_LNKC;
+	device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
+
+	return 0;
+}
+
+/* bit defines for adapter->led_status */
+#ifdef HAVE_ETHTOOL_SET_PHYS_ID
+static int igb_set_phys_id(struct net_device *netdev,
+                           enum ethtool_phys_id_state state)
+{
+        struct igb_adapter *adapter = netdev_priv(netdev);
+        struct e1000_hw *hw = &adapter->hw;
+
+        switch (state) {
+        case ETHTOOL_ID_ACTIVE:
+		e1000_blink_led(hw);
+                return 2;
+        case ETHTOOL_ID_ON:
+                e1000_led_on(hw);
+                break;
+        case ETHTOOL_ID_OFF:
+                e1000_led_off(hw);
+                break;
+        case ETHTOOL_ID_INACTIVE:
+		e1000_led_off(hw);
+		e1000_cleanup_led(hw);
+                break;
+        }
+
+        return 0;
+}
+#else
+static int igb_phys_id(struct net_device *netdev, u32 data)
+{
+	struct igb_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+	unsigned long timeout;
+
+	timeout = data * 1000;
+
+	/*
+	 *  msleep_interruptable only accepts unsigned int so we are limited
+	 * in how long a duration we can wait
+	 */
+	if (!timeout || timeout > UINT_MAX)
+		timeout = UINT_MAX;
+
+	e1000_blink_led(hw);
+	msleep_interruptible(timeout);
+
+	e1000_led_off(hw);
+	e1000_cleanup_led(hw);
+
+	return 0;
+}
+#endif /* HAVE_ETHTOOL_SET_PHYS_ID */
+
+static int igb_set_coalesce(struct net_device *netdev,
+			    struct ethtool_coalesce *ec)
+{
+	struct igb_adapter *adapter = netdev_priv(netdev);
+	int i;
+
+	if ((ec->rx_coalesce_usecs > IGB_MAX_ITR_USECS) ||
+	    ((ec->rx_coalesce_usecs > 3) &&
+	     (ec->rx_coalesce_usecs < IGB_MIN_ITR_USECS)) ||
+	    (ec->rx_coalesce_usecs == 2))
+	    {
+	    	printk("set_coalesce:invalid parameter..");
+		return -EINVAL;
+	}
+
+	if ((ec->tx_coalesce_usecs > IGB_MAX_ITR_USECS) ||
+	    ((ec->tx_coalesce_usecs > 3) &&
+	     (ec->tx_coalesce_usecs < IGB_MIN_ITR_USECS)) ||
+	    (ec->tx_coalesce_usecs == 2))
+		return -EINVAL;
+
+	if ((adapter->flags & IGB_FLAG_QUEUE_PAIRS) && ec->tx_coalesce_usecs)
+		return -EINVAL;
+
+	if (ec->tx_max_coalesced_frames_irq)
+		adapter->tx_work_limit = ec->tx_max_coalesced_frames_irq;
+
+	/* If ITR is disabled, disable DMAC */
+	if (ec->rx_coalesce_usecs == 0) {
+		adapter->dmac = IGB_DMAC_DISABLE;
+	}
+
+	/* convert to rate of irq's per second */
+	if (ec->rx_coalesce_usecs && ec->rx_coalesce_usecs <= 3)
+		adapter->rx_itr_setting = ec->rx_coalesce_usecs;
+	else
+		adapter->rx_itr_setting = ec->rx_coalesce_usecs << 2;
+
+	/* convert to rate of irq's per second */
+	if (adapter->flags & IGB_FLAG_QUEUE_PAIRS)
+		adapter->tx_itr_setting = adapter->rx_itr_setting;
+	else if (ec->tx_coalesce_usecs && ec->tx_coalesce_usecs <= 3)
+		adapter->tx_itr_setting = ec->tx_coalesce_usecs;
+	else
+		adapter->tx_itr_setting = ec->tx_coalesce_usecs << 2;
+
+	for (i = 0; i < adapter->num_q_vectors; i++) {
+		struct igb_q_vector *q_vector = adapter->q_vector[i];
+		q_vector->tx.work_limit = adapter->tx_work_limit;
+		if (q_vector->rx.ring)
+			q_vector->itr_val = adapter->rx_itr_setting;
+		else
+			q_vector->itr_val = adapter->tx_itr_setting;
+		if (q_vector->itr_val && q_vector->itr_val <= 3)
+			q_vector->itr_val = IGB_START_ITR;
+		q_vector->set_itr = 1;
+	}
+
+	return 0;
+}
+
+static int igb_get_coalesce(struct net_device *netdev,
+			    struct ethtool_coalesce *ec)
+{
+	struct igb_adapter *adapter = netdev_priv(netdev);
+
+	if (adapter->rx_itr_setting <= 3)
+		ec->rx_coalesce_usecs = adapter->rx_itr_setting;
+	else
+		ec->rx_coalesce_usecs = adapter->rx_itr_setting >> 2;
+
+	ec->tx_max_coalesced_frames_irq = adapter->tx_work_limit;
+
+	if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS)) {
+		if (adapter->tx_itr_setting <= 3)
+			ec->tx_coalesce_usecs = adapter->tx_itr_setting;
+		else
+			ec->tx_coalesce_usecs = adapter->tx_itr_setting >> 2;
+	}
+
+	return 0;
+}
+
+static int igb_nway_reset(struct net_device *netdev)
+{
+	struct igb_adapter *adapter = netdev_priv(netdev);
+	if (netif_running(netdev))
+		igb_reinit_locked(adapter);
+	return 0;
+}
+
+#ifdef HAVE_ETHTOOL_GET_SSET_COUNT
+static int igb_get_sset_count(struct net_device *netdev, int sset)
+{
+	switch (sset) {
+	case ETH_SS_STATS:
+		return IGB_STATS_LEN;
+	case ETH_SS_TEST:
+		return IGB_TEST_LEN;
+	default:
+		return -ENOTSUPP;
+	}
+}
+#else
+static int igb_get_stats_count(struct net_device *netdev)
+{
+	return IGB_STATS_LEN;
+}
+
+static int igb_diag_test_count(struct net_device *netdev)
+{
+	return IGB_TEST_LEN;
+}
+#endif
+
+static void igb_get_ethtool_stats(struct net_device *netdev,
+				  struct ethtool_stats *stats, u64 *data)
+{
+	struct igb_adapter *adapter = netdev_priv(netdev);
+#ifdef HAVE_NETDEV_STATS_IN_NETDEV
+	struct net_device_stats *net_stats = &netdev->stats;
+#else
+	struct net_device_stats *net_stats = &adapter->net_stats;
+#endif
+	u64 *queue_stat;
+	int i, j, k;
+	char *p;
+
+	igb_update_stats(adapter);
+
+	for (i = 0; i < IGB_GLOBAL_STATS_LEN; i++) {
+		p = (char *)adapter + igb_gstrings_stats[i].stat_offset;
+		data[i] = (igb_gstrings_stats[i].sizeof_stat ==
+			sizeof(u64)) ? *(u64 *)p : *(u32 *)p;
+	}
+	for (j = 0; j < IGB_NETDEV_STATS_LEN; j++, i++) {
+		p = (char *)net_stats + igb_gstrings_net_stats[j].stat_offset;
+		data[i] = (igb_gstrings_net_stats[j].sizeof_stat ==
+			sizeof(u64)) ? *(u64 *)p : *(u32 *)p;
+	}
+	for (j = 0; j < adapter->num_tx_queues; j++) {
+		queue_stat = (u64 *)&adapter->tx_ring[j]->tx_stats;
+		for (k = 0; k < IGB_TX_QUEUE_STATS_LEN; k++, i++)
+			data[i] = queue_stat[k];
+	}
+	for (j = 0; j < adapter->num_rx_queues; j++) {
+		queue_stat = (u64 *)&adapter->rx_ring[j]->rx_stats;
+		for (k = 0; k < IGB_RX_QUEUE_STATS_LEN; k++, i++)
+			data[i] = queue_stat[k];
+	}
+}
+
+static void igb_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
+{
+	struct igb_adapter *adapter = netdev_priv(netdev);
+	u8 *p = data;
+	int i;
+
+	switch (stringset) {
+	case ETH_SS_TEST:
+		memcpy(data, *igb_gstrings_test,
+			IGB_TEST_LEN*ETH_GSTRING_LEN);
+		break;
+	case ETH_SS_STATS:
+		for (i = 0; i < IGB_GLOBAL_STATS_LEN; i++) {
+			memcpy(p, igb_gstrings_stats[i].stat_string,
+			       ETH_GSTRING_LEN);
+			p += ETH_GSTRING_LEN;
+		}
+		for (i = 0; i < IGB_NETDEV_STATS_LEN; i++) {
+			memcpy(p, igb_gstrings_net_stats[i].stat_string,
+			       ETH_GSTRING_LEN);
+			p += ETH_GSTRING_LEN;
+		}
+		for (i = 0; i < adapter->num_tx_queues; i++) {
+			sprintf(p, "tx_queue_%u_packets", i);
+			p += ETH_GSTRING_LEN;
+			sprintf(p, "tx_queue_%u_bytes", i);
+			p += ETH_GSTRING_LEN;
+			sprintf(p, "tx_queue_%u_restart", i);
+			p += ETH_GSTRING_LEN;
+		}
+		for (i = 0; i < adapter->num_rx_queues; i++) {
+			sprintf(p, "rx_queue_%u_packets", i);
+			p += ETH_GSTRING_LEN;
+			sprintf(p, "rx_queue_%u_bytes", i);
+			p += ETH_GSTRING_LEN;
+			sprintf(p, "rx_queue_%u_drops", i);
+			p += ETH_GSTRING_LEN;
+			sprintf(p, "rx_queue_%u_csum_err", i);
+			p += ETH_GSTRING_LEN;
+			sprintf(p, "rx_queue_%u_alloc_failed", i);
+			p += ETH_GSTRING_LEN;
+			sprintf(p, "rx_queue_%u_ipv4_packets", i);
+			p += ETH_GSTRING_LEN;
+			sprintf(p, "rx_queue_%u_ipv4e_packets", i);
+			p += ETH_GSTRING_LEN;
+			sprintf(p, "rx_queue_%u_ipv6_packets", i);
+			p += ETH_GSTRING_LEN;
+			sprintf(p, "rx_queue_%u_ipv6e_packets", i);
+			p += ETH_GSTRING_LEN;
+			sprintf(p, "rx_queue_%u_tcp_packets", i);
+			p += ETH_GSTRING_LEN;
+			sprintf(p, "rx_queue_%u_udp_packets", i);
+			p += ETH_GSTRING_LEN;
+			sprintf(p, "rx_queue_%u_sctp_packets", i);
+			p += ETH_GSTRING_LEN;
+			sprintf(p, "rx_queue_%u_nfs_packets", i);
+			p += ETH_GSTRING_LEN;
+		}
+/*		BUG_ON(p - data != IGB_STATS_LEN * ETH_GSTRING_LEN); */
+		break;
+	}
+}
+
+#ifdef HAVE_ETHTOOL_GET_TS_INFO
+static int igb_get_ts_info(struct net_device *dev,
+			   struct ethtool_ts_info *info)
+{
+	struct igb_adapter *adapter = netdev_priv(dev);
+
+	switch (adapter->hw.mac.type) {
+#ifdef HAVE_PTP_1588_CLOCK
+	case e1000_82575:
+		info->so_timestamping =
+			SOF_TIMESTAMPING_TX_SOFTWARE |
+			SOF_TIMESTAMPING_RX_SOFTWARE |
+			SOF_TIMESTAMPING_SOFTWARE;
+		return 0;
+	case e1000_82576:
+	case e1000_82580:
+	case e1000_i350:
+	case e1000_i354:
+	case e1000_i210:
+	case e1000_i211:
+		info->so_timestamping =
+			SOF_TIMESTAMPING_TX_SOFTWARE |
+			SOF_TIMESTAMPING_RX_SOFTWARE |
+			SOF_TIMESTAMPING_SOFTWARE |
+			SOF_TIMESTAMPING_TX_HARDWARE |
+			SOF_TIMESTAMPING_RX_HARDWARE |
+			SOF_TIMESTAMPING_RAW_HARDWARE;
+
+		if (adapter->ptp_clock)
+			info->phc_index = ptp_clock_index(adapter->ptp_clock);
+		else
+			info->phc_index = -1;
+
+		info->tx_types =
+			(1 << HWTSTAMP_TX_OFF) |
+			(1 << HWTSTAMP_TX_ON);
+
+		info->rx_filters = 1 << HWTSTAMP_FILTER_NONE;
+
+		/* 82576 does not support timestamping all packets. */
+		if (adapter->hw.mac.type >= e1000_82580)
+			info->rx_filters |= 1 << HWTSTAMP_FILTER_ALL;
+		else
+			info->rx_filters |=
+				(1 << HWTSTAMP_FILTER_PTP_V1_L4_SYNC) |
+				(1 << HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ) |
+				(1 << HWTSTAMP_FILTER_PTP_V2_L2_SYNC) |
+				(1 << HWTSTAMP_FILTER_PTP_V2_L4_SYNC) |
+				(1 << HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ) |
+				(1 << HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ) |
+				(1 << HWTSTAMP_FILTER_PTP_V2_EVENT);
+
+		return 0;
+#endif /* HAVE_PTP_1588_CLOCK */
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+#endif /* HAVE_ETHTOOL_GET_TS_INFO */
+
+#ifdef CONFIG_PM_RUNTIME
+static int igb_ethtool_begin(struct net_device *netdev)
+{
+	struct igb_adapter *adapter = netdev_priv(netdev);
+
+	pm_runtime_get_sync(&adapter->pdev->dev);
+
+	return 0;
+}
+
+static void igb_ethtool_complete(struct net_device *netdev)
+{
+	struct igb_adapter *adapter = netdev_priv(netdev);
+
+	pm_runtime_put(&adapter->pdev->dev);
+}
+#endif /* CONFIG_PM_RUNTIME */
+
+#ifndef HAVE_NDO_SET_FEATURES
+static u32 igb_get_rx_csum(struct net_device *netdev)
+{
+	return !!(netdev->features & NETIF_F_RXCSUM);
+}
+
+static int igb_set_rx_csum(struct net_device *netdev, u32 data)
+{
+	const u32 feature_list = NETIF_F_RXCSUM;
+
+	if (data)
+		netdev->features |= feature_list;
+	else
+		netdev->features &= ~feature_list;
+
+	return 0;
+}
+
+static int igb_set_tx_csum(struct net_device *netdev, u32 data)
+{
+	struct igb_adapter *adapter = netdev_priv(netdev);
+#ifdef NETIF_F_IPV6_CSUM
+	u32 feature_list = NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
+#else
+	u32 feature_list = NETIF_F_IP_CSUM;
+#endif
+
+	if (adapter->hw.mac.type >= e1000_82576)
+		feature_list |= NETIF_F_SCTP_CSUM;
+
+	if (data)
+		netdev->features |= feature_list;
+	else
+		netdev->features &= ~feature_list;
+
+	return 0;
+}
+
+#ifdef NETIF_F_TSO
+static int igb_set_tso(struct net_device *netdev, u32 data)
+{
+#ifdef NETIF_F_TSO6
+	const u32 feature_list = NETIF_F_TSO | NETIF_F_TSO6;
+#else
+	const u32 feature_list = NETIF_F_TSO;
+#endif
+
+	if (data)
+		netdev->features |= feature_list;
+	else
+		netdev->features &= ~feature_list;
+
+#ifndef HAVE_NETDEV_VLAN_FEATURES
+	if (!data) {
+		struct igb_adapter *adapter = netdev_priv(netdev);
+		struct net_device *v_netdev;
+		int i;
+
+		/* disable TSO on all VLANs if they're present */
+		if (!adapter->vlgrp)
+			goto tso_out;
+
+		for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) {
+			v_netdev = vlan_group_get_device(adapter->vlgrp, i);
+			if (!v_netdev)
+				continue;
+
+			v_netdev->features &= ~feature_list;
+			vlan_group_set_device(adapter->vlgrp, i, v_netdev);
+		}
+	}
+
+tso_out:
+
+#endif /* HAVE_NETDEV_VLAN_FEATURES */
+	return 0;
+}
+
+#endif /* NETIF_F_TSO */
+#ifdef ETHTOOL_GFLAGS
+static int igb_set_flags(struct net_device *netdev, u32 data)
+{
+	u32 supported_flags = ETH_FLAG_RXVLAN | ETH_FLAG_TXVLAN |
+			      ETH_FLAG_RXHASH;
+#ifndef HAVE_VLAN_RX_REGISTER
+	u32 changed = netdev->features ^ data;
+#endif
+	int rc;
+#ifndef IGB_NO_LRO
+
+	supported_flags |= ETH_FLAG_LRO;
+#endif
+	/*
+	 * Since there is no support for separate tx vlan accel
+	 * enabled make sure tx flag is cleared if rx is.
+	 */
+	if (!(data & ETH_FLAG_RXVLAN))
+		data &= ~ETH_FLAG_TXVLAN;
+
+	rc = ethtool_op_set_flags(netdev, data, supported_flags);
+	if (rc)
+		return rc;
+#ifndef HAVE_VLAN_RX_REGISTER
+
+	if (changed & ETH_FLAG_RXVLAN)
+		igb_vlan_mode(netdev, data);
+#endif
+
+	return 0;
+}
+
+#endif /* ETHTOOL_GFLAGS */
+#endif /* HAVE_NDO_SET_FEATURES */
+#ifdef ETHTOOL_SADV_COAL
+static int igb_set_adv_coal(struct net_device *netdev, struct ethtool_value *edata)
+{
+	struct igb_adapter *adapter = netdev_priv(netdev);
+
+	switch (edata->data) {
+	case IGB_DMAC_DISABLE:
+		adapter->dmac = edata->data;
+		break;
+	case IGB_DMAC_MIN:
+		adapter->dmac = edata->data;
+		break;
+	case IGB_DMAC_500:
+		adapter->dmac = edata->data;
+		break;
+	case IGB_DMAC_EN_DEFAULT:
+		adapter->dmac = edata->data;
+		break;
+	case IGB_DMAC_2000:
+		adapter->dmac = edata->data;
+		break;
+	case IGB_DMAC_3000:
+		adapter->dmac = edata->data;
+		break;
+	case IGB_DMAC_4000:
+		adapter->dmac = edata->data;
+		break;
+	case IGB_DMAC_5000:
+		adapter->dmac = edata->data;
+		break;
+	case IGB_DMAC_6000:
+		adapter->dmac = edata->data;
+		break;
+	case IGB_DMAC_7000:
+		adapter->dmac = edata->data;
+		break;
+	case IGB_DMAC_8000:
+		adapter->dmac = edata->data;
+		break;
+	case IGB_DMAC_9000:
+		adapter->dmac = edata->data;
+		break;
+	case IGB_DMAC_MAX:
+		adapter->dmac = edata->data;
+		break;
+	default:
+		adapter->dmac = IGB_DMAC_DISABLE;
+		printk("set_dmac: invalid setting, setting DMAC to %d\n",
+			adapter->dmac);
+	}
+	printk("%s: setting DMAC to %d\n", netdev->name, adapter->dmac);
+	return 0;
+}
+#endif /* ETHTOOL_SADV_COAL */
+#ifdef ETHTOOL_GADV_COAL
+static void igb_get_dmac(struct net_device *netdev,
+			    struct ethtool_value *edata)
+{
+	struct igb_adapter *adapter = netdev_priv(netdev);
+	edata->data = adapter->dmac;
+
+	return;
+}
+#endif
+
+#ifdef ETHTOOL_GEEE
+static int igb_get_eee(struct net_device *netdev, struct ethtool_eee *edata)
+{
+	struct igb_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+	u32 ret_val;
+	u16 phy_data;
+
+	if ((hw->mac.type < e1000_i350) ||
+	    (hw->phy.media_type != e1000_media_type_copper))
+		return -EOPNOTSUPP;
+
+	edata->supported = (SUPPORTED_1000baseT_Full |
+			    SUPPORTED_100baseT_Full);
+
+	if (!hw->dev_spec._82575.eee_disable)
+		edata->advertised =
+			mmd_eee_adv_to_ethtool_adv_t(adapter->eee_advert);
+
+	/* The IPCNFG and EEER registers are not supported on I354. */
+	if (hw->mac.type == e1000_i354) {
+		e1000_get_eee_status_i354(hw, (bool *)&edata->eee_active);
+	} else {
+		u32 eeer;
+
+		eeer = E1000_READ_REG(hw, E1000_EEER);
+
+		/* EEE status on negotiated link */
+		if (eeer & E1000_EEER_EEE_NEG)
+			edata->eee_active = true;
+
+		if (eeer & E1000_EEER_TX_LPI_EN)
+			edata->tx_lpi_enabled = true;
+	}
+
+	/* EEE Link Partner Advertised */
+	switch (hw->mac.type) {
+	case e1000_i350:
+		ret_val = e1000_read_emi_reg(hw, E1000_EEE_LP_ADV_ADDR_I350,
+					     &phy_data);
+		if (ret_val)
+			return -ENODATA;
+
+		edata->lp_advertised = mmd_eee_adv_to_ethtool_adv_t(phy_data);
+
+		break;
+	case e1000_i354:
+	case e1000_i210:
+	case e1000_i211:
+		ret_val = e1000_read_xmdio_reg(hw, E1000_EEE_LP_ADV_ADDR_I210,
+					       E1000_EEE_LP_ADV_DEV_I210,
+					       &phy_data);
+		if (ret_val)
+			return -ENODATA;
+
+		edata->lp_advertised = mmd_eee_adv_to_ethtool_adv_t(phy_data);
+
+		break;
+	default:
+		break;
+	}
+
+	edata->eee_enabled = !hw->dev_spec._82575.eee_disable;
+
+	if ((hw->mac.type == e1000_i354) &&
+	    (edata->eee_enabled))
+		edata->tx_lpi_enabled = true;
+
+	/*
+	 * report correct negotiated EEE status for devices that
+	 * wrongly report EEE at half-duplex
+	 */
+	if (adapter->link_duplex == HALF_DUPLEX) {
+		edata->eee_enabled = false;
+		edata->eee_active = false;
+		edata->tx_lpi_enabled = false;
+		edata->advertised &= ~edata->advertised;
+	}
+
+	return 0;
+}
+#endif
+
+#ifdef ETHTOOL_SEEE
+static int igb_set_eee(struct net_device *netdev,
+		       struct ethtool_eee *edata)
+{
+	struct igb_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+	struct ethtool_eee eee_curr;
+	s32 ret_val;
+
+	if ((hw->mac.type < e1000_i350) ||
+	    (hw->phy.media_type != e1000_media_type_copper))
+		return -EOPNOTSUPP;
+
+	ret_val = igb_get_eee(netdev, &eee_curr);
+	if (ret_val)
+		return ret_val;
+
+	if (eee_curr.eee_enabled) {
+		if (eee_curr.tx_lpi_enabled != edata->tx_lpi_enabled) {
+			dev_err(pci_dev_to_dev(adapter->pdev),
+				"Setting EEE tx-lpi is not supported\n");
+			return -EINVAL;
+		}
+
+		/* Tx LPI time is not implemented currently */
+		if (edata->tx_lpi_timer) {
+			dev_err(pci_dev_to_dev(adapter->pdev),
+				"Setting EEE Tx LPI timer is not supported\n");
+			return -EINVAL;
+		}
+
+		if (edata->advertised &
+		    ~(ADVERTISE_100_FULL | ADVERTISE_1000_FULL)) {
+			dev_err(pci_dev_to_dev(adapter->pdev),
+				"EEE Advertisement supports only 100Tx and or 100T full duplex\n");
+			return -EINVAL;
+		}
+
+	} else if (!edata->eee_enabled) {
+		dev_err(pci_dev_to_dev(adapter->pdev),
+			"Setting EEE options is not supported with EEE disabled\n");
+			return -EINVAL;
+		}
+
+	adapter->eee_advert = ethtool_adv_to_mmd_eee_adv_t(edata->advertised);
+
+	if (hw->dev_spec._82575.eee_disable != !edata->eee_enabled) {
+		hw->dev_spec._82575.eee_disable = !edata->eee_enabled;
+
+		/* reset link */
+		if (netif_running(netdev))
+			igb_reinit_locked(adapter);
+		else
+			igb_reset(adapter);
+	}
+
+	return 0;
+}
+#endif /* ETHTOOL_SEEE */
+
+#ifdef ETHTOOL_GRXRINGS
+static int igb_get_rss_hash_opts(struct igb_adapter *adapter,
+				 struct ethtool_rxnfc *cmd)
+{
+	cmd->data = 0;
+
+	/* Report default options for RSS on igb */
+	switch (cmd->flow_type) {
+	case TCP_V4_FLOW:
+		cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+	case UDP_V4_FLOW:
+		if (adapter->flags & IGB_FLAG_RSS_FIELD_IPV4_UDP)
+			cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+	case SCTP_V4_FLOW:
+	case AH_ESP_V4_FLOW:
+	case AH_V4_FLOW:
+	case ESP_V4_FLOW:
+	case IPV4_FLOW:
+		cmd->data |= RXH_IP_SRC | RXH_IP_DST;
+		break;
+	case TCP_V6_FLOW:
+		cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+	case UDP_V6_FLOW:
+		if (adapter->flags & IGB_FLAG_RSS_FIELD_IPV6_UDP)
+			cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+	case SCTP_V6_FLOW:
+	case AH_ESP_V6_FLOW:
+	case AH_V6_FLOW:
+	case ESP_V6_FLOW:
+	case IPV6_FLOW:
+		cmd->data |= RXH_IP_SRC | RXH_IP_DST;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int igb_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd,
+#ifdef HAVE_ETHTOOL_GET_RXNFC_VOID_RULE_LOCS
+			   void *rule_locs)
+#else
+			   u32 *rule_locs)
+#endif
+{
+	struct igb_adapter *adapter = netdev_priv(dev);
+	int ret = -EOPNOTSUPP;
+
+	switch (cmd->cmd) {
+	case ETHTOOL_GRXRINGS:
+		cmd->data = adapter->num_rx_queues;
+		ret = 0;
+		break;
+	case ETHTOOL_GRXFH:
+		ret = igb_get_rss_hash_opts(adapter, cmd);
+		break;
+	default:
+		break;
+	}
+
+	return ret;
+}
+
+#define UDP_RSS_FLAGS (IGB_FLAG_RSS_FIELD_IPV4_UDP | \
+		       IGB_FLAG_RSS_FIELD_IPV6_UDP)
+static int igb_set_rss_hash_opt(struct igb_adapter *adapter,
+				struct ethtool_rxnfc *nfc)
+{
+	u32 flags = adapter->flags;
+
+	/*
+	 * RSS does not support anything other than hashing
+	 * to queues on src and dst IPs and ports
+	 */
+	if (nfc->data & ~(RXH_IP_SRC | RXH_IP_DST |
+			  RXH_L4_B_0_1 | RXH_L4_B_2_3))
+		return -EINVAL;
+
+	switch (nfc->flow_type) {
+	case TCP_V4_FLOW:
+	case TCP_V6_FLOW:
+		if (!(nfc->data & RXH_IP_SRC) ||
+		    !(nfc->data & RXH_IP_DST) ||
+		    !(nfc->data & RXH_L4_B_0_1) ||
+		    !(nfc->data & RXH_L4_B_2_3))
+			return -EINVAL;
+		break;
+	case UDP_V4_FLOW:
+		if (!(nfc->data & RXH_IP_SRC) ||
+		    !(nfc->data & RXH_IP_DST))
+			return -EINVAL;
+		switch (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) {
+		case 0:
+			flags &= ~IGB_FLAG_RSS_FIELD_IPV4_UDP;
+			break;
+		case (RXH_L4_B_0_1 | RXH_L4_B_2_3):
+			flags |= IGB_FLAG_RSS_FIELD_IPV4_UDP;
+			break;
+		default:
+			return -EINVAL;
+		}
+		break;
+	case UDP_V6_FLOW:
+		if (!(nfc->data & RXH_IP_SRC) ||
+		    !(nfc->data & RXH_IP_DST))
+			return -EINVAL;
+		switch (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) {
+		case 0:
+			flags &= ~IGB_FLAG_RSS_FIELD_IPV6_UDP;
+			break;
+		case (RXH_L4_B_0_1 | RXH_L4_B_2_3):
+			flags |= IGB_FLAG_RSS_FIELD_IPV6_UDP;
+			break;
+		default:
+			return -EINVAL;
+		}
+		break;
+	case AH_ESP_V4_FLOW:
+	case AH_V4_FLOW:
+	case ESP_V4_FLOW:
+	case SCTP_V4_FLOW:
+	case AH_ESP_V6_FLOW:
+	case AH_V6_FLOW:
+	case ESP_V6_FLOW:
+	case SCTP_V6_FLOW:
+		if (!(nfc->data & RXH_IP_SRC) ||
+		    !(nfc->data & RXH_IP_DST) ||
+		    (nfc->data & RXH_L4_B_0_1) ||
+		    (nfc->data & RXH_L4_B_2_3))
+			return -EINVAL;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	/* if we changed something we need to update flags */
+	if (flags != adapter->flags) {
+		struct e1000_hw *hw = &adapter->hw;
+		u32 mrqc = E1000_READ_REG(hw, E1000_MRQC);
+
+		if ((flags & UDP_RSS_FLAGS) &&
+		    !(adapter->flags & UDP_RSS_FLAGS))
+			DPRINTK(DRV, WARNING,
+				"enabling UDP RSS: fragmented packets may arrive out of order to the stack above\n");
+
+		adapter->flags = flags;
+
+		/* Perform hash on these packet types */
+		mrqc |= E1000_MRQC_RSS_FIELD_IPV4 |
+			E1000_MRQC_RSS_FIELD_IPV4_TCP |
+			E1000_MRQC_RSS_FIELD_IPV6 |
+			E1000_MRQC_RSS_FIELD_IPV6_TCP;
+
+		mrqc &= ~(E1000_MRQC_RSS_FIELD_IPV4_UDP |
+			  E1000_MRQC_RSS_FIELD_IPV6_UDP);
+
+		if (flags & IGB_FLAG_RSS_FIELD_IPV4_UDP)
+			mrqc |= E1000_MRQC_RSS_FIELD_IPV4_UDP;
+
+		if (flags & IGB_FLAG_RSS_FIELD_IPV6_UDP)
+			mrqc |= E1000_MRQC_RSS_FIELD_IPV6_UDP;
+
+		E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
+	}
+
+	return 0;
+}
+
+static int igb_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd)
+{
+	struct igb_adapter *adapter = netdev_priv(dev);
+	int ret = -EOPNOTSUPP;
+
+	switch (cmd->cmd) {
+	case ETHTOOL_SRXFH:
+		ret = igb_set_rss_hash_opt(adapter, cmd);
+		break;
+	default:
+		break;
+	}
+
+	return ret;
+}
+#endif /* ETHTOOL_GRXRINGS */
+
+static const struct ethtool_ops igb_ethtool_ops = {
+	.get_settings           = igb_get_settings,
+	.set_settings           = igb_set_settings,
+	.get_drvinfo            = igb_get_drvinfo,
+	.get_regs_len           = igb_get_regs_len,
+	.get_regs               = igb_get_regs,
+	.get_wol                = igb_get_wol,
+	.set_wol                = igb_set_wol,
+	.get_msglevel           = igb_get_msglevel,
+	.set_msglevel           = igb_set_msglevel,
+	.nway_reset             = igb_nway_reset,
+	.get_link               = igb_get_link,
+	.get_eeprom_len         = igb_get_eeprom_len,
+	.get_eeprom             = igb_get_eeprom,
+	.set_eeprom             = igb_set_eeprom,
+	.get_ringparam          = igb_get_ringparam,
+	.set_ringparam          = igb_set_ringparam,
+	.get_pauseparam         = igb_get_pauseparam,
+	.set_pauseparam         = igb_set_pauseparam,
+	.self_test              = igb_diag_test,
+	.get_strings            = igb_get_strings,
+#ifndef HAVE_RHEL6_ETHTOOL_OPS_EXT_STRUCT
+#ifdef HAVE_ETHTOOL_SET_PHYS_ID
+	.set_phys_id            = igb_set_phys_id,
+#else
+	.phys_id                = igb_phys_id,
+#endif /* HAVE_ETHTOOL_SET_PHYS_ID */
+#endif /* HAVE_RHEL6_ETHTOOL_OPS_EXT_STRUCT */
+#ifdef HAVE_ETHTOOL_GET_SSET_COUNT
+	.get_sset_count         = igb_get_sset_count,
+#else
+	.get_stats_count        = igb_get_stats_count,
+	.self_test_count        = igb_diag_test_count,
+#endif
+	.get_ethtool_stats      = igb_get_ethtool_stats,
+#ifdef HAVE_ETHTOOL_GET_PERM_ADDR
+	.get_perm_addr          = ethtool_op_get_perm_addr,
+#endif
+	.get_coalesce           = igb_get_coalesce,
+	.set_coalesce           = igb_set_coalesce,
+#ifndef HAVE_RHEL6_ETHTOOL_OPS_EXT_STRUCT
+#ifdef HAVE_ETHTOOL_GET_TS_INFO
+	.get_ts_info            = igb_get_ts_info,
+#endif /* HAVE_ETHTOOL_GET_TS_INFO */
+#endif /* HAVE_RHEL6_ETHTOOL_OPS_EXT_STRUCT */
+#ifdef CONFIG_PM_RUNTIME
+	.begin			= igb_ethtool_begin,
+	.complete		= igb_ethtool_complete,
+#endif /* CONFIG_PM_RUNTIME */
+#ifndef HAVE_NDO_SET_FEATURES
+	.get_rx_csum            = igb_get_rx_csum,
+	.set_rx_csum            = igb_set_rx_csum,
+	.get_tx_csum            = ethtool_op_get_tx_csum,
+	.set_tx_csum            = igb_set_tx_csum,
+	.get_sg                 = ethtool_op_get_sg,
+	.set_sg                 = ethtool_op_set_sg,
+#ifdef NETIF_F_TSO
+	.get_tso                = ethtool_op_get_tso,
+	.set_tso                = igb_set_tso,
+#endif
+#ifdef ETHTOOL_GFLAGS
+	.get_flags              = ethtool_op_get_flags,
+	.set_flags              = igb_set_flags,
+#endif /* ETHTOOL_GFLAGS */
+#endif /* HAVE_NDO_SET_FEATURES */
+#ifdef ETHTOOL_GADV_COAL
+	.get_advcoal		= igb_get_adv_coal,
+	.set_advcoal		= igb_set_dmac_coal,
+#endif /* ETHTOOL_GADV_COAL */
+#ifndef HAVE_RHEL6_ETHTOOL_OPS_EXT_STRUCT
+#ifdef ETHTOOL_GEEE
+	.get_eee		= igb_get_eee,
+#endif
+#ifdef ETHTOOL_SEEE
+	.set_eee		= igb_set_eee,
+#endif
+#endif /* HAVE_RHEL6_ETHTOOL_OPS_EXT_STRUCT */
+#ifdef ETHTOOL_GRXRINGS
+	.get_rxnfc		= igb_get_rxnfc,
+	.set_rxnfc		= igb_set_rxnfc,
+#endif
+};
+
+#ifdef HAVE_RHEL6_ETHTOOL_OPS_EXT_STRUCT
+static const struct ethtool_ops_ext igb_ethtool_ops_ext = {
+	.size		= sizeof(struct ethtool_ops_ext),
+	.get_ts_info	= igb_get_ts_info,
+	.set_phys_id	= igb_set_phys_id,
+	.get_eee	= igb_get_eee,
+	.set_eee	= igb_set_eee,
+};
+
+void igb_set_ethtool_ops(struct net_device *netdev)
+{
+	SET_ETHTOOL_OPS(netdev, &igb_ethtool_ops);
+	set_ethtool_ops_ext(netdev, &igb_ethtool_ops_ext);
+}
+#else
+void igb_set_ethtool_ops(struct net_device *netdev)
+{
+	/* have to "undeclare" const on this struct to remove warnings */
+	SET_ETHTOOL_OPS(netdev, (struct ethtool_ops *)&igb_ethtool_ops);
+}
+#endif /* HAVE_RHEL6_ETHTOOL_OPS_EXT_STRUCT */
+#endif	/* SIOCETHTOOL */
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_hwmon.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_hwmon.c
new file mode 100644
index 00000000..07a1ae07
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_hwmon.c
@@ -0,0 +1,260 @@
+/*******************************************************************************
+
+  Intel(R) Gigabit Ethernet Linux driver
+  Copyright(c) 2007-2013 Intel Corporation.
+
+  This program is free software; you can redistribute it and/or modify it
+  under the terms and conditions of the GNU General Public License,
+  version 2, as published by the Free Software Foundation.
+
+  This program is distributed in the hope it will be useful, but WITHOUT
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+  more details.
+
+  You should have received a copy of the GNU General Public License along with
+  this program; if not, write to the Free Software Foundation, Inc.,
+  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+  The full GNU General Public License is included in this distribution in
+  the file called "COPYING".
+
+  Contact Information:
+  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+#include "igb.h"
+#include "e1000_82575.h"
+#include "e1000_hw.h"
+#ifdef IGB_HWMON
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/sysfs.h>
+#include <linux/kobject.h>
+#include <linux/device.h>
+#include <linux/netdevice.h>
+#include <linux/hwmon.h>
+#include <linux/pci.h>
+
+#ifdef HAVE_I2C_SUPPORT
+static struct i2c_board_info i350_sensor_info = {
+	I2C_BOARD_INFO("i350bb", (0Xf8 >> 1)),
+};
+#endif /* HAVE_I2C_SUPPORT */
+
+/* hwmon callback functions */
+static ssize_t igb_hwmon_show_location(struct device *dev,
+					 struct device_attribute *attr,
+					 char *buf)
+{
+	struct hwmon_attr *igb_attr = container_of(attr, struct hwmon_attr,
+						     dev_attr);
+	return sprintf(buf, "loc%u\n",
+		       igb_attr->sensor->location);
+}
+
+static ssize_t igb_hwmon_show_temp(struct device *dev,
+				     struct device_attribute *attr,
+				     char *buf)
+{
+	struct hwmon_attr *igb_attr = container_of(attr, struct hwmon_attr,
+						     dev_attr);
+	unsigned int value;
+
+	/* reset the temp field */
+	igb_attr->hw->mac.ops.get_thermal_sensor_data(igb_attr->hw);
+
+	value = igb_attr->sensor->temp;
+
+	/* display millidegree */
+	value *= 1000;
+
+	return sprintf(buf, "%u\n", value);
+}
+
+static ssize_t igb_hwmon_show_cautionthresh(struct device *dev,
+				     struct device_attribute *attr,
+				     char *buf)
+{
+	struct hwmon_attr *igb_attr = container_of(attr, struct hwmon_attr,
+						     dev_attr);
+	unsigned int value = igb_attr->sensor->caution_thresh;
+
+	/* display millidegree */
+	value *= 1000;
+
+	return sprintf(buf, "%u\n", value);
+}
+
+static ssize_t igb_hwmon_show_maxopthresh(struct device *dev,
+				     struct device_attribute *attr,
+				     char *buf)
+{
+	struct hwmon_attr *igb_attr = container_of(attr, struct hwmon_attr,
+						     dev_attr);
+	unsigned int value = igb_attr->sensor->max_op_thresh;
+
+	/* display millidegree */
+	value *= 1000;
+
+	return sprintf(buf, "%u\n", value);
+}
+
+/* igb_add_hwmon_attr - Create hwmon attr table for a hwmon sysfs file.
+ * @ adapter: pointer to the adapter structure
+ * @ offset: offset in the eeprom sensor data table
+ * @ type: type of sensor data to display
+ *
+ * For each file we want in hwmon's sysfs interface we need a device_attribute
+ * This is included in our hwmon_attr struct that contains the references to
+ * the data structures we need to get the data to display.
+ */
+static int igb_add_hwmon_attr(struct igb_adapter *adapter,
+				unsigned int offset, int type) {
+	int rc;
+	unsigned int n_attr;
+	struct hwmon_attr *igb_attr;
+
+	n_attr = adapter->igb_hwmon_buff.n_hwmon;
+	igb_attr = &adapter->igb_hwmon_buff.hwmon_list[n_attr];
+
+	switch (type) {
+	case IGB_HWMON_TYPE_LOC:
+		igb_attr->dev_attr.show = igb_hwmon_show_location;
+		snprintf(igb_attr->name, sizeof(igb_attr->name),
+			 "temp%u_label", offset);
+		break;
+	case IGB_HWMON_TYPE_TEMP:
+		igb_attr->dev_attr.show = igb_hwmon_show_temp;
+		snprintf(igb_attr->name, sizeof(igb_attr->name),
+			 "temp%u_input", offset);
+		break;
+	case IGB_HWMON_TYPE_CAUTION:
+		igb_attr->dev_attr.show = igb_hwmon_show_cautionthresh;
+		snprintf(igb_attr->name, sizeof(igb_attr->name),
+			 "temp%u_max", offset);
+		break;
+	case IGB_HWMON_TYPE_MAX:
+		igb_attr->dev_attr.show = igb_hwmon_show_maxopthresh;
+		snprintf(igb_attr->name, sizeof(igb_attr->name),
+			 "temp%u_crit", offset);
+		break;
+	default:
+		rc = -EPERM;
+		return rc;
+	}
+
+	/* These always the same regardless of type */
+	igb_attr->sensor =
+		&adapter->hw.mac.thermal_sensor_data.sensor[offset];
+	igb_attr->hw = &adapter->hw;
+	igb_attr->dev_attr.store = NULL;
+	igb_attr->dev_attr.attr.mode = S_IRUGO;
+	igb_attr->dev_attr.attr.name = igb_attr->name;
+	sysfs_attr_init(&igb_attr->dev_attr.attr);
+	rc = device_create_file(&adapter->pdev->dev,
+				&igb_attr->dev_attr);
+	if (rc == 0)
+		++adapter->igb_hwmon_buff.n_hwmon;
+
+	return rc;
+}
+
+static void igb_sysfs_del_adapter(struct igb_adapter *adapter)
+{
+	int i;
+
+	if (adapter == NULL)
+		return;
+
+	for (i = 0; i < adapter->igb_hwmon_buff.n_hwmon; i++) {
+		device_remove_file(&adapter->pdev->dev,
+			   &adapter->igb_hwmon_buff.hwmon_list[i].dev_attr);
+	}
+
+	kfree(adapter->igb_hwmon_buff.hwmon_list);
+
+	if (adapter->igb_hwmon_buff.device)
+		hwmon_device_unregister(adapter->igb_hwmon_buff.device);
+}
+
+/* called from igb_main.c */
+void igb_sysfs_exit(struct igb_adapter *adapter)
+{
+	igb_sysfs_del_adapter(adapter);
+}
+
+/* called from igb_main.c */
+int igb_sysfs_init(struct igb_adapter *adapter)
+{
+	struct hwmon_buff *igb_hwmon = &adapter->igb_hwmon_buff;
+	unsigned int i;
+	int n_attrs;
+	int rc = 0;
+#ifdef HAVE_I2C_SUPPORT
+	struct i2c_client *client = NULL;
+#endif /* HAVE_I2C_SUPPORT */
+
+	/* If this method isn't defined we don't support thermals */
+	if (adapter->hw.mac.ops.init_thermal_sensor_thresh == NULL)
+		goto exit;
+
+	/* Don't create thermal hwmon interface if no sensors present */
+	rc = (adapter->hw.mac.ops.init_thermal_sensor_thresh(&adapter->hw));
+		if (rc)
+			goto exit;
+#ifdef HAVE_I2C_SUPPORT
+	/* init i2c_client */
+	client = i2c_new_device(&adapter->i2c_adap, &i350_sensor_info);
+	if (client == NULL) {
+		dev_info(&adapter->pdev->dev,
+			"Failed to create new i2c device..\n");
+		goto exit;
+	}
+	adapter->i2c_client = client;
+#endif /* HAVE_I2C_SUPPORT */
+
+	/* Allocation space for max attributes
+	 * max num sensors * values (loc, temp, max, caution)
+	 */
+	n_attrs = E1000_MAX_SENSORS * 4;
+	igb_hwmon->hwmon_list = kcalloc(n_attrs, sizeof(struct hwmon_attr),
+					  GFP_KERNEL);
+	if (!igb_hwmon->hwmon_list) {
+		rc = -ENOMEM;
+		goto err;
+	}
+
+	igb_hwmon->device = hwmon_device_register(&adapter->pdev->dev);
+	if (IS_ERR(igb_hwmon->device)) {
+		rc = PTR_ERR(igb_hwmon->device);
+		goto err;
+	}
+
+	for (i = 0; i < E1000_MAX_SENSORS; i++) {
+
+		/* Only create hwmon sysfs entries for sensors that have
+		 * meaningful data.
+		 */
+		if (adapter->hw.mac.thermal_sensor_data.sensor[i].location == 0)
+			continue;
+
+		/* Bail if any hwmon attr struct fails to initialize */
+		rc = igb_add_hwmon_attr(adapter, i, IGB_HWMON_TYPE_CAUTION);
+		rc |= igb_add_hwmon_attr(adapter, i, IGB_HWMON_TYPE_LOC);
+		rc |= igb_add_hwmon_attr(adapter, i, IGB_HWMON_TYPE_TEMP);
+		rc |= igb_add_hwmon_attr(adapter, i, IGB_HWMON_TYPE_MAX);
+		if (rc)
+			goto err;
+	}
+
+	goto exit;
+
+err:
+	igb_sysfs_del_adapter(adapter);
+exit:
+	return rc;
+}
+#endif /* IGB_HWMON */
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_main.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_main.c
new file mode 100644
index 00000000..96acec58
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_main.c
@@ -0,0 +1,10291 @@
+/*******************************************************************************
+
+  Intel(R) Gigabit Ethernet Linux driver
+  Copyright(c) 2007-2013 Intel Corporation.
+
+  This program is free software; you can redistribute it and/or modify it
+  under the terms and conditions of the GNU General Public License,
+  version 2, as published by the Free Software Foundation.
+
+  This program is distributed in the hope it will be useful, but WITHOUT
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+  more details.
+
+  You should have received a copy of the GNU General Public License along with
+  this program; if not, write to the Free Software Foundation, Inc.,
+  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+  The full GNU General Public License is included in this distribution in
+  the file called "COPYING".
+
+  Contact Information:
+  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/vmalloc.h>
+#include <linux/pagemap.h>
+#include <linux/netdevice.h>
+#include <linux/tcp.h>
+#ifdef NETIF_F_TSO
+#include <net/checksum.h>
+#ifdef NETIF_F_TSO6
+#include <linux/ipv6.h>
+#include <net/ip6_checksum.h>
+#endif
+#endif
+#ifdef SIOCGMIIPHY
+#include <linux/mii.h>
+#endif
+#ifdef SIOCETHTOOL
+#include <linux/ethtool.h>
+#endif
+#include <linux/if_vlan.h>
+#ifdef CONFIG_PM_RUNTIME
+#include <linux/pm_runtime.h>
+#endif /* CONFIG_PM_RUNTIME */
+
+#include <linux/if_bridge.h>
+#include "igb.h"
+#include "igb_vmdq.h"
+
+#include <linux/uio_driver.h>
+
+#if defined(DEBUG) || defined (DEBUG_DUMP) || defined (DEBUG_ICR) || defined(DEBUG_ITR)
+#define DRV_DEBUG "_debug"
+#else
+#define DRV_DEBUG
+#endif
+#define DRV_HW_PERF
+#define VERSION_SUFFIX
+
+#define MAJ 5
+#define MIN 0
+#define BUILD 6
+#define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." __stringify(BUILD) VERSION_SUFFIX DRV_DEBUG DRV_HW_PERF
+
+char igb_driver_name[] = "igb";
+char igb_driver_version[] = DRV_VERSION;
+static const char igb_driver_string[] =
+                                "Intel(R) Gigabit Ethernet Network Driver";
+static const char igb_copyright[] =
+				"Copyright (c) 2007-2013 Intel Corporation.";
+
+static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = {
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I354_BACKPLANE_1GBPS) },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I354_SGMII) },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I354_BACKPLANE_2_5GBPS) },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_COPPER) },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_FIBER) },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_SERDES) },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_SGMII) },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_COPPER_FLASHLESS) },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_SERDES_FLASHLESS) },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I211_COPPER) },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_COPPER) },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_FIBER) },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SERDES) },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SGMII) },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER) },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER) },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_QUAD_FIBER) },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES) },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII) },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL) },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SGMII) },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SERDES) },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_BACKPLANE) },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SFP) },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576) },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS) },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES) },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER) },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES) },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD) },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2) },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER) },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER) },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES) },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER) },
+	/* required last entry */
+	{0, }
+};
+
+//MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
+static void igb_set_sriov_capability(struct igb_adapter *adapter) __attribute__((__unused__));
+void igb_reset(struct igb_adapter *);
+static int igb_setup_all_tx_resources(struct igb_adapter *);
+static int igb_setup_all_rx_resources(struct igb_adapter *);
+static void igb_free_all_tx_resources(struct igb_adapter *);
+static void igb_free_all_rx_resources(struct igb_adapter *);
+static void igb_setup_mrqc(struct igb_adapter *);
+void igb_update_stats(struct igb_adapter *);
+static int igb_probe(struct pci_dev *, const struct pci_device_id *);
+static void __devexit igb_remove(struct pci_dev *pdev);
+static int igb_sw_init(struct igb_adapter *);
+static int igb_open(struct net_device *);
+static int igb_close(struct net_device *);
+static void igb_configure(struct igb_adapter *);
+static void igb_configure_tx(struct igb_adapter *);
+static void igb_configure_rx(struct igb_adapter *);
+static void igb_clean_all_tx_rings(struct igb_adapter *);
+static void igb_clean_all_rx_rings(struct igb_adapter *);
+static void igb_clean_tx_ring(struct igb_ring *);
+static void igb_set_rx_mode(struct net_device *);
+static void igb_update_phy_info(unsigned long);
+static void igb_watchdog(unsigned long);
+static void igb_watchdog_task(struct work_struct *);
+static void igb_dma_err_task(struct work_struct *);
+static void igb_dma_err_timer(unsigned long data);
+static netdev_tx_t igb_xmit_frame(struct sk_buff *skb, struct net_device *);
+static struct net_device_stats *igb_get_stats(struct net_device *);
+static int igb_change_mtu(struct net_device *, int);
+void igb_full_sync_mac_table(struct igb_adapter *adapter);
+static int igb_set_mac(struct net_device *, void *);
+static void igb_set_uta(struct igb_adapter *adapter);
+static irqreturn_t igb_intr(int irq, void *);
+static irqreturn_t igb_intr_msi(int irq, void *);
+static irqreturn_t igb_msix_other(int irq, void *);
+static irqreturn_t igb_msix_ring(int irq, void *);
+#ifdef IGB_DCA
+static void igb_update_dca(struct igb_q_vector *);
+static void igb_setup_dca(struct igb_adapter *);
+#endif /* IGB_DCA */
+static int igb_poll(struct napi_struct *, int);
+static bool igb_clean_tx_irq(struct igb_q_vector *);
+static bool igb_clean_rx_irq(struct igb_q_vector *, int);
+static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
+static void igb_tx_timeout(struct net_device *);
+static void igb_reset_task(struct work_struct *);
+#ifdef HAVE_VLAN_RX_REGISTER
+static void igb_vlan_mode(struct net_device *, struct vlan_group *);
+#endif
+#ifdef HAVE_VLAN_PROTOCOL
+static int igb_vlan_rx_add_vid(struct net_device *,
+                               __be16 proto, u16);
+static int igb_vlan_rx_kill_vid(struct net_device *,
+                                __be16 proto, u16);
+#elif defined HAVE_INT_NDO_VLAN_RX_ADD_VID
+#ifdef NETIF_F_HW_VLAN_CTAG_RX
+static int igb_vlan_rx_add_vid(struct net_device *,
+			       __always_unused __be16 proto, u16);
+static int igb_vlan_rx_kill_vid(struct net_device *,
+			        __always_unused __be16 proto, u16);
+#else
+static int igb_vlan_rx_add_vid(struct net_device *, u16);
+static int igb_vlan_rx_kill_vid(struct net_device *, u16);
+#endif
+#else
+static void igb_vlan_rx_add_vid(struct net_device *, u16);
+static void igb_vlan_rx_kill_vid(struct net_device *, u16);
+#endif
+static void igb_restore_vlan(struct igb_adapter *);
+void igb_rar_set(struct igb_adapter *adapter, u32 index);
+static void igb_ping_all_vfs(struct igb_adapter *);
+static void igb_msg_task(struct igb_adapter *);
+static void igb_vmm_control(struct igb_adapter *);
+static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
+static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
+static void igb_process_mdd_event(struct igb_adapter *);
+#ifdef IFLA_VF_MAX
+static int igb_ndo_set_vf_mac( struct net_device *netdev, int vf, u8 *mac);
+static int igb_ndo_set_vf_vlan(struct net_device *netdev,
+				int vf, u16 vlan, u8 qos);
+#ifdef HAVE_VF_SPOOFCHK_CONFIGURE
+static int igb_ndo_set_vf_spoofchk(struct net_device *netdev, int vf,
+				bool setting);
+#endif
+#ifdef HAVE_VF_MIN_MAX_TXRATE
+static int igb_ndo_set_vf_bw(struct net_device *, int, int, int);
+#else /* HAVE_VF_MIN_MAX_TXRATE */
+static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
+#endif /* HAVE_VF_MIN_MAX_TXRATE */
+static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
+				 struct ifla_vf_info *ivi);
+static void igb_check_vf_rate_limit(struct igb_adapter *);
+#endif
+static int igb_vf_configure(struct igb_adapter *adapter, int vf);
+#ifdef CONFIG_PM
+#ifdef HAVE_SYSTEM_SLEEP_PM_OPS
+static int igb_suspend(struct device *dev);
+static int igb_resume(struct device *dev);
+#ifdef CONFIG_PM_RUNTIME
+static int igb_runtime_suspend(struct device *dev);
+static int igb_runtime_resume(struct device *dev);
+static int igb_runtime_idle(struct device *dev);
+#endif /* CONFIG_PM_RUNTIME */
+static const struct dev_pm_ops igb_pm_ops = {
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,34)
+        .suspend = igb_suspend,
+        .resume = igb_resume,
+        .freeze = igb_suspend,
+        .thaw = igb_resume,
+        .poweroff = igb_suspend,
+        .restore = igb_resume,
+#ifdef CONFIG_PM_RUNTIME
+        .runtime_suspend = igb_runtime_suspend,
+        .runtime_resume = igb_runtime_resume,
+        .runtime_idle = igb_runtime_idle,
+#endif
+#else /* Linux >= 2.6.34 */
+	SET_SYSTEM_SLEEP_PM_OPS(igb_suspend, igb_resume)
+#ifdef CONFIG_PM_RUNTIME
+	SET_RUNTIME_PM_OPS(igb_runtime_suspend, igb_runtime_resume,
+			igb_runtime_idle)
+#endif /* CONFIG_PM_RUNTIME */
+#endif /* Linux version */
+};
+#else
+static int igb_suspend(struct pci_dev *pdev, pm_message_t state);
+static int igb_resume(struct pci_dev *pdev);
+#endif /* HAVE_SYSTEM_SLEEP_PM_OPS */
+#endif /* CONFIG_PM */
+#ifndef USE_REBOOT_NOTIFIER
+static void igb_shutdown(struct pci_dev *);
+#else
+static int igb_notify_reboot(struct notifier_block *, unsigned long, void *);
+static struct notifier_block igb_notifier_reboot = {
+	.notifier_call	= igb_notify_reboot,
+	.next		= NULL,
+	.priority	= 0
+};
+#endif
+#ifdef IGB_DCA
+static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
+static struct notifier_block dca_notifier = {
+	.notifier_call	= igb_notify_dca,
+	.next		= NULL,
+	.priority	= 0
+};
+#endif
+#ifdef CONFIG_NET_POLL_CONTROLLER
+/* for netdump / net console */
+static void igb_netpoll(struct net_device *);
+#endif
+
+#ifdef HAVE_PCI_ERS
+static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
+		     pci_channel_state_t);
+static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
+static void igb_io_resume(struct pci_dev *);
+
+static struct pci_error_handlers igb_err_handler = {
+	.error_detected = igb_io_error_detected,
+	.slot_reset = igb_io_slot_reset,
+	.resume = igb_io_resume,
+};
+#endif
+
+static void igb_init_fw(struct igb_adapter *adapter);
+static void igb_init_dmac(struct igb_adapter *adapter, u32 pba);
+
+static struct pci_driver igb_driver = {
+	.name     = igb_driver_name,
+	.id_table = igb_pci_tbl,
+	.probe    = igb_probe,
+	.remove   = __devexit_p(igb_remove),
+#ifdef CONFIG_PM
+#ifdef HAVE_SYSTEM_SLEEP_PM_OPS
+	.driver.pm = &igb_pm_ops,
+#else
+	.suspend  = igb_suspend,
+	.resume   = igb_resume,
+#endif /* HAVE_SYSTEM_SLEEP_PM_OPS */
+#endif /* CONFIG_PM */
+#ifndef USE_REBOOT_NOTIFIER
+	.shutdown = igb_shutdown,
+#endif
+#ifdef HAVE_PCI_ERS
+	.err_handler = &igb_err_handler
+#endif
+};
+
+//MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
+//MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
+//MODULE_LICENSE("GPL");
+//MODULE_VERSION(DRV_VERSION);
+
+static void igb_vfta_set(struct igb_adapter *adapter, u32 vid, bool add)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	struct e1000_host_mng_dhcp_cookie *mng_cookie = &hw->mng_cookie;
+	u32 index = (vid >> E1000_VFTA_ENTRY_SHIFT) & E1000_VFTA_ENTRY_MASK;
+	u32 mask = 1 << (vid & E1000_VFTA_ENTRY_BIT_SHIFT_MASK);
+	u32 vfta;
+
+	/*
+	 * if this is the management vlan the only option is to add it in so
+	 * that the management pass through will continue to work
+	 */
+	if ((mng_cookie->status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) &&
+	    (vid == mng_cookie->vlan_id))
+		add = TRUE;
+
+	vfta = adapter->shadow_vfta[index];
+
+	if (add)
+		vfta |= mask;
+	else
+		vfta &= ~mask;
+
+	e1000_write_vfta(hw, index, vfta);
+	adapter->shadow_vfta[index] = vfta;
+}
+
+static int debug = NETIF_MSG_DRV | NETIF_MSG_PROBE;
+//module_param(debug, int, 0);
+//MODULE_PARM_DESC(debug, "Debug level (0=none, ..., 16=all)");
+
+/**
+ * igb_init_module - Driver Registration Routine
+ *
+ * igb_init_module is the first routine called when the driver is
+ * loaded. All it does is register with the PCI subsystem.
+ **/
+static int __init igb_init_module(void)
+{
+	int ret;
+
+	printk(KERN_INFO "%s - version %s\n",
+	       igb_driver_string, igb_driver_version);
+
+	printk(KERN_INFO "%s\n", igb_copyright);
+#ifdef IGB_HWMON
+/* only use IGB_PROCFS if IGB_HWMON is not defined */
+#else
+#ifdef IGB_PROCFS
+	if (igb_procfs_topdir_init())
+		printk(KERN_INFO "Procfs failed to initialize topdir\n");
+#endif /* IGB_PROCFS */
+#endif /* IGB_HWMON  */
+
+#ifdef IGB_DCA
+	dca_register_notify(&dca_notifier);
+#endif
+	ret = pci_register_driver(&igb_driver);
+#ifdef USE_REBOOT_NOTIFIER
+	if (ret >= 0) {
+		register_reboot_notifier(&igb_notifier_reboot);
+	}
+#endif
+	return ret;
+}
+
+#undef module_init
+#define module_init(x) static int x(void)  __attribute__((__unused__));
+module_init(igb_init_module);
+
+/**
+ * igb_exit_module - Driver Exit Cleanup Routine
+ *
+ * igb_exit_module is called just before the driver is removed
+ * from memory.
+ **/
+static void __exit igb_exit_module(void)
+{
+#ifdef IGB_DCA
+	dca_unregister_notify(&dca_notifier);
+#endif
+#ifdef USE_REBOOT_NOTIFIER
+	unregister_reboot_notifier(&igb_notifier_reboot);
+#endif
+	pci_unregister_driver(&igb_driver);
+
+#ifdef IGB_HWMON
+/* only compile IGB_PROCFS if IGB_HWMON is not defined */
+#else
+#ifdef IGB_PROCFS
+	igb_procfs_topdir_exit();
+#endif /* IGB_PROCFS */
+#endif /* IGB_HWMON */
+}
+
+#undef module_exit
+#define module_exit(x) static void x(void)  __attribute__((__unused__));
+module_exit(igb_exit_module);
+
+#define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
+/**
+ * igb_cache_ring_register - Descriptor ring to register mapping
+ * @adapter: board private structure to initialize
+ *
+ * Once we know the feature-set enabled for the device, we'll cache
+ * the register offset the descriptor ring is assigned to.
+ **/
+static void igb_cache_ring_register(struct igb_adapter *adapter)
+{
+	int i = 0, j = 0;
+	u32 rbase_offset = adapter->vfs_allocated_count;
+
+	switch (adapter->hw.mac.type) {
+	case e1000_82576:
+		/* The queues are allocated for virtualization such that VF 0
+		 * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
+		 * In order to avoid collision we start at the first free queue
+		 * and continue consuming queues in the same sequence
+		 */
+		if ((adapter->rss_queues > 1) && adapter->vmdq_pools) {
+			for (; i < adapter->rss_queues; i++)
+				adapter->rx_ring[i]->reg_idx = rbase_offset +
+				                               Q_IDX_82576(i);
+		}
+	case e1000_82575:
+	case e1000_82580:
+	case e1000_i350:
+	case e1000_i354:
+	case e1000_i210:
+	case e1000_i211:
+	default:
+		for (; i < adapter->num_rx_queues; i++)
+			adapter->rx_ring[i]->reg_idx = rbase_offset + i;
+		for (; j < adapter->num_tx_queues; j++)
+			adapter->tx_ring[j]->reg_idx = rbase_offset + j;
+		break;
+	}
+}
+
+static void igb_configure_lli(struct igb_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u16 port;
+
+	/* LLI should only be enabled for MSI-X or MSI interrupts */
+	if (!adapter->msix_entries && !(adapter->flags & IGB_FLAG_HAS_MSI))
+		return;
+
+	if (adapter->lli_port) {
+		/* use filter 0 for port */
+		port = htons((u16)adapter->lli_port);
+		E1000_WRITE_REG(hw, E1000_IMIR(0),
+			(port | E1000_IMIR_PORT_IM_EN));
+		E1000_WRITE_REG(hw, E1000_IMIREXT(0),
+			(E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
+	}
+
+	if (adapter->flags & IGB_FLAG_LLI_PUSH) {
+		/* use filter 1 for push flag */
+		E1000_WRITE_REG(hw, E1000_IMIR(1),
+			(E1000_IMIR_PORT_BP | E1000_IMIR_PORT_IM_EN));
+		E1000_WRITE_REG(hw, E1000_IMIREXT(1),
+			(E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_PSH));
+	}
+
+	if (adapter->lli_size) {
+		/* use filter 2 for size */
+		E1000_WRITE_REG(hw, E1000_IMIR(2),
+			(E1000_IMIR_PORT_BP | E1000_IMIR_PORT_IM_EN));
+		E1000_WRITE_REG(hw, E1000_IMIREXT(2),
+			(adapter->lli_size | E1000_IMIREXT_CTRL_BP));
+	}
+
+}
+
+/**
+ *  igb_write_ivar - configure ivar for given MSI-X vector
+ *  @hw: pointer to the HW structure
+ *  @msix_vector: vector number we are allocating to a given ring
+ *  @index: row index of IVAR register to write within IVAR table
+ *  @offset: column offset of in IVAR, should be multiple of 8
+ *
+ *  This function is intended to handle the writing of the IVAR register
+ *  for adapters 82576 and newer.  The IVAR table consists of 2 columns,
+ *  each containing an cause allocation for an Rx and Tx ring, and a
+ *  variable number of rows depending on the number of queues supported.
+ **/
+static void igb_write_ivar(struct e1000_hw *hw, int msix_vector,
+			   int index, int offset)
+{
+	u32 ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
+
+	/* clear any bits that are currently set */
+	ivar &= ~((u32)0xFF << offset);
+
+	/* write vector and valid bit */
+	ivar |= (msix_vector | E1000_IVAR_VALID) << offset;
+
+	E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
+}
+
+#define IGB_N0_QUEUE -1
+static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
+{
+	struct igb_adapter *adapter = q_vector->adapter;
+	struct e1000_hw *hw = &adapter->hw;
+	int rx_queue = IGB_N0_QUEUE;
+	int tx_queue = IGB_N0_QUEUE;
+	u32 msixbm = 0;
+
+	if (q_vector->rx.ring)
+		rx_queue = q_vector->rx.ring->reg_idx;
+	if (q_vector->tx.ring)
+		tx_queue = q_vector->tx.ring->reg_idx;
+
+	switch (hw->mac.type) {
+	case e1000_82575:
+		/* The 82575 assigns vectors using a bitmask, which matches the
+		   bitmask for the EICR/EIMS/EIMC registers.  To assign one
+		   or more queues to a vector, we write the appropriate bits
+		   into the MSIXBM register for that vector. */
+		if (rx_queue > IGB_N0_QUEUE)
+			msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
+		if (tx_queue > IGB_N0_QUEUE)
+			msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
+		if (!adapter->msix_entries && msix_vector == 0)
+			msixbm |= E1000_EIMS_OTHER;
+		E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0), msix_vector, msixbm);
+		q_vector->eims_value = msixbm;
+		break;
+	case e1000_82576:
+		/*
+		 * 82576 uses a table that essentially consists of 2 columns
+		 * with 8 rows.  The ordering is column-major so we use the
+		 * lower 3 bits as the row index, and the 4th bit as the
+		 * column offset.
+		 */
+		if (rx_queue > IGB_N0_QUEUE)
+			igb_write_ivar(hw, msix_vector,
+				       rx_queue & 0x7,
+				       (rx_queue & 0x8) << 1);
+		if (tx_queue > IGB_N0_QUEUE)
+			igb_write_ivar(hw, msix_vector,
+				       tx_queue & 0x7,
+				       ((tx_queue & 0x8) << 1) + 8);
+		q_vector->eims_value = 1 << msix_vector;
+		break;
+	case e1000_82580:
+	case e1000_i350:
+	case e1000_i354:
+	case e1000_i210:
+	case e1000_i211:
+		/*
+		 * On 82580 and newer adapters the scheme is similar to 82576
+		 * however instead of ordering column-major we have things
+		 * ordered row-major.  So we traverse the table by using
+		 * bit 0 as the column offset, and the remaining bits as the
+		 * row index.
+		 */
+		if (rx_queue > IGB_N0_QUEUE)
+			igb_write_ivar(hw, msix_vector,
+				       rx_queue >> 1,
+				       (rx_queue & 0x1) << 4);
+		if (tx_queue > IGB_N0_QUEUE)
+			igb_write_ivar(hw, msix_vector,
+				       tx_queue >> 1,
+				       ((tx_queue & 0x1) << 4) + 8);
+		q_vector->eims_value = 1 << msix_vector;
+		break;
+	default:
+		BUG();
+		break;
+	}
+
+	/* add q_vector eims value to global eims_enable_mask */
+	adapter->eims_enable_mask |= q_vector->eims_value;
+
+	/* configure q_vector to set itr on first interrupt */
+	q_vector->set_itr = 1;
+}
+
+/**
+ * igb_configure_msix - Configure MSI-X hardware
+ *
+ * igb_configure_msix sets up the hardware to properly
+ * generate MSI-X interrupts.
+ **/
+static void igb_configure_msix(struct igb_adapter *adapter)
+{
+	u32 tmp;
+	int i, vector = 0;
+	struct e1000_hw *hw = &adapter->hw;
+
+	adapter->eims_enable_mask = 0;
+
+	/* set vector for other causes, i.e. link changes */
+	switch (hw->mac.type) {
+	case e1000_82575:
+		tmp = E1000_READ_REG(hw, E1000_CTRL_EXT);
+		/* enable MSI-X PBA support*/
+		tmp |= E1000_CTRL_EXT_PBA_CLR;
+
+		/* Auto-Mask interrupts upon ICR read. */
+		tmp |= E1000_CTRL_EXT_EIAME;
+		tmp |= E1000_CTRL_EXT_IRCA;
+
+		E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp);
+
+		/* enable msix_other interrupt */
+		E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0), vector++,
+		                      E1000_EIMS_OTHER);
+		adapter->eims_other = E1000_EIMS_OTHER;
+
+		break;
+
+	case e1000_82576:
+	case e1000_82580:
+	case e1000_i350:
+	case e1000_i354:
+	case e1000_i210:
+	case e1000_i211:
+		/* Turn on MSI-X capability first, or our settings
+		 * won't stick.  And it will take days to debug. */
+		E1000_WRITE_REG(hw, E1000_GPIE, E1000_GPIE_MSIX_MODE |
+		                E1000_GPIE_PBA | E1000_GPIE_EIAME |
+		                E1000_GPIE_NSICR);
+
+		/* enable msix_other interrupt */
+		adapter->eims_other = 1 << vector;
+		tmp = (vector++ | E1000_IVAR_VALID) << 8;
+
+		E1000_WRITE_REG(hw, E1000_IVAR_MISC, tmp);
+		break;
+	default:
+		/* do nothing, since nothing else supports MSI-X */
+		break;
+	} /* switch (hw->mac.type) */
+
+	adapter->eims_enable_mask |= adapter->eims_other;
+
+	for (i = 0; i < adapter->num_q_vectors; i++)
+		igb_assign_vector(adapter->q_vector[i], vector++);
+
+	E1000_WRITE_FLUSH(hw);
+}
+
+/**
+ * igb_request_msix - Initialize MSI-X interrupts
+ *
+ * igb_request_msix allocates MSI-X vectors and requests interrupts from the
+ * kernel.
+ **/
+static int igb_request_msix(struct igb_adapter *adapter)
+{
+	struct net_device *netdev = adapter->netdev;
+	struct e1000_hw *hw = &adapter->hw;
+	int i, err = 0, vector = 0, free_vector = 0;
+
+	err = request_irq(adapter->msix_entries[vector].vector,
+	                  &igb_msix_other, 0, netdev->name, adapter);
+	if (err)
+		goto err_out;
+
+	for (i = 0; i < adapter->num_q_vectors; i++) {
+		struct igb_q_vector *q_vector = adapter->q_vector[i];
+
+		vector++;
+
+		q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
+
+		if (q_vector->rx.ring && q_vector->tx.ring)
+			sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
+			        q_vector->rx.ring->queue_index);
+		else if (q_vector->tx.ring)
+			sprintf(q_vector->name, "%s-tx-%u", netdev->name,
+			        q_vector->tx.ring->queue_index);
+		else if (q_vector->rx.ring)
+			sprintf(q_vector->name, "%s-rx-%u", netdev->name,
+			        q_vector->rx.ring->queue_index);
+		else
+			sprintf(q_vector->name, "%s-unused", netdev->name);
+
+		err = request_irq(adapter->msix_entries[vector].vector,
+		                  igb_msix_ring, 0, q_vector->name,
+		                  q_vector);
+		if (err)
+			goto err_free;
+	}
+
+	igb_configure_msix(adapter);
+	return 0;
+
+err_free:
+	/* free already assigned IRQs */
+	free_irq(adapter->msix_entries[free_vector++].vector, adapter);
+
+	vector--;
+	for (i = 0; i < vector; i++) {
+		free_irq(adapter->msix_entries[free_vector++].vector,
+			 adapter->q_vector[i]);
+	}
+err_out:
+	return err;
+}
+
+static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
+{
+	if (adapter->msix_entries) {
+		pci_disable_msix(adapter->pdev);
+		kfree(adapter->msix_entries);
+		adapter->msix_entries = NULL;
+	} else if (adapter->flags & IGB_FLAG_HAS_MSI) {
+		pci_disable_msi(adapter->pdev);
+	}
+}
+
+/**
+ * igb_free_q_vector - Free memory allocated for specific interrupt vector
+ * @adapter: board private structure to initialize
+ * @v_idx: Index of vector to be freed
+ *
+ * This function frees the memory allocated to the q_vector.  In addition if
+ * NAPI is enabled it will delete any references to the NAPI struct prior
+ * to freeing the q_vector.
+ **/
+static void igb_free_q_vector(struct igb_adapter *adapter, int v_idx)
+{
+	struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
+
+	if (q_vector->tx.ring)
+		adapter->tx_ring[q_vector->tx.ring->queue_index] = NULL;
+
+	if (q_vector->rx.ring)
+		adapter->tx_ring[q_vector->rx.ring->queue_index] = NULL;
+
+	adapter->q_vector[v_idx] = NULL;
+	netif_napi_del(&q_vector->napi);
+#ifndef IGB_NO_LRO
+	__skb_queue_purge(&q_vector->lrolist.active);
+#endif
+	kfree(q_vector);
+}
+
+/**
+ * igb_free_q_vectors - Free memory allocated for interrupt vectors
+ * @adapter: board private structure to initialize
+ *
+ * This function frees the memory allocated to the q_vectors.  In addition if
+ * NAPI is enabled it will delete any references to the NAPI struct prior
+ * to freeing the q_vector.
+ **/
+static void igb_free_q_vectors(struct igb_adapter *adapter)
+{
+	int v_idx = adapter->num_q_vectors;
+
+	adapter->num_tx_queues = 0;
+	adapter->num_rx_queues = 0;
+	adapter->num_q_vectors = 0;
+
+	while (v_idx--)
+		igb_free_q_vector(adapter, v_idx);
+}
+
+/**
+ * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
+ *
+ * This function resets the device so that it has 0 rx queues, tx queues, and
+ * MSI-X interrupts allocated.
+ */
+static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
+{
+	igb_free_q_vectors(adapter);
+	igb_reset_interrupt_capability(adapter);
+}
+
+/**
+ * igb_process_mdd_event
+ * @adapter - board private structure
+ *
+ * Identify a malicious VF, disable the VF TX/RX queues and log a message.
+ */
+static void igb_process_mdd_event(struct igb_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u32 lvmmc, vfte, vfre, mdfb;
+	u8 vf_queue;
+
+	lvmmc = E1000_READ_REG(hw, E1000_LVMMC);
+	vf_queue = lvmmc >> 29;
+
+	/* VF index cannot be bigger or equal to VFs allocated */
+	if (vf_queue >= adapter->vfs_allocated_count)
+		return;
+
+	netdev_info(adapter->netdev,
+	            "VF %d misbehaved. VF queues are disabled. "
+	            "VM misbehavior code is 0x%x\n", vf_queue, lvmmc);
+
+	/* Disable VFTE and VFRE related bits */
+	vfte = E1000_READ_REG(hw, E1000_VFTE);
+	vfte &= ~(1 << vf_queue);
+	E1000_WRITE_REG(hw, E1000_VFTE, vfte);
+
+	vfre = E1000_READ_REG(hw, E1000_VFRE);
+	vfre &= ~(1 << vf_queue);
+	E1000_WRITE_REG(hw, E1000_VFRE, vfre);
+
+	/* Disable MDFB related bit. Clear on write */
+	mdfb = E1000_READ_REG(hw, E1000_MDFB);
+	mdfb |= (1 << vf_queue);
+	E1000_WRITE_REG(hw, E1000_MDFB, mdfb);
+
+	/* Reset the specific VF */
+	E1000_WRITE_REG(hw, E1000_VTCTRL(vf_queue), E1000_VTCTRL_RST);
+}
+
+/**
+ * igb_disable_mdd
+ * @adapter - board private structure
+ *
+ * Disable MDD behavior in the HW
+ **/
+static void igb_disable_mdd(struct igb_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u32 reg;
+
+	if ((hw->mac.type != e1000_i350) ||
+	    (hw->mac.type != e1000_i354))
+		return;
+
+	reg = E1000_READ_REG(hw, E1000_DTXCTL);
+	reg &= (~E1000_DTXCTL_MDP_EN);
+	E1000_WRITE_REG(hw, E1000_DTXCTL, reg);
+}
+
+/**
+ * igb_enable_mdd
+ * @adapter - board private structure
+ *
+ * Enable the HW to detect malicious driver and sends an interrupt to
+ * the driver.
+ **/
+static void igb_enable_mdd(struct igb_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u32 reg;
+
+	/* Only available on i350 device */
+	if (hw->mac.type != e1000_i350)
+		return;
+
+	reg = E1000_READ_REG(hw, E1000_DTXCTL);
+	reg |= E1000_DTXCTL_MDP_EN;
+	E1000_WRITE_REG(hw, E1000_DTXCTL, reg);
+}
+
+/**
+ * igb_reset_sriov_capability - disable SR-IOV if enabled
+ *
+ * Attempt to disable single root IO virtualization capabilites present in the
+ * kernel.
+ **/
+static void igb_reset_sriov_capability(struct igb_adapter *adapter)
+{
+	struct pci_dev *pdev = adapter->pdev;
+	struct e1000_hw *hw = &adapter->hw;
+
+	/* reclaim resources allocated to VFs */
+	if (adapter->vf_data) {
+		if (!pci_vfs_assigned(pdev)) {
+			/*
+			 * disable iov and allow time for transactions to
+			 * clear
+			 */
+			pci_disable_sriov(pdev);
+			msleep(500);
+
+			dev_info(pci_dev_to_dev(pdev), "IOV Disabled\n");
+		} else {
+			dev_info(pci_dev_to_dev(pdev), "IOV Not Disabled\n "
+					"VF(s) are assigned to guests!\n");
+		}
+		/* Disable Malicious Driver Detection */
+		igb_disable_mdd(adapter);
+
+		/* free vf data storage */
+		kfree(adapter->vf_data);
+		adapter->vf_data = NULL;
+
+		/* switch rings back to PF ownership */
+		E1000_WRITE_REG(hw, E1000_IOVCTL,
+				E1000_IOVCTL_REUSE_VFQ);
+		E1000_WRITE_FLUSH(hw);
+		msleep(100);
+	}
+
+	adapter->vfs_allocated_count = 0;
+}
+
+/**
+ * igb_set_sriov_capability - setup SR-IOV if supported
+ *
+ * Attempt to enable single root IO virtualization capabilites present in the
+ * kernel.
+ **/
+static void igb_set_sriov_capability(struct igb_adapter *adapter)
+{
+	struct pci_dev *pdev = adapter->pdev;
+	int old_vfs = 0;
+	int i;
+
+	old_vfs = pci_num_vf(pdev);
+	if (old_vfs) {
+		dev_info(pci_dev_to_dev(pdev),
+				"%d pre-allocated VFs found - override "
+				"max_vfs setting of %d\n", old_vfs,
+				adapter->vfs_allocated_count);
+		adapter->vfs_allocated_count = old_vfs;
+	}
+	/* no VFs requested, do nothing */
+	if (!adapter->vfs_allocated_count)
+		return;
+
+	/* allocate vf data storage */
+	adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
+	                           sizeof(struct vf_data_storage),
+	                           GFP_KERNEL);
+
+	if (adapter->vf_data) {
+		if (!old_vfs) {
+			if (pci_enable_sriov(pdev,
+					adapter->vfs_allocated_count))
+				goto err_out;
+		}
+		for (i = 0; i < adapter->vfs_allocated_count; i++)
+			igb_vf_configure(adapter, i);
+
+		switch (adapter->hw.mac.type) {
+		case e1000_82576:
+		case e1000_i350:
+			/* Enable VM to VM loopback by default */
+			adapter->flags |= IGB_FLAG_LOOPBACK_ENABLE;
+			break;
+		default:
+			/* Currently no other hardware supports loopback */
+			break;
+		}
+
+		/* DMA Coalescing is not supported in IOV mode. */
+		if (adapter->hw.mac.type >= e1000_i350)
+		adapter->dmac = IGB_DMAC_DISABLE;
+		if (adapter->hw.mac.type < e1000_i350)
+		adapter->flags |= IGB_FLAG_DETECT_BAD_DMA;
+		return;
+
+	}
+
+err_out:
+	kfree(adapter->vf_data);
+	adapter->vf_data = NULL;
+	adapter->vfs_allocated_count = 0;
+	dev_warn(pci_dev_to_dev(pdev),
+			"Failed to initialize SR-IOV virtualization\n");
+}
+
+/**
+ * igb_set_interrupt_capability - set MSI or MSI-X if supported
+ *
+ * Attempt to configure interrupts using the best available
+ * capabilities of the hardware and kernel.
+ **/
+static void igb_set_interrupt_capability(struct igb_adapter *adapter, bool msix)
+{
+	struct pci_dev *pdev = adapter->pdev;
+	int err;
+	int numvecs, i;
+
+	if (!msix)
+		adapter->int_mode = IGB_INT_MODE_MSI;
+
+	/* Number of supported queues. */
+	adapter->num_rx_queues = adapter->rss_queues;
+
+	if (adapter->vmdq_pools > 1)
+		adapter->num_rx_queues += adapter->vmdq_pools - 1;
+
+#ifdef HAVE_TX_MQ
+	if (adapter->vmdq_pools)
+		adapter->num_tx_queues = adapter->vmdq_pools;
+	else
+		adapter->num_tx_queues = adapter->num_rx_queues;
+#else
+	adapter->num_tx_queues = max_t(u32, 1, adapter->vmdq_pools);
+#endif
+
+	switch (adapter->int_mode) {
+	case IGB_INT_MODE_MSIX:
+		/* start with one vector for every rx queue */
+		numvecs = adapter->num_rx_queues;
+
+		/* if tx handler is separate add 1 for every tx queue */
+		if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS))
+			numvecs += adapter->num_tx_queues;
+
+		/* store the number of vectors reserved for queues */
+		adapter->num_q_vectors = numvecs;
+
+		/* add 1 vector for link status interrupts */
+		numvecs++;
+		adapter->msix_entries = kcalloc(numvecs,
+		                                sizeof(struct msix_entry),
+		                                GFP_KERNEL);
+		if (adapter->msix_entries) {
+			for (i = 0; i < numvecs; i++)
+				adapter->msix_entries[i].entry = i;
+
+			err = pci_enable_msix(pdev,
+			                      adapter->msix_entries, numvecs);
+			if (err == 0)
+				break;
+		}
+		/* MSI-X failed, so fall through and try MSI */
+		dev_warn(pci_dev_to_dev(pdev), "Failed to initialize MSI-X interrupts. "
+		         "Falling back to MSI interrupts.\n");
+		igb_reset_interrupt_capability(adapter);
+	case IGB_INT_MODE_MSI:
+		if (!pci_enable_msi(pdev))
+			adapter->flags |= IGB_FLAG_HAS_MSI;
+		else
+			dev_warn(pci_dev_to_dev(pdev), "Failed to initialize MSI "
+			         "interrupts.  Falling back to legacy "
+			         "interrupts.\n");
+		/* Fall through */
+	case IGB_INT_MODE_LEGACY:
+		/* disable advanced features and set number of queues to 1 */
+		igb_reset_sriov_capability(adapter);
+		adapter->vmdq_pools = 0;
+		adapter->rss_queues = 1;
+		adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
+		adapter->num_rx_queues = 1;
+		adapter->num_tx_queues = 1;
+		adapter->num_q_vectors = 1;
+		/* Don't do anything; this is system default */
+		break;
+	}
+}
+
+static void igb_add_ring(struct igb_ring *ring,
+			 struct igb_ring_container *head)
+{
+	head->ring = ring;
+	head->count++;
+}
+
+/**
+ * igb_alloc_q_vector - Allocate memory for a single interrupt vector
+ * @adapter: board private structure to initialize
+ * @v_count: q_vectors allocated on adapter, used for ring interleaving
+ * @v_idx: index of vector in adapter struct
+ * @txr_count: total number of Tx rings to allocate
+ * @txr_idx: index of first Tx ring to allocate
+ * @rxr_count: total number of Rx rings to allocate
+ * @rxr_idx: index of first Rx ring to allocate
+ *
+ * We allocate one q_vector.  If allocation fails we return -ENOMEM.
+ **/
+static int igb_alloc_q_vector(struct igb_adapter *adapter,
+			      unsigned int v_count, unsigned int v_idx,
+			      unsigned int txr_count, unsigned int txr_idx,
+			      unsigned int rxr_count, unsigned int rxr_idx)
+{
+	struct igb_q_vector *q_vector;
+	struct igb_ring *ring;
+	int ring_count, size;
+
+	/* igb only supports 1 Tx and/or 1 Rx queue per vector */
+	if (txr_count > 1 || rxr_count > 1)
+		return -ENOMEM;
+
+	ring_count = txr_count + rxr_count;
+	size = sizeof(struct igb_q_vector) +
+	       (sizeof(struct igb_ring) * ring_count);
+
+	/* allocate q_vector and rings */
+	q_vector = kzalloc(size, GFP_KERNEL);
+	if (!q_vector)
+		return -ENOMEM;
+
+#ifndef IGB_NO_LRO
+	/* initialize LRO */
+	__skb_queue_head_init(&q_vector->lrolist.active);
+
+#endif
+	/* initialize NAPI */
+	netif_napi_add(adapter->netdev, &q_vector->napi,
+		       igb_poll, 64);
+
+	/* tie q_vector and adapter together */
+	adapter->q_vector[v_idx] = q_vector;
+	q_vector->adapter = adapter;
+
+	/* initialize work limits */
+	q_vector->tx.work_limit = adapter->tx_work_limit;
+
+	/* initialize ITR configuration */
+	q_vector->itr_register = adapter->hw.hw_addr + E1000_EITR(0);
+	q_vector->itr_val = IGB_START_ITR;
+
+	/* initialize pointer to rings */
+	ring = q_vector->ring;
+
+	/* intialize ITR */
+	if (rxr_count) {
+		/* rx or rx/tx vector */
+		if (!adapter->rx_itr_setting || adapter->rx_itr_setting > 3)
+			q_vector->itr_val = adapter->rx_itr_setting;
+	} else {
+		/* tx only vector */
+		if (!adapter->tx_itr_setting || adapter->tx_itr_setting > 3)
+			q_vector->itr_val = adapter->tx_itr_setting;
+	}
+
+	if (txr_count) {
+		/* assign generic ring traits */
+		ring->dev = &adapter->pdev->dev;
+		ring->netdev = adapter->netdev;
+
+		/* configure backlink on ring */
+		ring->q_vector = q_vector;
+
+		/* update q_vector Tx values */
+		igb_add_ring(ring, &q_vector->tx);
+
+		/* For 82575, context index must be unique per ring. */
+		if (adapter->hw.mac.type == e1000_82575)
+			set_bit(IGB_RING_FLAG_TX_CTX_IDX, &ring->flags);
+
+		/* apply Tx specific ring traits */
+		ring->count = adapter->tx_ring_count;
+		ring->queue_index = txr_idx;
+
+		/* assign ring to adapter */
+		adapter->tx_ring[txr_idx] = ring;
+
+		/* push pointer to next ring */
+		ring++;
+	}
+
+	if (rxr_count) {
+		/* assign generic ring traits */
+		ring->dev = &adapter->pdev->dev;
+		ring->netdev = adapter->netdev;
+
+		/* configure backlink on ring */
+		ring->q_vector = q_vector;
+
+		/* update q_vector Rx values */
+		igb_add_ring(ring, &q_vector->rx);
+
+#ifndef HAVE_NDO_SET_FEATURES
+		/* enable rx checksum */
+		set_bit(IGB_RING_FLAG_RX_CSUM, &ring->flags);
+
+#endif
+		/* set flag indicating ring supports SCTP checksum offload */
+		if (adapter->hw.mac.type >= e1000_82576)
+			set_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags);
+
+		if ((adapter->hw.mac.type == e1000_i350) ||
+		    (adapter->hw.mac.type == e1000_i354))
+			set_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &ring->flags);
+
+		/* apply Rx specific ring traits */
+		ring->count = adapter->rx_ring_count;
+		ring->queue_index = rxr_idx;
+
+		/* assign ring to adapter */
+		adapter->rx_ring[rxr_idx] = ring;
+	}
+
+	return 0;
+}
+
+/**
+ * igb_alloc_q_vectors - Allocate memory for interrupt vectors
+ * @adapter: board private structure to initialize
+ *
+ * We allocate one q_vector per queue interrupt.  If allocation fails we
+ * return -ENOMEM.
+ **/
+static int igb_alloc_q_vectors(struct igb_adapter *adapter)
+{
+	int q_vectors = adapter->num_q_vectors;
+	int rxr_remaining = adapter->num_rx_queues;
+	int txr_remaining = adapter->num_tx_queues;
+	int rxr_idx = 0, txr_idx = 0, v_idx = 0;
+	int err;
+
+	if (q_vectors >= (rxr_remaining + txr_remaining)) {
+		for (; rxr_remaining; v_idx++) {
+			err = igb_alloc_q_vector(adapter, q_vectors, v_idx,
+						 0, 0, 1, rxr_idx);
+
+			if (err)
+				goto err_out;
+
+			/* update counts and index */
+			rxr_remaining--;
+			rxr_idx++;
+		}
+	}
+
+	for (; v_idx < q_vectors; v_idx++) {
+		int rqpv = DIV_ROUND_UP(rxr_remaining, q_vectors - v_idx);
+		int tqpv = DIV_ROUND_UP(txr_remaining, q_vectors - v_idx);
+		err = igb_alloc_q_vector(adapter, q_vectors, v_idx,
+					 tqpv, txr_idx, rqpv, rxr_idx);
+
+		if (err)
+			goto err_out;
+
+		/* update counts and index */
+		rxr_remaining -= rqpv;
+		txr_remaining -= tqpv;
+		rxr_idx++;
+		txr_idx++;
+	}
+
+	return 0;
+
+err_out:
+	adapter->num_tx_queues = 0;
+	adapter->num_rx_queues = 0;
+	adapter->num_q_vectors = 0;
+
+	while (v_idx--)
+		igb_free_q_vector(adapter, v_idx);
+
+	return -ENOMEM;
+}
+
+/**
+ * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
+ *
+ * This function initializes the interrupts and allocates all of the queues.
+ **/
+static int igb_init_interrupt_scheme(struct igb_adapter *adapter, bool msix)
+{
+	struct pci_dev *pdev = adapter->pdev;
+	int err;
+
+	igb_set_interrupt_capability(adapter, msix);
+
+	err = igb_alloc_q_vectors(adapter);
+	if (err) {
+		dev_err(pci_dev_to_dev(pdev), "Unable to allocate memory for vectors\n");
+		goto err_alloc_q_vectors;
+	}
+
+	igb_cache_ring_register(adapter);
+
+	return 0;
+
+err_alloc_q_vectors:
+	igb_reset_interrupt_capability(adapter);
+	return err;
+}
+
+/**
+ * igb_request_irq - initialize interrupts
+ *
+ * Attempts to configure interrupts using the best available
+ * capabilities of the hardware and kernel.
+ **/
+static int igb_request_irq(struct igb_adapter *adapter)
+{
+	struct net_device *netdev = adapter->netdev;
+	struct pci_dev *pdev = adapter->pdev;
+	int err = 0;
+
+	if (adapter->msix_entries) {
+		err = igb_request_msix(adapter);
+		if (!err)
+			goto request_done;
+		/* fall back to MSI */
+		igb_free_all_tx_resources(adapter);
+		igb_free_all_rx_resources(adapter);
+
+		igb_clear_interrupt_scheme(adapter);
+		igb_reset_sriov_capability(adapter);
+		err = igb_init_interrupt_scheme(adapter, false);
+		if (err)
+			goto request_done;
+		igb_setup_all_tx_resources(adapter);
+		igb_setup_all_rx_resources(adapter);
+		igb_configure(adapter);
+	}
+
+	igb_assign_vector(adapter->q_vector[0], 0);
+
+	if (adapter->flags & IGB_FLAG_HAS_MSI) {
+		err = request_irq(pdev->irq, &igb_intr_msi, 0,
+				  netdev->name, adapter);
+		if (!err)
+			goto request_done;
+
+		/* fall back to legacy interrupts */
+		igb_reset_interrupt_capability(adapter);
+		adapter->flags &= ~IGB_FLAG_HAS_MSI;
+	}
+
+	err = request_irq(pdev->irq, &igb_intr, IRQF_SHARED,
+			  netdev->name, adapter);
+
+	if (err)
+		dev_err(pci_dev_to_dev(pdev), "Error %d getting interrupt\n",
+			err);
+
+request_done:
+	return err;
+}
+
+static void igb_free_irq(struct igb_adapter *adapter)
+{
+	if (adapter->msix_entries) {
+		int vector = 0, i;
+
+		free_irq(adapter->msix_entries[vector++].vector, adapter);
+
+		for (i = 0; i < adapter->num_q_vectors; i++)
+			free_irq(adapter->msix_entries[vector++].vector,
+			         adapter->q_vector[i]);
+	} else {
+		free_irq(adapter->pdev->irq, adapter);
+	}
+}
+
+/**
+ * igb_irq_disable - Mask off interrupt generation on the NIC
+ * @adapter: board private structure
+ **/
+static void igb_irq_disable(struct igb_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+
+	/*
+	 * we need to be careful when disabling interrupts.  The VFs are also
+	 * mapped into these registers and so clearing the bits can cause
+	 * issues on the VF drivers so we only need to clear what we set
+	 */
+	if (adapter->msix_entries) {
+		u32 regval = E1000_READ_REG(hw, E1000_EIAM);
+		E1000_WRITE_REG(hw, E1000_EIAM, regval & ~adapter->eims_enable_mask);
+		E1000_WRITE_REG(hw, E1000_EIMC, adapter->eims_enable_mask);
+		regval = E1000_READ_REG(hw, E1000_EIAC);
+		E1000_WRITE_REG(hw, E1000_EIAC, regval & ~adapter->eims_enable_mask);
+	}
+
+	E1000_WRITE_REG(hw, E1000_IAM, 0);
+	E1000_WRITE_REG(hw, E1000_IMC, ~0);
+	E1000_WRITE_FLUSH(hw);
+
+	if (adapter->msix_entries) {
+		int vector = 0, i;
+
+		synchronize_irq(adapter->msix_entries[vector++].vector);
+
+		for (i = 0; i < adapter->num_q_vectors; i++)
+			synchronize_irq(adapter->msix_entries[vector++].vector);
+	} else {
+		synchronize_irq(adapter->pdev->irq);
+	}
+}
+
+/**
+ * igb_irq_enable - Enable default interrupt generation settings
+ * @adapter: board private structure
+ **/
+static void igb_irq_enable(struct igb_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+
+	if (adapter->msix_entries) {
+		u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC | E1000_IMS_DRSTA;
+		u32 regval = E1000_READ_REG(hw, E1000_EIAC);
+		E1000_WRITE_REG(hw, E1000_EIAC, regval | adapter->eims_enable_mask);
+		regval = E1000_READ_REG(hw, E1000_EIAM);
+		E1000_WRITE_REG(hw, E1000_EIAM, regval | adapter->eims_enable_mask);
+		E1000_WRITE_REG(hw, E1000_EIMS, adapter->eims_enable_mask);
+		if (adapter->vfs_allocated_count) {
+			E1000_WRITE_REG(hw, E1000_MBVFIMR, 0xFF);
+			ims |= E1000_IMS_VMMB;
+			if (adapter->mdd)
+				if ((adapter->hw.mac.type == e1000_i350) ||
+				    (adapter->hw.mac.type == e1000_i354))
+				ims |= E1000_IMS_MDDET;
+		}
+		E1000_WRITE_REG(hw, E1000_IMS, ims);
+	} else {
+		E1000_WRITE_REG(hw, E1000_IMS, IMS_ENABLE_MASK |
+				E1000_IMS_DRSTA);
+		E1000_WRITE_REG(hw, E1000_IAM, IMS_ENABLE_MASK |
+				E1000_IMS_DRSTA);
+	}
+}
+
+static void igb_update_mng_vlan(struct igb_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u16 vid = adapter->hw.mng_cookie.vlan_id;
+	u16 old_vid = adapter->mng_vlan_id;
+
+	if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
+		/* add VID to filter table */
+		igb_vfta_set(adapter, vid, TRUE);
+		adapter->mng_vlan_id = vid;
+	} else {
+		adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
+	}
+
+	if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
+	    (vid != old_vid) &&
+#ifdef HAVE_VLAN_RX_REGISTER
+	    !vlan_group_get_device(adapter->vlgrp, old_vid)) {
+#else
+	    !test_bit(old_vid, adapter->active_vlans)) {
+#endif
+		/* remove VID from filter table */
+		igb_vfta_set(adapter, old_vid, FALSE);
+	}
+}
+
+/**
+ * igb_release_hw_control - release control of the h/w to f/w
+ * @adapter: address of board private structure
+ *
+ * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
+ * For ASF and Pass Through versions of f/w this means that the
+ * driver is no longer loaded.
+ *
+ **/
+static void igb_release_hw_control(struct igb_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u32 ctrl_ext;
+
+	/* Let firmware take over control of h/w */
+	ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT);
+	E1000_WRITE_REG(hw, E1000_CTRL_EXT,
+			ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
+}
+
+/**
+ * igb_get_hw_control - get control of the h/w from f/w
+ * @adapter: address of board private structure
+ *
+ * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
+ * For ASF and Pass Through versions of f/w this means that
+ * the driver is loaded.
+ *
+ **/
+static void igb_get_hw_control(struct igb_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u32 ctrl_ext;
+
+	/* Let firmware know the driver has taken over */
+	ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT);
+	E1000_WRITE_REG(hw, E1000_CTRL_EXT,
+			ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
+}
+
+/**
+ * igb_configure - configure the hardware for RX and TX
+ * @adapter: private board structure
+ **/
+static void igb_configure(struct igb_adapter *adapter)
+{
+	struct net_device *netdev = adapter->netdev;
+	int i;
+
+	igb_get_hw_control(adapter);
+	igb_set_rx_mode(netdev);
+
+	igb_restore_vlan(adapter);
+
+	igb_setup_tctl(adapter);
+	igb_setup_mrqc(adapter);
+	igb_setup_rctl(adapter);
+
+	igb_configure_tx(adapter);
+	igb_configure_rx(adapter);
+
+	e1000_rx_fifo_flush_82575(&adapter->hw);
+#ifdef CONFIG_NETDEVICES_MULTIQUEUE
+	if (adapter->num_tx_queues > 1)
+		netdev->features |= NETIF_F_MULTI_QUEUE;
+	else
+		netdev->features &= ~NETIF_F_MULTI_QUEUE;
+#endif
+
+	/* call igb_desc_unused which always leaves
+	 * at least 1 descriptor unused to make sure
+	 * next_to_use != next_to_clean */
+	for (i = 0; i < adapter->num_rx_queues; i++) {
+		struct igb_ring *ring = adapter->rx_ring[i];
+		igb_alloc_rx_buffers(ring, igb_desc_unused(ring));
+	}
+}
+
+/**
+ * igb_power_up_link - Power up the phy/serdes link
+ * @adapter: address of board private structure
+ **/
+void igb_power_up_link(struct igb_adapter *adapter)
+{
+	e1000_phy_hw_reset(&adapter->hw);
+
+	if (adapter->hw.phy.media_type == e1000_media_type_copper)
+		e1000_power_up_phy(&adapter->hw);
+	else
+		e1000_power_up_fiber_serdes_link(&adapter->hw);
+}
+
+/**
+ * igb_power_down_link - Power down the phy/serdes link
+ * @adapter: address of board private structure
+ */
+static void igb_power_down_link(struct igb_adapter *adapter)
+{
+	if (adapter->hw.phy.media_type == e1000_media_type_copper)
+		e1000_power_down_phy(&adapter->hw);
+	else
+		e1000_shutdown_fiber_serdes_link(&adapter->hw);
+}
+
+/* Detect and switch function for Media Auto Sense */
+static void igb_check_swap_media(struct igb_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u32 ctrl_ext, connsw;
+	bool swap_now = false;
+	bool link;
+
+	ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT);
+	connsw = E1000_READ_REG(hw, E1000_CONNSW);
+	link = igb_has_link(adapter);
+
+	/* need to live swap if current media is copper and we have fiber/serdes
+	 * to go to.
+	 */
+
+	if ((hw->phy.media_type == e1000_media_type_copper) &&
+	    (!(connsw & E1000_CONNSW_AUTOSENSE_EN))) {
+		swap_now = true;
+	} else if (!(connsw & E1000_CONNSW_SERDESD)) {
+		/* copper signal takes time to appear */
+		if (adapter->copper_tries < 2) {
+			adapter->copper_tries++;
+			connsw |= E1000_CONNSW_AUTOSENSE_CONF;
+			E1000_WRITE_REG(hw, E1000_CONNSW, connsw);
+			return;
+		} else {
+			adapter->copper_tries = 0;
+			if ((connsw & E1000_CONNSW_PHYSD) &&
+			    (!(connsw & E1000_CONNSW_PHY_PDN))) {
+				swap_now = true;
+				connsw &= ~E1000_CONNSW_AUTOSENSE_CONF;
+				E1000_WRITE_REG(hw, E1000_CONNSW, connsw);
+			}
+		}
+	}
+
+	if (swap_now) {
+		switch (hw->phy.media_type) {
+		case e1000_media_type_copper:
+			dev_info(pci_dev_to_dev(adapter->pdev),
+				 "%s:MAS: changing media to fiber/serdes\n",
+			adapter->netdev->name);
+			ctrl_ext |=
+				E1000_CTRL_EXT_LINK_MODE_PCIE_SERDES;
+			adapter->flags |= IGB_FLAG_MEDIA_RESET;
+			adapter->copper_tries = 0;
+			break;
+		case e1000_media_type_internal_serdes:
+		case e1000_media_type_fiber:
+			dev_info(pci_dev_to_dev(adapter->pdev),
+				 "%s:MAS: changing media to copper\n",
+				 adapter->netdev->name);
+			ctrl_ext &=
+				~E1000_CTRL_EXT_LINK_MODE_PCIE_SERDES;
+			adapter->flags |= IGB_FLAG_MEDIA_RESET;
+			break;
+		default:
+			/* shouldn't get here during regular operation */
+			dev_err(pci_dev_to_dev(adapter->pdev),
+				"%s:AMS: Invalid media type found, returning\n",
+				adapter->netdev->name);
+			break;
+		}
+		E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext);
+	}
+}
+
+#ifdef HAVE_I2C_SUPPORT
+/*  igb_get_i2c_data - Reads the I2C SDA data bit
+ *  @hw: pointer to hardware structure
+ *  @i2cctl: Current value of I2CCTL register
+ *
+ *  Returns the I2C data bit value
+ */
+static int igb_get_i2c_data(void *data)
+{
+	struct igb_adapter *adapter = (struct igb_adapter *)data;
+	struct e1000_hw *hw = &adapter->hw;
+	s32 i2cctl = E1000_READ_REG(hw, E1000_I2CPARAMS);
+
+	return (i2cctl & E1000_I2C_DATA_IN) != 0;
+}
+
+/* igb_set_i2c_data - Sets the I2C data bit
+ *  @data: pointer to hardware structure
+ *  @state: I2C data value (0 or 1) to set
+ *
+ *  Sets the I2C data bit
+ */
+static void igb_set_i2c_data(void *data, int state)
+{
+	struct igb_adapter *adapter = (struct igb_adapter *)data;
+	struct e1000_hw *hw = &adapter->hw;
+	s32 i2cctl = E1000_READ_REG(hw, E1000_I2CPARAMS);
+
+	if (state)
+		i2cctl |= E1000_I2C_DATA_OUT;
+	else
+		i2cctl &= ~E1000_I2C_DATA_OUT;
+
+	i2cctl &= ~E1000_I2C_DATA_OE_N;
+	i2cctl |= E1000_I2C_CLK_OE_N;
+
+	E1000_WRITE_REG(hw, E1000_I2CPARAMS, i2cctl);
+	E1000_WRITE_FLUSH(hw);
+
+}
+
+/* igb_set_i2c_clk - Sets the I2C SCL clock
+ *  @data: pointer to hardware structure
+ *  @state: state to set clock
+ *
+ *  Sets the I2C clock line to state
+ */
+static void igb_set_i2c_clk(void *data, int state)
+{
+	struct igb_adapter *adapter = (struct igb_adapter *)data;
+	struct e1000_hw *hw = &adapter->hw;
+	s32 i2cctl = E1000_READ_REG(hw, E1000_I2CPARAMS);
+
+	if (state) {
+		i2cctl |= E1000_I2C_CLK_OUT;
+		i2cctl &= ~E1000_I2C_CLK_OE_N;
+	} else {
+		i2cctl &= ~E1000_I2C_CLK_OUT;
+		i2cctl &= ~E1000_I2C_CLK_OE_N;
+	}
+	E1000_WRITE_REG(hw, E1000_I2CPARAMS, i2cctl);
+	E1000_WRITE_FLUSH(hw);
+}
+
+/* igb_get_i2c_clk - Gets the I2C SCL clock state
+ *  @data: pointer to hardware structure
+ *
+ *  Gets the I2C clock state
+ */
+static int igb_get_i2c_clk(void *data)
+{
+	struct igb_adapter *adapter = (struct igb_adapter *)data;
+	struct e1000_hw *hw = &adapter->hw;
+	s32 i2cctl = E1000_READ_REG(hw, E1000_I2CPARAMS);
+
+	return (i2cctl & E1000_I2C_CLK_IN) != 0;
+}
+
+static const struct i2c_algo_bit_data igb_i2c_algo = {
+	.setsda		= igb_set_i2c_data,
+	.setscl		= igb_set_i2c_clk,
+	.getsda		= igb_get_i2c_data,
+	.getscl		= igb_get_i2c_clk,
+	.udelay		= 5,
+	.timeout	= 20,
+};
+
+/*  igb_init_i2c - Init I2C interface
+ *  @adapter: pointer to adapter structure
+ *
+ */
+static s32 igb_init_i2c(struct igb_adapter *adapter)
+{
+	s32 status = E1000_SUCCESS;
+
+	/* I2C interface supported on i350 devices */
+	if (adapter->hw.mac.type != e1000_i350)
+		return E1000_SUCCESS;
+
+	/* Initialize the i2c bus which is controlled by the registers.
+	 * This bus will use the i2c_algo_bit structue that implements
+	 * the protocol through toggling of the 4 bits in the register.
+	 */
+	adapter->i2c_adap.owner = THIS_MODULE;
+	adapter->i2c_algo = igb_i2c_algo;
+	adapter->i2c_algo.data = adapter;
+	adapter->i2c_adap.algo_data = &adapter->i2c_algo;
+	adapter->i2c_adap.dev.parent = &adapter->pdev->dev;
+	strlcpy(adapter->i2c_adap.name, "igb BB",
+		sizeof(adapter->i2c_adap.name));
+	status = i2c_bit_add_bus(&adapter->i2c_adap);
+	return status;
+}
+
+#endif /* HAVE_I2C_SUPPORT */
+/**
+ * igb_up - Open the interface and prepare it to handle traffic
+ * @adapter: board private structure
+ **/
+int igb_up(struct igb_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	int i;
+
+	/* hardware has been reset, we need to reload some things */
+	igb_configure(adapter);
+
+	clear_bit(__IGB_DOWN, &adapter->state);
+
+	for (i = 0; i < adapter->num_q_vectors; i++)
+		napi_enable(&(adapter->q_vector[i]->napi));
+
+	if (adapter->msix_entries)
+		igb_configure_msix(adapter);
+	else
+		igb_assign_vector(adapter->q_vector[0], 0);
+
+	igb_configure_lli(adapter);
+
+	/* Clear any pending interrupts. */
+	E1000_READ_REG(hw, E1000_ICR);
+	igb_irq_enable(adapter);
+
+	/* notify VFs that reset has been completed */
+	if (adapter->vfs_allocated_count) {
+		u32 reg_data = E1000_READ_REG(hw, E1000_CTRL_EXT);
+		reg_data |= E1000_CTRL_EXT_PFRSTD;
+		E1000_WRITE_REG(hw, E1000_CTRL_EXT, reg_data);
+	}
+
+	netif_tx_start_all_queues(adapter->netdev);
+
+	if (adapter->flags & IGB_FLAG_DETECT_BAD_DMA)
+		schedule_work(&adapter->dma_err_task);
+	/* start the watchdog. */
+	hw->mac.get_link_status = 1;
+	schedule_work(&adapter->watchdog_task);
+
+	if ((adapter->flags & IGB_FLAG_EEE) &&
+	    (!hw->dev_spec._82575.eee_disable))
+		adapter->eee_advert = MDIO_EEE_100TX | MDIO_EEE_1000T;
+
+	return 0;
+}
+
+void igb_down(struct igb_adapter *adapter)
+{
+	struct net_device *netdev = adapter->netdev;
+	struct e1000_hw *hw = &adapter->hw;
+	u32 tctl, rctl;
+	int i;
+
+	/* signal that we're down so the interrupt handler does not
+	 * reschedule our watchdog timer */
+	set_bit(__IGB_DOWN, &adapter->state);
+
+	/* disable receives in the hardware */
+	rctl = E1000_READ_REG(hw, E1000_RCTL);
+	E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
+	/* flush and sleep below */
+
+	netif_tx_stop_all_queues(netdev);
+
+	/* disable transmits in the hardware */
+	tctl = E1000_READ_REG(hw, E1000_TCTL);
+	tctl &= ~E1000_TCTL_EN;
+	E1000_WRITE_REG(hw, E1000_TCTL, tctl);
+	/* flush both disables and wait for them to finish */
+	E1000_WRITE_FLUSH(hw);
+	usleep_range(10000, 20000);
+
+	for (i = 0; i < adapter->num_q_vectors; i++)
+		napi_disable(&(adapter->q_vector[i]->napi));
+
+	igb_irq_disable(adapter);
+
+	adapter->flags &= ~IGB_FLAG_NEED_LINK_UPDATE;
+
+	del_timer_sync(&adapter->watchdog_timer);
+	if (adapter->flags & IGB_FLAG_DETECT_BAD_DMA)
+		del_timer_sync(&adapter->dma_err_timer);
+	del_timer_sync(&adapter->phy_info_timer);
+
+	netif_carrier_off(netdev);
+
+	/* record the stats before reset*/
+	igb_update_stats(adapter);
+
+	adapter->link_speed = 0;
+	adapter->link_duplex = 0;
+
+#ifdef HAVE_PCI_ERS
+	if (!pci_channel_offline(adapter->pdev))
+		igb_reset(adapter);
+#else
+	igb_reset(adapter);
+#endif
+	igb_clean_all_tx_rings(adapter);
+	igb_clean_all_rx_rings(adapter);
+#ifdef IGB_DCA
+	/* since we reset the hardware DCA settings were cleared */
+	igb_setup_dca(adapter);
+#endif
+}
+
+void igb_reinit_locked(struct igb_adapter *adapter)
+{
+	WARN_ON(in_interrupt());
+	while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
+		usleep_range(1000, 2000);
+	igb_down(adapter);
+	igb_up(adapter);
+	clear_bit(__IGB_RESETTING, &adapter->state);
+}
+
+/**
+ * igb_enable_mas - Media Autosense re-enable after swap
+ *
+ * @adapter: adapter struct
+ **/
+static s32  igb_enable_mas(struct igb_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u32 connsw;
+	s32 ret_val = E1000_SUCCESS;
+
+	connsw = E1000_READ_REG(hw, E1000_CONNSW);
+	if (hw->phy.media_type == e1000_media_type_copper) {
+		/* configure for SerDes media detect */
+		if (!(connsw & E1000_CONNSW_SERDESD)) {
+			connsw |= E1000_CONNSW_ENRGSRC;
+			connsw |= E1000_CONNSW_AUTOSENSE_EN;
+			E1000_WRITE_REG(hw, E1000_CONNSW, connsw);
+			E1000_WRITE_FLUSH(hw);
+		} else if (connsw & E1000_CONNSW_SERDESD) {
+			/* already SerDes, no need to enable anything */
+			return ret_val;
+		} else {
+			dev_info(pci_dev_to_dev(adapter->pdev),
+			"%s:MAS: Unable to configure feature, disabling..\n",
+			adapter->netdev->name);
+			adapter->flags &= ~IGB_FLAG_MAS_ENABLE;
+		}
+	}
+	return ret_val;
+}
+
+void igb_reset(struct igb_adapter *adapter)
+{
+	struct pci_dev *pdev = adapter->pdev;
+	struct e1000_hw *hw = &adapter->hw;
+	struct e1000_mac_info *mac = &hw->mac;
+	struct e1000_fc_info *fc = &hw->fc;
+	u32 pba = 0, tx_space, min_tx_space, min_rx_space, hwm;
+
+	/* Repartition Pba for greater than 9k mtu
+	 * To take effect CTRL.RST is required.
+	 */
+	switch (mac->type) {
+	case e1000_i350:
+	case e1000_82580:
+	case e1000_i354:
+		pba = E1000_READ_REG(hw, E1000_RXPBS);
+		pba = e1000_rxpbs_adjust_82580(pba);
+		break;
+	case e1000_82576:
+		pba = E1000_READ_REG(hw, E1000_RXPBS);
+		pba &= E1000_RXPBS_SIZE_MASK_82576;
+		break;
+	case e1000_82575:
+	case e1000_i210:
+	case e1000_i211:
+	default:
+		pba = E1000_PBA_34K;
+		break;
+	}
+
+	if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
+	    (mac->type < e1000_82576)) {
+		/* adjust PBA for jumbo frames */
+		E1000_WRITE_REG(hw, E1000_PBA, pba);
+
+		/* To maintain wire speed transmits, the Tx FIFO should be
+		 * large enough to accommodate two full transmit packets,
+		 * rounded up to the next 1KB and expressed in KB.  Likewise,
+		 * the Rx FIFO should be large enough to accommodate at least
+		 * one full receive packet and is similarly rounded up and
+		 * expressed in KB. */
+		pba = E1000_READ_REG(hw, E1000_PBA);
+		/* upper 16 bits has Tx packet buffer allocation size in KB */
+		tx_space = pba >> 16;
+		/* lower 16 bits has Rx packet buffer allocation size in KB */
+		pba &= 0xffff;
+		/* the tx fifo also stores 16 bytes of information about the tx
+		 * but don't include ethernet FCS because hardware appends it */
+		min_tx_space = (adapter->max_frame_size +
+				sizeof(union e1000_adv_tx_desc) -
+				ETH_FCS_LEN) * 2;
+		min_tx_space = ALIGN(min_tx_space, 1024);
+		min_tx_space >>= 10;
+		/* software strips receive CRC, so leave room for it */
+		min_rx_space = adapter->max_frame_size;
+		min_rx_space = ALIGN(min_rx_space, 1024);
+		min_rx_space >>= 10;
+
+		/* If current Tx allocation is less than the min Tx FIFO size,
+		 * and the min Tx FIFO size is less than the current Rx FIFO
+		 * allocation, take space away from current Rx allocation */
+		if (tx_space < min_tx_space &&
+		    ((min_tx_space - tx_space) < pba)) {
+			pba = pba - (min_tx_space - tx_space);
+
+			/* if short on rx space, rx wins and must trump tx
+			 * adjustment */
+			if (pba < min_rx_space)
+				pba = min_rx_space;
+		}
+		E1000_WRITE_REG(hw, E1000_PBA, pba);
+	}
+
+	/* flow control settings */
+	/* The high water mark must be low enough to fit one full frame
+	 * (or the size used for early receive) above it in the Rx FIFO.
+	 * Set it to the lower of:
+	 * - 90% of the Rx FIFO size, or
+	 * - the full Rx FIFO size minus one full frame */
+	hwm = min(((pba << 10) * 9 / 10),
+			((pba << 10) - 2 * adapter->max_frame_size));
+
+	fc->high_water = hwm & 0xFFFFFFF0;	/* 16-byte granularity */
+	fc->low_water = fc->high_water - 16;
+	fc->pause_time = 0xFFFF;
+	fc->send_xon = 1;
+	fc->current_mode = fc->requested_mode;
+
+	/* disable receive for all VFs and wait one second */
+	if (adapter->vfs_allocated_count) {
+		int i;
+		/*
+		 * Clear all flags except indication that the PF has set
+		 * the VF MAC addresses administratively
+		 */
+		for (i = 0 ; i < adapter->vfs_allocated_count; i++)
+			adapter->vf_data[i].flags &= IGB_VF_FLAG_PF_SET_MAC;
+
+		/* ping all the active vfs to let them know we are going down */
+		igb_ping_all_vfs(adapter);
+
+		/* disable transmits and receives */
+		E1000_WRITE_REG(hw, E1000_VFRE, 0);
+		E1000_WRITE_REG(hw, E1000_VFTE, 0);
+	}
+
+	/* Allow time for pending master requests to run */
+	e1000_reset_hw(hw);
+	E1000_WRITE_REG(hw, E1000_WUC, 0);
+
+	if (adapter->flags & IGB_FLAG_MEDIA_RESET) {
+		e1000_setup_init_funcs(hw, TRUE);
+		igb_check_options(adapter);
+		e1000_get_bus_info(hw);
+		adapter->flags &= ~IGB_FLAG_MEDIA_RESET;
+	}
+	if (adapter->flags & IGB_FLAG_MAS_ENABLE) {
+		if (igb_enable_mas(adapter))
+			dev_err(pci_dev_to_dev(pdev),
+				"Error enabling Media Auto Sense\n");
+	}
+	if (e1000_init_hw(hw))
+		dev_err(pci_dev_to_dev(pdev), "Hardware Error\n");
+
+	/*
+	 * Flow control settings reset on hardware reset, so guarantee flow
+	 * control is off when forcing speed.
+	 */
+	if (!hw->mac.autoneg)
+		e1000_force_mac_fc(hw);
+
+	igb_init_dmac(adapter, pba);
+	/* Re-initialize the thermal sensor on i350 devices. */
+	if (mac->type == e1000_i350 && hw->bus.func == 0) {
+		/*
+		 * If present, re-initialize the external thermal sensor
+		 * interface.
+		 */
+		if (adapter->ets)
+			e1000_set_i2c_bb(hw);
+		e1000_init_thermal_sensor_thresh(hw);
+	}
+
+	/*Re-establish EEE setting */
+	if (hw->phy.media_type == e1000_media_type_copper) {
+		switch (mac->type) {
+		case e1000_i350:
+		case e1000_i210:
+		case e1000_i211:
+			e1000_set_eee_i350(hw);
+			break;
+		case e1000_i354:
+			e1000_set_eee_i354(hw);
+			break;
+		default:
+			break;
+		}
+	}
+
+	if (!netif_running(adapter->netdev))
+		igb_power_down_link(adapter);
+
+	igb_update_mng_vlan(adapter);
+
+	/* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
+	E1000_WRITE_REG(hw, E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
+
+
+#ifdef HAVE_PTP_1588_CLOCK
+	/* Re-enable PTP, where applicable. */
+	igb_ptp_reset(adapter);
+#endif /* HAVE_PTP_1588_CLOCK */
+
+	e1000_get_phy_info(hw);
+
+	adapter->devrc++;
+}
+
+#ifdef HAVE_NDO_SET_FEATURES
+static kni_netdev_features_t igb_fix_features(struct net_device *netdev,
+					      kni_netdev_features_t features)
+{
+	/*
+	 * Since there is no support for separate tx vlan accel
+	 * enabled make sure tx flag is cleared if rx is.
+	 */
+#ifdef NETIF_F_HW_VLAN_CTAG_RX
+	if (!(features & NETIF_F_HW_VLAN_CTAG_RX))
+		features &= ~NETIF_F_HW_VLAN_CTAG_TX;
+#else
+	if (!(features & NETIF_F_HW_VLAN_RX))
+		features &= ~NETIF_F_HW_VLAN_TX;
+#endif
+
+	/* If Rx checksum is disabled, then LRO should also be disabled */
+	if (!(features & NETIF_F_RXCSUM))
+		features &= ~NETIF_F_LRO;
+
+	return features;
+}
+
+static int igb_set_features(struct net_device *netdev,
+			    kni_netdev_features_t features)
+{
+	u32 changed = netdev->features ^ features;
+
+#ifdef NETIF_F_HW_VLAN_CTAG_RX
+	if (changed & NETIF_F_HW_VLAN_CTAG_RX)
+#else
+	if (changed & NETIF_F_HW_VLAN_RX)
+#endif
+		igb_vlan_mode(netdev, features);
+
+	return 0;
+}
+
+#ifdef NTF_SELF
+#ifdef USE_CONST_DEV_UC_CHAR
+static int igb_ndo_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
+			   struct net_device *dev,
+			   const unsigned char *addr,
+#ifdef HAVE_NDO_FDB_ADD_VID
+			   u16 vid,
+#endif
+			   u16 flags)
+#else
+static int igb_ndo_fdb_add(struct ndmsg *ndm,
+			   struct net_device *dev,
+			   unsigned char *addr,
+			   u16 flags)
+#endif
+{
+	struct igb_adapter *adapter = netdev_priv(dev);
+	struct e1000_hw *hw = &adapter->hw;
+	int err;
+
+	if (!(adapter->vfs_allocated_count))
+		return -EOPNOTSUPP;
+
+	/* Hardware does not support aging addresses so if a
+	 * ndm_state is given only allow permanent addresses
+	 */
+	if (ndm->ndm_state && !(ndm->ndm_state & NUD_PERMANENT)) {
+		pr_info("%s: FDB only supports static addresses\n",
+			igb_driver_name);
+		return -EINVAL;
+	}
+
+	if (is_unicast_ether_addr(addr) || is_link_local_ether_addr(addr)) {
+		u32 rar_uc_entries = hw->mac.rar_entry_count -
+					(adapter->vfs_allocated_count + 1);
+
+		if (netdev_uc_count(dev) < rar_uc_entries)
+			err = dev_uc_add_excl(dev, addr);
+		else
+			err = -ENOMEM;
+	} else if (is_multicast_ether_addr(addr)) {
+		err = dev_mc_add_excl(dev, addr);
+	} else {
+		err = -EINVAL;
+	}
+
+	/* Only return duplicate errors if NLM_F_EXCL is set */
+	if (err == -EEXIST && !(flags & NLM_F_EXCL))
+		err = 0;
+
+	return err;
+}
+
+#ifndef USE_DEFAULT_FDB_DEL_DUMP
+#ifdef USE_CONST_DEV_UC_CHAR
+static int igb_ndo_fdb_del(struct ndmsg *ndm,
+			   struct net_device *dev,
+			   const unsigned char *addr)
+#else
+static int igb_ndo_fdb_del(struct ndmsg *ndm,
+			   struct net_device *dev,
+			   unsigned char *addr)
+#endif
+{
+	struct igb_adapter *adapter = netdev_priv(dev);
+	int err = -EOPNOTSUPP;
+
+	if (ndm->ndm_state & NUD_PERMANENT) {
+		pr_info("%s: FDB only supports static addresses\n",
+			igb_driver_name);
+		return -EINVAL;
+	}
+
+	if (adapter->vfs_allocated_count) {
+		if (is_unicast_ether_addr(addr))
+			err = dev_uc_del(dev, addr);
+		else if (is_multicast_ether_addr(addr))
+			err = dev_mc_del(dev, addr);
+		else
+			err = -EINVAL;
+	}
+
+	return err;
+}
+
+static int igb_ndo_fdb_dump(struct sk_buff *skb,
+			    struct netlink_callback *cb,
+			    struct net_device *dev,
+			    int idx)
+{
+	struct igb_adapter *adapter = netdev_priv(dev);
+
+	if (adapter->vfs_allocated_count)
+		idx = ndo_dflt_fdb_dump(skb, cb, dev, idx);
+
+	return idx;
+}
+#endif /* USE_DEFAULT_FDB_DEL_DUMP */
+
+#ifdef HAVE_BRIDGE_ATTRIBS
+#ifdef HAVE_NDO_BRIDGE_SET_DEL_LINK_FLAGS
+static int igb_ndo_bridge_setlink(struct net_device *dev,
+				  struct nlmsghdr *nlh,
+				  u16 flags)
+#else
+static int igb_ndo_bridge_setlink(struct net_device *dev,
+				  struct nlmsghdr *nlh)
+#endif /* HAVE_NDO_BRIDGE_SET_DEL_LINK_FLAGS */
+{
+	struct igb_adapter *adapter = netdev_priv(dev);
+	struct e1000_hw *hw = &adapter->hw;
+	struct nlattr *attr, *br_spec;
+	int rem;
+
+	if (!(adapter->vfs_allocated_count))
+		return -EOPNOTSUPP;
+
+	switch (adapter->hw.mac.type) {
+	case e1000_82576:
+	case e1000_i350:
+	case e1000_i354:
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
+
+	nla_for_each_nested(attr, br_spec, rem) {
+		__u16 mode;
+
+		if (nla_type(attr) != IFLA_BRIDGE_MODE)
+			continue;
+
+		mode = nla_get_u16(attr);
+		if (mode == BRIDGE_MODE_VEPA) {
+			e1000_vmdq_set_loopback_pf(hw, 0);
+			adapter->flags &= ~IGB_FLAG_LOOPBACK_ENABLE;
+		} else if (mode == BRIDGE_MODE_VEB) {
+			e1000_vmdq_set_loopback_pf(hw, 1);
+			adapter->flags |= IGB_FLAG_LOOPBACK_ENABLE;
+		} else
+			return -EINVAL;
+
+		netdev_info(adapter->netdev, "enabling bridge mode: %s\n",
+			    mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
+	}
+
+	return 0;
+}
+
+#ifdef HAVE_BRIDGE_FILTER
+#ifdef HAVE_NDO_BRIDGE_GETLINK_NLFLAGS
+static int igb_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
+				  struct net_device *dev, u32 filter_mask,
+				  int nlflags)
+#else
+static int igb_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
+				  struct net_device *dev, u32 filter_mask)
+#endif /* HAVE_NDO_BRIDGE_GETLINK_NLFLAGS */
+#else
+static int igb_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
+				  struct net_device *dev)
+#endif
+{
+	struct igb_adapter *adapter = netdev_priv(dev);
+	u16 mode;
+
+	if (!(adapter->vfs_allocated_count))
+		return -EOPNOTSUPP;
+
+	if (adapter->flags & IGB_FLAG_LOOPBACK_ENABLE)
+		mode = BRIDGE_MODE_VEB;
+	else
+		mode = BRIDGE_MODE_VEPA;
+
+#ifdef HAVE_NDO_DFLT_BRIDGE_ADD_MASK
+#ifdef HAVE_NDO_BRIDGE_GETLINK_NLFLAGS
+#ifdef HAVE_NDO_BRIDGE_GETLINK_FILTER_MASK_VLAN_FILL
+	return ndo_dflt_bridge_getlink(skb, pid, seq, dev, mode, 0, 0,
+				nlflags, filter_mask, NULL);
+#else
+	return ndo_dflt_bridge_getlink(skb, pid, seq, dev, mode, 0, 0, nlflags);
+#endif /* HAVE_NDO_BRIDGE_GETLINK_FILTER_MASK_VLAN_FILL */
+#else
+	return ndo_dflt_bridge_getlink(skb, pid, seq, dev, mode, 0, 0);
+#endif /* HAVE_NDO_BRIDGE_GETLINK_NLFLAGS */
+#else
+	return ndo_dflt_bridge_getlink(skb, pid, seq, dev, mode);
+#endif /* HAVE_NDO_DFLT_BRIDGE_ADD_MASK */
+}
+#endif /* HAVE_BRIDGE_ATTRIBS */
+#endif /* NTF_SELF */
+
+#endif /* HAVE_NDO_SET_FEATURES */
+#ifdef HAVE_NET_DEVICE_OPS
+static const struct net_device_ops igb_netdev_ops = {
+	.ndo_open		= igb_open,
+	.ndo_stop		= igb_close,
+	.ndo_start_xmit		= igb_xmit_frame,
+	.ndo_get_stats		= igb_get_stats,
+	.ndo_set_rx_mode	= igb_set_rx_mode,
+	.ndo_set_mac_address	= igb_set_mac,
+	.ndo_change_mtu		= igb_change_mtu,
+	.ndo_do_ioctl		= igb_ioctl,
+	.ndo_tx_timeout		= igb_tx_timeout,
+	.ndo_validate_addr	= eth_validate_addr,
+	.ndo_vlan_rx_add_vid	= igb_vlan_rx_add_vid,
+	.ndo_vlan_rx_kill_vid	= igb_vlan_rx_kill_vid,
+#ifdef IFLA_VF_MAX
+	.ndo_set_vf_mac		= igb_ndo_set_vf_mac,
+	.ndo_set_vf_vlan	= igb_ndo_set_vf_vlan,
+#ifdef HAVE_VF_MIN_MAX_TXRATE
+	.ndo_set_vf_rate	= igb_ndo_set_vf_bw,
+#else /* HAVE_VF_MIN_MAX_TXRATE */
+	.ndo_set_vf_tx_rate	= igb_ndo_set_vf_bw,
+#endif /* HAVE_VF_MIN_MAX_TXRATE */
+	.ndo_get_vf_config	= igb_ndo_get_vf_config,
+#ifdef HAVE_VF_SPOOFCHK_CONFIGURE
+	.ndo_set_vf_spoofchk	= igb_ndo_set_vf_spoofchk,
+#endif /* HAVE_VF_SPOOFCHK_CONFIGURE */
+#endif /* IFLA_VF_MAX */
+#ifdef CONFIG_NET_POLL_CONTROLLER
+	.ndo_poll_controller	= igb_netpoll,
+#endif
+#ifdef HAVE_NDO_SET_FEATURES
+	.ndo_fix_features	= igb_fix_features,
+	.ndo_set_features	= igb_set_features,
+#endif
+#ifdef HAVE_VLAN_RX_REGISTER
+	.ndo_vlan_rx_register	= igb_vlan_mode,
+#endif
+#ifndef HAVE_RHEL6_NETDEV_OPS_EXT_FDB
+#ifdef NTF_SELF
+	.ndo_fdb_add		= igb_ndo_fdb_add,
+#ifndef USE_DEFAULT_FDB_DEL_DUMP
+	.ndo_fdb_del		= igb_ndo_fdb_del,
+	.ndo_fdb_dump		= igb_ndo_fdb_dump,
+#endif
+#endif /* ! HAVE_RHEL6_NETDEV_OPS_EXT_FDB */
+#ifdef HAVE_BRIDGE_ATTRIBS
+	.ndo_bridge_setlink	= igb_ndo_bridge_setlink,
+	.ndo_bridge_getlink	= igb_ndo_bridge_getlink,
+#endif /* HAVE_BRIDGE_ATTRIBS */
+#endif
+};
+
+#ifdef CONFIG_IGB_VMDQ_NETDEV
+static const struct net_device_ops igb_vmdq_ops = {
+	.ndo_open		= &igb_vmdq_open,
+	.ndo_stop		= &igb_vmdq_close,
+	.ndo_start_xmit		= &igb_vmdq_xmit_frame,
+	.ndo_get_stats		= &igb_vmdq_get_stats,
+	.ndo_set_rx_mode	= &igb_vmdq_set_rx_mode,
+	.ndo_validate_addr	= eth_validate_addr,
+	.ndo_set_mac_address	= &igb_vmdq_set_mac,
+	.ndo_change_mtu		= &igb_vmdq_change_mtu,
+	.ndo_tx_timeout		= &igb_vmdq_tx_timeout,
+	.ndo_vlan_rx_register	= &igb_vmdq_vlan_rx_register,
+	.ndo_vlan_rx_add_vid	= &igb_vmdq_vlan_rx_add_vid,
+	.ndo_vlan_rx_kill_vid	= &igb_vmdq_vlan_rx_kill_vid,
+};
+
+#endif /* CONFIG_IGB_VMDQ_NETDEV */
+#endif /* HAVE_NET_DEVICE_OPS */
+#ifdef CONFIG_IGB_VMDQ_NETDEV
+void igb_assign_vmdq_netdev_ops(struct net_device *vnetdev)
+{
+#ifdef HAVE_NET_DEVICE_OPS
+	vnetdev->netdev_ops = &igb_vmdq_ops;
+#else
+	dev->open = &igb_vmdq_open;
+	dev->stop = &igb_vmdq_close;
+	dev->hard_start_xmit = &igb_vmdq_xmit_frame;
+	dev->get_stats = &igb_vmdq_get_stats;
+#ifdef HAVE_SET_RX_MODE
+	dev->set_rx_mode = &igb_vmdq_set_rx_mode;
+#endif
+	dev->set_multicast_list = &igb_vmdq_set_rx_mode;
+	dev->set_mac_address = &igb_vmdq_set_mac;
+	dev->change_mtu = &igb_vmdq_change_mtu;
+#ifdef HAVE_TX_TIMEOUT
+	dev->tx_timeout = &igb_vmdq_tx_timeout;
+#endif
+#if defined(NETIF_F_HW_VLAN_TX) || defined(NETIF_F_HW_VLAN_CTAG_TX)
+	dev->vlan_rx_register = &igb_vmdq_vlan_rx_register;
+	dev->vlan_rx_add_vid = &igb_vmdq_vlan_rx_add_vid;
+	dev->vlan_rx_kill_vid = &igb_vmdq_vlan_rx_kill_vid;
+#endif
+#endif
+	igb_vmdq_set_ethtool_ops(vnetdev);
+	vnetdev->watchdog_timeo = 5 * HZ;
+
+}
+
+int igb_init_vmdq_netdevs(struct igb_adapter *adapter)
+{
+	int pool, err = 0, base_queue;
+	struct net_device *vnetdev;
+	struct igb_vmdq_adapter *vmdq_adapter;
+
+	for (pool = 1; pool < adapter->vmdq_pools; pool++) {
+		int qpp = (!adapter->rss_queues ? 1 : adapter->rss_queues);
+		base_queue = pool * qpp;
+		vnetdev = alloc_etherdev(sizeof(struct igb_vmdq_adapter));
+		if (!vnetdev) {
+			err = -ENOMEM;
+			break;
+		}
+		vmdq_adapter = netdev_priv(vnetdev);
+		vmdq_adapter->vnetdev = vnetdev;
+		vmdq_adapter->real_adapter = adapter;
+		vmdq_adapter->rx_ring = adapter->rx_ring[base_queue];
+		vmdq_adapter->tx_ring = adapter->tx_ring[base_queue];
+		igb_assign_vmdq_netdev_ops(vnetdev);
+		snprintf(vnetdev->name, IFNAMSIZ, "%sv%d",
+			 adapter->netdev->name, pool);
+		vnetdev->features = adapter->netdev->features;
+#ifdef HAVE_NETDEV_VLAN_FEATURES
+		vnetdev->vlan_features = adapter->netdev->vlan_features;
+#endif
+		adapter->vmdq_netdev[pool-1] = vnetdev;
+		err = register_netdev(vnetdev);
+		if (err)
+			break;
+	}
+	return err;
+}
+
+int igb_remove_vmdq_netdevs(struct igb_adapter *adapter)
+{
+	int pool, err = 0;
+
+	for (pool = 1; pool < adapter->vmdq_pools; pool++) {
+		unregister_netdev(adapter->vmdq_netdev[pool-1]);
+		free_netdev(adapter->vmdq_netdev[pool-1]);
+		adapter->vmdq_netdev[pool-1] = NULL;
+	}
+	return err;
+}
+#endif /* CONFIG_IGB_VMDQ_NETDEV */
+
+/**
+ * igb_set_fw_version - Configure version string for ethtool
+ * @adapter: adapter struct
+ *
+ **/
+static void igb_set_fw_version(struct igb_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	struct e1000_fw_version fw;
+
+	e1000_get_fw_version(hw, &fw);
+
+	switch (hw->mac.type) {
+	case e1000_i210:
+	case e1000_i211:
+		if (!(e1000_get_flash_presence_i210(hw))) {
+			snprintf(adapter->fw_version,
+			    sizeof(adapter->fw_version),
+			    "%2d.%2d-%d",
+			    fw.invm_major, fw.invm_minor, fw.invm_img_type);
+			break;
+		}
+		/* fall through */
+	default:
+		/* if option rom is valid, display its version too*/
+		if (fw.or_valid) {
+			snprintf(adapter->fw_version,
+			    sizeof(adapter->fw_version),
+			    "%d.%d, 0x%08x, %d.%d.%d",
+			    fw.eep_major, fw.eep_minor, fw.etrack_id,
+			    fw.or_major, fw.or_build, fw.or_patch);
+		/* no option rom */
+		} else {
+			if (fw.etrack_id != 0X0000) {
+			snprintf(adapter->fw_version,
+			    sizeof(adapter->fw_version),
+			    "%d.%d, 0x%08x",
+			    fw.eep_major, fw.eep_minor, fw.etrack_id);
+			} else {
+			snprintf(adapter->fw_version,
+			    sizeof(adapter->fw_version),
+			    "%d.%d.%d",
+			    fw.eep_major, fw.eep_minor, fw.eep_build);
+			}
+		}
+		break;
+	}
+
+	return;
+}
+
+/**
+ * igb_init_mas - init Media Autosense feature if enabled in the NVM
+ *
+ * @adapter: adapter struct
+ **/
+static void igb_init_mas(struct igb_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u16 eeprom_data;
+
+	e1000_read_nvm(hw, NVM_COMPAT, 1, &eeprom_data);
+	switch (hw->bus.func) {
+	case E1000_FUNC_0:
+		if (eeprom_data & IGB_MAS_ENABLE_0)
+			adapter->flags |= IGB_FLAG_MAS_ENABLE;
+		break;
+	case E1000_FUNC_1:
+		if (eeprom_data & IGB_MAS_ENABLE_1)
+			adapter->flags |= IGB_FLAG_MAS_ENABLE;
+		break;
+	case E1000_FUNC_2:
+		if (eeprom_data & IGB_MAS_ENABLE_2)
+			adapter->flags |= IGB_FLAG_MAS_ENABLE;
+		break;
+	case E1000_FUNC_3:
+		if (eeprom_data & IGB_MAS_ENABLE_3)
+			adapter->flags |= IGB_FLAG_MAS_ENABLE;
+		break;
+	default:
+		/* Shouldn't get here */
+		dev_err(pci_dev_to_dev(adapter->pdev),
+			"%s:AMS: Invalid port configuration, returning\n",
+			adapter->netdev->name);
+		break;
+	}
+}
+
+/**
+ * igb_probe - Device Initialization Routine
+ * @pdev: PCI device information struct
+ * @ent: entry in igb_pci_tbl
+ *
+ * Returns 0 on success, negative on failure
+ *
+ * igb_probe initializes an adapter identified by a pci_dev structure.
+ * The OS initialization, configuring of the adapter private structure,
+ * and a hardware reset occur.
+ **/
+static int __devinit igb_probe(struct pci_dev *pdev,
+			       const struct pci_device_id *ent)
+{
+	struct net_device *netdev;
+	struct igb_adapter *adapter;
+	struct e1000_hw *hw;
+	u16 eeprom_data = 0;
+	u8 pba_str[E1000_PBANUM_LENGTH];
+	s32 ret_val;
+	static int global_quad_port_a; /* global quad port a indication */
+	int i, err, pci_using_dac;
+	static int cards_found;
+
+	err = pci_enable_device_mem(pdev);
+	if (err)
+		return err;
+
+	pci_using_dac = 0;
+	err = dma_set_mask(pci_dev_to_dev(pdev), DMA_BIT_MASK(64));
+	if (!err) {
+		err = dma_set_coherent_mask(pci_dev_to_dev(pdev), DMA_BIT_MASK(64));
+		if (!err)
+			pci_using_dac = 1;
+	} else {
+		err = dma_set_mask(pci_dev_to_dev(pdev), DMA_BIT_MASK(32));
+		if (err) {
+			err = dma_set_coherent_mask(pci_dev_to_dev(pdev), DMA_BIT_MASK(32));
+			if (err) {
+				IGB_ERR("No usable DMA configuration, "
+				        "aborting\n");
+				goto err_dma;
+			}
+		}
+	}
+
+#ifndef HAVE_ASPM_QUIRKS
+	/* 82575 requires that the pci-e link partner disable the L0s state */
+	switch (pdev->device) {
+	case E1000_DEV_ID_82575EB_COPPER:
+	case E1000_DEV_ID_82575EB_FIBER_SERDES:
+	case E1000_DEV_ID_82575GB_QUAD_COPPER:
+		pci_disable_link_state(pdev, PCIE_LINK_STATE_L0S);
+	default:
+		break;
+	}
+
+#endif /* HAVE_ASPM_QUIRKS */
+	err = pci_request_selected_regions(pdev,
+	                                   pci_select_bars(pdev,
+                                                           IORESOURCE_MEM),
+	                                   igb_driver_name);
+	if (err)
+		goto err_pci_reg;
+
+	pci_enable_pcie_error_reporting(pdev);
+
+	pci_set_master(pdev);
+
+	err = -ENOMEM;
+#ifdef HAVE_TX_MQ
+	netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
+	                           IGB_MAX_TX_QUEUES);
+#else
+	netdev = alloc_etherdev(sizeof(struct igb_adapter));
+#endif /* HAVE_TX_MQ */
+	if (!netdev)
+		goto err_alloc_etherdev;
+
+	SET_MODULE_OWNER(netdev);
+	SET_NETDEV_DEV(netdev, &pdev->dev);
+
+	pci_set_drvdata(pdev, netdev);
+	adapter = netdev_priv(netdev);
+	adapter->netdev = netdev;
+	adapter->pdev = pdev;
+	hw = &adapter->hw;
+	hw->back = adapter;
+	adapter->port_num = hw->bus.func;
+	adapter->msg_enable = (1 << debug) - 1;
+
+#ifdef HAVE_PCI_ERS
+	err = pci_save_state(pdev);
+	if (err)
+		goto err_ioremap;
+#endif
+	err = -EIO;
+	hw->hw_addr = ioremap(pci_resource_start(pdev, 0),
+	                      pci_resource_len(pdev, 0));
+	if (!hw->hw_addr)
+		goto err_ioremap;
+
+#ifdef HAVE_NET_DEVICE_OPS
+	netdev->netdev_ops = &igb_netdev_ops;
+#else /* HAVE_NET_DEVICE_OPS */
+	netdev->open = &igb_open;
+	netdev->stop = &igb_close;
+	netdev->get_stats = &igb_get_stats;
+#ifdef HAVE_SET_RX_MODE
+	netdev->set_rx_mode = &igb_set_rx_mode;
+#endif
+	netdev->set_multicast_list = &igb_set_rx_mode;
+	netdev->set_mac_address = &igb_set_mac;
+	netdev->change_mtu = &igb_change_mtu;
+	netdev->do_ioctl = &igb_ioctl;
+#ifdef HAVE_TX_TIMEOUT
+	netdev->tx_timeout = &igb_tx_timeout;
+#endif
+	netdev->vlan_rx_register = igb_vlan_mode;
+	netdev->vlan_rx_add_vid = igb_vlan_rx_add_vid;
+	netdev->vlan_rx_kill_vid = igb_vlan_rx_kill_vid;
+#ifdef CONFIG_NET_POLL_CONTROLLER
+	netdev->poll_controller = igb_netpoll;
+#endif
+	netdev->hard_start_xmit = &igb_xmit_frame;
+#endif /* HAVE_NET_DEVICE_OPS */
+	igb_set_ethtool_ops(netdev);
+#ifdef HAVE_TX_TIMEOUT
+	netdev->watchdog_timeo = 5 * HZ;
+#endif
+
+	strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
+
+	adapter->bd_number = cards_found;
+
+	/* setup the private structure */
+	err = igb_sw_init(adapter);
+	if (err)
+		goto err_sw_init;
+
+	e1000_get_bus_info(hw);
+
+	hw->phy.autoneg_wait_to_complete = FALSE;
+	hw->mac.adaptive_ifs = FALSE;
+
+	/* Copper options */
+	if (hw->phy.media_type == e1000_media_type_copper) {
+		hw->phy.mdix = AUTO_ALL_MODES;
+		hw->phy.disable_polarity_correction = FALSE;
+		hw->phy.ms_type = e1000_ms_hw_default;
+	}
+
+	if (e1000_check_reset_block(hw))
+		dev_info(pci_dev_to_dev(pdev),
+			"PHY reset is blocked due to SOL/IDER session.\n");
+
+	/*
+	 * features is initialized to 0 in allocation, it might have bits
+	 * set by igb_sw_init so we should use an or instead of an
+	 * assignment.
+	 */
+	netdev->features |= NETIF_F_SG |
+			    NETIF_F_IP_CSUM |
+#ifdef NETIF_F_IPV6_CSUM
+			    NETIF_F_IPV6_CSUM |
+#endif
+#ifdef NETIF_F_TSO
+			    NETIF_F_TSO |
+#ifdef NETIF_F_TSO6
+			    NETIF_F_TSO6 |
+#endif
+#endif /* NETIF_F_TSO */
+#ifdef NETIF_F_RXHASH
+			    NETIF_F_RXHASH |
+#endif
+			    NETIF_F_RXCSUM |
+#ifdef NETIF_F_HW_VLAN_CTAG_RX
+			    NETIF_F_HW_VLAN_CTAG_RX |
+			    NETIF_F_HW_VLAN_CTAG_TX;
+#else
+			    NETIF_F_HW_VLAN_RX |
+			    NETIF_F_HW_VLAN_TX;
+#endif
+
+	if (hw->mac.type >= e1000_82576)
+		netdev->features |= NETIF_F_SCTP_CSUM;
+
+#ifdef HAVE_NDO_SET_FEATURES
+	/* copy netdev features into list of user selectable features */
+	netdev->hw_features |= netdev->features;
+#ifndef IGB_NO_LRO
+
+	/* give us the option of enabling LRO later */
+	netdev->hw_features |= NETIF_F_LRO;
+#endif
+#else
+#ifdef NETIF_F_GRO
+
+	/* this is only needed on kernels prior to 2.6.39 */
+	netdev->features |= NETIF_F_GRO;
+#endif
+#endif
+
+	/* set this bit last since it cannot be part of hw_features */
+#ifdef NETIF_F_HW_VLAN_CTAG_FILTER
+	netdev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
+#else
+	netdev->features |= NETIF_F_HW_VLAN_FILTER;
+#endif
+
+#ifdef HAVE_NETDEV_VLAN_FEATURES
+	netdev->vlan_features |= NETIF_F_TSO |
+				 NETIF_F_TSO6 |
+				 NETIF_F_IP_CSUM |
+				 NETIF_F_IPV6_CSUM |
+				 NETIF_F_SG;
+
+#endif
+	if (pci_using_dac)
+		netdev->features |= NETIF_F_HIGHDMA;
+
+	adapter->en_mng_pt = e1000_enable_mng_pass_thru(hw);
+#ifdef DEBUG
+	if (adapter->dmac != IGB_DMAC_DISABLE)
+		printk("%s: DMA Coalescing is enabled..\n", netdev->name);
+#endif
+
+	/* before reading the NVM, reset the controller to put the device in a
+	 * known good starting state */
+	e1000_reset_hw(hw);
+
+	/* make sure the NVM is good */
+	if (e1000_validate_nvm_checksum(hw) < 0) {
+		dev_err(pci_dev_to_dev(pdev), "The NVM Checksum Is Not"
+		        " Valid\n");
+		err = -EIO;
+		goto err_eeprom;
+	}
+
+	/* copy the MAC address out of the NVM */
+	if (e1000_read_mac_addr(hw))
+		dev_err(pci_dev_to_dev(pdev), "NVM Read Error\n");
+	memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
+#ifdef ETHTOOL_GPERMADDR
+	memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
+
+	if (!is_valid_ether_addr(netdev->perm_addr)) {
+#else
+	if (!is_valid_ether_addr(netdev->dev_addr)) {
+#endif
+		dev_err(pci_dev_to_dev(pdev), "Invalid MAC Address\n");
+		err = -EIO;
+		goto err_eeprom;
+	}
+
+	memcpy(&adapter->mac_table[0].addr, hw->mac.addr, netdev->addr_len);
+	adapter->mac_table[0].queue = adapter->vfs_allocated_count;
+	adapter->mac_table[0].state = (IGB_MAC_STATE_DEFAULT | IGB_MAC_STATE_IN_USE);
+	igb_rar_set(adapter, 0);
+
+	/* get firmware version for ethtool -i */
+	igb_set_fw_version(adapter);
+
+	/* Check if Media Autosense is enabled */
+	if (hw->mac.type == e1000_82580)
+		igb_init_mas(adapter);
+	setup_timer(&adapter->watchdog_timer, &igb_watchdog,
+	            (unsigned long) adapter);
+	if (adapter->flags & IGB_FLAG_DETECT_BAD_DMA)
+		setup_timer(&adapter->dma_err_timer, &igb_dma_err_timer,
+			    (unsigned long) adapter);
+	setup_timer(&adapter->phy_info_timer, &igb_update_phy_info,
+	            (unsigned long) adapter);
+
+	INIT_WORK(&adapter->reset_task, igb_reset_task);
+	INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
+	if (adapter->flags & IGB_FLAG_DETECT_BAD_DMA)
+		INIT_WORK(&adapter->dma_err_task, igb_dma_err_task);
+
+	/* Initialize link properties that are user-changeable */
+	adapter->fc_autoneg = true;
+	hw->mac.autoneg = true;
+	hw->phy.autoneg_advertised = 0x2f;
+
+	hw->fc.requested_mode = e1000_fc_default;
+	hw->fc.current_mode = e1000_fc_default;
+
+	e1000_validate_mdi_setting(hw);
+
+	/* By default, support wake on port A */
+	if (hw->bus.func == 0)
+		adapter->flags |= IGB_FLAG_WOL_SUPPORTED;
+
+	/* Check the NVM for wake support for non-port A ports */
+	if (hw->mac.type >= e1000_82580)
+		hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
+		                 NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
+		                 &eeprom_data);
+	else if (hw->bus.func == 1)
+		e1000_read_nvm(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
+
+	if (eeprom_data & IGB_EEPROM_APME)
+		adapter->flags |= IGB_FLAG_WOL_SUPPORTED;
+
+	/* now that we have the eeprom settings, apply the special cases where
+	 * the eeprom may be wrong or the board simply won't support wake on
+	 * lan on a particular port */
+	switch (pdev->device) {
+	case E1000_DEV_ID_82575GB_QUAD_COPPER:
+		adapter->flags &= ~IGB_FLAG_WOL_SUPPORTED;
+		break;
+	case E1000_DEV_ID_82575EB_FIBER_SERDES:
+	case E1000_DEV_ID_82576_FIBER:
+	case E1000_DEV_ID_82576_SERDES:
+		/* Wake events only supported on port A for dual fiber
+		 * regardless of eeprom setting */
+		if (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_FUNC_1)
+			adapter->flags &= ~IGB_FLAG_WOL_SUPPORTED;
+		break;
+	case E1000_DEV_ID_82576_QUAD_COPPER:
+	case E1000_DEV_ID_82576_QUAD_COPPER_ET2:
+		/* if quad port adapter, disable WoL on all but port A */
+		if (global_quad_port_a != 0)
+			adapter->flags &= ~IGB_FLAG_WOL_SUPPORTED;
+		else
+			adapter->flags |= IGB_FLAG_QUAD_PORT_A;
+		/* Reset for multiple quad port adapters */
+		if (++global_quad_port_a == 4)
+			global_quad_port_a = 0;
+		break;
+	default:
+		/* If the device can't wake, don't set software support */
+		if (!device_can_wakeup(&adapter->pdev->dev))
+			adapter->flags &= ~IGB_FLAG_WOL_SUPPORTED;
+		break;
+	}
+
+	/* initialize the wol settings based on the eeprom settings */
+	if (adapter->flags & IGB_FLAG_WOL_SUPPORTED)
+		adapter->wol |= E1000_WUFC_MAG;
+
+	/* Some vendors want WoL disabled by default, but still supported */
+	if ((hw->mac.type == e1000_i350) &&
+	    (pdev->subsystem_vendor == PCI_VENDOR_ID_HP)) {
+		adapter->flags |= IGB_FLAG_WOL_SUPPORTED;
+		adapter->wol = 0;
+	}
+
+	device_set_wakeup_enable(pci_dev_to_dev(adapter->pdev),
+				 adapter->flags & IGB_FLAG_WOL_SUPPORTED);
+
+	/* reset the hardware with the new settings */
+	igb_reset(adapter);
+	adapter->devrc = 0;
+
+#ifdef HAVE_I2C_SUPPORT
+	/* Init the I2C interface */
+	err = igb_init_i2c(adapter);
+	if (err) {
+		dev_err(&pdev->dev, "failed to init i2c interface\n");
+		goto err_eeprom;
+	}
+#endif /* HAVE_I2C_SUPPORT */
+
+	/* let the f/w know that the h/w is now under the control of the
+	 * driver. */
+	igb_get_hw_control(adapter);
+
+	strncpy(netdev->name, "eth%d", IFNAMSIZ);
+	err = register_netdev(netdev);
+	if (err)
+		goto err_register;
+
+#ifdef CONFIG_IGB_VMDQ_NETDEV
+	err = igb_init_vmdq_netdevs(adapter);
+	if (err)
+		goto err_register;
+#endif
+	/* carrier off reporting is important to ethtool even BEFORE open */
+	netif_carrier_off(netdev);
+
+#ifdef IGB_DCA
+	if (dca_add_requester(&pdev->dev) == E1000_SUCCESS) {
+		adapter->flags |= IGB_FLAG_DCA_ENABLED;
+		dev_info(pci_dev_to_dev(pdev), "DCA enabled\n");
+		igb_setup_dca(adapter);
+	}
+
+#endif
+#ifdef HAVE_PTP_1588_CLOCK
+	/* do hw tstamp init after resetting */
+	igb_ptp_init(adapter);
+#endif /* HAVE_PTP_1588_CLOCK */
+
+	dev_info(pci_dev_to_dev(pdev), "Intel(R) Gigabit Ethernet Network Connection\n");
+	/* print bus type/speed/width info */
+	dev_info(pci_dev_to_dev(pdev), "%s: (PCIe:%s:%s) ",
+	         netdev->name,
+	         ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5GT/s" :
+	          (hw->bus.speed == e1000_bus_speed_5000) ? "5.0GT/s" :
+		  (hw->mac.type == e1000_i354) ? "integrated" :
+	                                                    "unknown"),
+	         ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
+	          (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
+	          (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
+		  (hw->mac.type == e1000_i354) ? "integrated" :
+	           "unknown"));
+	dev_info(pci_dev_to_dev(pdev), "%s: MAC: ", netdev->name);
+	for (i = 0; i < 6; i++)
+		printk("%2.2x%c", netdev->dev_addr[i], i == 5 ? '\n' : ':');
+
+	ret_val = e1000_read_pba_string(hw, pba_str, E1000_PBANUM_LENGTH);
+	if (ret_val)
+		strncpy(pba_str, "Unknown", sizeof(pba_str) - 1);
+	dev_info(pci_dev_to_dev(pdev), "%s: PBA No: %s\n", netdev->name,
+		 pba_str);
+
+
+	/* Initialize the thermal sensor on i350 devices. */
+	if (hw->mac.type == e1000_i350) {
+		if (hw->bus.func == 0) {
+			u16 ets_word;
+
+			/*
+			 * Read the NVM to determine if this i350 device
+			 * supports an external thermal sensor.
+			 */
+			e1000_read_nvm(hw, NVM_ETS_CFG, 1, &ets_word);
+			if (ets_word != 0x0000 && ets_word != 0xFFFF)
+				adapter->ets = true;
+			else
+				adapter->ets = false;
+		}
+#ifdef IGB_HWMON
+
+		igb_sysfs_init(adapter);
+#else
+#ifdef IGB_PROCFS
+
+		igb_procfs_init(adapter);
+#endif /* IGB_PROCFS */
+#endif /* IGB_HWMON */
+	} else {
+		adapter->ets = false;
+	}
+
+	if (hw->phy.media_type == e1000_media_type_copper) {
+		switch (hw->mac.type) {
+		case e1000_i350:
+		case e1000_i210:
+		case e1000_i211:
+			/* Enable EEE for internal copper PHY devices */
+			err = e1000_set_eee_i350(hw);
+			if ((!err) &&
+			    (adapter->flags & IGB_FLAG_EEE))
+				adapter->eee_advert =
+					MDIO_EEE_100TX | MDIO_EEE_1000T;
+			break;
+		case e1000_i354:
+			if ((E1000_READ_REG(hw, E1000_CTRL_EXT)) &
+			    (E1000_CTRL_EXT_LINK_MODE_SGMII)) {
+				err = e1000_set_eee_i354(hw);
+				if ((!err) &&
+				    (adapter->flags & IGB_FLAG_EEE))
+					adapter->eee_advert =
+					   MDIO_EEE_100TX | MDIO_EEE_1000T;
+			}
+			break;
+		default:
+			break;
+		}
+	}
+
+	/* send driver version info to firmware */
+	if (hw->mac.type >= e1000_i350)
+		igb_init_fw(adapter);
+
+#ifndef IGB_NO_LRO
+	if (netdev->features & NETIF_F_LRO)
+		dev_info(pci_dev_to_dev(pdev), "Internal LRO is enabled \n");
+	else
+		dev_info(pci_dev_to_dev(pdev), "LRO is disabled \n");
+#endif
+	dev_info(pci_dev_to_dev(pdev),
+	         "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
+	         adapter->msix_entries ? "MSI-X" :
+	         (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
+	         adapter->num_rx_queues, adapter->num_tx_queues);
+
+	cards_found++;
+
+	pm_runtime_put_noidle(&pdev->dev);
+	return 0;
+
+err_register:
+	igb_release_hw_control(adapter);
+#ifdef HAVE_I2C_SUPPORT
+	memset(&adapter->i2c_adap, 0, sizeof(adapter->i2c_adap));
+#endif /* HAVE_I2C_SUPPORT */
+err_eeprom:
+	if (!e1000_check_reset_block(hw))
+		e1000_phy_hw_reset(hw);
+
+	if (hw->flash_address)
+		iounmap(hw->flash_address);
+err_sw_init:
+	igb_clear_interrupt_scheme(adapter);
+	igb_reset_sriov_capability(adapter);
+	iounmap(hw->hw_addr);
+err_ioremap:
+	free_netdev(netdev);
+err_alloc_etherdev:
+	pci_release_selected_regions(pdev,
+	                             pci_select_bars(pdev, IORESOURCE_MEM));
+err_pci_reg:
+err_dma:
+	pci_disable_device(pdev);
+	return err;
+}
+#ifdef HAVE_I2C_SUPPORT
+/*
+ *  igb_remove_i2c - Cleanup  I2C interface
+ *  @adapter: pointer to adapter structure
+ *
+ */
+static void igb_remove_i2c(struct igb_adapter *adapter)
+{
+
+	/* free the adapter bus structure */
+	i2c_del_adapter(&adapter->i2c_adap);
+}
+#endif /* HAVE_I2C_SUPPORT */
+
+/**
+ * igb_remove - Device Removal Routine
+ * @pdev: PCI device information struct
+ *
+ * igb_remove is called by the PCI subsystem to alert the driver
+ * that it should release a PCI device.  The could be caused by a
+ * Hot-Plug event, or because the driver is going to be removed from
+ * memory.
+ **/
+static void __devexit igb_remove(struct pci_dev *pdev)
+{
+	struct net_device *netdev = pci_get_drvdata(pdev);
+	struct igb_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+
+	pm_runtime_get_noresume(&pdev->dev);
+#ifdef HAVE_I2C_SUPPORT
+	igb_remove_i2c(adapter);
+#endif /* HAVE_I2C_SUPPORT */
+#ifdef HAVE_PTP_1588_CLOCK
+	igb_ptp_stop(adapter);
+#endif /* HAVE_PTP_1588_CLOCK */
+
+	/* flush_scheduled work may reschedule our watchdog task, so
+	 * explicitly disable watchdog tasks from being rescheduled  */
+	set_bit(__IGB_DOWN, &adapter->state);
+	del_timer_sync(&adapter->watchdog_timer);
+	if (adapter->flags & IGB_FLAG_DETECT_BAD_DMA)
+		del_timer_sync(&adapter->dma_err_timer);
+	del_timer_sync(&adapter->phy_info_timer);
+
+	flush_scheduled_work();
+
+#ifdef IGB_DCA
+	if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
+		dev_info(pci_dev_to_dev(pdev), "DCA disabled\n");
+		dca_remove_requester(&pdev->dev);
+		adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
+		E1000_WRITE_REG(hw, E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_DISABLE);
+	}
+#endif
+
+	/* Release control of h/w to f/w.  If f/w is AMT enabled, this
+	 * would have already happened in close and is redundant. */
+	igb_release_hw_control(adapter);
+
+	unregister_netdev(netdev);
+#ifdef CONFIG_IGB_VMDQ_NETDEV
+	igb_remove_vmdq_netdevs(adapter);
+#endif
+
+	igb_clear_interrupt_scheme(adapter);
+	igb_reset_sriov_capability(adapter);
+
+	iounmap(hw->hw_addr);
+	if (hw->flash_address)
+		iounmap(hw->flash_address);
+	pci_release_selected_regions(pdev,
+	                             pci_select_bars(pdev, IORESOURCE_MEM));
+
+#ifdef IGB_HWMON
+	igb_sysfs_exit(adapter);
+#else
+#ifdef IGB_PROCFS
+	igb_procfs_exit(adapter);
+#endif /* IGB_PROCFS */
+#endif /* IGB_HWMON */
+	kfree(adapter->mac_table);
+	kfree(adapter->shadow_vfta);
+	free_netdev(netdev);
+
+	pci_disable_pcie_error_reporting(pdev);
+
+	pci_disable_device(pdev);
+}
+
+/**
+ * igb_sw_init - Initialize general software structures (struct igb_adapter)
+ * @adapter: board private structure to initialize
+ *
+ * igb_sw_init initializes the Adapter private data structure.
+ * Fields are initialized based on PCI device information and
+ * OS network device settings (MTU size).
+ **/
+static int igb_sw_init(struct igb_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	struct net_device *netdev = adapter->netdev;
+	struct pci_dev *pdev = adapter->pdev;
+
+	/* PCI config space info */
+
+	hw->vendor_id = pdev->vendor;
+	hw->device_id = pdev->device;
+	hw->subsystem_vendor_id = pdev->subsystem_vendor;
+	hw->subsystem_device_id = pdev->subsystem_device;
+
+	pci_read_config_byte(pdev, PCI_REVISION_ID, &hw->revision_id);
+
+	pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
+
+	/* set default ring sizes */
+	adapter->tx_ring_count = IGB_DEFAULT_TXD;
+	adapter->rx_ring_count = IGB_DEFAULT_RXD;
+
+	/* set default work limits */
+	adapter->tx_work_limit = IGB_DEFAULT_TX_WORK;
+
+	adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN +
+					      VLAN_HLEN;
+
+	/* Initialize the hardware-specific values */
+	if (e1000_setup_init_funcs(hw, TRUE)) {
+		dev_err(pci_dev_to_dev(pdev), "Hardware Initialization Failure\n");
+		return -EIO;
+	}
+
+	adapter->mac_table = kzalloc(sizeof(struct igb_mac_addr) *
+				     hw->mac.rar_entry_count,
+				     GFP_ATOMIC);
+
+	/* Setup and initialize a copy of the hw vlan table array */
+	adapter->shadow_vfta = kzalloc(sizeof(u32) * E1000_VFTA_ENTRIES,
+				       GFP_ATOMIC);
+#ifdef NO_KNI
+	/* These calls may decrease the number of queues */
+	if (hw->mac.type < e1000_i210) {
+		igb_set_sriov_capability(adapter);
+	}
+
+	if (igb_init_interrupt_scheme(adapter, true)) {
+		dev_err(pci_dev_to_dev(pdev), "Unable to allocate memory for queues\n");
+		return -ENOMEM;
+	}
+
+	/* Explicitly disable IRQ since the NIC can be in any state. */
+	igb_irq_disable(adapter);
+
+	set_bit(__IGB_DOWN, &adapter->state);
+#endif
+	return 0;
+}
+
+/**
+ * igb_open - Called when a network interface is made active
+ * @netdev: network interface device structure
+ *
+ * Returns 0 on success, negative value on failure
+ *
+ * The open entry point is called when a network interface is made
+ * active by the system (IFF_UP).  At this point all resources needed
+ * for transmit and receive operations are allocated, the interrupt
+ * handler is registered with the OS, the watchdog timer is started,
+ * and the stack is notified that the interface is ready.
+ **/
+static int __igb_open(struct net_device *netdev, bool resuming)
+{
+	struct igb_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+#ifdef CONFIG_PM_RUNTIME
+	struct pci_dev *pdev = adapter->pdev;
+#endif /* CONFIG_PM_RUNTIME */
+	int err;
+	int i;
+
+	/* disallow open during test */
+	if (test_bit(__IGB_TESTING, &adapter->state)) {
+		WARN_ON(resuming);
+		return -EBUSY;
+	}
+
+#ifdef CONFIG_PM_RUNTIME
+	if (!resuming)
+		pm_runtime_get_sync(&pdev->dev);
+#endif /* CONFIG_PM_RUNTIME */
+
+	netif_carrier_off(netdev);
+
+	/* allocate transmit descriptors */
+	err = igb_setup_all_tx_resources(adapter);
+	if (err)
+		goto err_setup_tx;
+
+	/* allocate receive descriptors */
+	err = igb_setup_all_rx_resources(adapter);
+	if (err)
+		goto err_setup_rx;
+
+	igb_power_up_link(adapter);
+
+	/* before we allocate an interrupt, we must be ready to handle it.
+	 * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
+	 * as soon as we call pci_request_irq, so we have to setup our
+	 * clean_rx handler before we do so.  */
+	igb_configure(adapter);
+
+	err = igb_request_irq(adapter);
+	if (err)
+		goto err_req_irq;
+
+	/* Notify the stack of the actual queue counts. */
+	netif_set_real_num_tx_queues(netdev,
+				     adapter->vmdq_pools ? 1 :
+				     adapter->num_tx_queues);
+
+	err = netif_set_real_num_rx_queues(netdev,
+					   adapter->vmdq_pools ? 1 :
+					   adapter->num_rx_queues);
+	if (err)
+		goto err_set_queues;
+
+	/* From here on the code is the same as igb_up() */
+	clear_bit(__IGB_DOWN, &adapter->state);
+
+	for (i = 0; i < adapter->num_q_vectors; i++)
+		napi_enable(&(adapter->q_vector[i]->napi));
+	igb_configure_lli(adapter);
+
+	/* Clear any pending interrupts. */
+	E1000_READ_REG(hw, E1000_ICR);
+
+	igb_irq_enable(adapter);
+
+	/* notify VFs that reset has been completed */
+	if (adapter->vfs_allocated_count) {
+		u32 reg_data = E1000_READ_REG(hw, E1000_CTRL_EXT);
+		reg_data |= E1000_CTRL_EXT_PFRSTD;
+		E1000_WRITE_REG(hw, E1000_CTRL_EXT, reg_data);
+	}
+
+	netif_tx_start_all_queues(netdev);
+
+	if (adapter->flags & IGB_FLAG_DETECT_BAD_DMA)
+		schedule_work(&adapter->dma_err_task);
+
+	/* start the watchdog. */
+	hw->mac.get_link_status = 1;
+	schedule_work(&adapter->watchdog_task);
+
+	return E1000_SUCCESS;
+
+err_set_queues:
+	igb_free_irq(adapter);
+err_req_irq:
+	igb_release_hw_control(adapter);
+	igb_power_down_link(adapter);
+	igb_free_all_rx_resources(adapter);
+err_setup_rx:
+	igb_free_all_tx_resources(adapter);
+err_setup_tx:
+	igb_reset(adapter);
+
+#ifdef CONFIG_PM_RUNTIME
+	if (!resuming)
+		pm_runtime_put(&pdev->dev);
+#endif /* CONFIG_PM_RUNTIME */
+
+	return err;
+}
+
+static int igb_open(struct net_device *netdev)
+{
+	return __igb_open(netdev, false);
+}
+
+/**
+ * igb_close - Disables a network interface
+ * @netdev: network interface device structure
+ *
+ * Returns 0, this is not allowed to fail
+ *
+ * The close entry point is called when an interface is de-activated
+ * by the OS.  The hardware is still under the driver's control, but
+ * needs to be disabled.  A global MAC reset is issued to stop the
+ * hardware, and all transmit and receive resources are freed.
+ **/
+static int __igb_close(struct net_device *netdev, bool suspending)
+{
+	struct igb_adapter *adapter = netdev_priv(netdev);
+#ifdef CONFIG_PM_RUNTIME
+	struct pci_dev *pdev = adapter->pdev;
+#endif /* CONFIG_PM_RUNTIME */
+
+	WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
+
+#ifdef CONFIG_PM_RUNTIME
+	if (!suspending)
+		pm_runtime_get_sync(&pdev->dev);
+#endif /* CONFIG_PM_RUNTIME */
+
+	igb_down(adapter);
+
+	igb_release_hw_control(adapter);
+
+	igb_free_irq(adapter);
+
+	igb_free_all_tx_resources(adapter);
+	igb_free_all_rx_resources(adapter);
+
+#ifdef CONFIG_PM_RUNTIME
+	if (!suspending)
+		pm_runtime_put_sync(&pdev->dev);
+#endif /* CONFIG_PM_RUNTIME */
+
+	return 0;
+}
+
+static int igb_close(struct net_device *netdev)
+{
+	return __igb_close(netdev, false);
+}
+
+/**
+ * igb_setup_tx_resources - allocate Tx resources (Descriptors)
+ * @tx_ring: tx descriptor ring (for a specific queue) to setup
+ *
+ * Return 0 on success, negative on failure
+ **/
+int igb_setup_tx_resources(struct igb_ring *tx_ring)
+{
+	struct device *dev = tx_ring->dev;
+	int size;
+
+	size = sizeof(struct igb_tx_buffer) * tx_ring->count;
+	tx_ring->tx_buffer_info = vzalloc(size);
+	if (!tx_ring->tx_buffer_info)
+		goto err;
+
+	/* round up to nearest 4K */
+	tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
+	tx_ring->size = ALIGN(tx_ring->size, 4096);
+
+	tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size,
+					   &tx_ring->dma, GFP_KERNEL);
+
+	if (!tx_ring->desc)
+		goto err;
+
+	tx_ring->next_to_use = 0;
+	tx_ring->next_to_clean = 0;
+
+	return 0;
+
+err:
+	vfree(tx_ring->tx_buffer_info);
+	dev_err(dev,
+		"Unable to allocate memory for the transmit descriptor ring\n");
+	return -ENOMEM;
+}
+
+/**
+ * igb_setup_all_tx_resources - wrapper to allocate Tx resources
+ *				  (Descriptors) for all queues
+ * @adapter: board private structure
+ *
+ * Return 0 on success, negative on failure
+ **/
+static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
+{
+	struct pci_dev *pdev = adapter->pdev;
+	int i, err = 0;
+
+	for (i = 0; i < adapter->num_tx_queues; i++) {
+		err = igb_setup_tx_resources(adapter->tx_ring[i]);
+		if (err) {
+			dev_err(pci_dev_to_dev(pdev),
+				"Allocation for Tx Queue %u failed\n", i);
+			for (i--; i >= 0; i--)
+				igb_free_tx_resources(adapter->tx_ring[i]);
+			break;
+		}
+	}
+
+	return err;
+}
+
+/**
+ * igb_setup_tctl - configure the transmit control registers
+ * @adapter: Board private structure
+ **/
+void igb_setup_tctl(struct igb_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u32 tctl;
+
+	/* disable queue 0 which is enabled by default on 82575 and 82576 */
+	E1000_WRITE_REG(hw, E1000_TXDCTL(0), 0);
+
+	/* Program the Transmit Control Register */
+	tctl = E1000_READ_REG(hw, E1000_TCTL);
+	tctl &= ~E1000_TCTL_CT;
+	tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
+		(E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
+
+	e1000_config_collision_dist(hw);
+
+	/* Enable transmits */
+	tctl |= E1000_TCTL_EN;
+
+	E1000_WRITE_REG(hw, E1000_TCTL, tctl);
+}
+
+static u32 igb_tx_wthresh(struct igb_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	switch (hw->mac.type) {
+	case e1000_i354:
+		return 4;
+	case e1000_82576:
+		if (adapter->msix_entries)
+			return 1;
+	default:
+		break;
+	}
+
+	return 16;
+}
+
+/**
+ * igb_configure_tx_ring - Configure transmit ring after Reset
+ * @adapter: board private structure
+ * @ring: tx ring to configure
+ *
+ * Configure a transmit ring after a reset.
+ **/
+void igb_configure_tx_ring(struct igb_adapter *adapter,
+                           struct igb_ring *ring)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u32 txdctl = 0;
+	u64 tdba = ring->dma;
+	int reg_idx = ring->reg_idx;
+
+	/* disable the queue */
+	E1000_WRITE_REG(hw, E1000_TXDCTL(reg_idx), 0);
+	E1000_WRITE_FLUSH(hw);
+	mdelay(10);
+
+	E1000_WRITE_REG(hw, E1000_TDLEN(reg_idx),
+	                ring->count * sizeof(union e1000_adv_tx_desc));
+	E1000_WRITE_REG(hw, E1000_TDBAL(reg_idx),
+	                tdba & 0x00000000ffffffffULL);
+	E1000_WRITE_REG(hw, E1000_TDBAH(reg_idx), tdba >> 32);
+
+	ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
+	E1000_WRITE_REG(hw, E1000_TDH(reg_idx), 0);
+	writel(0, ring->tail);
+
+	txdctl |= IGB_TX_PTHRESH;
+	txdctl |= IGB_TX_HTHRESH << 8;
+	txdctl |= igb_tx_wthresh(adapter) << 16;
+
+	txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
+	E1000_WRITE_REG(hw, E1000_TXDCTL(reg_idx), txdctl);
+}
+
+/**
+ * igb_configure_tx - Configure transmit Unit after Reset
+ * @adapter: board private structure
+ *
+ * Configure the Tx unit of the MAC after a reset.
+ **/
+static void igb_configure_tx(struct igb_adapter *adapter)
+{
+	int i;
+
+	for (i = 0; i < adapter->num_tx_queues; i++)
+		igb_configure_tx_ring(adapter, adapter->tx_ring[i]);
+}
+
+/**
+ * igb_setup_rx_resources - allocate Rx resources (Descriptors)
+ * @rx_ring:    rx descriptor ring (for a specific queue) to setup
+ *
+ * Returns 0 on success, negative on failure
+ **/
+int igb_setup_rx_resources(struct igb_ring *rx_ring)
+{
+	struct device *dev = rx_ring->dev;
+	int size, desc_len;
+
+	size = sizeof(struct igb_rx_buffer) * rx_ring->count;
+	rx_ring->rx_buffer_info = vzalloc(size);
+	if (!rx_ring->rx_buffer_info)
+		goto err;
+
+	desc_len = sizeof(union e1000_adv_rx_desc);
+
+	/* Round up to nearest 4K */
+	rx_ring->size = rx_ring->count * desc_len;
+	rx_ring->size = ALIGN(rx_ring->size, 4096);
+
+	rx_ring->desc = dma_alloc_coherent(dev, rx_ring->size,
+					   &rx_ring->dma, GFP_KERNEL);
+
+	if (!rx_ring->desc)
+		goto err;
+
+	rx_ring->next_to_alloc = 0;
+	rx_ring->next_to_clean = 0;
+	rx_ring->next_to_use = 0;
+
+	return 0;
+
+err:
+	vfree(rx_ring->rx_buffer_info);
+	rx_ring->rx_buffer_info = NULL;
+	dev_err(dev, "Unable to allocate memory for the receive descriptor"
+		" ring\n");
+	return -ENOMEM;
+}
+
+/**
+ * igb_setup_all_rx_resources - wrapper to allocate Rx resources
+ *				  (Descriptors) for all queues
+ * @adapter: board private structure
+ *
+ * Return 0 on success, negative on failure
+ **/
+static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
+{
+	struct pci_dev *pdev = adapter->pdev;
+	int i, err = 0;
+
+	for (i = 0; i < adapter->num_rx_queues; i++) {
+		err = igb_setup_rx_resources(adapter->rx_ring[i]);
+		if (err) {
+			dev_err(pci_dev_to_dev(pdev),
+				"Allocation for Rx Queue %u failed\n", i);
+			for (i--; i >= 0; i--)
+				igb_free_rx_resources(adapter->rx_ring[i]);
+			break;
+		}
+	}
+
+	return err;
+}
+
+/**
+ * igb_setup_mrqc - configure the multiple receive queue control registers
+ * @adapter: Board private structure
+ **/
+static void igb_setup_mrqc(struct igb_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u32 mrqc, rxcsum;
+	u32 j, num_rx_queues, shift = 0, shift2 = 0;
+	static const u32 rsskey[10] = { 0xDA565A6D, 0xC20E5B25, 0x3D256741,
+					0xB08FA343, 0xCB2BCAD0, 0xB4307BAE,
+					0xA32DCB77, 0x0CF23080, 0x3BB7426A,
+					0xFA01ACBE };
+
+	/* Fill out hash function seeds */
+	for (j = 0; j < 10; j++)
+		E1000_WRITE_REG(hw, E1000_RSSRK(j), rsskey[j]);
+
+	num_rx_queues = adapter->rss_queues;
+
+	/* 82575 and 82576 supports 2 RSS queues for VMDq */
+	switch (hw->mac.type) {
+	case e1000_82575:
+		if (adapter->vmdq_pools) {
+			shift = 2;
+			shift2 = 6;
+			break;
+		}
+		shift = 6;
+		break;
+	case e1000_82576:
+		/* 82576 supports 2 RSS queues for SR-IOV */
+		if (adapter->vfs_allocated_count || adapter->vmdq_pools) {
+			shift = 3;
+			num_rx_queues = 2;
+		}
+		break;
+	default:
+		break;
+	}
+
+	/*
+	 * Populate the redirection table 4 entries at a time.  To do this
+	 * we are generating the results for n and n+2 and then interleaving
+	 * those with the results with n+1 and n+3.
+	 */
+	for (j = 0; j < 32; j++) {
+		/* first pass generates n and n+2 */
+		u32 base = ((j * 0x00040004) + 0x00020000) * num_rx_queues;
+		u32 reta = (base & 0x07800780) >> (7 - shift);
+
+		/* second pass generates n+1 and n+3 */
+		base += 0x00010001 * num_rx_queues;
+		reta |= (base & 0x07800780) << (1 + shift);
+
+		/* generate 2nd table for 82575 based parts */
+		if (shift2)
+			reta |= (0x01010101 * num_rx_queues) << shift2;
+
+		E1000_WRITE_REG(hw, E1000_RETA(j), reta);
+	}
+
+	/*
+	 * Disable raw packet checksumming so that RSS hash is placed in
+	 * descriptor on writeback.  No need to enable TCP/UDP/IP checksum
+	 * offloads as they are enabled by default
+	 */
+	rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
+	rxcsum |= E1000_RXCSUM_PCSD;
+
+	if (adapter->hw.mac.type >= e1000_82576)
+		/* Enable Receive Checksum Offload for SCTP */
+		rxcsum |= E1000_RXCSUM_CRCOFL;
+
+	/* Don't need to set TUOFL or IPOFL, they default to 1 */
+	E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
+
+	/* Generate RSS hash based on packet types, TCP/UDP
+	 * port numbers and/or IPv4/v6 src and dst addresses
+	 */
+	mrqc = E1000_MRQC_RSS_FIELD_IPV4 |
+	       E1000_MRQC_RSS_FIELD_IPV4_TCP |
+	       E1000_MRQC_RSS_FIELD_IPV6 |
+	       E1000_MRQC_RSS_FIELD_IPV6_TCP |
+	       E1000_MRQC_RSS_FIELD_IPV6_TCP_EX;
+
+	if (adapter->flags & IGB_FLAG_RSS_FIELD_IPV4_UDP)
+		mrqc |= E1000_MRQC_RSS_FIELD_IPV4_UDP;
+	if (adapter->flags & IGB_FLAG_RSS_FIELD_IPV6_UDP)
+		mrqc |= E1000_MRQC_RSS_FIELD_IPV6_UDP;
+
+	/* If VMDq is enabled then we set the appropriate mode for that, else
+	 * we default to RSS so that an RSS hash is calculated per packet even
+	 * if we are only using one queue */
+	if (adapter->vfs_allocated_count || adapter->vmdq_pools) {
+		if (hw->mac.type > e1000_82575) {
+			/* Set the default pool for the PF's first queue */
+			u32 vtctl = E1000_READ_REG(hw, E1000_VT_CTL);
+			vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
+				   E1000_VT_CTL_DISABLE_DEF_POOL);
+			vtctl |= adapter->vfs_allocated_count <<
+				E1000_VT_CTL_DEFAULT_POOL_SHIFT;
+			E1000_WRITE_REG(hw, E1000_VT_CTL, vtctl);
+		} else if (adapter->rss_queues > 1) {
+			/* set default queue for pool 1 to queue 2 */
+			E1000_WRITE_REG(hw, E1000_VT_CTL,
+				        adapter->rss_queues << 7);
+		}
+		if (adapter->rss_queues > 1)
+			mrqc |= E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
+		else
+			mrqc |= E1000_MRQC_ENABLE_VMDQ;
+	} else {
+		mrqc |= E1000_MRQC_ENABLE_RSS_4Q;
+	}
+	igb_vmm_control(adapter);
+
+	E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
+}
+
+/**
+ * igb_setup_rctl - configure the receive control registers
+ * @adapter: Board private structure
+ **/
+void igb_setup_rctl(struct igb_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u32 rctl;
+
+	rctl = E1000_READ_REG(hw, E1000_RCTL);
+
+	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
+	rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
+
+	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
+		(hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
+
+	/*
+	 * enable stripping of CRC. It's unlikely this will break BMC
+	 * redirection as it did with e1000. Newer features require
+	 * that the HW strips the CRC.
+	 */
+	rctl |= E1000_RCTL_SECRC;
+
+	/* disable store bad packets and clear size bits. */
+	rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
+
+	/* enable LPE to prevent packets larger than max_frame_size */
+	rctl |= E1000_RCTL_LPE;
+
+	/* disable queue 0 to prevent tail write w/o re-config */
+	E1000_WRITE_REG(hw, E1000_RXDCTL(0), 0);
+
+	/* Attention!!!  For SR-IOV PF driver operations you must enable
+	 * queue drop for all VF and PF queues to prevent head of line blocking
+	 * if an un-trusted VF does not provide descriptors to hardware.
+	 */
+	if (adapter->vfs_allocated_count) {
+		/* set all queue drop enable bits */
+		E1000_WRITE_REG(hw, E1000_QDE, ALL_QUEUES);
+	}
+
+	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
+}
+
+static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
+                                   int vfn)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u32 vmolr;
+
+	/* if it isn't the PF check to see if VFs are enabled and
+	 * increase the size to support vlan tags */
+	if (vfn < adapter->vfs_allocated_count &&
+	    adapter->vf_data[vfn].vlans_enabled)
+		size += VLAN_HLEN;
+
+#ifdef CONFIG_IGB_VMDQ_NETDEV
+	if (vfn >= adapter->vfs_allocated_count) {
+		int queue = vfn - adapter->vfs_allocated_count;
+		struct igb_vmdq_adapter *vadapter;
+
+		vadapter = netdev_priv(adapter->vmdq_netdev[queue-1]);
+		if (vadapter->vlgrp)
+			size += VLAN_HLEN;
+	}
+#endif
+	vmolr = E1000_READ_REG(hw, E1000_VMOLR(vfn));
+	vmolr &= ~E1000_VMOLR_RLPML_MASK;
+	vmolr |= size | E1000_VMOLR_LPE;
+	E1000_WRITE_REG(hw, E1000_VMOLR(vfn), vmolr);
+
+	return 0;
+}
+
+/**
+ * igb_rlpml_set - set maximum receive packet size
+ * @adapter: board private structure
+ *
+ * Configure maximum receivable packet size.
+ **/
+static void igb_rlpml_set(struct igb_adapter *adapter)
+{
+	u32 max_frame_size = adapter->max_frame_size;
+	struct e1000_hw *hw = &adapter->hw;
+	u16 pf_id = adapter->vfs_allocated_count;
+
+	if (adapter->vmdq_pools && hw->mac.type != e1000_82575) {
+		int i;
+		for (i = 0; i < adapter->vmdq_pools; i++)
+			igb_set_vf_rlpml(adapter, max_frame_size, pf_id + i);
+		/*
+		 * If we're in VMDQ or SR-IOV mode, then set global RLPML
+		 * to our max jumbo frame size, in case we need to enable
+		 * jumbo frames on one of the rings later.
+		 * This will not pass over-length frames into the default
+		 * queue because it's gated by the VMOLR.RLPML.
+		 */
+		max_frame_size = MAX_JUMBO_FRAME_SIZE;
+	}
+	/* Set VF RLPML for the PF device. */
+	if (adapter->vfs_allocated_count)
+		igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
+
+	E1000_WRITE_REG(hw, E1000_RLPML, max_frame_size);
+}
+
+static inline void igb_set_vf_vlan_strip(struct igb_adapter *adapter,
+					int vfn, bool enable)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u32 val;
+	void __iomem *reg;
+
+	if (hw->mac.type < e1000_82576)
+		return;
+
+	if (hw->mac.type == e1000_i350)
+		reg = hw->hw_addr + E1000_DVMOLR(vfn);
+	else
+		reg = hw->hw_addr + E1000_VMOLR(vfn);
+
+	val = readl(reg);
+	if (enable)
+		val |= E1000_VMOLR_STRVLAN;
+	else
+		val &= ~(E1000_VMOLR_STRVLAN);
+	writel(val, reg);
+}
+static inline void igb_set_vmolr(struct igb_adapter *adapter,
+				 int vfn, bool aupe)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u32 vmolr;
+
+	/*
+	 * This register exists only on 82576 and newer so if we are older then
+	 * we should exit and do nothing
+	 */
+	if (hw->mac.type < e1000_82576)
+		return;
+
+	vmolr = E1000_READ_REG(hw, E1000_VMOLR(vfn));
+
+	if (aupe)
+		vmolr |= E1000_VMOLR_AUPE;        /* Accept untagged packets */
+	else
+		vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */
+
+	/* clear all bits that might not be set */
+	vmolr &= ~E1000_VMOLR_RSSE;
+
+	if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count)
+		vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
+
+	vmolr |= E1000_VMOLR_BAM;	   /* Accept broadcast */
+	vmolr |= E1000_VMOLR_LPE;	   /* Accept long packets */
+
+	E1000_WRITE_REG(hw, E1000_VMOLR(vfn), vmolr);
+}
+
+/**
+ * igb_configure_rx_ring - Configure a receive ring after Reset
+ * @adapter: board private structure
+ * @ring: receive ring to be configured
+ *
+ * Configure the Rx unit of the MAC after a reset.
+ **/
+void igb_configure_rx_ring(struct igb_adapter *adapter,
+                           struct igb_ring *ring)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u64 rdba = ring->dma;
+	int reg_idx = ring->reg_idx;
+	u32 srrctl = 0, rxdctl = 0;
+
+#ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT
+	/*
+	 * RLPML prevents us from receiving a frame larger than max_frame so
+	 * it is safe to just set the rx_buffer_len to max_frame without the
+	 * risk of an skb over panic.
+	 */
+	ring->rx_buffer_len = max_t(u32, adapter->max_frame_size,
+				    MAXIMUM_ETHERNET_VLAN_SIZE);
+
+#endif
+	/* disable the queue */
+	E1000_WRITE_REG(hw, E1000_RXDCTL(reg_idx), 0);
+
+	/* Set DMA base address registers */
+	E1000_WRITE_REG(hw, E1000_RDBAL(reg_idx),
+	                rdba & 0x00000000ffffffffULL);
+	E1000_WRITE_REG(hw, E1000_RDBAH(reg_idx), rdba >> 32);
+	E1000_WRITE_REG(hw, E1000_RDLEN(reg_idx),
+	               ring->count * sizeof(union e1000_adv_rx_desc));
+
+	/* initialize head and tail */
+	ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
+	E1000_WRITE_REG(hw, E1000_RDH(reg_idx), 0);
+	writel(0, ring->tail);
+
+	/* reset next-to- use/clean to place SW in sync with hardwdare */
+	ring->next_to_clean = 0;
+	ring->next_to_use = 0;
+#ifndef CONFIG_IGB_DISABLE_PACKET_SPLIT
+	ring->next_to_alloc = 0;
+
+#endif
+	/* set descriptor configuration */
+#ifndef CONFIG_IGB_DISABLE_PACKET_SPLIT
+	srrctl = IGB_RX_HDR_LEN << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
+	srrctl |= IGB_RX_BUFSZ >> E1000_SRRCTL_BSIZEPKT_SHIFT;
+#else /* CONFIG_IGB_DISABLE_PACKET_SPLIT */
+	srrctl = ALIGN(ring->rx_buffer_len, 1024) >>
+	         E1000_SRRCTL_BSIZEPKT_SHIFT;
+#endif /* CONFIG_IGB_DISABLE_PACKET_SPLIT */
+	srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
+#ifdef HAVE_PTP_1588_CLOCK
+	if (hw->mac.type >= e1000_82580)
+		srrctl |= E1000_SRRCTL_TIMESTAMP;
+#endif /* HAVE_PTP_1588_CLOCK */
+	/*
+	 * We should set the drop enable bit if:
+	 *  SR-IOV is enabled
+	 *   or
+	 *  Flow Control is disabled and number of RX queues > 1
+	 *
+	 *  This allows us to avoid head of line blocking for security
+	 *  and performance reasons.
+	 */
+	if (adapter->vfs_allocated_count ||
+	    (adapter->num_rx_queues > 1 &&
+	     (hw->fc.requested_mode == e1000_fc_none ||
+	      hw->fc.requested_mode == e1000_fc_rx_pause)))
+		srrctl |= E1000_SRRCTL_DROP_EN;
+
+	E1000_WRITE_REG(hw, E1000_SRRCTL(reg_idx), srrctl);
+
+	/* set filtering for VMDQ pools */
+	igb_set_vmolr(adapter, reg_idx & 0x7, true);
+
+	rxdctl |= IGB_RX_PTHRESH;
+	rxdctl |= IGB_RX_HTHRESH << 8;
+	rxdctl |= IGB_RX_WTHRESH << 16;
+
+	/* enable receive descriptor fetching */
+	rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
+	E1000_WRITE_REG(hw, E1000_RXDCTL(reg_idx), rxdctl);
+}
+
+/**
+ * igb_configure_rx - Configure receive Unit after Reset
+ * @adapter: board private structure
+ *
+ * Configure the Rx unit of the MAC after a reset.
+ **/
+static void igb_configure_rx(struct igb_adapter *adapter)
+{
+	int i;
+
+	/* set UTA to appropriate mode */
+	igb_set_uta(adapter);
+
+	igb_full_sync_mac_table(adapter);
+	/* Setup the HW Rx Head and Tail Descriptor Pointers and
+	 * the Base and Length of the Rx Descriptor Ring */
+	for (i = 0; i < adapter->num_rx_queues; i++)
+		igb_configure_rx_ring(adapter, adapter->rx_ring[i]);
+}
+
+/**
+ * igb_free_tx_resources - Free Tx Resources per Queue
+ * @tx_ring: Tx descriptor ring for a specific queue
+ *
+ * Free all transmit software resources
+ **/
+void igb_free_tx_resources(struct igb_ring *tx_ring)
+{
+	igb_clean_tx_ring(tx_ring);
+
+	vfree(tx_ring->tx_buffer_info);
+	tx_ring->tx_buffer_info = NULL;
+
+	/* if not set, then don't free */
+	if (!tx_ring->desc)
+		return;
+
+	dma_free_coherent(tx_ring->dev, tx_ring->size,
+			  tx_ring->desc, tx_ring->dma);
+
+	tx_ring->desc = NULL;
+}
+
+/**
+ * igb_free_all_tx_resources - Free Tx Resources for All Queues
+ * @adapter: board private structure
+ *
+ * Free all transmit software resources
+ **/
+static void igb_free_all_tx_resources(struct igb_adapter *adapter)
+{
+	int i;
+
+	for (i = 0; i < adapter->num_tx_queues; i++)
+		igb_free_tx_resources(adapter->tx_ring[i]);
+}
+
+void igb_unmap_and_free_tx_resource(struct igb_ring *ring,
+				    struct igb_tx_buffer *tx_buffer)
+{
+	if (tx_buffer->skb) {
+		dev_kfree_skb_any(tx_buffer->skb);
+		if (dma_unmap_len(tx_buffer, len))
+			dma_unmap_single(ring->dev,
+			                 dma_unmap_addr(tx_buffer, dma),
+			                 dma_unmap_len(tx_buffer, len),
+			                 DMA_TO_DEVICE);
+	} else if (dma_unmap_len(tx_buffer, len)) {
+		dma_unmap_page(ring->dev,
+		               dma_unmap_addr(tx_buffer, dma),
+		               dma_unmap_len(tx_buffer, len),
+		               DMA_TO_DEVICE);
+	}
+	tx_buffer->next_to_watch = NULL;
+	tx_buffer->skb = NULL;
+	dma_unmap_len_set(tx_buffer, len, 0);
+	/* buffer_info must be completely set up in the transmit path */
+}
+
+/**
+ * igb_clean_tx_ring - Free Tx Buffers
+ * @tx_ring: ring to be cleaned
+ **/
+static void igb_clean_tx_ring(struct igb_ring *tx_ring)
+{
+	struct igb_tx_buffer *buffer_info;
+	unsigned long size;
+	u16 i;
+
+	if (!tx_ring->tx_buffer_info)
+		return;
+	/* Free all the Tx ring sk_buffs */
+
+	for (i = 0; i < tx_ring->count; i++) {
+		buffer_info = &tx_ring->tx_buffer_info[i];
+		igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
+	}
+
+	netdev_tx_reset_queue(txring_txq(tx_ring));
+
+	size = sizeof(struct igb_tx_buffer) * tx_ring->count;
+	memset(tx_ring->tx_buffer_info, 0, size);
+
+	/* Zero out the descriptor ring */
+	memset(tx_ring->desc, 0, tx_ring->size);
+
+	tx_ring->next_to_use = 0;
+	tx_ring->next_to_clean = 0;
+}
+
+/**
+ * igb_clean_all_tx_rings - Free Tx Buffers for all queues
+ * @adapter: board private structure
+ **/
+static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
+{
+	int i;
+
+	for (i = 0; i < adapter->num_tx_queues; i++)
+		igb_clean_tx_ring(adapter->tx_ring[i]);
+}
+
+/**
+ * igb_free_rx_resources - Free Rx Resources
+ * @rx_ring: ring to clean the resources from
+ *
+ * Free all receive software resources
+ **/
+void igb_free_rx_resources(struct igb_ring *rx_ring)
+{
+	igb_clean_rx_ring(rx_ring);
+
+	vfree(rx_ring->rx_buffer_info);
+	rx_ring->rx_buffer_info = NULL;
+
+	/* if not set, then don't free */
+	if (!rx_ring->desc)
+		return;
+
+	dma_free_coherent(rx_ring->dev, rx_ring->size,
+			  rx_ring->desc, rx_ring->dma);
+
+	rx_ring->desc = NULL;
+}
+
+/**
+ * igb_free_all_rx_resources - Free Rx Resources for All Queues
+ * @adapter: board private structure
+ *
+ * Free all receive software resources
+ **/
+static void igb_free_all_rx_resources(struct igb_adapter *adapter)
+{
+	int i;
+
+	for (i = 0; i < adapter->num_rx_queues; i++)
+		igb_free_rx_resources(adapter->rx_ring[i]);
+}
+
+/**
+ * igb_clean_rx_ring - Free Rx Buffers per Queue
+ * @rx_ring: ring to free buffers from
+ **/
+void igb_clean_rx_ring(struct igb_ring *rx_ring)
+{
+	unsigned long size;
+	u16 i;
+
+	if (!rx_ring->rx_buffer_info)
+		return;
+
+#ifndef CONFIG_IGB_DISABLE_PACKET_SPLIT
+	if (rx_ring->skb)
+		dev_kfree_skb(rx_ring->skb);
+	rx_ring->skb = NULL;
+
+#endif
+	/* Free all the Rx ring sk_buffs */
+	for (i = 0; i < rx_ring->count; i++) {
+		struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
+#ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT
+		if (buffer_info->dma) {
+			dma_unmap_single(rx_ring->dev,
+			                 buffer_info->dma,
+					 rx_ring->rx_buffer_len,
+					 DMA_FROM_DEVICE);
+			buffer_info->dma = 0;
+		}
+
+		if (buffer_info->skb) {
+			dev_kfree_skb(buffer_info->skb);
+			buffer_info->skb = NULL;
+		}
+#else
+		if (!buffer_info->page)
+			continue;
+
+		dma_unmap_page(rx_ring->dev,
+			       buffer_info->dma,
+			       PAGE_SIZE,
+			       DMA_FROM_DEVICE);
+		__free_page(buffer_info->page);
+
+		buffer_info->page = NULL;
+#endif
+	}
+
+	size = sizeof(struct igb_rx_buffer) * rx_ring->count;
+	memset(rx_ring->rx_buffer_info, 0, size);
+
+	/* Zero out the descriptor ring */
+	memset(rx_ring->desc, 0, rx_ring->size);
+
+	rx_ring->next_to_alloc = 0;
+	rx_ring->next_to_clean = 0;
+	rx_ring->next_to_use = 0;
+}
+
+/**
+ * igb_clean_all_rx_rings - Free Rx Buffers for all queues
+ * @adapter: board private structure
+ **/
+static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
+{
+	int i;
+
+	for (i = 0; i < adapter->num_rx_queues; i++)
+		igb_clean_rx_ring(adapter->rx_ring[i]);
+}
+
+/**
+ * igb_set_mac - Change the Ethernet Address of the NIC
+ * @netdev: network interface device structure
+ * @p: pointer to an address structure
+ *
+ * Returns 0 on success, negative on failure
+ **/
+static int igb_set_mac(struct net_device *netdev, void *p)
+{
+	struct igb_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+	struct sockaddr *addr = p;
+
+	if (!is_valid_ether_addr(addr->sa_data))
+		return -EADDRNOTAVAIL;
+
+	igb_del_mac_filter(adapter, hw->mac.addr,
+			   adapter->vfs_allocated_count);
+	memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
+	memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
+
+	/* set the correct pool for the new PF MAC address in entry 0 */
+	return igb_add_mac_filter(adapter, hw->mac.addr,
+	                   adapter->vfs_allocated_count);
+}
+
+/**
+ * igb_write_mc_addr_list - write multicast addresses to MTA
+ * @netdev: network interface device structure
+ *
+ * Writes multicast address list to the MTA hash table.
+ * Returns: -ENOMEM on failure
+ *                0 on no addresses written
+ *                X on writing X addresses to MTA
+ **/
+int igb_write_mc_addr_list(struct net_device *netdev)
+{
+	struct igb_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+#ifdef NETDEV_HW_ADDR_T_MULTICAST
+	struct netdev_hw_addr *ha;
+#else
+	struct dev_mc_list *ha;
+#endif
+	u8  *mta_list;
+	int i, count;
+#ifdef CONFIG_IGB_VMDQ_NETDEV
+	int vm;
+#endif
+	count = netdev_mc_count(netdev);
+#ifdef CONFIG_IGB_VMDQ_NETDEV
+	for (vm = 1; vm < adapter->vmdq_pools; vm++) {
+		if (!adapter->vmdq_netdev[vm])
+			break;
+		if (!netif_running(adapter->vmdq_netdev[vm]))
+			continue;
+		count += netdev_mc_count(adapter->vmdq_netdev[vm]);
+	}
+#endif
+
+	if (!count) {
+		e1000_update_mc_addr_list(hw, NULL, 0);
+		return 0;
+	}
+	mta_list = kzalloc(count * 6, GFP_ATOMIC);
+	if (!mta_list)
+		return -ENOMEM;
+
+	/* The shared function expects a packed array of only addresses. */
+	i = 0;
+	netdev_for_each_mc_addr(ha, netdev)
+#ifdef NETDEV_HW_ADDR_T_MULTICAST
+		memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
+#else
+		memcpy(mta_list + (i++ * ETH_ALEN), ha->dmi_addr, ETH_ALEN);
+#endif
+#ifdef CONFIG_IGB_VMDQ_NETDEV
+	for (vm = 1; vm < adapter->vmdq_pools; vm++) {
+		if (!adapter->vmdq_netdev[vm])
+			break;
+		if (!netif_running(adapter->vmdq_netdev[vm]) ||
+		    !netdev_mc_count(adapter->vmdq_netdev[vm]))
+			continue;
+		netdev_for_each_mc_addr(ha, adapter->vmdq_netdev[vm])
+#ifdef NETDEV_HW_ADDR_T_MULTICAST
+			memcpy(mta_list + (i++ * ETH_ALEN),
+			       ha->addr, ETH_ALEN);
+#else
+			memcpy(mta_list + (i++ * ETH_ALEN),
+			       ha->dmi_addr, ETH_ALEN);
+#endif
+	}
+#endif
+	e1000_update_mc_addr_list(hw, mta_list, i);
+	kfree(mta_list);
+
+	return count;
+}
+
+void igb_rar_set(struct igb_adapter *adapter, u32 index)
+{
+	u32 rar_low, rar_high;
+	struct e1000_hw *hw = &adapter->hw;
+	u8 *addr = adapter->mac_table[index].addr;
+	/* HW expects these in little endian so we reverse the byte order
+	 * from network order (big endian) to little endian
+	 */
+	rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
+	          ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
+	rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
+
+	/* Indicate to hardware the Address is Valid. */
+	if (adapter->mac_table[index].state & IGB_MAC_STATE_IN_USE)
+		rar_high |= E1000_RAH_AV;
+
+	if (hw->mac.type == e1000_82575)
+		rar_high |= E1000_RAH_POOL_1 * adapter->mac_table[index].queue;
+	else
+		rar_high |= E1000_RAH_POOL_1 << adapter->mac_table[index].queue;
+
+	E1000_WRITE_REG(hw, E1000_RAL(index), rar_low);
+	E1000_WRITE_FLUSH(hw);
+	E1000_WRITE_REG(hw, E1000_RAH(index), rar_high);
+	E1000_WRITE_FLUSH(hw);
+}
+
+void igb_full_sync_mac_table(struct igb_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	int i;
+	for (i = 0; i < hw->mac.rar_entry_count; i++) {
+			igb_rar_set(adapter, i);
+	}
+}
+
+void igb_sync_mac_table(struct igb_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	int i;
+	for (i = 0; i < hw->mac.rar_entry_count; i++) {
+		if (adapter->mac_table[i].state & IGB_MAC_STATE_MODIFIED)
+			igb_rar_set(adapter, i);
+		adapter->mac_table[i].state &= ~(IGB_MAC_STATE_MODIFIED);
+	}
+}
+
+int igb_available_rars(struct igb_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	int i, count = 0;
+
+	for (i = 0; i < hw->mac.rar_entry_count; i++) {
+		if (adapter->mac_table[i].state == 0)
+			count++;
+	}
+	return count;
+}
+
+#ifdef HAVE_SET_RX_MODE
+/**
+ * igb_write_uc_addr_list - write unicast addresses to RAR table
+ * @netdev: network interface device structure
+ *
+ * Writes unicast address list to the RAR table.
+ * Returns: -ENOMEM on failure/insufficient address space
+ *                0 on no addresses written
+ *                X on writing X addresses to the RAR table
+ **/
+static int igb_write_uc_addr_list(struct net_device *netdev)
+{
+	struct igb_adapter *adapter = netdev_priv(netdev);
+	unsigned int vfn = adapter->vfs_allocated_count;
+	int count = 0;
+
+	/* return ENOMEM indicating insufficient memory for addresses */
+	if (netdev_uc_count(netdev) > igb_available_rars(adapter))
+		return -ENOMEM;
+	if (!netdev_uc_empty(netdev)) {
+#ifdef NETDEV_HW_ADDR_T_UNICAST
+		struct netdev_hw_addr *ha;
+#else
+		struct dev_mc_list *ha;
+#endif
+		netdev_for_each_uc_addr(ha, netdev) {
+#ifdef NETDEV_HW_ADDR_T_UNICAST
+			igb_del_mac_filter(adapter, ha->addr, vfn);
+			igb_add_mac_filter(adapter, ha->addr, vfn);
+#else
+			igb_del_mac_filter(adapter, ha->da_addr, vfn);
+			igb_add_mac_filter(adapter, ha->da_addr, vfn);
+#endif
+			count++;
+		}
+	}
+	return count;
+}
+
+#endif /* HAVE_SET_RX_MODE */
+/**
+ * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
+ * @netdev: network interface device structure
+ *
+ * The set_rx_mode entry point is called whenever the unicast or multicast
+ * address lists or the network interface flags are updated.  This routine is
+ * responsible for configuring the hardware for proper unicast, multicast,
+ * promiscuous mode, and all-multi behavior.
+ **/
+static void igb_set_rx_mode(struct net_device *netdev)
+{
+	struct igb_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+	unsigned int vfn = adapter->vfs_allocated_count;
+	u32 rctl, vmolr = 0;
+	int count;
+
+	/* Check for Promiscuous and All Multicast modes */
+	rctl = E1000_READ_REG(hw, E1000_RCTL);
+
+	/* clear the effected bits */
+	rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
+
+	if (netdev->flags & IFF_PROMISC) {
+		rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
+		vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
+		/* retain VLAN HW filtering if in VT mode */
+		if (adapter->vfs_allocated_count || adapter->vmdq_pools)
+			rctl |= E1000_RCTL_VFE;
+	} else {
+		if (netdev->flags & IFF_ALLMULTI) {
+			rctl |= E1000_RCTL_MPE;
+			vmolr |= E1000_VMOLR_MPME;
+		} else {
+			/*
+			 * Write addresses to the MTA, if the attempt fails
+			 * then we should just turn on promiscuous mode so
+			 * that we can at least receive multicast traffic
+			 */
+			count = igb_write_mc_addr_list(netdev);
+			if (count < 0) {
+				rctl |= E1000_RCTL_MPE;
+				vmolr |= E1000_VMOLR_MPME;
+			} else if (count) {
+				vmolr |= E1000_VMOLR_ROMPE;
+			}
+		}
+#ifdef HAVE_SET_RX_MODE
+		/*
+		 * Write addresses to available RAR registers, if there is not
+		 * sufficient space to store all the addresses then enable
+		 * unicast promiscuous mode
+		 */
+		count = igb_write_uc_addr_list(netdev);
+		if (count < 0) {
+			rctl |= E1000_RCTL_UPE;
+			vmolr |= E1000_VMOLR_ROPE;
+		}
+#endif /* HAVE_SET_RX_MODE */
+		rctl |= E1000_RCTL_VFE;
+	}
+	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
+
+	/*
+	 * In order to support SR-IOV and eventually VMDq it is necessary to set
+	 * the VMOLR to enable the appropriate modes.  Without this workaround
+	 * we will have issues with VLAN tag stripping not being done for frames
+	 * that are only arriving because we are the default pool
+	 */
+	if (hw->mac.type < e1000_82576)
+		return;
+
+	vmolr |= E1000_READ_REG(hw, E1000_VMOLR(vfn)) &
+	         ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
+	E1000_WRITE_REG(hw, E1000_VMOLR(vfn), vmolr);
+	igb_restore_vf_multicasts(adapter);
+}
+
+static void igb_check_wvbr(struct igb_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u32 wvbr = 0;
+
+	switch (hw->mac.type) {
+	case e1000_82576:
+	case e1000_i350:
+		if (!(wvbr = E1000_READ_REG(hw, E1000_WVBR)))
+			return;
+		break;
+	default:
+		break;
+	}
+
+	adapter->wvbr |= wvbr;
+}
+
+#define IGB_STAGGERED_QUEUE_OFFSET 8
+
+static void igb_spoof_check(struct igb_adapter *adapter)
+{
+	int j;
+
+	if (!adapter->wvbr)
+		return;
+
+	switch (adapter->hw.mac.type) {
+	case e1000_82576:
+		for (j = 0; j < adapter->vfs_allocated_count; j++) {
+			if (adapter->wvbr & (1 << j) ||
+			    adapter->wvbr & (1 << (j + IGB_STAGGERED_QUEUE_OFFSET))) {
+				DPRINTK(DRV, WARNING,
+					"Spoof event(s) detected on VF %d\n", j);
+				adapter->wvbr &=
+					~((1 << j) |
+					  (1 << (j + IGB_STAGGERED_QUEUE_OFFSET)));
+			}
+		}
+		break;
+	case e1000_i350:
+		for (j = 0; j < adapter->vfs_allocated_count; j++) {
+			if (adapter->wvbr & (1 << j)) {
+				DPRINTK(DRV, WARNING,
+					"Spoof event(s) detected on VF %d\n", j);
+				adapter->wvbr &= ~(1 << j);
+			}
+		}
+		break;
+	default:
+		break;
+	}
+}
+
+/* Need to wait a few seconds after link up to get diagnostic information from
+ * the phy */
+static void igb_update_phy_info(unsigned long data)
+{
+	struct igb_adapter *adapter = (struct igb_adapter *) data;
+	e1000_get_phy_info(&adapter->hw);
+}
+
+/**
+ * igb_has_link - check shared code for link and determine up/down
+ * @adapter: pointer to driver private info
+ **/
+bool igb_has_link(struct igb_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	bool link_active = FALSE;
+
+	/* get_link_status is set on LSC (link status) interrupt or
+	 * rx sequence error interrupt.  get_link_status will stay
+	 * false until the e1000_check_for_link establishes link
+	 * for copper adapters ONLY
+	 */
+	switch (hw->phy.media_type) {
+	case e1000_media_type_copper:
+		if (!hw->mac.get_link_status)
+			return true;
+	case e1000_media_type_internal_serdes:
+		e1000_check_for_link(hw);
+		link_active = !hw->mac.get_link_status;
+		break;
+	case e1000_media_type_unknown:
+	default:
+		break;
+	}
+
+	if (((hw->mac.type == e1000_i210) ||
+	     (hw->mac.type == e1000_i211)) &&
+	     (hw->phy.id == I210_I_PHY_ID)) {
+		if (!netif_carrier_ok(adapter->netdev)) {
+			adapter->flags &= ~IGB_FLAG_NEED_LINK_UPDATE;
+		} else if (!(adapter->flags & IGB_FLAG_NEED_LINK_UPDATE)) {
+			adapter->flags |= IGB_FLAG_NEED_LINK_UPDATE;
+			adapter->link_check_timeout = jiffies;
+		}
+	}
+
+	return link_active;
+}
+
+/**
+ * igb_watchdog - Timer Call-back
+ * @data: pointer to adapter cast into an unsigned long
+ **/
+static void igb_watchdog(unsigned long data)
+{
+	struct igb_adapter *adapter = (struct igb_adapter *)data;
+	/* Do the rest outside of interrupt context */
+	schedule_work(&adapter->watchdog_task);
+}
+
+static void igb_watchdog_task(struct work_struct *work)
+{
+	struct igb_adapter *adapter = container_of(work,
+	                                           struct igb_adapter,
+                                                   watchdog_task);
+	struct e1000_hw *hw = &adapter->hw;
+	struct net_device *netdev = adapter->netdev;
+	u32 link;
+	int i;
+	u32 thstat, ctrl_ext;
+	u32 connsw;
+
+	link = igb_has_link(adapter);
+	/* Force link down if we have fiber to swap to */
+	if (adapter->flags & IGB_FLAG_MAS_ENABLE) {
+		if (hw->phy.media_type == e1000_media_type_copper) {
+			connsw = E1000_READ_REG(hw, E1000_CONNSW);
+			if (!(connsw & E1000_CONNSW_AUTOSENSE_EN))
+				link = 0;
+		}
+	}
+
+	if (adapter->flags & IGB_FLAG_NEED_LINK_UPDATE) {
+		if (time_after(jiffies, (adapter->link_check_timeout + HZ)))
+			adapter->flags &= ~IGB_FLAG_NEED_LINK_UPDATE;
+		else
+			link = FALSE;
+	}
+
+	if (link) {
+		/* Perform a reset if the media type changed. */
+		if (hw->dev_spec._82575.media_changed) {
+			hw->dev_spec._82575.media_changed = false;
+			adapter->flags |= IGB_FLAG_MEDIA_RESET;
+			igb_reset(adapter);
+		}
+
+		/* Cancel scheduled suspend requests. */
+		pm_runtime_resume(netdev->dev.parent);
+
+		if (!netif_carrier_ok(netdev)) {
+			u32 ctrl;
+			e1000_get_speed_and_duplex(hw,
+			                           &adapter->link_speed,
+			                           &adapter->link_duplex);
+
+			ctrl = E1000_READ_REG(hw, E1000_CTRL);
+			/* Links status message must follow this format */
+			printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s, "
+				 "Flow Control: %s\n",
+			       netdev->name,
+			       adapter->link_speed,
+			       adapter->link_duplex == FULL_DUPLEX ?
+				 "Full Duplex" : "Half Duplex",
+			       ((ctrl & E1000_CTRL_TFCE) &&
+			        (ctrl & E1000_CTRL_RFCE)) ? "RX/TX":
+			       ((ctrl & E1000_CTRL_RFCE) ?  "RX" :
+			       ((ctrl & E1000_CTRL_TFCE) ?  "TX" : "None")));
+			/* adjust timeout factor according to speed/duplex */
+			adapter->tx_timeout_factor = 1;
+			switch (adapter->link_speed) {
+			case SPEED_10:
+				adapter->tx_timeout_factor = 14;
+				break;
+			case SPEED_100:
+				/* maybe add some timeout factor ? */
+				break;
+			default:
+				break;
+			}
+
+			netif_carrier_on(netdev);
+			netif_tx_wake_all_queues(netdev);
+
+			igb_ping_all_vfs(adapter);
+#ifdef IFLA_VF_MAX
+			igb_check_vf_rate_limit(adapter);
+#endif /* IFLA_VF_MAX */
+
+			/* link state has changed, schedule phy info update */
+			if (!test_bit(__IGB_DOWN, &adapter->state))
+				mod_timer(&adapter->phy_info_timer,
+					  round_jiffies(jiffies + 2 * HZ));
+		}
+	} else {
+		if (netif_carrier_ok(netdev)) {
+			adapter->link_speed = 0;
+			adapter->link_duplex = 0;
+			/* check for thermal sensor event on i350 */
+			if (hw->mac.type == e1000_i350) {
+				thstat = E1000_READ_REG(hw, E1000_THSTAT);
+				ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT);
+				if ((hw->phy.media_type ==
+					e1000_media_type_copper) &&
+					!(ctrl_ext &
+					E1000_CTRL_EXT_LINK_MODE_SGMII)) {
+					if (thstat & E1000_THSTAT_PWR_DOWN) {
+						printk(KERN_ERR "igb: %s The "
+						"network adapter was stopped "
+						"because it overheated.\n",
+						netdev->name);
+					}
+					if (thstat & E1000_THSTAT_LINK_THROTTLE) {
+						printk(KERN_INFO
+							"igb: %s The network "
+							"adapter supported "
+							"link speed "
+							"was downshifted "
+							"because it "
+							"overheated.\n",
+							netdev->name);
+					}
+				}
+			}
+
+			/* Links status message must follow this format */
+			printk(KERN_INFO "igb: %s NIC Link is Down\n",
+			       netdev->name);
+			netif_carrier_off(netdev);
+			netif_tx_stop_all_queues(netdev);
+
+			igb_ping_all_vfs(adapter);
+
+			/* link state has changed, schedule phy info update */
+			if (!test_bit(__IGB_DOWN, &adapter->state))
+				mod_timer(&adapter->phy_info_timer,
+					  round_jiffies(jiffies + 2 * HZ));
+			/* link is down, time to check for alternate media */
+			if (adapter->flags & IGB_FLAG_MAS_ENABLE) {
+				igb_check_swap_media(adapter);
+				if (adapter->flags & IGB_FLAG_MEDIA_RESET) {
+					schedule_work(&adapter->reset_task);
+					/* return immediately */
+					return;
+				}
+			}
+			pm_schedule_suspend(netdev->dev.parent,
+					    MSEC_PER_SEC * 5);
+
+		/* also check for alternate media here */
+		} else if (!netif_carrier_ok(netdev) &&
+			   (adapter->flags & IGB_FLAG_MAS_ENABLE)) {
+			hw->mac.ops.power_up_serdes(hw);
+			igb_check_swap_media(adapter);
+			if (adapter->flags & IGB_FLAG_MEDIA_RESET) {
+				schedule_work(&adapter->reset_task);
+				/* return immediately */
+				return;
+			}
+		}
+	}
+
+	igb_update_stats(adapter);
+
+	for (i = 0; i < adapter->num_tx_queues; i++) {
+		struct igb_ring *tx_ring = adapter->tx_ring[i];
+		if (!netif_carrier_ok(netdev)) {
+			/* We've lost link, so the controller stops DMA,
+			 * but we've got queued Tx work that's never going
+			 * to get done, so reset controller to flush Tx.
+			 * (Do the reset outside of interrupt context). */
+			if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
+				adapter->tx_timeout_count++;
+				schedule_work(&adapter->reset_task);
+				/* return immediately since reset is imminent */
+				return;
+			}
+		}
+
+		/* Force detection of hung controller every watchdog period */
+		set_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
+	}
+
+	/* Cause software interrupt to ensure rx ring is cleaned */
+	if (adapter->msix_entries) {
+		u32 eics = 0;
+		for (i = 0; i < adapter->num_q_vectors; i++)
+			eics |= adapter->q_vector[i]->eims_value;
+		E1000_WRITE_REG(hw, E1000_EICS, eics);
+	} else {
+		E1000_WRITE_REG(hw, E1000_ICS, E1000_ICS_RXDMT0);
+	}
+
+	igb_spoof_check(adapter);
+
+	/* Reset the timer */
+	if (!test_bit(__IGB_DOWN, &adapter->state)) {
+		if (adapter->flags & IGB_FLAG_NEED_LINK_UPDATE)
+			mod_timer(&adapter->watchdog_timer,
+				  round_jiffies(jiffies +  HZ));
+		else
+			mod_timer(&adapter->watchdog_timer,
+				  round_jiffies(jiffies + 2 * HZ));
+	}
+}
+
+static void igb_dma_err_task(struct work_struct *work)
+{
+	struct igb_adapter *adapter = container_of(work,
+	                                           struct igb_adapter,
+                                                   dma_err_task);
+	int vf;
+	struct e1000_hw *hw = &adapter->hw;
+	struct net_device *netdev = adapter->netdev;
+	u32 hgptc;
+	u32 ciaa, ciad;
+
+	hgptc = E1000_READ_REG(hw, E1000_HGPTC);
+	if (hgptc) /* If incrementing then no need for the check below */
+		goto dma_timer_reset;
+	/*
+	 * Check to see if a bad DMA write target from an errant or
+	 * malicious VF has caused a PCIe error.  If so then we can
+	 * issue a VFLR to the offending VF(s) and then resume without
+	 * requesting a full slot reset.
+	 */
+
+	for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
+		ciaa = (vf << 16) | 0x80000000;
+		/* 32 bit read so align, we really want status at offset 6 */
+		ciaa |= PCI_COMMAND;
+		E1000_WRITE_REG(hw, E1000_CIAA, ciaa);
+		ciad = E1000_READ_REG(hw, E1000_CIAD);
+		ciaa &= 0x7FFFFFFF;
+		/* disable debug mode asap after reading data */
+		E1000_WRITE_REG(hw, E1000_CIAA, ciaa);
+		/* Get the upper 16 bits which will be the PCI status reg */
+		ciad >>= 16;
+		if (ciad & (PCI_STATUS_REC_MASTER_ABORT |
+			    PCI_STATUS_REC_TARGET_ABORT |
+			    PCI_STATUS_SIG_SYSTEM_ERROR)) {
+			netdev_err(netdev, "VF %d suffered error\n", vf);
+			/* Issue VFLR */
+			ciaa = (vf << 16) | 0x80000000;
+			ciaa |= 0xA8;
+			E1000_WRITE_REG(hw, E1000_CIAA, ciaa);
+			ciad = 0x00008000;  /* VFLR */
+			E1000_WRITE_REG(hw, E1000_CIAD, ciad);
+			ciaa &= 0x7FFFFFFF;
+			E1000_WRITE_REG(hw, E1000_CIAA, ciaa);
+		}
+	}
+dma_timer_reset:
+	/* Reset the timer */
+	if (!test_bit(__IGB_DOWN, &adapter->state))
+		mod_timer(&adapter->dma_err_timer,
+			  round_jiffies(jiffies + HZ / 10));
+}
+
+/**
+ * igb_dma_err_timer - Timer Call-back
+ * @data: pointer to adapter cast into an unsigned long
+ **/
+static void igb_dma_err_timer(unsigned long data)
+{
+	struct igb_adapter *adapter = (struct igb_adapter *)data;
+	/* Do the rest outside of interrupt context */
+	schedule_work(&adapter->dma_err_task);
+}
+
+enum latency_range {
+	lowest_latency = 0,
+	low_latency = 1,
+	bulk_latency = 2,
+	latency_invalid = 255
+};
+
+/**
+ * igb_update_ring_itr - update the dynamic ITR value based on packet size
+ *
+ *      Stores a new ITR value based on strictly on packet size.  This
+ *      algorithm is less sophisticated than that used in igb_update_itr,
+ *      due to the difficulty of synchronizing statistics across multiple
+ *      receive rings.  The divisors and thresholds used by this function
+ *      were determined based on theoretical maximum wire speed and testing
+ *      data, in order to minimize response time while increasing bulk
+ *      throughput.
+ *      This functionality is controlled by the InterruptThrottleRate module
+ *      parameter (see igb_param.c)
+ *      NOTE:  This function is called only when operating in a multiqueue
+ *             receive environment.
+ * @q_vector: pointer to q_vector
+ **/
+static void igb_update_ring_itr(struct igb_q_vector *q_vector)
+{
+	int new_val = q_vector->itr_val;
+	int avg_wire_size = 0;
+	struct igb_adapter *adapter = q_vector->adapter;
+	unsigned int packets;
+
+	/* For non-gigabit speeds, just fix the interrupt rate at 4000
+	 * ints/sec - ITR timer value of 120 ticks.
+	 */
+	switch (adapter->link_speed) {
+	case SPEED_10:
+	case SPEED_100:
+		new_val = IGB_4K_ITR;
+		goto set_itr_val;
+	default:
+		break;
+	}
+
+	packets = q_vector->rx.total_packets;
+	if (packets)
+		avg_wire_size = q_vector->rx.total_bytes / packets;
+
+	packets = q_vector->tx.total_packets;
+	if (packets)
+		avg_wire_size = max_t(u32, avg_wire_size,
+		                      q_vector->tx.total_bytes / packets);
+
+	/* if avg_wire_size isn't set no work was done */
+	if (!avg_wire_size)
+		goto clear_counts;
+
+	/* Add 24 bytes to size to account for CRC, preamble, and gap */
+	avg_wire_size += 24;
+
+	/* Don't starve jumbo frames */
+	avg_wire_size = min(avg_wire_size, 3000);
+
+	/* Give a little boost to mid-size frames */
+	if ((avg_wire_size > 300) && (avg_wire_size < 1200))
+		new_val = avg_wire_size / 3;
+	else
+		new_val = avg_wire_size / 2;
+
+	/* conservative mode (itr 3) eliminates the lowest_latency setting */
+	if (new_val < IGB_20K_ITR &&
+	    ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
+	     (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
+		new_val = IGB_20K_ITR;
+
+set_itr_val:
+	if (new_val != q_vector->itr_val) {
+		q_vector->itr_val = new_val;
+		q_vector->set_itr = 1;
+	}
+clear_counts:
+	q_vector->rx.total_bytes = 0;
+	q_vector->rx.total_packets = 0;
+	q_vector->tx.total_bytes = 0;
+	q_vector->tx.total_packets = 0;
+}
+
+/**
+ * igb_update_itr - update the dynamic ITR value based on statistics
+ *      Stores a new ITR value based on packets and byte
+ *      counts during the last interrupt.  The advantage of per interrupt
+ *      computation is faster updates and more accurate ITR for the current
+ *      traffic pattern.  Constants in this function were computed
+ *      based on theoretical maximum wire speed and thresholds were set based
+ *      on testing data as well as attempting to minimize response time
+ *      while increasing bulk throughput.
+ *      this functionality is controlled by the InterruptThrottleRate module
+ *      parameter (see igb_param.c)
+ *      NOTE:  These calculations are only valid when operating in a single-
+ *             queue environment.
+ * @q_vector: pointer to q_vector
+ * @ring_container: ring info to update the itr for
+ **/
+static void igb_update_itr(struct igb_q_vector *q_vector,
+			   struct igb_ring_container *ring_container)
+{
+	unsigned int packets = ring_container->total_packets;
+	unsigned int bytes = ring_container->total_bytes;
+	u8 itrval = ring_container->itr;
+
+	/* no packets, exit with status unchanged */
+	if (packets == 0)
+		return;
+
+	switch (itrval) {
+	case lowest_latency:
+		/* handle TSO and jumbo frames */
+		if (bytes/packets > 8000)
+			itrval = bulk_latency;
+		else if ((packets < 5) && (bytes > 512))
+			itrval = low_latency;
+		break;
+	case low_latency:  /* 50 usec aka 20000 ints/s */
+		if (bytes > 10000) {
+			/* this if handles the TSO accounting */
+			if (bytes/packets > 8000) {
+				itrval = bulk_latency;
+			} else if ((packets < 10) || ((bytes/packets) > 1200)) {
+				itrval = bulk_latency;
+			} else if ((packets > 35)) {
+				itrval = lowest_latency;
+			}
+		} else if (bytes/packets > 2000) {
+			itrval = bulk_latency;
+		} else if (packets <= 2 && bytes < 512) {
+			itrval = lowest_latency;
+		}
+		break;
+	case bulk_latency: /* 250 usec aka 4000 ints/s */
+		if (bytes > 25000) {
+			if (packets > 35)
+				itrval = low_latency;
+		} else if (bytes < 1500) {
+			itrval = low_latency;
+		}
+		break;
+	}
+
+	/* clear work counters since we have the values we need */
+	ring_container->total_bytes = 0;
+	ring_container->total_packets = 0;
+
+	/* write updated itr to ring container */
+	ring_container->itr = itrval;
+}
+
+static void igb_set_itr(struct igb_q_vector *q_vector)
+{
+	struct igb_adapter *adapter = q_vector->adapter;
+	u32 new_itr = q_vector->itr_val;
+	u8 current_itr = 0;
+
+	/* for non-gigabit speeds, just fix the interrupt rate at 4000 */
+	switch (adapter->link_speed) {
+	case SPEED_10:
+	case SPEED_100:
+		current_itr = 0;
+		new_itr = IGB_4K_ITR;
+		goto set_itr_now;
+	default:
+		break;
+	}
+
+	igb_update_itr(q_vector, &q_vector->tx);
+	igb_update_itr(q_vector, &q_vector->rx);
+
+	current_itr = max(q_vector->rx.itr, q_vector->tx.itr);
+
+	/* conservative mode (itr 3) eliminates the lowest_latency setting */
+	if (current_itr == lowest_latency &&
+	    ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
+	     (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
+		current_itr = low_latency;
+
+	switch (current_itr) {
+	/* counts and packets in update_itr are dependent on these numbers */
+	case lowest_latency:
+		new_itr = IGB_70K_ITR; /* 70,000 ints/sec */
+		break;
+	case low_latency:
+		new_itr = IGB_20K_ITR; /* 20,000 ints/sec */
+		break;
+	case bulk_latency:
+		new_itr = IGB_4K_ITR;  /* 4,000 ints/sec */
+		break;
+	default:
+		break;
+	}
+
+set_itr_now:
+	if (new_itr != q_vector->itr_val) {
+		/* this attempts to bias the interrupt rate towards Bulk
+		 * by adding intermediate steps when interrupt rate is
+		 * increasing */
+		new_itr = new_itr > q_vector->itr_val ?
+		             max((new_itr * q_vector->itr_val) /
+		                 (new_itr + (q_vector->itr_val >> 2)),
+				 new_itr) :
+			     new_itr;
+		/* Don't write the value here; it resets the adapter's
+		 * internal timer, and causes us to delay far longer than
+		 * we should between interrupts.  Instead, we write the ITR
+		 * value at the beginning of the next interrupt so the timing
+		 * ends up being correct.
+		 */
+		q_vector->itr_val = new_itr;
+		q_vector->set_itr = 1;
+	}
+}
+
+void igb_tx_ctxtdesc(struct igb_ring *tx_ring, u32 vlan_macip_lens,
+		     u32 type_tucmd, u32 mss_l4len_idx)
+{
+	struct e1000_adv_tx_context_desc *context_desc;
+	u16 i = tx_ring->next_to_use;
+
+	context_desc = IGB_TX_CTXTDESC(tx_ring, i);
+
+	i++;
+	tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
+
+	/* set bits to identify this as an advanced context descriptor */
+	type_tucmd |= E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
+
+	/* For 82575, context index must be unique per ring. */
+	if (test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
+		mss_l4len_idx |= tx_ring->reg_idx << 4;
+
+	context_desc->vlan_macip_lens	= cpu_to_le32(vlan_macip_lens);
+	context_desc->seqnum_seed	= 0;
+	context_desc->type_tucmd_mlhl	= cpu_to_le32(type_tucmd);
+	context_desc->mss_l4len_idx	= cpu_to_le32(mss_l4len_idx);
+}
+
+static int igb_tso(struct igb_ring *tx_ring,
+		   struct igb_tx_buffer *first,
+		   u8 *hdr_len)
+{
+#ifdef NETIF_F_TSO
+	struct sk_buff *skb = first->skb;
+	u32 vlan_macip_lens, type_tucmd;
+	u32 mss_l4len_idx, l4len;
+
+	if (skb->ip_summed != CHECKSUM_PARTIAL)
+		return 0;
+
+	if (!skb_is_gso(skb))
+#endif /* NETIF_F_TSO */
+		return 0;
+#ifdef NETIF_F_TSO
+
+	if (skb_header_cloned(skb)) {
+		int err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
+		if (err)
+			return err;
+	}
+
+	/* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
+	type_tucmd = E1000_ADVTXD_TUCMD_L4T_TCP;
+
+	if (first->protocol == __constant_htons(ETH_P_IP)) {
+		struct iphdr *iph = ip_hdr(skb);
+		iph->tot_len = 0;
+		iph->check = 0;
+		tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
+							 iph->daddr, 0,
+							 IPPROTO_TCP,
+							 0);
+		type_tucmd |= E1000_ADVTXD_TUCMD_IPV4;
+		first->tx_flags |= IGB_TX_FLAGS_TSO |
+				   IGB_TX_FLAGS_CSUM |
+				   IGB_TX_FLAGS_IPV4;
+#ifdef NETIF_F_TSO6
+	} else if (skb_is_gso_v6(skb)) {
+		ipv6_hdr(skb)->payload_len = 0;
+		tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
+						       &ipv6_hdr(skb)->daddr,
+						       0, IPPROTO_TCP, 0);
+		first->tx_flags |= IGB_TX_FLAGS_TSO |
+				   IGB_TX_FLAGS_CSUM;
+#endif
+	}
+
+	/* compute header lengths */
+	l4len = tcp_hdrlen(skb);
+	*hdr_len = skb_transport_offset(skb) + l4len;
+
+	/* update gso size and bytecount with header size */
+	first->gso_segs = skb_shinfo(skb)->gso_segs;
+	first->bytecount += (first->gso_segs - 1) * *hdr_len;
+
+	/* MSS L4LEN IDX */
+	mss_l4len_idx = l4len << E1000_ADVTXD_L4LEN_SHIFT;
+	mss_l4len_idx |= skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT;
+
+	/* VLAN MACLEN IPLEN */
+	vlan_macip_lens = skb_network_header_len(skb);
+	vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT;
+	vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK;
+
+	igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx);
+
+	return 1;
+#endif  /* NETIF_F_TSO */
+}
+
+static void igb_tx_csum(struct igb_ring *tx_ring, struct igb_tx_buffer *first)
+{
+	struct sk_buff *skb = first->skb;
+	u32 vlan_macip_lens = 0;
+	u32 mss_l4len_idx = 0;
+	u32 type_tucmd = 0;
+
+	if (skb->ip_summed != CHECKSUM_PARTIAL) {
+		if (!(first->tx_flags & IGB_TX_FLAGS_VLAN))
+			return;
+	} else {
+		u8 nexthdr = 0;
+		switch (first->protocol) {
+		case __constant_htons(ETH_P_IP):
+			vlan_macip_lens |= skb_network_header_len(skb);
+			type_tucmd |= E1000_ADVTXD_TUCMD_IPV4;
+			nexthdr = ip_hdr(skb)->protocol;
+			break;
+#ifdef NETIF_F_IPV6_CSUM
+		case __constant_htons(ETH_P_IPV6):
+			vlan_macip_lens |= skb_network_header_len(skb);
+			nexthdr = ipv6_hdr(skb)->nexthdr;
+			break;
+#endif
+		default:
+			if (unlikely(net_ratelimit())) {
+				dev_warn(tx_ring->dev,
+				 "partial checksum but proto=%x!\n",
+				 first->protocol);
+			}
+			break;
+		}
+
+		switch (nexthdr) {
+		case IPPROTO_TCP:
+			type_tucmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
+			mss_l4len_idx = tcp_hdrlen(skb) <<
+					E1000_ADVTXD_L4LEN_SHIFT;
+			break;
+#ifdef HAVE_SCTP
+		case IPPROTO_SCTP:
+			type_tucmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
+			mss_l4len_idx = sizeof(struct sctphdr) <<
+					E1000_ADVTXD_L4LEN_SHIFT;
+			break;
+#endif
+		case IPPROTO_UDP:
+			mss_l4len_idx = sizeof(struct udphdr) <<
+					E1000_ADVTXD_L4LEN_SHIFT;
+			break;
+		default:
+			if (unlikely(net_ratelimit())) {
+				dev_warn(tx_ring->dev,
+				 "partial checksum but l4 proto=%x!\n",
+				 nexthdr);
+			}
+			break;
+		}
+
+		/* update TX checksum flag */
+		first->tx_flags |= IGB_TX_FLAGS_CSUM;
+	}
+
+	vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT;
+	vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK;
+
+	igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx);
+}
+
+#define IGB_SET_FLAG(_input, _flag, _result) \
+	((_flag <= _result) ? \
+	 ((u32)(_input & _flag) * (_result / _flag)) : \
+	 ((u32)(_input & _flag) / (_flag / _result)))
+
+static u32 igb_tx_cmd_type(struct sk_buff *skb, u32 tx_flags)
+{
+	/* set type for advanced descriptor with frame checksum insertion */
+	u32 cmd_type = E1000_ADVTXD_DTYP_DATA |
+		       E1000_ADVTXD_DCMD_DEXT |
+		       E1000_ADVTXD_DCMD_IFCS;
+
+	/* set HW vlan bit if vlan is present */
+	cmd_type |= IGB_SET_FLAG(tx_flags, IGB_TX_FLAGS_VLAN,
+				 (E1000_ADVTXD_DCMD_VLE));
+
+	/* set segmentation bits for TSO */
+	cmd_type |= IGB_SET_FLAG(tx_flags, IGB_TX_FLAGS_TSO,
+				 (E1000_ADVTXD_DCMD_TSE));
+
+	/* set timestamp bit if present */
+	cmd_type |= IGB_SET_FLAG(tx_flags, IGB_TX_FLAGS_TSTAMP,
+				 (E1000_ADVTXD_MAC_TSTAMP));
+
+	return cmd_type;
+}
+
+static void igb_tx_olinfo_status(struct igb_ring *tx_ring,
+				 union e1000_adv_tx_desc *tx_desc,
+				 u32 tx_flags, unsigned int paylen)
+{
+	u32 olinfo_status = paylen << E1000_ADVTXD_PAYLEN_SHIFT;
+
+	/* 82575 requires a unique index per ring */
+	if (test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
+		olinfo_status |= tx_ring->reg_idx << 4;
+
+	/* insert L4 checksum */
+	olinfo_status |= IGB_SET_FLAG(tx_flags,
+				      IGB_TX_FLAGS_CSUM,
+				      (E1000_TXD_POPTS_TXSM << 8));
+
+	/* insert IPv4 checksum */
+	olinfo_status |= IGB_SET_FLAG(tx_flags,
+				      IGB_TX_FLAGS_IPV4,
+				      (E1000_TXD_POPTS_IXSM << 8));
+
+	tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
+}
+
+static void igb_tx_map(struct igb_ring *tx_ring,
+		       struct igb_tx_buffer *first,
+		       const u8 hdr_len)
+{
+	struct sk_buff *skb = first->skb;
+	struct igb_tx_buffer *tx_buffer;
+	union e1000_adv_tx_desc *tx_desc;
+	struct skb_frag_struct *frag;
+	dma_addr_t dma;
+	unsigned int data_len, size;
+	u32 tx_flags = first->tx_flags;
+	u32 cmd_type = igb_tx_cmd_type(skb, tx_flags);
+	u16 i = tx_ring->next_to_use;
+
+	tx_desc = IGB_TX_DESC(tx_ring, i);
+
+	igb_tx_olinfo_status(tx_ring, tx_desc, tx_flags, skb->len - hdr_len);
+
+	size = skb_headlen(skb);
+	data_len = skb->data_len;
+
+	dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE);
+
+	tx_buffer = first;
+
+	for (frag = &skb_shinfo(skb)->frags[0];; frag++) {
+		if (dma_mapping_error(tx_ring->dev, dma))
+			goto dma_error;
+
+		/* record length, and DMA address */
+		dma_unmap_len_set(tx_buffer, len, size);
+		dma_unmap_addr_set(tx_buffer, dma, dma);
+
+		tx_desc->read.buffer_addr = cpu_to_le64(dma);
+
+		while (unlikely(size > IGB_MAX_DATA_PER_TXD)) {
+			tx_desc->read.cmd_type_len =
+				cpu_to_le32(cmd_type ^ IGB_MAX_DATA_PER_TXD);
+
+			i++;
+			tx_desc++;
+			if (i == tx_ring->count) {
+				tx_desc = IGB_TX_DESC(tx_ring, 0);
+				i = 0;
+			}
+			tx_desc->read.olinfo_status = 0;
+
+			dma += IGB_MAX_DATA_PER_TXD;
+			size -= IGB_MAX_DATA_PER_TXD;
+
+			tx_desc->read.buffer_addr = cpu_to_le64(dma);
+		}
+
+		if (likely(!data_len))
+			break;
+
+		tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type ^ size);
+
+		i++;
+		tx_desc++;
+		if (i == tx_ring->count) {
+			tx_desc = IGB_TX_DESC(tx_ring, 0);
+			i = 0;
+		}
+		tx_desc->read.olinfo_status = 0;
+
+		size = skb_frag_size(frag);
+		data_len -= size;
+
+		dma = skb_frag_dma_map(tx_ring->dev, frag, 0,
+				       size, DMA_TO_DEVICE);
+
+		tx_buffer = &tx_ring->tx_buffer_info[i];
+	}
+
+	/* write last descriptor with RS and EOP bits */
+	cmd_type |= size | IGB_TXD_DCMD;
+	tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type);
+
+	netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount);
+	/* set the timestamp */
+	first->time_stamp = jiffies;
+
+	/*
+	 * Force memory writes to complete before letting h/w know there
+	 * are new descriptors to fetch.  (Only applicable for weak-ordered
+	 * memory model archs, such as IA-64).
+	 *
+	 * We also need this memory barrier to make certain all of the
+	 * status bits have been updated before next_to_watch is written.
+	 */
+	wmb();
+
+	/* set next_to_watch value indicating a packet is present */
+	first->next_to_watch = tx_desc;
+
+	i++;
+	if (i == tx_ring->count)
+		i = 0;
+
+	tx_ring->next_to_use = i;
+
+	writel(i, tx_ring->tail);
+
+	/* we need this if more than one processor can write to our tail
+	 * at a time, it syncronizes IO on IA64/Altix systems */
+	mmiowb();
+
+	return;
+
+dma_error:
+	dev_err(tx_ring->dev, "TX DMA map failed\n");
+
+	/* clear dma mappings for failed tx_buffer_info map */
+	for (;;) {
+		tx_buffer = &tx_ring->tx_buffer_info[i];
+		igb_unmap_and_free_tx_resource(tx_ring, tx_buffer);
+		if (tx_buffer == first)
+			break;
+		if (i == 0)
+			i = tx_ring->count;
+		i--;
+	}
+
+	tx_ring->next_to_use = i;
+}
+
+static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size)
+{
+	struct net_device *netdev = netdev_ring(tx_ring);
+
+	if (netif_is_multiqueue(netdev))
+		netif_stop_subqueue(netdev, ring_queue_index(tx_ring));
+	else
+		netif_stop_queue(netdev);
+
+	/* Herbert's original patch had:
+	 *  smp_mb__after_netif_stop_queue();
+	 * but since that doesn't exist yet, just open code it. */
+	smp_mb();
+
+	/* We need to check again in a case another CPU has just
+	 * made room available. */
+	if (igb_desc_unused(tx_ring) < size)
+		return -EBUSY;
+
+	/* A reprieve! */
+	if (netif_is_multiqueue(netdev))
+		netif_wake_subqueue(netdev, ring_queue_index(tx_ring));
+	else
+		netif_wake_queue(netdev);
+
+	tx_ring->tx_stats.restart_queue++;
+
+	return 0;
+}
+
+static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size)
+{
+	if (igb_desc_unused(tx_ring) >= size)
+		return 0;
+	return __igb_maybe_stop_tx(tx_ring, size);
+}
+
+netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb,
+				struct igb_ring *tx_ring)
+{
+	struct igb_tx_buffer *first;
+	int tso;
+	u32 tx_flags = 0;
+#if PAGE_SIZE > IGB_MAX_DATA_PER_TXD
+	unsigned short f;
+#endif
+	u16 count = TXD_USE_COUNT(skb_headlen(skb));
+	__be16 protocol = vlan_get_protocol(skb);
+	u8 hdr_len = 0;
+
+	/*
+	 * need: 1 descriptor per page * PAGE_SIZE/IGB_MAX_DATA_PER_TXD,
+	 *       + 1 desc for skb_headlen/IGB_MAX_DATA_PER_TXD,
+	 *       + 2 desc gap to keep tail from touching head,
+	 *       + 1 desc for context descriptor,
+	 * otherwise try next time
+	 */
+#if PAGE_SIZE > IGB_MAX_DATA_PER_TXD
+	for (f = 0; f < skb_shinfo(skb)->nr_frags; f++)
+		count += TXD_USE_COUNT(skb_shinfo(skb)->frags[f].size);
+#else
+	count += skb_shinfo(skb)->nr_frags;
+#endif
+	if (igb_maybe_stop_tx(tx_ring, count + 3)) {
+		/* this is a hard error */
+		return NETDEV_TX_BUSY;
+	}
+
+	/* record the location of the first descriptor for this packet */
+	first = &tx_ring->tx_buffer_info[tx_ring->next_to_use];
+	first->skb = skb;
+	first->bytecount = skb->len;
+	first->gso_segs = 1;
+
+	skb_tx_timestamp(skb);
+
+#ifdef HAVE_PTP_1588_CLOCK
+	if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
+		struct igb_adapter *adapter = netdev_priv(tx_ring->netdev);
+		if (!adapter->ptp_tx_skb) {
+			skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
+			tx_flags |= IGB_TX_FLAGS_TSTAMP;
+
+			adapter->ptp_tx_skb = skb_get(skb);
+			adapter->ptp_tx_start = jiffies;
+			if (adapter->hw.mac.type == e1000_82576)
+				schedule_work(&adapter->ptp_tx_work);
+		}
+	}
+#endif /* HAVE_PTP_1588_CLOCK */
+
+	if (vlan_tx_tag_present(skb)) {
+		tx_flags |= IGB_TX_FLAGS_VLAN;
+		tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
+	}
+
+	/* record initial flags and protocol */
+	first->tx_flags = tx_flags;
+	first->protocol = protocol;
+
+	tso = igb_tso(tx_ring, first, &hdr_len);
+	if (tso < 0)
+		goto out_drop;
+	else if (!tso)
+		igb_tx_csum(tx_ring, first);
+
+	igb_tx_map(tx_ring, first, hdr_len);
+
+#ifndef HAVE_TRANS_START_IN_QUEUE
+	netdev_ring(tx_ring)->trans_start = jiffies;
+
+#endif
+	/* Make sure there is space in the ring for the next send. */
+	igb_maybe_stop_tx(tx_ring, DESC_NEEDED);
+
+	return NETDEV_TX_OK;
+
+out_drop:
+	igb_unmap_and_free_tx_resource(tx_ring, first);
+
+	return NETDEV_TX_OK;
+}
+
+#ifdef HAVE_TX_MQ
+static inline struct igb_ring *igb_tx_queue_mapping(struct igb_adapter *adapter,
+                                                    struct sk_buff *skb)
+{
+	unsigned int r_idx = skb->queue_mapping;
+
+	if (r_idx >= adapter->num_tx_queues)
+		r_idx = r_idx % adapter->num_tx_queues;
+
+	return adapter->tx_ring[r_idx];
+}
+#else
+#define igb_tx_queue_mapping(_adapter, _skb) (_adapter)->tx_ring[0]
+#endif
+
+static netdev_tx_t igb_xmit_frame(struct sk_buff *skb,
+                                  struct net_device *netdev)
+{
+	struct igb_adapter *adapter = netdev_priv(netdev);
+
+	if (test_bit(__IGB_DOWN, &adapter->state)) {
+		dev_kfree_skb_any(skb);
+		return NETDEV_TX_OK;
+	}
+
+	if (skb->len <= 0) {
+		dev_kfree_skb_any(skb);
+		return NETDEV_TX_OK;
+	}
+
+	/*
+	 * The minimum packet size with TCTL.PSP set is 17 so pad the skb
+	 * in order to meet this minimum size requirement.
+	 */
+	if (skb->len < 17) {
+		if (skb_padto(skb, 17))
+			return NETDEV_TX_OK;
+		skb->len = 17;
+	}
+
+	return igb_xmit_frame_ring(skb, igb_tx_queue_mapping(adapter, skb));
+}
+
+/**
+ * igb_tx_timeout - Respond to a Tx Hang
+ * @netdev: network interface device structure
+ **/
+static void igb_tx_timeout(struct net_device *netdev)
+{
+	struct igb_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+
+	/* Do the reset outside of interrupt context */
+	adapter->tx_timeout_count++;
+
+	if (hw->mac.type >= e1000_82580)
+		hw->dev_spec._82575.global_device_reset = true;
+
+	schedule_work(&adapter->reset_task);
+	E1000_WRITE_REG(hw, E1000_EICS,
+			(adapter->eims_enable_mask & ~adapter->eims_other));
+}
+
+static void igb_reset_task(struct work_struct *work)
+{
+	struct igb_adapter *adapter;
+	adapter = container_of(work, struct igb_adapter, reset_task);
+
+	igb_reinit_locked(adapter);
+}
+
+/**
+ * igb_get_stats - Get System Network Statistics
+ * @netdev: network interface device structure
+ *
+ * Returns the address of the device statistics structure.
+ * The statistics are updated here and also from the timer callback.
+ **/
+static struct net_device_stats *igb_get_stats(struct net_device *netdev)
+{
+	struct igb_adapter *adapter = netdev_priv(netdev);
+
+	if (!test_bit(__IGB_RESETTING, &adapter->state))
+		igb_update_stats(adapter);
+
+#ifdef HAVE_NETDEV_STATS_IN_NETDEV
+	/* only return the current stats */
+	return &netdev->stats;
+#else
+	/* only return the current stats */
+	return &adapter->net_stats;
+#endif /* HAVE_NETDEV_STATS_IN_NETDEV */
+}
+
+/**
+ * igb_change_mtu - Change the Maximum Transfer Unit
+ * @netdev: network interface device structure
+ * @new_mtu: new value for maximum frame size
+ *
+ * Returns 0 on success, negative on failure
+ **/
+static int igb_change_mtu(struct net_device *netdev, int new_mtu)
+{
+	struct igb_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+	struct pci_dev *pdev = adapter->pdev;
+	int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
+
+	if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {
+		dev_err(pci_dev_to_dev(pdev), "Invalid MTU setting\n");
+		return -EINVAL;
+	}
+
+#define MAX_STD_JUMBO_FRAME_SIZE 9238
+	if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
+		dev_err(pci_dev_to_dev(pdev), "MTU > 9216 not supported.\n");
+		return -EINVAL;
+	}
+
+	/* adjust max frame to be at least the size of a standard frame */
+	if (max_frame < (ETH_FRAME_LEN + ETH_FCS_LEN))
+		max_frame = ETH_FRAME_LEN + ETH_FCS_LEN;
+
+	while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
+		usleep_range(1000, 2000);
+
+	/* igb_down has a dependency on max_frame_size */
+	adapter->max_frame_size = max_frame;
+
+	if (netif_running(netdev))
+		igb_down(adapter);
+
+	dev_info(pci_dev_to_dev(pdev), "changing MTU from %d to %d\n",
+	        netdev->mtu, new_mtu);
+	netdev->mtu = new_mtu;
+	hw->dev_spec._82575.mtu = new_mtu;
+
+	if (netif_running(netdev))
+		igb_up(adapter);
+	else
+		igb_reset(adapter);
+
+	clear_bit(__IGB_RESETTING, &adapter->state);
+
+	return 0;
+}
+
+/**
+ * igb_update_stats - Update the board statistics counters
+ * @adapter: board private structure
+ **/
+
+void igb_update_stats(struct igb_adapter *adapter)
+{
+#ifdef HAVE_NETDEV_STATS_IN_NETDEV
+	struct net_device_stats *net_stats = &adapter->netdev->stats;
+#else
+	struct net_device_stats *net_stats = &adapter->net_stats;
+#endif /* HAVE_NETDEV_STATS_IN_NETDEV */
+	struct e1000_hw *hw = &adapter->hw;
+#ifdef HAVE_PCI_ERS
+	struct pci_dev *pdev = adapter->pdev;
+#endif
+	u32 reg, mpc;
+	u16 phy_tmp;
+	int i;
+	u64 bytes, packets;
+#ifndef IGB_NO_LRO
+	u32 flushed = 0, coal = 0;
+	struct igb_q_vector *q_vector;
+#endif
+
+#define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
+
+	/*
+	 * Prevent stats update while adapter is being reset, or if the pci
+	 * connection is down.
+	 */
+	if (adapter->link_speed == 0)
+		return;
+#ifdef HAVE_PCI_ERS
+	if (pci_channel_offline(pdev))
+		return;
+
+#endif
+#ifndef IGB_NO_LRO
+	for (i = 0; i < adapter->num_q_vectors; i++) {
+		q_vector = adapter->q_vector[i];
+		if (!q_vector)
+			continue;
+		flushed += q_vector->lrolist.stats.flushed;
+		coal += q_vector->lrolist.stats.coal;
+	}
+	adapter->lro_stats.flushed = flushed;
+	adapter->lro_stats.coal = coal;
+
+#endif
+	bytes = 0;
+	packets = 0;
+	for (i = 0; i < adapter->num_rx_queues; i++) {
+		u32 rqdpc_tmp = E1000_READ_REG(hw, E1000_RQDPC(i)) & 0x0FFF;
+		struct igb_ring *ring = adapter->rx_ring[i];
+		ring->rx_stats.drops += rqdpc_tmp;
+		net_stats->rx_fifo_errors += rqdpc_tmp;
+#ifdef CONFIG_IGB_VMDQ_NETDEV
+		if (!ring->vmdq_netdev) {
+			bytes += ring->rx_stats.bytes;
+			packets += ring->rx_stats.packets;
+		}
+#else
+		bytes += ring->rx_stats.bytes;
+		packets += ring->rx_stats.packets;
+#endif
+	}
+
+	net_stats->rx_bytes = bytes;
+	net_stats->rx_packets = packets;
+
+	bytes = 0;
+	packets = 0;
+	for (i = 0; i < adapter->num_tx_queues; i++) {
+		struct igb_ring *ring = adapter->tx_ring[i];
+#ifdef CONFIG_IGB_VMDQ_NETDEV
+		if (!ring->vmdq_netdev) {
+			bytes += ring->tx_stats.bytes;
+			packets += ring->tx_stats.packets;
+		}
+#else
+		bytes += ring->tx_stats.bytes;
+		packets += ring->tx_stats.packets;
+#endif
+	}
+	net_stats->tx_bytes = bytes;
+	net_stats->tx_packets = packets;
+
+	/* read stats registers */
+	adapter->stats.crcerrs += E1000_READ_REG(hw, E1000_CRCERRS);
+	adapter->stats.gprc += E1000_READ_REG(hw, E1000_GPRC);
+	adapter->stats.gorc += E1000_READ_REG(hw, E1000_GORCL);
+	E1000_READ_REG(hw, E1000_GORCH); /* clear GORCL */
+	adapter->stats.bprc += E1000_READ_REG(hw, E1000_BPRC);
+	adapter->stats.mprc += E1000_READ_REG(hw, E1000_MPRC);
+	adapter->stats.roc += E1000_READ_REG(hw, E1000_ROC);
+
+	adapter->stats.prc64 += E1000_READ_REG(hw, E1000_PRC64);
+	adapter->stats.prc127 += E1000_READ_REG(hw, E1000_PRC127);
+	adapter->stats.prc255 += E1000_READ_REG(hw, E1000_PRC255);
+	adapter->stats.prc511 += E1000_READ_REG(hw, E1000_PRC511);
+	adapter->stats.prc1023 += E1000_READ_REG(hw, E1000_PRC1023);
+	adapter->stats.prc1522 += E1000_READ_REG(hw, E1000_PRC1522);
+	adapter->stats.symerrs += E1000_READ_REG(hw, E1000_SYMERRS);
+	adapter->stats.sec += E1000_READ_REG(hw, E1000_SEC);
+
+	mpc = E1000_READ_REG(hw, E1000_MPC);
+	adapter->stats.mpc += mpc;
+	net_stats->rx_fifo_errors += mpc;
+	adapter->stats.scc += E1000_READ_REG(hw, E1000_SCC);
+	adapter->stats.ecol += E1000_READ_REG(hw, E1000_ECOL);
+	adapter->stats.mcc += E1000_READ_REG(hw, E1000_MCC);
+	adapter->stats.latecol += E1000_READ_REG(hw, E1000_LATECOL);
+	adapter->stats.dc += E1000_READ_REG(hw, E1000_DC);
+	adapter->stats.rlec += E1000_READ_REG(hw, E1000_RLEC);
+	adapter->stats.xonrxc += E1000_READ_REG(hw, E1000_XONRXC);
+	adapter->stats.xontxc += E1000_READ_REG(hw, E1000_XONTXC);
+	adapter->stats.xoffrxc += E1000_READ_REG(hw, E1000_XOFFRXC);
+	adapter->stats.xofftxc += E1000_READ_REG(hw, E1000_XOFFTXC);
+	adapter->stats.fcruc += E1000_READ_REG(hw, E1000_FCRUC);
+	adapter->stats.gptc += E1000_READ_REG(hw, E1000_GPTC);
+	adapter->stats.gotc += E1000_READ_REG(hw, E1000_GOTCL);
+	E1000_READ_REG(hw, E1000_GOTCH); /* clear GOTCL */
+	adapter->stats.rnbc += E1000_READ_REG(hw, E1000_RNBC);
+	adapter->stats.ruc += E1000_READ_REG(hw, E1000_RUC);
+	adapter->stats.rfc += E1000_READ_REG(hw, E1000_RFC);
+	adapter->stats.rjc += E1000_READ_REG(hw, E1000_RJC);
+	adapter->stats.tor += E1000_READ_REG(hw, E1000_TORH);
+	adapter->stats.tot += E1000_READ_REG(hw, E1000_TOTH);
+	adapter->stats.tpr += E1000_READ_REG(hw, E1000_TPR);
+
+	adapter->stats.ptc64 += E1000_READ_REG(hw, E1000_PTC64);
+	adapter->stats.ptc127 += E1000_READ_REG(hw, E1000_PTC127);
+	adapter->stats.ptc255 += E1000_READ_REG(hw, E1000_PTC255);
+	adapter->stats.ptc511 += E1000_READ_REG(hw, E1000_PTC511);
+	adapter->stats.ptc1023 += E1000_READ_REG(hw, E1000_PTC1023);
+	adapter->stats.ptc1522 += E1000_READ_REG(hw, E1000_PTC1522);
+
+	adapter->stats.mptc += E1000_READ_REG(hw, E1000_MPTC);
+	adapter->stats.bptc += E1000_READ_REG(hw, E1000_BPTC);
+
+	adapter->stats.tpt += E1000_READ_REG(hw, E1000_TPT);
+	adapter->stats.colc += E1000_READ_REG(hw, E1000_COLC);
+
+	adapter->stats.algnerrc += E1000_READ_REG(hw, E1000_ALGNERRC);
+	/* read internal phy sepecific stats */
+	reg = E1000_READ_REG(hw, E1000_CTRL_EXT);
+	if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) {
+		adapter->stats.rxerrc += E1000_READ_REG(hw, E1000_RXERRC);
+
+		/* this stat has invalid values on i210/i211 */
+		if ((hw->mac.type != e1000_i210) &&
+		    (hw->mac.type != e1000_i211))
+			adapter->stats.tncrs += E1000_READ_REG(hw, E1000_TNCRS);
+	}
+	adapter->stats.tsctc += E1000_READ_REG(hw, E1000_TSCTC);
+	adapter->stats.tsctfc += E1000_READ_REG(hw, E1000_TSCTFC);
+
+	adapter->stats.iac += E1000_READ_REG(hw, E1000_IAC);
+	adapter->stats.icrxoc += E1000_READ_REG(hw, E1000_ICRXOC);
+	adapter->stats.icrxptc += E1000_READ_REG(hw, E1000_ICRXPTC);
+	adapter->stats.icrxatc += E1000_READ_REG(hw, E1000_ICRXATC);
+	adapter->stats.ictxptc += E1000_READ_REG(hw, E1000_ICTXPTC);
+	adapter->stats.ictxatc += E1000_READ_REG(hw, E1000_ICTXATC);
+	adapter->stats.ictxqec += E1000_READ_REG(hw, E1000_ICTXQEC);
+	adapter->stats.ictxqmtc += E1000_READ_REG(hw, E1000_ICTXQMTC);
+	adapter->stats.icrxdmtc += E1000_READ_REG(hw, E1000_ICRXDMTC);
+
+	/* Fill out the OS statistics structure */
+	net_stats->multicast = adapter->stats.mprc;
+	net_stats->collisions = adapter->stats.colc;
+
+	/* Rx Errors */
+
+	/* RLEC on some newer hardware can be incorrect so build
+	 * our own version based on RUC and ROC */
+	net_stats->rx_errors = adapter->stats.rxerrc +
+		adapter->stats.crcerrs + adapter->stats.algnerrc +
+		adapter->stats.ruc + adapter->stats.roc +
+		adapter->stats.cexterr;
+	net_stats->rx_length_errors = adapter->stats.ruc +
+				      adapter->stats.roc;
+	net_stats->rx_crc_errors = adapter->stats.crcerrs;
+	net_stats->rx_frame_errors = adapter->stats.algnerrc;
+	net_stats->rx_missed_errors = adapter->stats.mpc;
+
+	/* Tx Errors */
+	net_stats->tx_errors = adapter->stats.ecol +
+			       adapter->stats.latecol;
+	net_stats->tx_aborted_errors = adapter->stats.ecol;
+	net_stats->tx_window_errors = adapter->stats.latecol;
+	net_stats->tx_carrier_errors = adapter->stats.tncrs;
+
+	/* Tx Dropped needs to be maintained elsewhere */
+
+	/* Phy Stats */
+	if (hw->phy.media_type == e1000_media_type_copper) {
+		if ((adapter->link_speed == SPEED_1000) &&
+		   (!e1000_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
+			phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
+			adapter->phy_stats.idle_errors += phy_tmp;
+		}
+	}
+
+	/* Management Stats */
+	adapter->stats.mgptc += E1000_READ_REG(hw, E1000_MGTPTC);
+	adapter->stats.mgprc += E1000_READ_REG(hw, E1000_MGTPRC);
+	if (hw->mac.type > e1000_82580) {
+		adapter->stats.o2bgptc += E1000_READ_REG(hw, E1000_O2BGPTC);
+		adapter->stats.o2bspc += E1000_READ_REG(hw, E1000_O2BSPC);
+		adapter->stats.b2ospc += E1000_READ_REG(hw, E1000_B2OSPC);
+		adapter->stats.b2ogprc += E1000_READ_REG(hw, E1000_B2OGPRC);
+	}
+}
+
+static irqreturn_t igb_msix_other(int irq, void *data)
+{
+	struct igb_adapter *adapter = data;
+	struct e1000_hw *hw = &adapter->hw;
+	u32 icr = E1000_READ_REG(hw, E1000_ICR);
+	/* reading ICR causes bit 31 of EICR to be cleared */
+
+	if (icr & E1000_ICR_DRSTA)
+		schedule_work(&adapter->reset_task);
+
+	if (icr & E1000_ICR_DOUTSYNC) {
+		/* HW is reporting DMA is out of sync */
+		adapter->stats.doosync++;
+		/* The DMA Out of Sync is also indication of a spoof event
+		 * in IOV mode. Check the Wrong VM Behavior register to
+		 * see if it is really a spoof event. */
+		igb_check_wvbr(adapter);
+	}
+
+	/* Check for a mailbox event */
+	if (icr & E1000_ICR_VMMB)
+		igb_msg_task(adapter);
+
+	if (icr & E1000_ICR_LSC) {
+		hw->mac.get_link_status = 1;
+		/* guard against interrupt when we're going down */
+		if (!test_bit(__IGB_DOWN, &adapter->state))
+			mod_timer(&adapter->watchdog_timer, jiffies + 1);
+	}
+
+#ifdef HAVE_PTP_1588_CLOCK
+	if (icr & E1000_ICR_TS) {
+		u32 tsicr = E1000_READ_REG(hw, E1000_TSICR);
+
+		if (tsicr & E1000_TSICR_TXTS) {
+			/* acknowledge the interrupt */
+			E1000_WRITE_REG(hw, E1000_TSICR, E1000_TSICR_TXTS);
+			/* retrieve hardware timestamp */
+			schedule_work(&adapter->ptp_tx_work);
+		}
+	}
+#endif /* HAVE_PTP_1588_CLOCK */
+
+	/* Check for MDD event */
+	if (icr & E1000_ICR_MDDET)
+		igb_process_mdd_event(adapter);
+
+	E1000_WRITE_REG(hw, E1000_EIMS, adapter->eims_other);
+
+	return IRQ_HANDLED;
+}
+
+static void igb_write_itr(struct igb_q_vector *q_vector)
+{
+	struct igb_adapter *adapter = q_vector->adapter;
+	u32 itr_val = q_vector->itr_val & 0x7FFC;
+
+	if (!q_vector->set_itr)
+		return;
+
+	if (!itr_val)
+		itr_val = 0x4;
+
+	if (adapter->hw.mac.type == e1000_82575)
+		itr_val |= itr_val << 16;
+	else
+		itr_val |= E1000_EITR_CNT_IGNR;
+
+	writel(itr_val, q_vector->itr_register);
+	q_vector->set_itr = 0;
+}
+
+static irqreturn_t igb_msix_ring(int irq, void *data)
+{
+	struct igb_q_vector *q_vector = data;
+
+	/* Write the ITR value calculated from the previous interrupt. */
+	igb_write_itr(q_vector);
+
+	napi_schedule(&q_vector->napi);
+
+	return IRQ_HANDLED;
+}
+
+#ifdef IGB_DCA
+static void igb_update_tx_dca(struct igb_adapter *adapter,
+			      struct igb_ring *tx_ring,
+			      int cpu)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u32 txctrl = dca3_get_tag(tx_ring->dev, cpu);
+
+	if (hw->mac.type != e1000_82575)
+		txctrl <<= E1000_DCA_TXCTRL_CPUID_SHIFT_82576;
+
+	/*
+	 * We can enable relaxed ordering for reads, but not writes when
+	 * DCA is enabled.  This is due to a known issue in some chipsets
+	 * which will cause the DCA tag to be cleared.
+	 */
+	txctrl |= E1000_DCA_TXCTRL_DESC_RRO_EN |
+		  E1000_DCA_TXCTRL_DATA_RRO_EN |
+		  E1000_DCA_TXCTRL_DESC_DCA_EN;
+
+	E1000_WRITE_REG(hw, E1000_DCA_TXCTRL(tx_ring->reg_idx), txctrl);
+}
+
+static void igb_update_rx_dca(struct igb_adapter *adapter,
+			      struct igb_ring *rx_ring,
+			      int cpu)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u32 rxctrl = dca3_get_tag(&adapter->pdev->dev, cpu);
+
+	if (hw->mac.type != e1000_82575)
+		rxctrl <<= E1000_DCA_RXCTRL_CPUID_SHIFT_82576;
+
+	/*
+	 * We can enable relaxed ordering for reads, but not writes when
+	 * DCA is enabled.  This is due to a known issue in some chipsets
+	 * which will cause the DCA tag to be cleared.
+	 */
+	rxctrl |= E1000_DCA_RXCTRL_DESC_RRO_EN |
+		  E1000_DCA_RXCTRL_DESC_DCA_EN;
+
+	E1000_WRITE_REG(hw, E1000_DCA_RXCTRL(rx_ring->reg_idx), rxctrl);
+}
+
+static void igb_update_dca(struct igb_q_vector *q_vector)
+{
+	struct igb_adapter *adapter = q_vector->adapter;
+	int cpu = get_cpu();
+
+	if (q_vector->cpu == cpu)
+		goto out_no_update;
+
+	if (q_vector->tx.ring)
+		igb_update_tx_dca(adapter, q_vector->tx.ring, cpu);
+
+	if (q_vector->rx.ring)
+		igb_update_rx_dca(adapter, q_vector->rx.ring, cpu);
+
+	q_vector->cpu = cpu;
+out_no_update:
+	put_cpu();
+}
+
+static void igb_setup_dca(struct igb_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	int i;
+
+	if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
+		return;
+
+	/* Always use CB2 mode, difference is masked in the CB driver. */
+	E1000_WRITE_REG(hw, E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
+
+	for (i = 0; i < adapter->num_q_vectors; i++) {
+		adapter->q_vector[i]->cpu = -1;
+		igb_update_dca(adapter->q_vector[i]);
+	}
+}
+
+static int __igb_notify_dca(struct device *dev, void *data)
+{
+	struct net_device *netdev = dev_get_drvdata(dev);
+	struct igb_adapter *adapter = netdev_priv(netdev);
+	struct pci_dev *pdev = adapter->pdev;
+	struct e1000_hw *hw = &adapter->hw;
+	unsigned long event = *(unsigned long *)data;
+
+	switch (event) {
+	case DCA_PROVIDER_ADD:
+		/* if already enabled, don't do it again */
+		if (adapter->flags & IGB_FLAG_DCA_ENABLED)
+			break;
+		if (dca_add_requester(dev) == E1000_SUCCESS) {
+			adapter->flags |= IGB_FLAG_DCA_ENABLED;
+			dev_info(pci_dev_to_dev(pdev), "DCA enabled\n");
+			igb_setup_dca(adapter);
+			break;
+		}
+		/* Fall Through since DCA is disabled. */
+	case DCA_PROVIDER_REMOVE:
+		if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
+			/* without this a class_device is left
+			 * hanging around in the sysfs model */
+			dca_remove_requester(dev);
+			dev_info(pci_dev_to_dev(pdev), "DCA disabled\n");
+			adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
+			E1000_WRITE_REG(hw, E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_DISABLE);
+		}
+		break;
+	}
+
+	return E1000_SUCCESS;
+}
+
+static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
+                          void *p)
+{
+	int ret_val;
+
+	ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
+	                                 __igb_notify_dca);
+
+	return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
+}
+#endif /* IGB_DCA */
+
+static int igb_vf_configure(struct igb_adapter *adapter, int vf)
+{
+	unsigned char mac_addr[ETH_ALEN];
+
+	random_ether_addr(mac_addr);
+	igb_set_vf_mac(adapter, vf, mac_addr);
+
+#ifdef IFLA_VF_MAX
+#ifdef HAVE_VF_SPOOFCHK_CONFIGURE
+	/* By default spoof check is enabled for all VFs */
+	adapter->vf_data[vf].spoofchk_enabled = true;
+#endif
+#endif
+
+	return true;
+}
+
+static void igb_ping_all_vfs(struct igb_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u32 ping;
+	int i;
+
+	for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
+		ping = E1000_PF_CONTROL_MSG;
+		if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
+			ping |= E1000_VT_MSGTYPE_CTS;
+		e1000_write_mbx(hw, &ping, 1, i);
+	}
+}
+
+/**
+ *  igb_mta_set_ - Set multicast filter table address
+ *  @adapter: pointer to the adapter structure
+ *  @hash_value: determines the MTA register and bit to set
+ *
+ *  The multicast table address is a register array of 32-bit registers.
+ *  The hash_value is used to determine what register the bit is in, the
+ *  current value is read, the new bit is OR'd in and the new value is
+ *  written back into the register.
+ **/
+void igb_mta_set(struct igb_adapter *adapter, u32 hash_value)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u32 hash_bit, hash_reg, mta;
+
+	/*
+	 * The MTA is a register array of 32-bit registers. It is
+	 * treated like an array of (32*mta_reg_count) bits.  We want to
+	 * set bit BitArray[hash_value]. So we figure out what register
+	 * the bit is in, read it, OR in the new bit, then write
+	 * back the new value.  The (hw->mac.mta_reg_count - 1) serves as a
+	 * mask to bits 31:5 of the hash value which gives us the
+	 * register we're modifying.  The hash bit within that register
+	 * is determined by the lower 5 bits of the hash value.
+	 */
+	hash_reg = (hash_value >> 5) & (hw->mac.mta_reg_count - 1);
+	hash_bit = hash_value & 0x1F;
+
+	mta = E1000_READ_REG_ARRAY(hw, E1000_MTA, hash_reg);
+
+	mta |= (1 << hash_bit);
+
+	E1000_WRITE_REG_ARRAY(hw, E1000_MTA, hash_reg, mta);
+	E1000_WRITE_FLUSH(hw);
+}
+
+static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
+{
+
+	struct e1000_hw *hw = &adapter->hw;
+	u32 vmolr = E1000_READ_REG(hw, E1000_VMOLR(vf));
+	struct vf_data_storage *vf_data = &adapter->vf_data[vf];
+
+	vf_data->flags &= ~(IGB_VF_FLAG_UNI_PROMISC |
+	                    IGB_VF_FLAG_MULTI_PROMISC);
+	vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
+
+#ifdef IGB_ENABLE_VF_PROMISC
+	if (*msgbuf & E1000_VF_SET_PROMISC_UNICAST) {
+		vmolr |= E1000_VMOLR_ROPE;
+		vf_data->flags |= IGB_VF_FLAG_UNI_PROMISC;
+		*msgbuf &= ~E1000_VF_SET_PROMISC_UNICAST;
+	}
+#endif
+	if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
+		vmolr |= E1000_VMOLR_MPME;
+		vf_data->flags |= IGB_VF_FLAG_MULTI_PROMISC;
+		*msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
+	} else {
+		/*
+		 * if we have hashes and we are clearing a multicast promisc
+		 * flag we need to write the hashes to the MTA as this step
+		 * was previously skipped
+		 */
+		if (vf_data->num_vf_mc_hashes > 30) {
+			vmolr |= E1000_VMOLR_MPME;
+		} else if (vf_data->num_vf_mc_hashes) {
+			int j;
+			vmolr |= E1000_VMOLR_ROMPE;
+			for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
+				igb_mta_set(adapter, vf_data->vf_mc_hashes[j]);
+		}
+	}
+
+	E1000_WRITE_REG(hw, E1000_VMOLR(vf), vmolr);
+
+	/* there are flags left unprocessed, likely not supported */
+	if (*msgbuf & E1000_VT_MSGINFO_MASK)
+		return -EINVAL;
+
+	return 0;
+
+}
+
+static int igb_set_vf_multicasts(struct igb_adapter *adapter,
+				  u32 *msgbuf, u32 vf)
+{
+	int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
+	u16 *hash_list = (u16 *)&msgbuf[1];
+	struct vf_data_storage *vf_data = &adapter->vf_data[vf];
+	int i;
+
+	/* salt away the number of multicast addresses assigned
+	 * to this VF for later use to restore when the PF multi cast
+	 * list changes
+	 */
+	vf_data->num_vf_mc_hashes = n;
+
+	/* only up to 30 hash values supported */
+	if (n > 30)
+		n = 30;
+
+	/* store the hashes for later use */
+	for (i = 0; i < n; i++)
+		vf_data->vf_mc_hashes[i] = hash_list[i];
+
+	/* Flush and reset the mta with the new values */
+	igb_set_rx_mode(adapter->netdev);
+
+	return 0;
+}
+
+static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	struct vf_data_storage *vf_data;
+	int i, j;
+
+	for (i = 0; i < adapter->vfs_allocated_count; i++) {
+		u32 vmolr = E1000_READ_REG(hw, E1000_VMOLR(i));
+		vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
+
+		vf_data = &adapter->vf_data[i];
+
+		if ((vf_data->num_vf_mc_hashes > 30) ||
+		    (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
+			vmolr |= E1000_VMOLR_MPME;
+		} else if (vf_data->num_vf_mc_hashes) {
+			vmolr |= E1000_VMOLR_ROMPE;
+			for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
+				igb_mta_set(adapter, vf_data->vf_mc_hashes[j]);
+		}
+		E1000_WRITE_REG(hw, E1000_VMOLR(i), vmolr);
+	}
+}
+
+static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u32 pool_mask, reg, vid;
+	u16 vlan_default;
+	int i;
+
+	pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
+
+	/* Find the vlan filter for this id */
+	for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
+		reg = E1000_READ_REG(hw, E1000_VLVF(i));
+
+		/* remove the vf from the pool */
+		reg &= ~pool_mask;
+
+		/* if pool is empty then remove entry from vfta */
+		if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
+		    (reg & E1000_VLVF_VLANID_ENABLE)) {
+			reg = 0;
+			vid = reg & E1000_VLVF_VLANID_MASK;
+			igb_vfta_set(adapter, vid, FALSE);
+		}
+
+		E1000_WRITE_REG(hw, E1000_VLVF(i), reg);
+	}
+
+	adapter->vf_data[vf].vlans_enabled = 0;
+
+	vlan_default = adapter->vf_data[vf].default_vf_vlan_id;
+	if (vlan_default)
+		igb_vlvf_set(adapter, vlan_default, true, vf);
+}
+
+s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u32 reg, i;
+
+	/* The vlvf table only exists on 82576 hardware and newer */
+	if (hw->mac.type < e1000_82576)
+		return -1;
+
+	/* we only need to do this if VMDq is enabled */
+	if (!adapter->vmdq_pools)
+		return -1;
+
+	/* Find the vlan filter for this id */
+	for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
+		reg = E1000_READ_REG(hw, E1000_VLVF(i));
+		if ((reg & E1000_VLVF_VLANID_ENABLE) &&
+		    vid == (reg & E1000_VLVF_VLANID_MASK))
+			break;
+	}
+
+	if (add) {
+		if (i == E1000_VLVF_ARRAY_SIZE) {
+			/* Did not find a matching VLAN ID entry that was
+			 * enabled.  Search for a free filter entry, i.e.
+			 * one without the enable bit set
+			 */
+			for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
+				reg = E1000_READ_REG(hw, E1000_VLVF(i));
+				if (!(reg & E1000_VLVF_VLANID_ENABLE))
+					break;
+			}
+		}
+		if (i < E1000_VLVF_ARRAY_SIZE) {
+			/* Found an enabled/available entry */
+			reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
+
+			/* if !enabled we need to set this up in vfta */
+			if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
+				/* add VID to filter table */
+				igb_vfta_set(adapter, vid, TRUE);
+				reg |= E1000_VLVF_VLANID_ENABLE;
+			}
+			reg &= ~E1000_VLVF_VLANID_MASK;
+			reg |= vid;
+			E1000_WRITE_REG(hw, E1000_VLVF(i), reg);
+
+			/* do not modify RLPML for PF devices */
+			if (vf >= adapter->vfs_allocated_count)
+				return E1000_SUCCESS;
+
+			if (!adapter->vf_data[vf].vlans_enabled) {
+				u32 size;
+				reg = E1000_READ_REG(hw, E1000_VMOLR(vf));
+				size = reg & E1000_VMOLR_RLPML_MASK;
+				size += 4;
+				reg &= ~E1000_VMOLR_RLPML_MASK;
+				reg |= size;
+				E1000_WRITE_REG(hw, E1000_VMOLR(vf), reg);
+			}
+
+			adapter->vf_data[vf].vlans_enabled++;
+		}
+	} else {
+		if (i < E1000_VLVF_ARRAY_SIZE) {
+			/* remove vf from the pool */
+			reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
+			/* if pool is empty then remove entry from vfta */
+			if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
+				reg = 0;
+				igb_vfta_set(adapter, vid, FALSE);
+			}
+			E1000_WRITE_REG(hw, E1000_VLVF(i), reg);
+
+			/* do not modify RLPML for PF devices */
+			if (vf >= adapter->vfs_allocated_count)
+				return E1000_SUCCESS;
+
+			adapter->vf_data[vf].vlans_enabled--;
+			if (!adapter->vf_data[vf].vlans_enabled) {
+				u32 size;
+				reg = E1000_READ_REG(hw, E1000_VMOLR(vf));
+				size = reg & E1000_VMOLR_RLPML_MASK;
+				size -= 4;
+				reg &= ~E1000_VMOLR_RLPML_MASK;
+				reg |= size;
+				E1000_WRITE_REG(hw, E1000_VMOLR(vf), reg);
+			}
+		}
+	}
+	return E1000_SUCCESS;
+}
+
+#ifdef IFLA_VF_MAX
+static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf)
+{
+	struct e1000_hw *hw = &adapter->hw;
+
+	if (vid)
+		E1000_WRITE_REG(hw, E1000_VMVIR(vf), (vid | E1000_VMVIR_VLANA_DEFAULT));
+	else
+		E1000_WRITE_REG(hw, E1000_VMVIR(vf), 0);
+}
+
+static int igb_ndo_set_vf_vlan(struct net_device *netdev,
+			       int vf, u16 vlan, u8 qos)
+{
+	int err = 0;
+	struct igb_adapter *adapter = netdev_priv(netdev);
+
+	/* VLAN IDs accepted range 0-4094 */
+	if ((vf >= adapter->vfs_allocated_count) || (vlan > VLAN_VID_MASK-1) || (qos > 7))
+		return -EINVAL;
+	if (vlan || qos) {
+		err = igb_vlvf_set(adapter, vlan, !!vlan, vf);
+		if (err)
+			goto out;
+		igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf);
+		igb_set_vmolr(adapter, vf, !vlan);
+		adapter->vf_data[vf].pf_vlan = vlan;
+		adapter->vf_data[vf].pf_qos = qos;
+		igb_set_vf_vlan_strip(adapter, vf, true);
+		dev_info(&adapter->pdev->dev,
+			 "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf);
+		if (test_bit(__IGB_DOWN, &adapter->state)) {
+			dev_warn(&adapter->pdev->dev,
+				 "The VF VLAN has been set,"
+				 " but the PF device is not up.\n");
+			dev_warn(&adapter->pdev->dev,
+				 "Bring the PF device up before"
+				 " attempting to use the VF device.\n");
+		}
+	} else {
+		if (adapter->vf_data[vf].pf_vlan)
+			dev_info(&adapter->pdev->dev,
+				 "Clearing VLAN on VF %d\n", vf);
+		igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan,
+				   false, vf);
+		igb_set_vmvir(adapter, vlan, vf);
+		igb_set_vmolr(adapter, vf, true);
+		igb_set_vf_vlan_strip(adapter, vf, false);
+		adapter->vf_data[vf].pf_vlan = 0;
+		adapter->vf_data[vf].pf_qos = 0;
+       }
+out:
+       return err;
+}
+
+#ifdef HAVE_VF_SPOOFCHK_CONFIGURE
+static int igb_ndo_set_vf_spoofchk(struct net_device *netdev, int vf,
+				bool setting)
+{
+	struct igb_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+	u32 dtxswc, reg_offset;
+
+	if (!adapter->vfs_allocated_count)
+		return -EOPNOTSUPP;
+
+	if (vf >= adapter->vfs_allocated_count)
+		return -EINVAL;
+
+	reg_offset = (hw->mac.type == e1000_82576) ? E1000_DTXSWC : E1000_TXSWC;
+	dtxswc = E1000_READ_REG(hw, reg_offset);
+	if (setting)
+		dtxswc |= ((1 << vf) |
+			   (1 << (vf + E1000_DTXSWC_VLAN_SPOOF_SHIFT)));
+	else
+		dtxswc &= ~((1 << vf) |
+			    (1 << (vf + E1000_DTXSWC_VLAN_SPOOF_SHIFT)));
+	E1000_WRITE_REG(hw, reg_offset, dtxswc);
+
+	adapter->vf_data[vf].spoofchk_enabled = setting;
+	return E1000_SUCCESS;
+}
+#endif /* HAVE_VF_SPOOFCHK_CONFIGURE */
+#endif /* IFLA_VF_MAX */
+
+static int igb_find_vlvf_entry(struct igb_adapter *adapter, int vid)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	int i;
+	u32 reg;
+
+	/* Find the vlan filter for this id */
+	for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
+		reg = E1000_READ_REG(hw, E1000_VLVF(i));
+		if ((reg & E1000_VLVF_VLANID_ENABLE) &&
+		    vid == (reg & E1000_VLVF_VLANID_MASK))
+			break;
+	}
+
+	if (i >= E1000_VLVF_ARRAY_SIZE)
+		i = -1;
+
+	return i;
+}
+
+static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
+	int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
+	int err = 0;
+
+	if (vid)
+		igb_set_vf_vlan_strip(adapter, vf, true);
+	else
+		igb_set_vf_vlan_strip(adapter, vf, false);
+
+	/* If in promiscuous mode we need to make sure the PF also has
+	 * the VLAN filter set.
+	 */
+	if (add && (adapter->netdev->flags & IFF_PROMISC))
+		err = igb_vlvf_set(adapter, vid, add,
+				   adapter->vfs_allocated_count);
+	if (err)
+		goto out;
+
+	err = igb_vlvf_set(adapter, vid, add, vf);
+
+	if (err)
+		goto out;
+
+	/* Go through all the checks to see if the VLAN filter should
+	 * be wiped completely.
+	 */
+	if (!add && (adapter->netdev->flags & IFF_PROMISC)) {
+		u32 vlvf, bits;
+
+		int regndx = igb_find_vlvf_entry(adapter, vid);
+		if (regndx < 0)
+			goto out;
+		/* See if any other pools are set for this VLAN filter
+		 * entry other than the PF.
+		 */
+		vlvf = bits = E1000_READ_REG(hw, E1000_VLVF(regndx));
+		bits &= 1 << (E1000_VLVF_POOLSEL_SHIFT +
+			      adapter->vfs_allocated_count);
+		/* If the filter was removed then ensure PF pool bit
+		 * is cleared if the PF only added itself to the pool
+		 * because the PF is in promiscuous mode.
+		 */
+		if ((vlvf & VLAN_VID_MASK) == vid &&
+#ifndef HAVE_VLAN_RX_REGISTER
+		    !test_bit(vid, adapter->active_vlans) &&
+#endif
+		    !bits)
+			igb_vlvf_set(adapter, vid, add,
+				     adapter->vfs_allocated_count);
+	}
+
+out:
+	return err;
+}
+
+static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
+{
+	struct e1000_hw *hw = &adapter->hw;
+
+	/* clear flags except flag that the PF has set the MAC */
+	adapter->vf_data[vf].flags &= IGB_VF_FLAG_PF_SET_MAC;
+	adapter->vf_data[vf].last_nack = jiffies;
+
+	/* reset offloads to defaults */
+	igb_set_vmolr(adapter, vf, true);
+
+	/* reset vlans for device */
+	igb_clear_vf_vfta(adapter, vf);
+#ifdef IFLA_VF_MAX
+	if (adapter->vf_data[vf].pf_vlan)
+		igb_ndo_set_vf_vlan(adapter->netdev, vf,
+				    adapter->vf_data[vf].pf_vlan,
+				    adapter->vf_data[vf].pf_qos);
+	else
+		igb_clear_vf_vfta(adapter, vf);
+#endif
+
+	/* reset multicast table array for vf */
+	adapter->vf_data[vf].num_vf_mc_hashes = 0;
+
+	/* Flush and reset the mta with the new values */
+	igb_set_rx_mode(adapter->netdev);
+
+	/*
+	 * Reset the VFs TDWBAL and TDWBAH registers which are not
+	 * cleared by a VFLR
+	 */
+	E1000_WRITE_REG(hw, E1000_TDWBAH(vf), 0);
+	E1000_WRITE_REG(hw, E1000_TDWBAL(vf), 0);
+	if (hw->mac.type == e1000_82576) {
+		E1000_WRITE_REG(hw, E1000_TDWBAH(IGB_MAX_VF_FUNCTIONS + vf), 0);
+		E1000_WRITE_REG(hw, E1000_TDWBAL(IGB_MAX_VF_FUNCTIONS + vf), 0);
+	}
+}
+
+static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
+{
+	unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
+
+	/* generate a new mac address as we were hotplug removed/added */
+	if (!(adapter->vf_data[vf].flags & IGB_VF_FLAG_PF_SET_MAC))
+		random_ether_addr(vf_mac);
+
+	/* process remaining reset events */
+	igb_vf_reset(adapter, vf);
+}
+
+static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
+	u32 reg, msgbuf[3];
+	u8 *addr = (u8 *)(&msgbuf[1]);
+
+	/* process all the same items cleared in a function level reset */
+	igb_vf_reset(adapter, vf);
+
+	/* set vf mac address */
+	igb_del_mac_filter(adapter, vf_mac, vf);
+	igb_add_mac_filter(adapter, vf_mac, vf);
+
+	/* enable transmit and receive for vf */
+	reg = E1000_READ_REG(hw, E1000_VFTE);
+	E1000_WRITE_REG(hw, E1000_VFTE, reg | (1 << vf));
+	reg = E1000_READ_REG(hw, E1000_VFRE);
+	E1000_WRITE_REG(hw, E1000_VFRE, reg | (1 << vf));
+
+	adapter->vf_data[vf].flags |= IGB_VF_FLAG_CTS;
+
+	/* reply to reset with ack and vf mac address */
+	msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
+	memcpy(addr, vf_mac, 6);
+	e1000_write_mbx(hw, msgbuf, 3, vf);
+}
+
+static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
+{
+	/*
+	 * The VF MAC Address is stored in a packed array of bytes
+	 * starting at the second 32 bit word of the msg array
+	 */
+	unsigned char *addr = (unsigned char *)&msg[1];
+	int err = -1;
+
+	if (is_valid_ether_addr(addr))
+		err = igb_set_vf_mac(adapter, vf, addr);
+
+	return err;
+}
+
+static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	struct vf_data_storage *vf_data = &adapter->vf_data[vf];
+	u32 msg = E1000_VT_MSGTYPE_NACK;
+
+	/* if device isn't clear to send it shouldn't be reading either */
+	if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
+	    time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
+		e1000_write_mbx(hw, &msg, 1, vf);
+		vf_data->last_nack = jiffies;
+	}
+}
+
+static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
+{
+	struct pci_dev *pdev = adapter->pdev;
+	u32 msgbuf[E1000_VFMAILBOX_SIZE];
+	struct e1000_hw *hw = &adapter->hw;
+	struct vf_data_storage *vf_data = &adapter->vf_data[vf];
+	s32 retval;
+
+	retval = e1000_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
+
+	if (retval) {
+		dev_err(pci_dev_to_dev(pdev), "Error receiving message from VF\n");
+		return;
+	}
+
+	/* this is a message we already processed, do nothing */
+	if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
+		return;
+
+	/*
+	 * until the vf completes a reset it should not be
+	 * allowed to start any configuration.
+	 */
+
+	if (msgbuf[0] == E1000_VF_RESET) {
+		igb_vf_reset_msg(adapter, vf);
+		return;
+	}
+
+	if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
+		msgbuf[0] = E1000_VT_MSGTYPE_NACK;
+		if (time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
+			e1000_write_mbx(hw, msgbuf, 1, vf);
+			vf_data->last_nack = jiffies;
+		}
+		return;
+	}
+
+	switch ((msgbuf[0] & 0xFFFF)) {
+	case E1000_VF_SET_MAC_ADDR:
+		retval = -EINVAL;
+#ifndef IGB_DISABLE_VF_MAC_SET
+		if (!(vf_data->flags & IGB_VF_FLAG_PF_SET_MAC))
+			retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
+		else
+			DPRINTK(DRV, INFO,
+				"VF %d attempted to override administratively "
+				"set MAC address\nReload the VF driver to "
+				"resume operations\n", vf);
+#endif
+		break;
+	case E1000_VF_SET_PROMISC:
+		retval = igb_set_vf_promisc(adapter, msgbuf, vf);
+		break;
+	case E1000_VF_SET_MULTICAST:
+		retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
+		break;
+	case E1000_VF_SET_LPE:
+		retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
+		break;
+	case E1000_VF_SET_VLAN:
+		retval = -1;
+#ifdef IFLA_VF_MAX
+		if (vf_data->pf_vlan)
+			DPRINTK(DRV, INFO,
+				"VF %d attempted to override administratively "
+				"set VLAN tag\nReload the VF driver to "
+				"resume operations\n", vf);
+		else
+#endif
+			retval = igb_set_vf_vlan(adapter, msgbuf, vf);
+		break;
+	default:
+		dev_err(pci_dev_to_dev(pdev), "Unhandled Msg %08x\n", msgbuf[0]);
+		retval = -E1000_ERR_MBX;
+		break;
+	}
+
+	/* notify the VF of the results of what it sent us */
+	if (retval)
+		msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
+	else
+		msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
+
+	msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
+
+	e1000_write_mbx(hw, msgbuf, 1, vf);
+}
+
+static void igb_msg_task(struct igb_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u32 vf;
+
+	for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
+		/* process any reset requests */
+		if (!e1000_check_for_rst(hw, vf))
+			igb_vf_reset_event(adapter, vf);
+
+		/* process any messages pending */
+		if (!e1000_check_for_msg(hw, vf))
+			igb_rcv_msg_from_vf(adapter, vf);
+
+		/* process any acks */
+		if (!e1000_check_for_ack(hw, vf))
+			igb_rcv_ack_from_vf(adapter, vf);
+	}
+}
+
+/**
+ *  igb_set_uta - Set unicast filter table address
+ *  @adapter: board private structure
+ *
+ *  The unicast table address is a register array of 32-bit registers.
+ *  The table is meant to be used in a way similar to how the MTA is used
+ *  however due to certain limitations in the hardware it is necessary to
+ *  set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscuous
+ *  enable bit to allow vlan tag stripping when promiscuous mode is enabled
+ **/
+static void igb_set_uta(struct igb_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	int i;
+
+	/* The UTA table only exists on 82576 hardware and newer */
+	if (hw->mac.type < e1000_82576)
+		return;
+
+	/* we only need to do this if VMDq is enabled */
+	if (!adapter->vmdq_pools)
+		return;
+
+	for (i = 0; i < hw->mac.uta_reg_count; i++)
+		E1000_WRITE_REG_ARRAY(hw, E1000_UTA, i, ~0);
+}
+
+/**
+ * igb_intr_msi - Interrupt Handler
+ * @irq: interrupt number
+ * @data: pointer to a network interface device structure
+ **/
+static irqreturn_t igb_intr_msi(int irq, void *data)
+{
+	struct igb_adapter *adapter = data;
+	struct igb_q_vector *q_vector = adapter->q_vector[0];
+	struct e1000_hw *hw = &adapter->hw;
+	/* read ICR disables interrupts using IAM */
+	u32 icr = E1000_READ_REG(hw, E1000_ICR);
+
+	igb_write_itr(q_vector);
+
+	if (icr & E1000_ICR_DRSTA)
+		schedule_work(&adapter->reset_task);
+
+	if (icr & E1000_ICR_DOUTSYNC) {
+		/* HW is reporting DMA is out of sync */
+		adapter->stats.doosync++;
+	}
+
+	if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
+		hw->mac.get_link_status = 1;
+		if (!test_bit(__IGB_DOWN, &adapter->state))
+			mod_timer(&adapter->watchdog_timer, jiffies + 1);
+	}
+
+#ifdef HAVE_PTP_1588_CLOCK
+	if (icr & E1000_ICR_TS) {
+		u32 tsicr = E1000_READ_REG(hw, E1000_TSICR);
+
+		if (tsicr & E1000_TSICR_TXTS) {
+			/* acknowledge the interrupt */
+			E1000_WRITE_REG(hw, E1000_TSICR, E1000_TSICR_TXTS);
+			/* retrieve hardware timestamp */
+			schedule_work(&adapter->ptp_tx_work);
+		}
+	}
+#endif /* HAVE_PTP_1588_CLOCK */
+
+	napi_schedule(&q_vector->napi);
+
+	return IRQ_HANDLED;
+}
+
+/**
+ * igb_intr - Legacy Interrupt Handler
+ * @irq: interrupt number
+ * @data: pointer to a network interface device structure
+ **/
+static irqreturn_t igb_intr(int irq, void *data)
+{
+	struct igb_adapter *adapter = data;
+	struct igb_q_vector *q_vector = adapter->q_vector[0];
+	struct e1000_hw *hw = &adapter->hw;
+	/* Interrupt Auto-Mask...upon reading ICR, interrupts are masked.  No
+	 * need for the IMC write */
+	u32 icr = E1000_READ_REG(hw, E1000_ICR);
+
+	/* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
+	 * not set, then the adapter didn't send an interrupt */
+	if (!(icr & E1000_ICR_INT_ASSERTED))
+		return IRQ_NONE;
+
+	igb_write_itr(q_vector);
+
+	if (icr & E1000_ICR_DRSTA)
+		schedule_work(&adapter->reset_task);
+
+	if (icr & E1000_ICR_DOUTSYNC) {
+		/* HW is reporting DMA is out of sync */
+		adapter->stats.doosync++;
+	}
+
+	if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
+		hw->mac.get_link_status = 1;
+		/* guard against interrupt when we're going down */
+		if (!test_bit(__IGB_DOWN, &adapter->state))
+			mod_timer(&adapter->watchdog_timer, jiffies + 1);
+	}
+
+#ifdef HAVE_PTP_1588_CLOCK
+	if (icr & E1000_ICR_TS) {
+		u32 tsicr = E1000_READ_REG(hw, E1000_TSICR);
+
+		if (tsicr & E1000_TSICR_TXTS) {
+			/* acknowledge the interrupt */
+			E1000_WRITE_REG(hw, E1000_TSICR, E1000_TSICR_TXTS);
+			/* retrieve hardware timestamp */
+			schedule_work(&adapter->ptp_tx_work);
+		}
+	}
+#endif /* HAVE_PTP_1588_CLOCK */
+
+	napi_schedule(&q_vector->napi);
+
+	return IRQ_HANDLED;
+}
+
+void igb_ring_irq_enable(struct igb_q_vector *q_vector)
+{
+	struct igb_adapter *adapter = q_vector->adapter;
+	struct e1000_hw *hw = &adapter->hw;
+
+	if ((q_vector->rx.ring && (adapter->rx_itr_setting & 3)) ||
+	    (!q_vector->rx.ring && (adapter->tx_itr_setting & 3))) {
+		if ((adapter->num_q_vectors == 1) && !adapter->vf_data)
+			igb_set_itr(q_vector);
+		else
+			igb_update_ring_itr(q_vector);
+	}
+
+	if (!test_bit(__IGB_DOWN, &adapter->state)) {
+		if (adapter->msix_entries)
+			E1000_WRITE_REG(hw, E1000_EIMS, q_vector->eims_value);
+		else
+			igb_irq_enable(adapter);
+	}
+}
+
+/**
+ * igb_poll - NAPI Rx polling callback
+ * @napi: napi polling structure
+ * @budget: count of how many packets we should handle
+ **/
+static int igb_poll(struct napi_struct *napi, int budget)
+{
+	struct igb_q_vector *q_vector = container_of(napi, struct igb_q_vector, napi);
+	bool clean_complete = true;
+
+#ifdef IGB_DCA
+	if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
+		igb_update_dca(q_vector);
+#endif
+	if (q_vector->tx.ring)
+		clean_complete = igb_clean_tx_irq(q_vector);
+
+	if (q_vector->rx.ring)
+		clean_complete &= igb_clean_rx_irq(q_vector, budget);
+
+#ifndef HAVE_NETDEV_NAPI_LIST
+	/* if netdev is disabled we need to stop polling */
+	if (!netif_running(q_vector->adapter->netdev))
+		clean_complete = true;
+
+#endif
+	/* If all work not completed, return budget and keep polling */
+	if (!clean_complete)
+		return budget;
+
+	/* If not enough Rx work done, exit the polling mode */
+	napi_complete(napi);
+	igb_ring_irq_enable(q_vector);
+
+	return 0;
+}
+
+/**
+ * igb_clean_tx_irq - Reclaim resources after transmit completes
+ * @q_vector: pointer to q_vector containing needed info
+ * returns TRUE if ring is completely cleaned
+ **/
+static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
+{
+	struct igb_adapter *adapter = q_vector->adapter;
+	struct igb_ring *tx_ring = q_vector->tx.ring;
+	struct igb_tx_buffer *tx_buffer;
+	union e1000_adv_tx_desc *tx_desc;
+	unsigned int total_bytes = 0, total_packets = 0;
+	unsigned int budget = q_vector->tx.work_limit;
+	unsigned int i = tx_ring->next_to_clean;
+
+	if (test_bit(__IGB_DOWN, &adapter->state))
+		return true;
+
+	tx_buffer = &tx_ring->tx_buffer_info[i];
+	tx_desc = IGB_TX_DESC(tx_ring, i);
+	i -= tx_ring->count;
+
+	do {
+		union e1000_adv_tx_desc *eop_desc = tx_buffer->next_to_watch;
+
+		/* if next_to_watch is not set then there is no work pending */
+		if (!eop_desc)
+			break;
+
+		/* prevent any other reads prior to eop_desc */
+		read_barrier_depends();
+
+		/* if DD is not set pending work has not been completed */
+		if (!(eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)))
+			break;
+
+		/* clear next_to_watch to prevent false hangs */
+		tx_buffer->next_to_watch = NULL;
+
+		/* update the statistics for this packet */
+		total_bytes += tx_buffer->bytecount;
+		total_packets += tx_buffer->gso_segs;
+
+		/* free the skb */
+		dev_kfree_skb_any(tx_buffer->skb);
+
+		/* unmap skb header data */
+		dma_unmap_single(tx_ring->dev,
+		                 dma_unmap_addr(tx_buffer, dma),
+		                 dma_unmap_len(tx_buffer, len),
+		                 DMA_TO_DEVICE);
+
+		/* clear tx_buffer data */
+		tx_buffer->skb = NULL;
+		dma_unmap_len_set(tx_buffer, len, 0);
+
+		/* clear last DMA location and unmap remaining buffers */
+		while (tx_desc != eop_desc) {
+			tx_buffer++;
+			tx_desc++;
+			i++;
+			if (unlikely(!i)) {
+				i -= tx_ring->count;
+				tx_buffer = tx_ring->tx_buffer_info;
+				tx_desc = IGB_TX_DESC(tx_ring, 0);
+			}
+
+			/* unmap any remaining paged data */
+			if (dma_unmap_len(tx_buffer, len)) {
+				dma_unmap_page(tx_ring->dev,
+				               dma_unmap_addr(tx_buffer, dma),
+				               dma_unmap_len(tx_buffer, len),
+				               DMA_TO_DEVICE);
+				dma_unmap_len_set(tx_buffer, len, 0);
+			}
+		}
+
+		/* move us one more past the eop_desc for start of next pkt */
+		tx_buffer++;
+		tx_desc++;
+		i++;
+		if (unlikely(!i)) {
+			i -= tx_ring->count;
+			tx_buffer = tx_ring->tx_buffer_info;
+			tx_desc = IGB_TX_DESC(tx_ring, 0);
+		}
+
+		/* issue prefetch for next Tx descriptor */
+		prefetch(tx_desc);
+
+		/* update budget accounting */
+		budget--;
+	} while (likely(budget));
+
+	netdev_tx_completed_queue(txring_txq(tx_ring),
+				  total_packets, total_bytes);
+
+	i += tx_ring->count;
+	tx_ring->next_to_clean = i;
+	tx_ring->tx_stats.bytes += total_bytes;
+	tx_ring->tx_stats.packets += total_packets;
+	q_vector->tx.total_bytes += total_bytes;
+	q_vector->tx.total_packets += total_packets;
+
+#ifdef DEBUG
+	if (test_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags) &&
+	    !(adapter->disable_hw_reset && adapter->tx_hang_detected)) {
+#else
+	if (test_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags)) {
+#endif
+		struct e1000_hw *hw = &adapter->hw;
+
+		/* Detect a transmit hang in hardware, this serializes the
+		 * check with the clearing of time_stamp and movement of i */
+		clear_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
+		if (tx_buffer->next_to_watch &&
+		    time_after(jiffies, tx_buffer->time_stamp +
+		               (adapter->tx_timeout_factor * HZ))
+		    && !(E1000_READ_REG(hw, E1000_STATUS) &
+		         E1000_STATUS_TXOFF)) {
+
+			/* detected Tx unit hang */
+#ifdef DEBUG
+			adapter->tx_hang_detected = TRUE;
+			if (adapter->disable_hw_reset) {
+				DPRINTK(DRV, WARNING,
+					"Deactivating netdev watchdog timer\n");
+				if (del_timer(&netdev_ring(tx_ring)->watchdog_timer))
+					dev_put(netdev_ring(tx_ring));
+#ifndef HAVE_NET_DEVICE_OPS
+				netdev_ring(tx_ring)->tx_timeout = NULL;
+#endif
+			}
+#endif /* DEBUG */
+			dev_err(tx_ring->dev,
+				"Detected Tx Unit Hang\n"
+				"  Tx Queue             <%d>\n"
+				"  TDH                  <%x>\n"
+				"  TDT                  <%x>\n"
+				"  next_to_use          <%x>\n"
+				"  next_to_clean        <%x>\n"
+				"buffer_info[next_to_clean]\n"
+				"  time_stamp           <%lx>\n"
+				"  next_to_watch        <%p>\n"
+				"  jiffies              <%lx>\n"
+				"  desc.status          <%x>\n",
+				tx_ring->queue_index,
+				E1000_READ_REG(hw, E1000_TDH(tx_ring->reg_idx)),
+				readl(tx_ring->tail),
+				tx_ring->next_to_use,
+				tx_ring->next_to_clean,
+				tx_buffer->time_stamp,
+				tx_buffer->next_to_watch,
+				jiffies,
+				tx_buffer->next_to_watch->wb.status);
+			if (netif_is_multiqueue(netdev_ring(tx_ring)))
+				netif_stop_subqueue(netdev_ring(tx_ring),
+						    ring_queue_index(tx_ring));
+			else
+				netif_stop_queue(netdev_ring(tx_ring));
+
+			/* we are about to reset, no point in enabling stuff */
+			return true;
+		}
+	}
+
+#define TX_WAKE_THRESHOLD (DESC_NEEDED * 2)
+	if (unlikely(total_packets &&
+		     netif_carrier_ok(netdev_ring(tx_ring)) &&
+		     igb_desc_unused(tx_ring) >= TX_WAKE_THRESHOLD)) {
+		/* Make sure that anybody stopping the queue after this
+		 * sees the new next_to_clean.
+		 */
+		smp_mb();
+		if (netif_is_multiqueue(netdev_ring(tx_ring))) {
+			if (__netif_subqueue_stopped(netdev_ring(tx_ring),
+						     ring_queue_index(tx_ring)) &&
+			    !(test_bit(__IGB_DOWN, &adapter->state))) {
+				netif_wake_subqueue(netdev_ring(tx_ring),
+						    ring_queue_index(tx_ring));
+				tx_ring->tx_stats.restart_queue++;
+			}
+		} else {
+			if (netif_queue_stopped(netdev_ring(tx_ring)) &&
+			    !(test_bit(__IGB_DOWN, &adapter->state))) {
+				netif_wake_queue(netdev_ring(tx_ring));
+				tx_ring->tx_stats.restart_queue++;
+			}
+		}
+	}
+
+	return !!budget;
+}
+
+#ifdef HAVE_VLAN_RX_REGISTER
+/**
+ * igb_receive_skb - helper function to handle rx indications
+ * @q_vector: structure containing interrupt and ring information
+ * @skb: packet to send up
+ **/
+static void igb_receive_skb(struct igb_q_vector *q_vector,
+                            struct sk_buff *skb)
+{
+	struct vlan_group **vlgrp = netdev_priv(skb->dev);
+
+	if (IGB_CB(skb)->vid) {
+		if (*vlgrp) {
+			vlan_gro_receive(&q_vector->napi, *vlgrp,
+					 IGB_CB(skb)->vid, skb);
+		} else {
+			dev_kfree_skb_any(skb);
+		}
+	} else {
+		napi_gro_receive(&q_vector->napi, skb);
+	}
+}
+
+#endif /* HAVE_VLAN_RX_REGISTER */
+#ifndef CONFIG_IGB_DISABLE_PACKET_SPLIT
+/**
+ * igb_reuse_rx_page - page flip buffer and store it back on the ring
+ * @rx_ring: rx descriptor ring to store buffers on
+ * @old_buff: donor buffer to have page reused
+ *
+ * Synchronizes page for reuse by the adapter
+ **/
+static void igb_reuse_rx_page(struct igb_ring *rx_ring,
+			      struct igb_rx_buffer *old_buff)
+{
+	struct igb_rx_buffer *new_buff;
+	u16 nta = rx_ring->next_to_alloc;
+
+	new_buff = &rx_ring->rx_buffer_info[nta];
+
+	/* update, and store next to alloc */
+	nta++;
+	rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0;
+
+	/* transfer page from old buffer to new buffer */
+	memcpy(new_buff, old_buff, sizeof(struct igb_rx_buffer));
+
+	/* sync the buffer for use by the device */
+	dma_sync_single_range_for_device(rx_ring->dev, old_buff->dma,
+					 old_buff->page_offset,
+					 IGB_RX_BUFSZ,
+					 DMA_FROM_DEVICE);
+}
+
+static bool igb_can_reuse_rx_page(struct igb_rx_buffer *rx_buffer,
+				  struct page *page,
+				  unsigned int truesize)
+{
+	/* avoid re-using remote pages */
+	if (unlikely(page_to_nid(page) != numa_node_id()))
+		return false;
+
+#if (PAGE_SIZE < 8192)
+	/* if we are only owner of page we can reuse it */
+	if (unlikely(page_count(page) != 1))
+		return false;
+
+	/* flip page offset to other buffer */
+	rx_buffer->page_offset ^= IGB_RX_BUFSZ;
+
+#else
+	/* move offset up to the next cache line */
+	rx_buffer->page_offset += truesize;
+
+	if (rx_buffer->page_offset > (PAGE_SIZE - IGB_RX_BUFSZ))
+		return false;
+#endif
+
+	/* bump ref count on page before it is given to the stack */
+	get_page(page);
+
+	return true;
+}
+
+/**
+ * igb_add_rx_frag - Add contents of Rx buffer to sk_buff
+ * @rx_ring: rx descriptor ring to transact packets on
+ * @rx_buffer: buffer containing page to add
+ * @rx_desc: descriptor containing length of buffer written by hardware
+ * @skb: sk_buff to place the data into
+ *
+ * This function will add the data contained in rx_buffer->page to the skb.
+ * This is done either through a direct copy if the data in the buffer is
+ * less than the skb header size, otherwise it will just attach the page as
+ * a frag to the skb.
+ *
+ * The function will then update the page offset if necessary and return
+ * true if the buffer can be reused by the adapter.
+ **/
+static bool igb_add_rx_frag(struct igb_ring *rx_ring,
+			    struct igb_rx_buffer *rx_buffer,
+			    union e1000_adv_rx_desc *rx_desc,
+			    struct sk_buff *skb)
+{
+	struct page *page = rx_buffer->page;
+	unsigned int size = le16_to_cpu(rx_desc->wb.upper.length);
+#if (PAGE_SIZE < 8192)
+	unsigned int truesize = IGB_RX_BUFSZ;
+#else
+	unsigned int truesize = ALIGN(size, L1_CACHE_BYTES);
+#endif
+
+	if ((size <= IGB_RX_HDR_LEN) && !skb_is_nonlinear(skb)) {
+		unsigned char *va = page_address(page) + rx_buffer->page_offset;
+
+#ifdef HAVE_PTP_1588_CLOCK
+		if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) {
+			igb_ptp_rx_pktstamp(rx_ring->q_vector, va, skb);
+			va += IGB_TS_HDR_LEN;
+			size -= IGB_TS_HDR_LEN;
+		}
+#endif /* HAVE_PTP_1588_CLOCK */
+
+		memcpy(__skb_put(skb, size), va, ALIGN(size, sizeof(long)));
+
+		/* we can reuse buffer as-is, just make sure it is local */
+		if (likely(page_to_nid(page) == numa_node_id()))
+			return true;
+
+		/* this page cannot be reused so discard it */
+		put_page(page);
+		return false;
+	}
+
+	skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page,
+			rx_buffer->page_offset, size, truesize);
+
+	return igb_can_reuse_rx_page(rx_buffer, page, truesize);
+}
+
+static struct sk_buff *igb_fetch_rx_buffer(struct igb_ring *rx_ring,
+					   union e1000_adv_rx_desc *rx_desc,
+					   struct sk_buff *skb)
+{
+	struct igb_rx_buffer *rx_buffer;
+	struct page *page;
+
+	rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean];
+
+	page = rx_buffer->page;
+	prefetchw(page);
+
+	if (likely(!skb)) {
+		void *page_addr = page_address(page) +
+				  rx_buffer->page_offset;
+
+		/* prefetch first cache line of first page */
+		prefetch(page_addr);
+#if L1_CACHE_BYTES < 128
+		prefetch(page_addr + L1_CACHE_BYTES);
+#endif
+
+		/* allocate a skb to store the frags */
+		skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
+						IGB_RX_HDR_LEN);
+		if (unlikely(!skb)) {
+			rx_ring->rx_stats.alloc_failed++;
+			return NULL;
+		}
+
+		/*
+		 * we will be copying header into skb->data in
+		 * pskb_may_pull so it is in our interest to prefetch
+		 * it now to avoid a possible cache miss
+		 */
+		prefetchw(skb->data);
+	}
+
+	/* we are reusing so sync this buffer for CPU use */
+	dma_sync_single_range_for_cpu(rx_ring->dev,
+				      rx_buffer->dma,
+				      rx_buffer->page_offset,
+				      IGB_RX_BUFSZ,
+				      DMA_FROM_DEVICE);
+
+	/* pull page into skb */
+	if (igb_add_rx_frag(rx_ring, rx_buffer, rx_desc, skb)) {
+		/* hand second half of page back to the ring */
+		igb_reuse_rx_page(rx_ring, rx_buffer);
+	} else {
+		/* we are not reusing the buffer so unmap it */
+		dma_unmap_page(rx_ring->dev, rx_buffer->dma,
+			       PAGE_SIZE, DMA_FROM_DEVICE);
+	}
+
+	/* clear contents of rx_buffer */
+	rx_buffer->page = NULL;
+
+	return skb;
+}
+
+#endif
+static inline void igb_rx_checksum(struct igb_ring *ring,
+				   union e1000_adv_rx_desc *rx_desc,
+				   struct sk_buff *skb)
+{
+	skb_checksum_none_assert(skb);
+
+	/* Ignore Checksum bit is set */
+	if (igb_test_staterr(rx_desc, E1000_RXD_STAT_IXSM))
+		return;
+
+	/* Rx checksum disabled via ethtool */
+	if (!(netdev_ring(ring)->features & NETIF_F_RXCSUM))
+		return;
+
+	/* TCP/UDP checksum error bit is set */
+	if (igb_test_staterr(rx_desc,
+			     E1000_RXDEXT_STATERR_TCPE |
+			     E1000_RXDEXT_STATERR_IPE)) {
+		/*
+		 * work around errata with sctp packets where the TCPE aka
+		 * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
+		 * packets, (aka let the stack check the crc32c)
+		 */
+		if (!((skb->len == 60) &&
+		      test_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags)))
+			ring->rx_stats.csum_err++;
+
+		/* let the stack verify checksum errors */
+		return;
+	}
+	/* It must be a TCP or UDP packet with a valid checksum */
+	if (igb_test_staterr(rx_desc, E1000_RXD_STAT_TCPCS |
+				      E1000_RXD_STAT_UDPCS))
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+}
+
+#ifdef NETIF_F_RXHASH
+static inline void igb_rx_hash(struct igb_ring *ring,
+			       union e1000_adv_rx_desc *rx_desc,
+			       struct sk_buff *skb)
+{
+	if (netdev_ring(ring)->features & NETIF_F_RXHASH)
+		skb_set_hash(skb, le32_to_cpu(rx_desc->wb.lower.hi_dword.rss),
+			     PKT_HASH_TYPE_L3);
+}
+
+#endif
+#ifndef IGB_NO_LRO
+#ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT
+/**
+ * igb_merge_active_tail - merge active tail into lro skb
+ * @tail: pointer to active tail in frag_list
+ *
+ * This function merges the length and data of an active tail into the
+ * skb containing the frag_list.  It resets the tail's pointer to the head,
+ * but it leaves the heads pointer to tail intact.
+ **/
+static inline struct sk_buff *igb_merge_active_tail(struct sk_buff *tail)
+{
+	struct sk_buff *head = IGB_CB(tail)->head;
+
+	if (!head)
+		return tail;
+
+	head->len += tail->len;
+	head->data_len += tail->len;
+	head->truesize += tail->len;
+
+	IGB_CB(tail)->head = NULL;
+
+	return head;
+}
+
+/**
+ * igb_add_active_tail - adds an active tail into the skb frag_list
+ * @head: pointer to the start of the skb
+ * @tail: pointer to active tail to add to frag_list
+ *
+ * This function adds an active tail to the end of the frag list.  This tail
+ * will still be receiving data so we cannot yet ad it's stats to the main
+ * skb.  That is done via igb_merge_active_tail.
+ **/
+static inline void igb_add_active_tail(struct sk_buff *head, struct sk_buff *tail)
+{
+	struct sk_buff *old_tail = IGB_CB(head)->tail;
+
+	if (old_tail) {
+		igb_merge_active_tail(old_tail);
+		old_tail->next = tail;
+	} else {
+		skb_shinfo(head)->frag_list = tail;
+	}
+
+	IGB_CB(tail)->head = head;
+	IGB_CB(head)->tail = tail;
+
+	IGB_CB(head)->append_cnt++;
+}
+
+/**
+ * igb_close_active_frag_list - cleanup pointers on a frag_list skb
+ * @head: pointer to head of an active frag list
+ *
+ * This function will clear the frag_tail_tracker pointer on an active
+ * frag_list and returns true if the pointer was actually set
+ **/
+static inline bool igb_close_active_frag_list(struct sk_buff *head)
+{
+	struct sk_buff *tail = IGB_CB(head)->tail;
+
+	if (!tail)
+		return false;
+
+	igb_merge_active_tail(tail);
+
+	IGB_CB(head)->tail = NULL;
+
+	return true;
+}
+
+#endif /* CONFIG_IGB_DISABLE_PACKET_SPLIT */
+/**
+ * igb_can_lro - returns true if packet is TCP/IPV4 and LRO is enabled
+ * @adapter: board private structure
+ * @rx_desc: pointer to the rx descriptor
+ * @skb: pointer to the skb to be merged
+ *
+ **/
+static inline bool igb_can_lro(struct igb_ring *rx_ring,
+			       union e1000_adv_rx_desc *rx_desc,
+			       struct sk_buff *skb)
+{
+	struct iphdr *iph = (struct iphdr *)skb->data;
+	__le16 pkt_info = rx_desc->wb.lower.lo_dword.hs_rss.pkt_info;
+
+	/* verify hardware indicates this is IPv4/TCP */
+	if((!(pkt_info & cpu_to_le16(E1000_RXDADV_PKTTYPE_TCP)) ||
+	    !(pkt_info & cpu_to_le16(E1000_RXDADV_PKTTYPE_IPV4))))
+		return false;
+
+	/* .. and LRO is enabled */
+	if (!(netdev_ring(rx_ring)->features & NETIF_F_LRO))
+		return false;
+
+	/* .. and we are not in promiscuous mode */
+	if (netdev_ring(rx_ring)->flags & IFF_PROMISC)
+		return false;
+
+	/* .. and the header is large enough for us to read IP/TCP fields */
+	if (!pskb_may_pull(skb, sizeof(struct igb_lrohdr)))
+		return false;
+
+	/* .. and there are no VLANs on packet */
+	if (skb->protocol != __constant_htons(ETH_P_IP))
+		return false;
+
+	/* .. and we are version 4 with no options */
+	if (*(u8 *)iph != 0x45)
+		return false;
+
+	/* .. and the packet is not fragmented */
+	if (iph->frag_off & htons(IP_MF | IP_OFFSET))
+		return false;
+
+	/* .. and that next header is TCP */
+	if (iph->protocol != IPPROTO_TCP)
+		return false;
+
+	return true;
+}
+
+static inline struct igb_lrohdr *igb_lro_hdr(struct sk_buff *skb)
+{
+	return (struct igb_lrohdr *)skb->data;
+}
+
+/**
+ * igb_lro_flush - Indicate packets to upper layer.
+ *
+ * Update IP and TCP header part of head skb if more than one
+ * skb's chained and indicate packets to upper layer.
+ **/
+static void igb_lro_flush(struct igb_q_vector *q_vector,
+			  struct sk_buff *skb)
+{
+	struct igb_lro_list *lrolist = &q_vector->lrolist;
+
+	__skb_unlink(skb, &lrolist->active);
+
+	if (IGB_CB(skb)->append_cnt) {
+		struct igb_lrohdr *lroh = igb_lro_hdr(skb);
+
+#ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT
+		/* close any active lro contexts */
+		igb_close_active_frag_list(skb);
+
+#endif
+		/* incorporate ip header and re-calculate checksum */
+		lroh->iph.tot_len = ntohs(skb->len);
+		lroh->iph.check = 0;
+
+		/* header length is 5 since we know no options exist */
+		lroh->iph.check = ip_fast_csum((u8 *)lroh, 5);
+
+		/* clear TCP checksum to indicate we are an LRO frame */
+		lroh->th.check = 0;
+
+		/* incorporate latest timestamp into the tcp header */
+		if (IGB_CB(skb)->tsecr) {
+			lroh->ts[2] = IGB_CB(skb)->tsecr;
+			lroh->ts[1] = htonl(IGB_CB(skb)->tsval);
+		}
+#ifdef NETIF_F_GSO
+
+		skb_shinfo(skb)->gso_size = IGB_CB(skb)->mss;
+		skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
+#endif
+	}
+
+#ifdef HAVE_VLAN_RX_REGISTER
+	igb_receive_skb(q_vector, skb);
+#else
+	napi_gro_receive(&q_vector->napi, skb);
+#endif
+	lrolist->stats.flushed++;
+}
+
+static void igb_lro_flush_all(struct igb_q_vector *q_vector)
+{
+	struct igb_lro_list *lrolist = &q_vector->lrolist;
+	struct sk_buff *skb, *tmp;
+
+	skb_queue_reverse_walk_safe(&lrolist->active, skb, tmp)
+		igb_lro_flush(q_vector, skb);
+}
+
+/*
+ * igb_lro_header_ok - Main LRO function.
+ **/
+static void igb_lro_header_ok(struct sk_buff *skb)
+{
+	struct igb_lrohdr *lroh = igb_lro_hdr(skb);
+	u16 opt_bytes, data_len;
+
+#ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT
+	IGB_CB(skb)->tail = NULL;
+#endif
+	IGB_CB(skb)->tsecr = 0;
+	IGB_CB(skb)->append_cnt = 0;
+	IGB_CB(skb)->mss = 0;
+
+	/* ensure that the checksum is valid */
+	if (skb->ip_summed != CHECKSUM_UNNECESSARY)
+		return;
+
+	/* If we see CE codepoint in IP header, packet is not mergeable */
+	if (INET_ECN_is_ce(ipv4_get_dsfield(&lroh->iph)))
+		return;
+
+	/* ensure no bits set besides ack or psh */
+	if (lroh->th.fin || lroh->th.syn || lroh->th.rst ||
+	    lroh->th.urg || lroh->th.ece || lroh->th.cwr ||
+	    !lroh->th.ack)
+		return;
+
+	/* store the total packet length */
+	data_len = ntohs(lroh->iph.tot_len);
+
+	/* remove any padding from the end of the skb */
+	__pskb_trim(skb, data_len);
+
+	/* remove header length from data length */
+	data_len -= sizeof(struct igb_lrohdr);
+
+	/*
+	 * check for timestamps. Since the only option we handle are timestamps,
+	 * we only have to handle the simple case of aligned timestamps
+	 */
+	opt_bytes = (lroh->th.doff << 2) - sizeof(struct tcphdr);
+	if (opt_bytes != 0) {
+		if ((opt_bytes != TCPOLEN_TSTAMP_ALIGNED) ||
+		    !pskb_may_pull(skb, sizeof(struct igb_lrohdr) +
+					TCPOLEN_TSTAMP_ALIGNED) ||
+		    (lroh->ts[0] != htonl((TCPOPT_NOP << 24) |
+					     (TCPOPT_NOP << 16) |
+					     (TCPOPT_TIMESTAMP << 8) |
+					      TCPOLEN_TIMESTAMP)) ||
+		    (lroh->ts[2] == 0)) {
+			return;
+		}
+
+		IGB_CB(skb)->tsval = ntohl(lroh->ts[1]);
+		IGB_CB(skb)->tsecr = lroh->ts[2];
+
+		data_len -= TCPOLEN_TSTAMP_ALIGNED;
+	}
+
+	/* record data_len as mss for the packet */
+	IGB_CB(skb)->mss = data_len;
+	IGB_CB(skb)->next_seq = ntohl(lroh->th.seq);
+}
+
+#ifndef CONFIG_IGB_DISABLE_PACKET_SPLIT
+static void igb_merge_frags(struct sk_buff *lro_skb, struct sk_buff *new_skb)
+{
+	struct skb_shared_info *sh_info;
+	struct skb_shared_info *new_skb_info;
+	unsigned int data_len;
+
+	sh_info = skb_shinfo(lro_skb);
+	new_skb_info = skb_shinfo(new_skb);
+
+	/* copy frags into the last skb */
+	memcpy(sh_info->frags + sh_info->nr_frags,
+	       new_skb_info->frags,
+	       new_skb_info->nr_frags * sizeof(skb_frag_t));
+
+	/* copy size data over */
+	sh_info->nr_frags += new_skb_info->nr_frags;
+	data_len = IGB_CB(new_skb)->mss;
+	lro_skb->len += data_len;
+	lro_skb->data_len += data_len;
+	lro_skb->truesize += data_len;
+
+	/* wipe record of data from new_skb */
+	new_skb_info->nr_frags = 0;
+	new_skb->len = new_skb->data_len = 0;
+	dev_kfree_skb_any(new_skb);
+}
+
+#endif /* CONFIG_IGB_DISABLE_PACKET_SPLIT */
+/**
+ * igb_lro_receive - if able, queue skb into lro chain
+ * @q_vector: structure containing interrupt and ring information
+ * @new_skb: pointer to current skb being checked
+ *
+ * Checks whether the skb given is eligible for LRO and if that's
+ * fine chains it to the existing lro_skb based on flowid. If an LRO for
+ * the flow doesn't exist create one.
+ **/
+static void igb_lro_receive(struct igb_q_vector *q_vector,
+			    struct sk_buff *new_skb)
+{
+	struct sk_buff *lro_skb;
+	struct igb_lro_list *lrolist = &q_vector->lrolist;
+	struct igb_lrohdr *lroh = igb_lro_hdr(new_skb);
+	__be32 saddr = lroh->iph.saddr;
+	__be32 daddr = lroh->iph.daddr;
+	__be32 tcp_ports = *(__be32 *)&lroh->th;
+	u16 data_len;
+#ifdef HAVE_VLAN_RX_REGISTER
+	u16 vid = IGB_CB(new_skb)->vid;
+#else
+	u16 vid = new_skb->vlan_tci;
+#endif
+
+	igb_lro_header_ok(new_skb);
+
+	/*
+	 * we have a packet that might be eligible for LRO,
+	 * so see if it matches anything we might expect
+	 */
+	skb_queue_walk(&lrolist->active, lro_skb) {
+		if (*(__be32 *)&igb_lro_hdr(lro_skb)->th != tcp_ports ||
+		    igb_lro_hdr(lro_skb)->iph.saddr != saddr ||
+		    igb_lro_hdr(lro_skb)->iph.daddr != daddr)
+			continue;
+
+#ifdef HAVE_VLAN_RX_REGISTER
+		if (IGB_CB(lro_skb)->vid != vid)
+#else
+		if (lro_skb->vlan_tci != vid)
+#endif
+			continue;
+
+		/* out of order packet */
+		if (IGB_CB(lro_skb)->next_seq != IGB_CB(new_skb)->next_seq) {
+			igb_lro_flush(q_vector, lro_skb);
+			IGB_CB(new_skb)->mss = 0;
+			break;
+		}
+
+		/* TCP timestamp options have changed */
+		if (!IGB_CB(lro_skb)->tsecr != !IGB_CB(new_skb)->tsecr) {
+			igb_lro_flush(q_vector, lro_skb);
+			break;
+		}
+
+		/* make sure timestamp values are increasing */
+		if (IGB_CB(lro_skb)->tsecr &&
+		    IGB_CB(lro_skb)->tsval > IGB_CB(new_skb)->tsval) {
+			igb_lro_flush(q_vector, lro_skb);
+			IGB_CB(new_skb)->mss = 0;
+			break;
+		}
+
+		data_len = IGB_CB(new_skb)->mss;
+
+		/* Check for all of the above below
+		 *   malformed header
+		 *   no tcp data
+		 *   resultant packet would be too large
+		 *   new skb is larger than our current mss
+		 *   data would remain in header
+		 *   we would consume more frags then the sk_buff contains
+		 *   ack sequence numbers changed
+		 *   window size has changed
+		 */
+		if (data_len == 0 ||
+		    data_len > IGB_CB(lro_skb)->mss ||
+		    data_len > IGB_CB(lro_skb)->free ||
+#ifndef CONFIG_IGB_DISABLE_PACKET_SPLIT
+		    data_len != new_skb->data_len ||
+		    skb_shinfo(new_skb)->nr_frags >=
+		    (MAX_SKB_FRAGS - skb_shinfo(lro_skb)->nr_frags) ||
+#endif
+		    igb_lro_hdr(lro_skb)->th.ack_seq != lroh->th.ack_seq ||
+		    igb_lro_hdr(lro_skb)->th.window != lroh->th.window) {
+			igb_lro_flush(q_vector, lro_skb);
+			break;
+		}
+
+		/* Remove IP and TCP header*/
+		skb_pull(new_skb, new_skb->len - data_len);
+
+		/* update timestamp and timestamp echo response */
+		IGB_CB(lro_skb)->tsval = IGB_CB(new_skb)->tsval;
+		IGB_CB(lro_skb)->tsecr = IGB_CB(new_skb)->tsecr;
+
+		/* update sequence and free space */
+		IGB_CB(lro_skb)->next_seq += data_len;
+		IGB_CB(lro_skb)->free -= data_len;
+
+		/* update append_cnt */
+		IGB_CB(lro_skb)->append_cnt++;
+
+#ifndef CONFIG_IGB_DISABLE_PACKET_SPLIT
+		/* if header is empty pull pages into current skb */
+		igb_merge_frags(lro_skb, new_skb);
+#else
+		/* chain this new skb in frag_list */
+		igb_add_active_tail(lro_skb, new_skb);
+#endif
+
+		if ((data_len < IGB_CB(lro_skb)->mss) || lroh->th.psh ||
+		    skb_shinfo(lro_skb)->nr_frags == MAX_SKB_FRAGS) {
+			igb_lro_hdr(lro_skb)->th.psh |= lroh->th.psh;
+			igb_lro_flush(q_vector, lro_skb);
+		}
+
+		lrolist->stats.coal++;
+		return;
+	}
+
+	if (IGB_CB(new_skb)->mss && !lroh->th.psh) {
+		/* if we are at capacity flush the tail */
+		if (skb_queue_len(&lrolist->active) >= IGB_LRO_MAX) {
+			lro_skb = skb_peek_tail(&lrolist->active);
+			if (lro_skb)
+				igb_lro_flush(q_vector, lro_skb);
+		}
+
+		/* update sequence and free space */
+		IGB_CB(new_skb)->next_seq += IGB_CB(new_skb)->mss;
+		IGB_CB(new_skb)->free = 65521 - new_skb->len;
+
+		/* .. and insert at the front of the active list */
+		__skb_queue_head(&lrolist->active, new_skb);
+
+		lrolist->stats.coal++;
+		return;
+	}
+
+	/* packet not handled by any of the above, pass it to the stack */
+#ifdef HAVE_VLAN_RX_REGISTER
+	igb_receive_skb(q_vector, new_skb);
+#else
+	napi_gro_receive(&q_vector->napi, new_skb);
+#endif
+}
+
+#endif /* IGB_NO_LRO */
+/**
+ * igb_process_skb_fields - Populate skb header fields from Rx descriptor
+ * @rx_ring: rx descriptor ring packet is being transacted on
+ * @rx_desc: pointer to the EOP Rx descriptor
+ * @skb: pointer to current skb being populated
+ *
+ * This function checks the ring, descriptor, and packet information in
+ * order to populate the hash, checksum, VLAN, timestamp, protocol, and
+ * other fields within the skb.
+ **/
+static void igb_process_skb_fields(struct igb_ring *rx_ring,
+				   union e1000_adv_rx_desc *rx_desc,
+				   struct sk_buff *skb)
+{
+	struct net_device *dev = rx_ring->netdev;
+	__le16 pkt_info = rx_desc->wb.lower.lo_dword.hs_rss.pkt_info;
+
+#ifdef NETIF_F_RXHASH
+	igb_rx_hash(rx_ring, rx_desc, skb);
+
+#endif
+	igb_rx_checksum(rx_ring, rx_desc, skb);
+
+    /* update packet type stats */
+	if (pkt_info & cpu_to_le16(E1000_RXDADV_PKTTYPE_IPV4))
+		rx_ring->rx_stats.ipv4_packets++;
+	else if (pkt_info & cpu_to_le16(E1000_RXDADV_PKTTYPE_IPV4_EX))
+		rx_ring->rx_stats.ipv4e_packets++;
+	else if (pkt_info & cpu_to_le16(E1000_RXDADV_PKTTYPE_IPV6))
+		rx_ring->rx_stats.ipv6_packets++;
+	else if (pkt_info & cpu_to_le16(E1000_RXDADV_PKTTYPE_IPV6_EX))
+		rx_ring->rx_stats.ipv6e_packets++;
+	else if (pkt_info & cpu_to_le16(E1000_RXDADV_PKTTYPE_TCP))
+		rx_ring->rx_stats.tcp_packets++;
+	else if (pkt_info & cpu_to_le16(E1000_RXDADV_PKTTYPE_UDP))
+		rx_ring->rx_stats.udp_packets++;
+	else if (pkt_info & cpu_to_le16(E1000_RXDADV_PKTTYPE_SCTP))
+		rx_ring->rx_stats.sctp_packets++;
+	else if (pkt_info & cpu_to_le16(E1000_RXDADV_PKTTYPE_NFS))
+		rx_ring->rx_stats.nfs_packets++;
+
+#ifdef HAVE_PTP_1588_CLOCK
+	igb_ptp_rx_hwtstamp(rx_ring, rx_desc, skb);
+#endif /* HAVE_PTP_1588_CLOCK */
+
+#ifdef NETIF_F_HW_VLAN_CTAG_RX
+	if ((dev->features & NETIF_F_HW_VLAN_CTAG_RX) &&
+#else
+	if ((dev->features & NETIF_F_HW_VLAN_RX) &&
+#endif
+	    igb_test_staterr(rx_desc, E1000_RXD_STAT_VP)) {
+		u16 vid = 0;
+		if (igb_test_staterr(rx_desc, E1000_RXDEXT_STATERR_LB) &&
+		    test_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &rx_ring->flags))
+			vid = be16_to_cpu(rx_desc->wb.upper.vlan);
+		else
+			vid = le16_to_cpu(rx_desc->wb.upper.vlan);
+#ifdef HAVE_VLAN_RX_REGISTER
+		IGB_CB(skb)->vid = vid;
+	} else {
+		IGB_CB(skb)->vid = 0;
+#else
+
+#ifdef HAVE_VLAN_PROTOCOL
+		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vid);
+#else
+		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vid);
+#endif
+
+
+#endif
+	}
+
+	skb_record_rx_queue(skb, rx_ring->queue_index);
+
+	skb->protocol = eth_type_trans(skb, dev);
+}
+
+/**
+ * igb_is_non_eop - process handling of non-EOP buffers
+ * @rx_ring: Rx ring being processed
+ * @rx_desc: Rx descriptor for current buffer
+ *
+ * This function updates next to clean.  If the buffer is an EOP buffer
+ * this function exits returning false, otherwise it will place the
+ * sk_buff in the next buffer to be chained and return true indicating
+ * that this is in fact a non-EOP buffer.
+ **/
+static bool igb_is_non_eop(struct igb_ring *rx_ring,
+			   union e1000_adv_rx_desc *rx_desc)
+{
+	u32 ntc = rx_ring->next_to_clean + 1;
+
+	/* fetch, update, and store next to clean */
+	ntc = (ntc < rx_ring->count) ? ntc : 0;
+	rx_ring->next_to_clean = ntc;
+
+	prefetch(IGB_RX_DESC(rx_ring, ntc));
+
+	if (likely(igb_test_staterr(rx_desc, E1000_RXD_STAT_EOP)))
+		return false;
+
+	return true;
+}
+
+#ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT
+/* igb_clean_rx_irq -- * legacy */
+static bool igb_clean_rx_irq(struct igb_q_vector *q_vector, int budget)
+{
+	struct igb_ring *rx_ring = q_vector->rx.ring;
+	unsigned int total_bytes = 0, total_packets = 0;
+	u16 cleaned_count = igb_desc_unused(rx_ring);
+
+	do {
+		struct igb_rx_buffer *rx_buffer;
+		union e1000_adv_rx_desc *rx_desc;
+		struct sk_buff *skb;
+		u16 ntc;
+
+		/* return some buffers to hardware, one at a time is too slow */
+		if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
+			igb_alloc_rx_buffers(rx_ring, cleaned_count);
+			cleaned_count = 0;
+		}
+
+		ntc = rx_ring->next_to_clean;
+		rx_desc = IGB_RX_DESC(rx_ring, ntc);
+		rx_buffer = &rx_ring->rx_buffer_info[ntc];
+
+		if (!igb_test_staterr(rx_desc, E1000_RXD_STAT_DD))
+			break;
+
+		/*
+		 * This memory barrier is needed to keep us from reading
+		 * any other fields out of the rx_desc until we know the
+		 * RXD_STAT_DD bit is set
+		 */
+		rmb();
+
+		skb = rx_buffer->skb;
+
+		prefetch(skb->data);
+
+		/* pull the header of the skb in */
+		__skb_put(skb, le16_to_cpu(rx_desc->wb.upper.length));
+
+		/* clear skb reference in buffer info structure */
+		rx_buffer->skb = NULL;
+
+		cleaned_count++;
+
+		BUG_ON(igb_is_non_eop(rx_ring, rx_desc));
+
+		dma_unmap_single(rx_ring->dev, rx_buffer->dma,
+				 rx_ring->rx_buffer_len,
+				 DMA_FROM_DEVICE);
+		rx_buffer->dma = 0;
+
+		if (igb_test_staterr(rx_desc,
+				     E1000_RXDEXT_ERR_FRAME_ERR_MASK)) {
+			dev_kfree_skb_any(skb);
+			continue;
+		}
+
+		total_bytes += skb->len;
+
+		/* populate checksum, timestamp, VLAN, and protocol */
+		igb_process_skb_fields(rx_ring, rx_desc, skb);
+
+#ifndef IGB_NO_LRO
+		if (igb_can_lro(rx_ring, rx_desc, skb))
+			igb_lro_receive(q_vector, skb);
+		else
+#endif
+#ifdef HAVE_VLAN_RX_REGISTER
+			igb_receive_skb(q_vector, skb);
+#else
+			napi_gro_receive(&q_vector->napi, skb);
+#endif
+
+#ifndef NETIF_F_GRO
+		netdev_ring(rx_ring)->last_rx = jiffies;
+
+#endif
+		/* update budget accounting */
+		total_packets++;
+	} while (likely(total_packets < budget));
+
+	rx_ring->rx_stats.packets += total_packets;
+	rx_ring->rx_stats.bytes += total_bytes;
+	q_vector->rx.total_packets += total_packets;
+	q_vector->rx.total_bytes += total_bytes;
+
+	if (cleaned_count)
+		igb_alloc_rx_buffers(rx_ring, cleaned_count);
+
+#ifndef IGB_NO_LRO
+	igb_lro_flush_all(q_vector);
+
+#endif /* IGB_NO_LRO */
+	return total_packets < budget;
+}
+#else /* CONFIG_IGB_DISABLE_PACKET_SPLIT */
+/**
+ * igb_get_headlen - determine size of header for LRO/GRO
+ * @data: pointer to the start of the headers
+ * @max_len: total length of section to find headers in
+ *
+ * This function is meant to determine the length of headers that will
+ * be recognized by hardware for LRO, and GRO offloads.  The main
+ * motivation of doing this is to only perform one pull for IPv4 TCP
+ * packets so that we can do basic things like calculating the gso_size
+ * based on the average data per packet.
+ **/
+static unsigned int igb_get_headlen(unsigned char *data,
+				    unsigned int max_len)
+{
+	union {
+		unsigned char *network;
+		/* l2 headers */
+		struct ethhdr *eth;
+		struct vlan_hdr *vlan;
+		/* l3 headers */
+		struct iphdr *ipv4;
+		struct ipv6hdr *ipv6;
+	} hdr;
+	__be16 protocol;
+	u8 nexthdr = 0;	/* default to not TCP */
+	u8 hlen;
+
+	/* this should never happen, but better safe than sorry */
+	if (max_len < ETH_HLEN)
+		return max_len;
+
+	/* initialize network frame pointer */
+	hdr.network = data;
+
+	/* set first protocol and move network header forward */
+	protocol = hdr.eth->h_proto;
+	hdr.network += ETH_HLEN;
+
+	/* handle any vlan tag if present */
+	if (protocol == __constant_htons(ETH_P_8021Q)) {
+		if ((hdr.network - data) > (max_len - VLAN_HLEN))
+			return max_len;
+
+		protocol = hdr.vlan->h_vlan_encapsulated_proto;
+		hdr.network += VLAN_HLEN;
+	}
+
+	/* handle L3 protocols */
+	if (protocol == __constant_htons(ETH_P_IP)) {
+		if ((hdr.network - data) > (max_len - sizeof(struct iphdr)))
+			return max_len;
+
+		/* access ihl as a u8 to avoid unaligned access on ia64 */
+		hlen = (hdr.network[0] & 0x0F) << 2;
+
+		/* verify hlen meets minimum size requirements */
+		if (hlen < sizeof(struct iphdr))
+			return hdr.network - data;
+
+		/* record next protocol if header is present */
+		if (!(hdr.ipv4->frag_off & htons(IP_OFFSET)))
+			nexthdr = hdr.ipv4->protocol;
+#ifdef NETIF_F_TSO6
+	} else if (protocol == __constant_htons(ETH_P_IPV6)) {
+		if ((hdr.network - data) > (max_len - sizeof(struct ipv6hdr)))
+			return max_len;
+
+		/* record next protocol */
+		nexthdr = hdr.ipv6->nexthdr;
+		hlen = sizeof(struct ipv6hdr);
+#endif /* NETIF_F_TSO6 */
+	} else {
+		return hdr.network - data;
+	}
+
+	/* relocate pointer to start of L4 header */
+	hdr.network += hlen;
+
+	/* finally sort out TCP */
+	if (nexthdr == IPPROTO_TCP) {
+		if ((hdr.network - data) > (max_len - sizeof(struct tcphdr)))
+			return max_len;
+
+		/* access doff as a u8 to avoid unaligned access on ia64 */
+		hlen = (hdr.network[12] & 0xF0) >> 2;
+
+		/* verify hlen meets minimum size requirements */
+		if (hlen < sizeof(struct tcphdr))
+			return hdr.network - data;
+
+		hdr.network += hlen;
+	} else if (nexthdr == IPPROTO_UDP) {
+		if ((hdr.network - data) > (max_len - sizeof(struct udphdr)))
+			return max_len;
+
+		hdr.network += sizeof(struct udphdr);
+	}
+
+	/*
+	 * If everything has gone correctly hdr.network should be the
+	 * data section of the packet and will be the end of the header.
+	 * If not then it probably represents the end of the last recognized
+	 * header.
+	 */
+	if ((hdr.network - data) < max_len)
+		return hdr.network - data;
+	else
+		return max_len;
+}
+
+/**
+ * igb_pull_tail - igb specific version of skb_pull_tail
+ * @rx_ring: rx descriptor ring packet is being transacted on
+ * @rx_desc: pointer to the EOP Rx descriptor
+ * @skb: pointer to current skb being adjusted
+ *
+ * This function is an igb specific version of __pskb_pull_tail.  The
+ * main difference between this version and the original function is that
+ * this function can make several assumptions about the state of things
+ * that allow for significant optimizations versus the standard function.
+ * As a result we can do things like drop a frag and maintain an accurate
+ * truesize for the skb.
+ */
+static void igb_pull_tail(struct igb_ring *rx_ring,
+			  union e1000_adv_rx_desc *rx_desc,
+			  struct sk_buff *skb)
+{
+	struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0];
+	unsigned char *va;
+	unsigned int pull_len;
+
+	/*
+	 * it is valid to use page_address instead of kmap since we are
+	 * working with pages allocated out of the lomem pool per
+	 * alloc_page(GFP_ATOMIC)
+	 */
+	va = skb_frag_address(frag);
+
+#ifdef HAVE_PTP_1588_CLOCK
+	if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) {
+		/* retrieve timestamp from buffer */
+		igb_ptp_rx_pktstamp(rx_ring->q_vector, va, skb);
+
+		/* update pointers to remove timestamp header */
+		skb_frag_size_sub(frag, IGB_TS_HDR_LEN);
+		frag->page_offset += IGB_TS_HDR_LEN;
+		skb->data_len -= IGB_TS_HDR_LEN;
+		skb->len -= IGB_TS_HDR_LEN;
+
+		/* move va to start of packet data */
+		va += IGB_TS_HDR_LEN;
+	}
+#endif /* HAVE_PTP_1588_CLOCK */
+
+	/*
+	 * we need the header to contain the greater of either ETH_HLEN or
+	 * 60 bytes if the skb->len is less than 60 for skb_pad.
+	 */
+	pull_len = igb_get_headlen(va, IGB_RX_HDR_LEN);
+
+	/* align pull length to size of long to optimize memcpy performance */
+	skb_copy_to_linear_data(skb, va, ALIGN(pull_len, sizeof(long)));
+
+	/* update all of the pointers */
+	skb_frag_size_sub(frag, pull_len);
+	frag->page_offset += pull_len;
+	skb->data_len -= pull_len;
+	skb->tail += pull_len;
+}
+
+/**
+ * igb_cleanup_headers - Correct corrupted or empty headers
+ * @rx_ring: rx descriptor ring packet is being transacted on
+ * @rx_desc: pointer to the EOP Rx descriptor
+ * @skb: pointer to current skb being fixed
+ *
+ * Address the case where we are pulling data in on pages only
+ * and as such no data is present in the skb header.
+ *
+ * In addition if skb is not at least 60 bytes we need to pad it so that
+ * it is large enough to qualify as a valid Ethernet frame.
+ *
+ * Returns true if an error was encountered and skb was freed.
+ **/
+static bool igb_cleanup_headers(struct igb_ring *rx_ring,
+				union e1000_adv_rx_desc *rx_desc,
+				struct sk_buff *skb)
+{
+
+	if (unlikely((igb_test_staterr(rx_desc,
+				       E1000_RXDEXT_ERR_FRAME_ERR_MASK)))) {
+		struct net_device *netdev = rx_ring->netdev;
+		if (!(netdev->features & NETIF_F_RXALL)) {
+			dev_kfree_skb_any(skb);
+			return true;
+		}
+	}
+
+	/* place header in linear portion of buffer */
+	if (skb_is_nonlinear(skb))
+		igb_pull_tail(rx_ring, rx_desc, skb);
+
+	/* if skb_pad returns an error the skb was freed */
+	if (unlikely(skb->len < 60)) {
+		int pad_len = 60 - skb->len;
+
+		if (skb_pad(skb, pad_len))
+			return true;
+		__skb_put(skb, pad_len);
+	}
+
+	return false;
+}
+
+/* igb_clean_rx_irq -- * packet split */
+static bool igb_clean_rx_irq(struct igb_q_vector *q_vector, int budget)
+{
+	struct igb_ring *rx_ring = q_vector->rx.ring;
+	struct sk_buff *skb = rx_ring->skb;
+	unsigned int total_bytes = 0, total_packets = 0;
+	u16 cleaned_count = igb_desc_unused(rx_ring);
+
+	do {
+		union e1000_adv_rx_desc *rx_desc;
+
+		/* return some buffers to hardware, one at a time is too slow */
+		if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
+			igb_alloc_rx_buffers(rx_ring, cleaned_count);
+			cleaned_count = 0;
+		}
+
+		rx_desc = IGB_RX_DESC(rx_ring, rx_ring->next_to_clean);
+
+		if (!igb_test_staterr(rx_desc, E1000_RXD_STAT_DD))
+			break;
+
+		/*
+		 * This memory barrier is needed to keep us from reading
+		 * any other fields out of the rx_desc until we know the
+		 * RXD_STAT_DD bit is set
+		 */
+		rmb();
+
+		/* retrieve a buffer from the ring */
+		skb = igb_fetch_rx_buffer(rx_ring, rx_desc, skb);
+
+		/* exit if we failed to retrieve a buffer */
+		if (!skb)
+			break;
+
+		cleaned_count++;
+
+		/* fetch next buffer in frame if non-eop */
+		if (igb_is_non_eop(rx_ring, rx_desc))
+			continue;
+
+		/* verify the packet layout is correct */
+		if (igb_cleanup_headers(rx_ring, rx_desc, skb)) {
+			skb = NULL;
+			continue;
+		}
+
+		/* probably a little skewed due to removing CRC */
+		total_bytes += skb->len;
+
+		/* populate checksum, timestamp, VLAN, and protocol */
+		igb_process_skb_fields(rx_ring, rx_desc, skb);
+
+#ifndef IGB_NO_LRO
+		if (igb_can_lro(rx_ring, rx_desc, skb))
+			igb_lro_receive(q_vector, skb);
+		else
+#endif
+#ifdef HAVE_VLAN_RX_REGISTER
+			igb_receive_skb(q_vector, skb);
+#else
+			napi_gro_receive(&q_vector->napi, skb);
+#endif
+#ifndef NETIF_F_GRO
+
+		netdev_ring(rx_ring)->last_rx = jiffies;
+#endif
+
+		/* reset skb pointer */
+		skb = NULL;
+
+		/* update budget accounting */
+		total_packets++;
+	} while (likely(total_packets < budget));
+
+	/* place incomplete frames back on ring for completion */
+	rx_ring->skb = skb;
+
+	rx_ring->rx_stats.packets += total_packets;
+	rx_ring->rx_stats.bytes += total_bytes;
+	q_vector->rx.total_packets += total_packets;
+	q_vector->rx.total_bytes += total_bytes;
+
+	if (cleaned_count)
+		igb_alloc_rx_buffers(rx_ring, cleaned_count);
+
+#ifndef IGB_NO_LRO
+	igb_lro_flush_all(q_vector);
+
+#endif /* IGB_NO_LRO */
+	return total_packets < budget;
+}
+#endif /* CONFIG_IGB_DISABLE_PACKET_SPLIT */
+
+#ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT
+static bool igb_alloc_mapped_skb(struct igb_ring *rx_ring,
+				 struct igb_rx_buffer *bi)
+{
+	struct sk_buff *skb = bi->skb;
+	dma_addr_t dma = bi->dma;
+
+	if (dma)
+		return true;
+
+	if (likely(!skb)) {
+		skb = netdev_alloc_skb_ip_align(netdev_ring(rx_ring),
+						rx_ring->rx_buffer_len);
+		bi->skb = skb;
+		if (!skb) {
+			rx_ring->rx_stats.alloc_failed++;
+			return false;
+		}
+
+		/* initialize skb for ring */
+		skb_record_rx_queue(skb, ring_queue_index(rx_ring));
+	}
+
+	dma = dma_map_single(rx_ring->dev, skb->data,
+			     rx_ring->rx_buffer_len, DMA_FROM_DEVICE);
+
+	/* if mapping failed free memory back to system since
+	 * there isn't much point in holding memory we can't use
+	 */
+	if (dma_mapping_error(rx_ring->dev, dma)) {
+		dev_kfree_skb_any(skb);
+		bi->skb = NULL;
+
+		rx_ring->rx_stats.alloc_failed++;
+		return false;
+	}
+
+	bi->dma = dma;
+	return true;
+}
+
+#else /* CONFIG_IGB_DISABLE_PACKET_SPLIT */
+static bool igb_alloc_mapped_page(struct igb_ring *rx_ring,
+				  struct igb_rx_buffer *bi)
+{
+	struct page *page = bi->page;
+	dma_addr_t dma;
+
+	/* since we are recycling buffers we should seldom need to alloc */
+	if (likely(page))
+		return true;
+
+	/* alloc new page for storage */
+	page = alloc_page(GFP_ATOMIC | __GFP_COLD);
+	if (unlikely(!page)) {
+		rx_ring->rx_stats.alloc_failed++;
+		return false;
+	}
+
+	/* map page for use */
+	dma = dma_map_page(rx_ring->dev, page, 0, PAGE_SIZE, DMA_FROM_DEVICE);
+
+	/*
+	 * if mapping failed free memory back to system since
+	 * there isn't much point in holding memory we can't use
+	 */
+	if (dma_mapping_error(rx_ring->dev, dma)) {
+		__free_page(page);
+
+		rx_ring->rx_stats.alloc_failed++;
+		return false;
+	}
+
+	bi->dma = dma;
+	bi->page = page;
+	bi->page_offset = 0;
+
+	return true;
+}
+
+#endif /* CONFIG_IGB_DISABLE_PACKET_SPLIT */
+/**
+ * igb_alloc_rx_buffers - Replace used receive buffers; packet split
+ * @adapter: address of board private structure
+ **/
+void igb_alloc_rx_buffers(struct igb_ring *rx_ring, u16 cleaned_count)
+{
+	union e1000_adv_rx_desc *rx_desc;
+	struct igb_rx_buffer *bi;
+	u16 i = rx_ring->next_to_use;
+
+	/* nothing to do */
+	if (!cleaned_count)
+		return;
+
+	rx_desc = IGB_RX_DESC(rx_ring, i);
+	bi = &rx_ring->rx_buffer_info[i];
+	i -= rx_ring->count;
+
+	do {
+#ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT
+		if (!igb_alloc_mapped_skb(rx_ring, bi))
+#else
+		if (!igb_alloc_mapped_page(rx_ring, bi))
+#endif /* CONFIG_IGB_DISABLE_PACKET_SPLIT */
+			break;
+
+		/*
+		 * Refresh the desc even if buffer_addrs didn't change
+		 * because each write-back erases this info.
+		 */
+#ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT
+		rx_desc->read.pkt_addr = cpu_to_le64(bi->dma);
+#else
+		rx_desc->read.pkt_addr = cpu_to_le64(bi->dma + bi->page_offset);
+#endif
+
+		rx_desc++;
+		bi++;
+		i++;
+		if (unlikely(!i)) {
+			rx_desc = IGB_RX_DESC(rx_ring, 0);
+			bi = rx_ring->rx_buffer_info;
+			i -= rx_ring->count;
+		}
+
+		/* clear the hdr_addr for the next_to_use descriptor */
+		rx_desc->read.hdr_addr = 0;
+
+		cleaned_count--;
+	} while (cleaned_count);
+
+	i += rx_ring->count;
+
+	if (rx_ring->next_to_use != i) {
+		/* record the next descriptor to use */
+		rx_ring->next_to_use = i;
+
+#ifndef CONFIG_IGB_DISABLE_PACKET_SPLIT
+		/* update next to alloc since we have filled the ring */
+		rx_ring->next_to_alloc = i;
+
+#endif
+		/*
+		 * Force memory writes to complete before letting h/w
+		 * know there are new descriptors to fetch.  (Only
+		 * applicable for weak-ordered memory model archs,
+		 * such as IA-64).
+		 */
+		wmb();
+		writel(i, rx_ring->tail);
+	}
+}
+
+#ifdef SIOCGMIIPHY
+/**
+ * igb_mii_ioctl -
+ * @netdev:
+ * @ifreq:
+ * @cmd:
+ **/
+static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
+{
+	struct igb_adapter *adapter = netdev_priv(netdev);
+	struct mii_ioctl_data *data = if_mii(ifr);
+
+	if (adapter->hw.phy.media_type != e1000_media_type_copper)
+		return -EOPNOTSUPP;
+
+	switch (cmd) {
+	case SIOCGMIIPHY:
+		data->phy_id = adapter->hw.phy.addr;
+		break;
+	case SIOCGMIIREG:
+		if (!capable(CAP_NET_ADMIN))
+			return -EPERM;
+		if (e1000_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
+				   &data->val_out))
+			return -EIO;
+		break;
+	case SIOCSMIIREG:
+	default:
+		return -EOPNOTSUPP;
+	}
+	return E1000_SUCCESS;
+}
+
+#endif
+/**
+ * igb_ioctl -
+ * @netdev:
+ * @ifreq:
+ * @cmd:
+ **/
+static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
+{
+	switch (cmd) {
+#ifdef SIOCGMIIPHY
+	case SIOCGMIIPHY:
+	case SIOCGMIIREG:
+	case SIOCSMIIREG:
+		return igb_mii_ioctl(netdev, ifr, cmd);
+#endif
+#ifdef HAVE_PTP_1588_CLOCK
+	case SIOCSHWTSTAMP:
+		return igb_ptp_hwtstamp_ioctl(netdev, ifr, cmd);
+#endif /* HAVE_PTP_1588_CLOCK */
+#ifdef ETHTOOL_OPS_COMPAT
+	case SIOCETHTOOL:
+		return ethtool_ioctl(ifr);
+#endif
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+s32 e1000_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
+{
+	struct igb_adapter *adapter = hw->back;
+	u16 cap_offset;
+
+	cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
+	if (!cap_offset)
+		return -E1000_ERR_CONFIG;
+
+	pci_read_config_word(adapter->pdev, cap_offset + reg, value);
+
+	return E1000_SUCCESS;
+}
+
+s32 e1000_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
+{
+	struct igb_adapter *adapter = hw->back;
+	u16 cap_offset;
+
+	cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
+	if (!cap_offset)
+		return -E1000_ERR_CONFIG;
+
+	pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
+
+	return E1000_SUCCESS;
+}
+
+#ifdef HAVE_VLAN_RX_REGISTER
+static void igb_vlan_mode(struct net_device *netdev, struct vlan_group *vlgrp)
+#else
+void igb_vlan_mode(struct net_device *netdev, u32 features)
+#endif
+{
+	struct igb_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+	u32 ctrl, rctl;
+	int i;
+#ifdef HAVE_VLAN_RX_REGISTER
+	bool enable = !!vlgrp;
+
+	igb_irq_disable(adapter);
+
+	adapter->vlgrp = vlgrp;
+
+	if (!test_bit(__IGB_DOWN, &adapter->state))
+		igb_irq_enable(adapter);
+#else
+#ifdef NETIF_F_HW_VLAN_CTAG_RX
+	bool enable = !!(features & NETIF_F_HW_VLAN_CTAG_RX);
+#else
+	bool enable = !!(features & NETIF_F_HW_VLAN_RX);
+#endif
+#endif
+
+	if (enable) {
+		/* enable VLAN tag insert/strip */
+		ctrl = E1000_READ_REG(hw, E1000_CTRL);
+		ctrl |= E1000_CTRL_VME;
+		E1000_WRITE_REG(hw, E1000_CTRL, ctrl);
+
+		/* Disable CFI check */
+		rctl = E1000_READ_REG(hw, E1000_RCTL);
+		rctl &= ~E1000_RCTL_CFIEN;
+		E1000_WRITE_REG(hw, E1000_RCTL, rctl);
+	} else {
+		/* disable VLAN tag insert/strip */
+		ctrl = E1000_READ_REG(hw, E1000_CTRL);
+		ctrl &= ~E1000_CTRL_VME;
+		E1000_WRITE_REG(hw, E1000_CTRL, ctrl);
+	}
+
+#ifndef CONFIG_IGB_VMDQ_NETDEV
+	for (i = 0; i < adapter->vmdq_pools; i++) {
+		igb_set_vf_vlan_strip(adapter,
+				      adapter->vfs_allocated_count + i,
+				      enable);
+	}
+
+#else
+	igb_set_vf_vlan_strip(adapter,
+			      adapter->vfs_allocated_count,
+			      enable);
+
+	for (i = 1; i < adapter->vmdq_pools; i++) {
+#ifdef HAVE_VLAN_RX_REGISTER
+		struct igb_vmdq_adapter *vadapter;
+		vadapter = netdev_priv(adapter->vmdq_netdev[i-1]);
+		enable = !!vadapter->vlgrp;
+#else
+		struct net_device *vnetdev;
+		vnetdev = adapter->vmdq_netdev[i-1];
+#ifdef NETIF_F_HW_VLAN_CTAG_RX
+		enable = !!(vnetdev->features & NETIF_F_HW_VLAN_CTAG_RX);
+#else
+		enable = !!(vnetdev->features & NETIF_F_HW_VLAN_RX);
+#endif
+#endif
+		igb_set_vf_vlan_strip(adapter,
+				      adapter->vfs_allocated_count + i,
+				      enable);
+	}
+
+#endif
+	igb_rlpml_set(adapter);
+}
+
+#ifdef HAVE_VLAN_PROTOCOL
+static int igb_vlan_rx_add_vid(struct net_device *netdev, __be16 proto, u16 vid)
+#elif defined HAVE_INT_NDO_VLAN_RX_ADD_VID
+#ifdef NETIF_F_HW_VLAN_CTAG_RX
+static int igb_vlan_rx_add_vid(struct net_device *netdev,
+			       __always_unused __be16 proto, u16 vid)
+#else
+static int igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
+#endif
+#else
+static void igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
+#endif
+{
+	struct igb_adapter *adapter = netdev_priv(netdev);
+	int pf_id = adapter->vfs_allocated_count;
+
+	/* attempt to add filter to vlvf array */
+	igb_vlvf_set(adapter, vid, TRUE, pf_id);
+
+	/* add the filter since PF can receive vlans w/o entry in vlvf */
+	igb_vfta_set(adapter, vid, TRUE);
+#ifndef HAVE_NETDEV_VLAN_FEATURES
+
+	/* Copy feature flags from netdev to the vlan netdev for this vid.
+	 * This allows things like TSO to bubble down to our vlan device.
+	 * There is no need to update netdev for vlan 0 (DCB), since it
+	 * wouldn't has v_netdev.
+	 */
+	if (adapter->vlgrp) {
+		struct vlan_group *vlgrp = adapter->vlgrp;
+		struct net_device *v_netdev = vlan_group_get_device(vlgrp, vid);
+		if (v_netdev) {
+			v_netdev->features |= netdev->features;
+			vlan_group_set_device(vlgrp, vid, v_netdev);
+		}
+	}
+#endif
+#ifndef HAVE_VLAN_RX_REGISTER
+
+	set_bit(vid, adapter->active_vlans);
+#endif
+#ifdef HAVE_INT_NDO_VLAN_RX_ADD_VID
+	return 0;
+#endif
+}
+
+#ifdef HAVE_VLAN_PROTOCOL
+static int igb_vlan_rx_kill_vid(struct net_device *netdev, __be16 proto, u16 vid)
+#elif defined HAVE_INT_NDO_VLAN_RX_ADD_VID
+#ifdef NETIF_F_HW_VLAN_CTAG_RX
+static int igb_vlan_rx_kill_vid(struct net_device *netdev,
+				__always_unused __be16 proto, u16 vid)
+#else
+static int igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
+#endif
+#else
+static void igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
+#endif
+{
+	struct igb_adapter *adapter = netdev_priv(netdev);
+	int pf_id = adapter->vfs_allocated_count;
+	s32 err;
+
+#ifdef HAVE_VLAN_RX_REGISTER
+	igb_irq_disable(adapter);
+
+	vlan_group_set_device(adapter->vlgrp, vid, NULL);
+
+	if (!test_bit(__IGB_DOWN, &adapter->state))
+		igb_irq_enable(adapter);
+
+#endif /* HAVE_VLAN_RX_REGISTER */
+	/* remove vlan from VLVF table array */
+	err = igb_vlvf_set(adapter, vid, FALSE, pf_id);
+
+	/* if vid was not present in VLVF just remove it from table */
+	if (err)
+		igb_vfta_set(adapter, vid, FALSE);
+#ifndef HAVE_VLAN_RX_REGISTER
+
+	clear_bit(vid, adapter->active_vlans);
+#endif
+#ifdef HAVE_INT_NDO_VLAN_RX_ADD_VID
+	return 0;
+#endif
+}
+
+static void igb_restore_vlan(struct igb_adapter *adapter)
+{
+#ifdef HAVE_VLAN_RX_REGISTER
+	igb_vlan_mode(adapter->netdev, adapter->vlgrp);
+
+	if (adapter->vlgrp) {
+		u16 vid;
+		for (vid = 0; vid < VLAN_N_VID; vid++) {
+			if (!vlan_group_get_device(adapter->vlgrp, vid))
+				continue;
+#ifdef NETIF_F_HW_VLAN_CTAG_RX
+			igb_vlan_rx_add_vid(adapter->netdev,
+					    htons(ETH_P_8021Q), vid);
+#else
+			igb_vlan_rx_add_vid(adapter->netdev, vid);
+#endif
+		}
+	}
+#else
+	u16 vid;
+
+	igb_vlan_mode(adapter->netdev, adapter->netdev->features);
+
+	for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID)
+#ifdef NETIF_F_HW_VLAN_CTAG_RX
+		igb_vlan_rx_add_vid(adapter->netdev,
+				    htons(ETH_P_8021Q), vid);
+#else
+		igb_vlan_rx_add_vid(adapter->netdev, vid);
+#endif
+#endif
+}
+
+int igb_set_spd_dplx(struct igb_adapter *adapter, u16 spddplx)
+{
+	struct pci_dev *pdev = adapter->pdev;
+	struct e1000_mac_info *mac = &adapter->hw.mac;
+
+	mac->autoneg = 0;
+
+	/* SerDes device's does not support 10Mbps Full/duplex
+	 * and 100Mbps Half duplex
+	 */
+	if (adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
+		switch (spddplx) {
+		case SPEED_10 + DUPLEX_HALF:
+		case SPEED_10 + DUPLEX_FULL:
+		case SPEED_100 + DUPLEX_HALF:
+			dev_err(pci_dev_to_dev(pdev),
+				"Unsupported Speed/Duplex configuration\n");
+			return -EINVAL;
+		default:
+			break;
+		}
+	}
+
+	switch (spddplx) {
+	case SPEED_10 + DUPLEX_HALF:
+		mac->forced_speed_duplex = ADVERTISE_10_HALF;
+		break;
+	case SPEED_10 + DUPLEX_FULL:
+		mac->forced_speed_duplex = ADVERTISE_10_FULL;
+		break;
+	case SPEED_100 + DUPLEX_HALF:
+		mac->forced_speed_duplex = ADVERTISE_100_HALF;
+		break;
+	case SPEED_100 + DUPLEX_FULL:
+		mac->forced_speed_duplex = ADVERTISE_100_FULL;
+		break;
+	case SPEED_1000 + DUPLEX_FULL:
+		mac->autoneg = 1;
+		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
+		break;
+	case SPEED_1000 + DUPLEX_HALF: /* not supported */
+	default:
+		dev_err(pci_dev_to_dev(pdev), "Unsupported Speed/Duplex configuration\n");
+		return -EINVAL;
+	}
+
+	/* clear MDI, MDI(-X) override is only allowed when autoneg enabled */
+	adapter->hw.phy.mdix = AUTO_ALL_MODES;
+
+	return 0;
+}
+
+static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake,
+			  bool runtime)
+{
+	struct net_device *netdev = pci_get_drvdata(pdev);
+	struct igb_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+	u32 ctrl, rctl, status;
+	u32 wufc = runtime ? E1000_WUFC_LNKC : adapter->wol;
+#ifdef CONFIG_PM
+	int retval = 0;
+#endif
+
+	netif_device_detach(netdev);
+
+	status = E1000_READ_REG(hw, E1000_STATUS);
+	if (status & E1000_STATUS_LU)
+		wufc &= ~E1000_WUFC_LNKC;
+
+	if (netif_running(netdev))
+		__igb_close(netdev, true);
+
+	igb_clear_interrupt_scheme(adapter);
+
+#ifdef CONFIG_PM
+	retval = pci_save_state(pdev);
+	if (retval)
+		return retval;
+#endif
+
+	if (wufc) {
+		igb_setup_rctl(adapter);
+		igb_set_rx_mode(netdev);
+
+		/* turn on all-multi mode if wake on multicast is enabled */
+		if (wufc & E1000_WUFC_MC) {
+			rctl = E1000_READ_REG(hw, E1000_RCTL);
+			rctl |= E1000_RCTL_MPE;
+			E1000_WRITE_REG(hw, E1000_RCTL, rctl);
+		}
+
+		ctrl = E1000_READ_REG(hw, E1000_CTRL);
+		/* phy power management enable */
+		#define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
+		ctrl |= E1000_CTRL_ADVD3WUC;
+		E1000_WRITE_REG(hw, E1000_CTRL, ctrl);
+
+		/* Allow time for pending master requests to run */
+		e1000_disable_pcie_master(hw);
+
+		E1000_WRITE_REG(hw, E1000_WUC, E1000_WUC_PME_EN);
+		E1000_WRITE_REG(hw, E1000_WUFC, wufc);
+	} else {
+		E1000_WRITE_REG(hw, E1000_WUC, 0);
+		E1000_WRITE_REG(hw, E1000_WUFC, 0);
+	}
+
+	*enable_wake = wufc || adapter->en_mng_pt;
+	if (!*enable_wake)
+		igb_power_down_link(adapter);
+	else
+		igb_power_up_link(adapter);
+
+	/* Release control of h/w to f/w.  If f/w is AMT enabled, this
+	 * would have already happened in close and is redundant. */
+	igb_release_hw_control(adapter);
+
+	pci_disable_device(pdev);
+
+	return 0;
+}
+
+#ifdef CONFIG_PM
+#ifdef HAVE_SYSTEM_SLEEP_PM_OPS
+static int igb_suspend(struct device *dev)
+#else
+static int igb_suspend(struct pci_dev *pdev, pm_message_t state)
+#endif /* HAVE_SYSTEM_SLEEP_PM_OPS */
+{
+#ifdef HAVE_SYSTEM_SLEEP_PM_OPS
+	struct pci_dev *pdev = to_pci_dev(dev);
+#endif /* HAVE_SYSTEM_SLEEP_PM_OPS */
+	int retval;
+	bool wake;
+
+	retval = __igb_shutdown(pdev, &wake, 0);
+	if (retval)
+		return retval;
+
+	if (wake) {
+		pci_prepare_to_sleep(pdev);
+	} else {
+		pci_wake_from_d3(pdev, false);
+		pci_set_power_state(pdev, PCI_D3hot);
+	}
+
+	return 0;
+}
+
+#ifdef HAVE_SYSTEM_SLEEP_PM_OPS
+static int igb_resume(struct device *dev)
+#else
+static int igb_resume(struct pci_dev *pdev)
+#endif /* HAVE_SYSTEM_SLEEP_PM_OPS */
+{
+#ifdef HAVE_SYSTEM_SLEEP_PM_OPS
+	struct pci_dev *pdev = to_pci_dev(dev);
+#endif /* HAVE_SYSTEM_SLEEP_PM_OPS */
+	struct net_device *netdev = pci_get_drvdata(pdev);
+	struct igb_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+	u32 err;
+
+	pci_set_power_state(pdev, PCI_D0);
+	pci_restore_state(pdev);
+	pci_save_state(pdev);
+
+	err = pci_enable_device_mem(pdev);
+	if (err) {
+		dev_err(pci_dev_to_dev(pdev),
+			"igb: Cannot enable PCI device from suspend\n");
+		return err;
+	}
+	pci_set_master(pdev);
+
+	pci_enable_wake(pdev, PCI_D3hot, 0);
+	pci_enable_wake(pdev, PCI_D3cold, 0);
+
+	if (igb_init_interrupt_scheme(adapter, true)) {
+		dev_err(pci_dev_to_dev(pdev), "Unable to allocate memory for queues\n");
+		return -ENOMEM;
+	}
+
+	igb_reset(adapter);
+
+	/* let the f/w know that the h/w is now under the control of the
+	 * driver. */
+	igb_get_hw_control(adapter);
+
+	E1000_WRITE_REG(hw, E1000_WUS, ~0);
+
+	if (netdev->flags & IFF_UP) {
+		rtnl_lock();
+		err = __igb_open(netdev, true);
+		rtnl_unlock();
+		if (err)
+			return err;
+	}
+
+	netif_device_attach(netdev);
+
+	return 0;
+}
+
+#ifdef CONFIG_PM_RUNTIME
+#ifdef HAVE_SYSTEM_SLEEP_PM_OPS
+static int igb_runtime_idle(struct device *dev)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct net_device *netdev = pci_get_drvdata(pdev);
+	struct igb_adapter *adapter = netdev_priv(netdev);
+
+	if (!igb_has_link(adapter))
+		pm_schedule_suspend(dev, MSEC_PER_SEC * 5);
+
+	return -EBUSY;
+}
+
+static int igb_runtime_suspend(struct device *dev)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	int retval;
+	bool wake;
+
+	retval = __igb_shutdown(pdev, &wake, 1);
+	if (retval)
+		return retval;
+
+	if (wake) {
+		pci_prepare_to_sleep(pdev);
+	} else {
+		pci_wake_from_d3(pdev, false);
+		pci_set_power_state(pdev, PCI_D3hot);
+	}
+
+	return 0;
+}
+
+static int igb_runtime_resume(struct device *dev)
+{
+	return igb_resume(dev);
+}
+#endif /* HAVE_SYSTEM_SLEEP_PM_OPS */
+#endif /* CONFIG_PM_RUNTIME */
+#endif /* CONFIG_PM */
+
+#ifdef USE_REBOOT_NOTIFIER
+/* only want to do this for 2.4 kernels? */
+static int igb_notify_reboot(struct notifier_block *nb, unsigned long event,
+                             void *p)
+{
+	struct pci_dev *pdev = NULL;
+	bool wake;
+
+	switch (event) {
+	case SYS_DOWN:
+	case SYS_HALT:
+	case SYS_POWER_OFF:
+		while ((pdev = pci_find_device(PCI_ANY_ID, PCI_ANY_ID, pdev))) {
+			if (pci_dev_driver(pdev) == &igb_driver) {
+				__igb_shutdown(pdev, &wake, 0);
+				if (event == SYS_POWER_OFF) {
+					pci_wake_from_d3(pdev, wake);
+					pci_set_power_state(pdev, PCI_D3hot);
+				}
+			}
+		}
+	}
+	return NOTIFY_DONE;
+}
+#else
+static void igb_shutdown(struct pci_dev *pdev)
+{
+	bool wake = false;
+
+	__igb_shutdown(pdev, &wake, 0);
+
+	if (system_state == SYSTEM_POWER_OFF) {
+		pci_wake_from_d3(pdev, wake);
+		pci_set_power_state(pdev, PCI_D3hot);
+	}
+}
+#endif /* USE_REBOOT_NOTIFIER */
+
+#ifdef CONFIG_NET_POLL_CONTROLLER
+/*
+ * Polling 'interrupt' - used by things like netconsole to send skbs
+ * without having to re-enable interrupts. It's not called while
+ * the interrupt routine is executing.
+ */
+static void igb_netpoll(struct net_device *netdev)
+{
+	struct igb_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+	struct igb_q_vector *q_vector;
+	int i;
+
+	for (i = 0; i < adapter->num_q_vectors; i++) {
+		q_vector = adapter->q_vector[i];
+		if (adapter->msix_entries)
+			E1000_WRITE_REG(hw, E1000_EIMC, q_vector->eims_value);
+		else
+			igb_irq_disable(adapter);
+		napi_schedule(&q_vector->napi);
+	}
+}
+#endif /* CONFIG_NET_POLL_CONTROLLER */
+
+#ifdef HAVE_PCI_ERS
+#define E1000_DEV_ID_82576_VF 0x10CA
+/**
+ * igb_io_error_detected - called when PCI error is detected
+ * @pdev: Pointer to PCI device
+ * @state: The current pci connection state
+ *
+ * This function is called after a PCI bus error affecting
+ * this device has been detected.
+ */
+static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
+					      pci_channel_state_t state)
+{
+	struct net_device *netdev = pci_get_drvdata(pdev);
+	struct igb_adapter *adapter = netdev_priv(netdev);
+
+#ifdef CONFIG_PCI_IOV__UNUSED
+	struct pci_dev *bdev, *vfdev;
+	u32 dw0, dw1, dw2, dw3;
+	int vf, pos;
+	u16 req_id, pf_func;
+
+	if (!(adapter->flags & IGB_FLAG_DETECT_BAD_DMA))
+		goto skip_bad_vf_detection;
+
+	bdev = pdev->bus->self;
+	while (bdev && (pci_pcie_type(bdev) != PCI_EXP_TYPE_ROOT_PORT))
+		bdev = bdev->bus->self;
+
+	if (!bdev)
+		goto skip_bad_vf_detection;
+
+	pos = pci_find_ext_capability(bdev, PCI_EXT_CAP_ID_ERR);
+	if (!pos)
+		goto skip_bad_vf_detection;
+
+	pci_read_config_dword(bdev, pos + PCI_ERR_HEADER_LOG, &dw0);
+	pci_read_config_dword(bdev, pos + PCI_ERR_HEADER_LOG + 4, &dw1);
+	pci_read_config_dword(bdev, pos + PCI_ERR_HEADER_LOG + 8, &dw2);
+	pci_read_config_dword(bdev, pos + PCI_ERR_HEADER_LOG + 12, &dw3);
+
+	req_id = dw1 >> 16;
+	/* On the 82576 if bit 7 of the requestor ID is set then it's a VF */
+	if (!(req_id & 0x0080))
+		goto skip_bad_vf_detection;
+
+	pf_func = req_id & 0x01;
+	if ((pf_func & 1) == (pdev->devfn & 1)) {
+
+		vf = (req_id & 0x7F) >> 1;
+		dev_err(pci_dev_to_dev(pdev),
+			"VF %d has caused a PCIe error\n", vf);
+		dev_err(pci_dev_to_dev(pdev),
+			"TLP: dw0: %8.8x\tdw1: %8.8x\tdw2: "
+			"%8.8x\tdw3: %8.8x\n",
+			dw0, dw1, dw2, dw3);
+
+		/* Find the pci device of the offending VF */
+		vfdev = pci_get_device(PCI_VENDOR_ID_INTEL,
+				       E1000_DEV_ID_82576_VF, NULL);
+		while (vfdev) {
+			if (vfdev->devfn == (req_id & 0xFF))
+				break;
+			vfdev = pci_get_device(PCI_VENDOR_ID_INTEL,
+					       E1000_DEV_ID_82576_VF, vfdev);
+		}
+		/*
+		 * There's a slim chance the VF could have been hot plugged,
+		 * so if it is no longer present we don't need to issue the
+		 * VFLR.  Just clean up the AER in that case.
+		 */
+		if (vfdev) {
+			dev_err(pci_dev_to_dev(pdev),
+				"Issuing VFLR to VF %d\n", vf);
+			pci_write_config_dword(vfdev, 0xA8, 0x00008000);
+		}
+
+		pci_cleanup_aer_uncorrect_error_status(pdev);
+	}
+
+	/*
+	 * Even though the error may have occurred on the other port
+	 * we still need to increment the vf error reference count for
+	 * both ports because the I/O resume function will be called
+	 * for both of them.
+	 */
+	adapter->vferr_refcount++;
+
+	return PCI_ERS_RESULT_RECOVERED;
+
+skip_bad_vf_detection:
+#endif /* CONFIG_PCI_IOV */
+
+	netif_device_detach(netdev);
+
+	if (state == pci_channel_io_perm_failure)
+		return PCI_ERS_RESULT_DISCONNECT;
+
+	if (netif_running(netdev))
+		igb_down(adapter);
+	pci_disable_device(pdev);
+
+	/* Request a slot slot reset. */
+	return PCI_ERS_RESULT_NEED_RESET;
+}
+
+/**
+ * igb_io_slot_reset - called after the pci bus has been reset.
+ * @pdev: Pointer to PCI device
+ *
+ * Restart the card from scratch, as if from a cold-boot. Implementation
+ * resembles the first-half of the igb_resume routine.
+ */
+static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
+{
+	struct net_device *netdev = pci_get_drvdata(pdev);
+	struct igb_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+	pci_ers_result_t result;
+
+	if (pci_enable_device_mem(pdev)) {
+		dev_err(pci_dev_to_dev(pdev),
+			"Cannot re-enable PCI device after reset.\n");
+		result = PCI_ERS_RESULT_DISCONNECT;
+	} else {
+		pci_set_master(pdev);
+		pci_restore_state(pdev);
+		pci_save_state(pdev);
+
+		pci_enable_wake(pdev, PCI_D3hot, 0);
+		pci_enable_wake(pdev, PCI_D3cold, 0);
+
+		schedule_work(&adapter->reset_task);
+		E1000_WRITE_REG(hw, E1000_WUS, ~0);
+		result = PCI_ERS_RESULT_RECOVERED;
+	}
+
+	pci_cleanup_aer_uncorrect_error_status(pdev);
+
+	return result;
+}
+
+/**
+ * igb_io_resume - called when traffic can start flowing again.
+ * @pdev: Pointer to PCI device
+ *
+ * This callback is called when the error recovery driver tells us that
+ * its OK to resume normal operation. Implementation resembles the
+ * second-half of the igb_resume routine.
+ */
+static void igb_io_resume(struct pci_dev *pdev)
+{
+	struct net_device *netdev = pci_get_drvdata(pdev);
+	struct igb_adapter *adapter = netdev_priv(netdev);
+
+	if (adapter->vferr_refcount) {
+		dev_info(pci_dev_to_dev(pdev), "Resuming after VF err\n");
+		adapter->vferr_refcount--;
+		return;
+	}
+
+	if (netif_running(netdev)) {
+		if (igb_up(adapter)) {
+			dev_err(pci_dev_to_dev(pdev), "igb_up failed after reset\n");
+			return;
+		}
+	}
+
+	netif_device_attach(netdev);
+
+	/* let the f/w know that the h/w is now under the control of the
+	 * driver. */
+	igb_get_hw_control(adapter);
+}
+
+#endif /* HAVE_PCI_ERS */
+
+int igb_add_mac_filter(struct igb_adapter *adapter, u8 *addr, u16 queue)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	int i;
+
+	if (is_zero_ether_addr(addr))
+		return 0;
+
+	for (i = 0; i < hw->mac.rar_entry_count; i++) {
+		if (adapter->mac_table[i].state & IGB_MAC_STATE_IN_USE)
+			continue;
+		adapter->mac_table[i].state = (IGB_MAC_STATE_MODIFIED |
+						   IGB_MAC_STATE_IN_USE);
+		memcpy(adapter->mac_table[i].addr, addr, ETH_ALEN);
+		adapter->mac_table[i].queue = queue;
+		igb_sync_mac_table(adapter);
+		return 0;
+	}
+	return -ENOMEM;
+}
+int igb_del_mac_filter(struct igb_adapter *adapter, u8* addr, u16 queue)
+{
+	/* search table for addr, if found, set to 0 and sync */
+	int i;
+	struct e1000_hw *hw = &adapter->hw;
+
+	if (is_zero_ether_addr(addr))
+		return 0;
+	for (i = 0; i < hw->mac.rar_entry_count; i++) {
+		if (ether_addr_equal(addr, adapter->mac_table[i].addr) &&
+		    adapter->mac_table[i].queue == queue) {
+			adapter->mac_table[i].state = IGB_MAC_STATE_MODIFIED;
+			memset(adapter->mac_table[i].addr, 0, ETH_ALEN);
+			adapter->mac_table[i].queue = 0;
+			igb_sync_mac_table(adapter);
+			return 0;
+		}
+	}
+	return -ENOMEM;
+}
+static int igb_set_vf_mac(struct igb_adapter *adapter,
+                          int vf, unsigned char *mac_addr)
+{
+	igb_del_mac_filter(adapter, adapter->vf_data[vf].vf_mac_addresses, vf);
+	memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
+
+	igb_add_mac_filter(adapter, mac_addr, vf);
+
+	return 0;
+}
+
+#ifdef IFLA_VF_MAX
+static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
+{
+	struct igb_adapter *adapter = netdev_priv(netdev);
+	if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count))
+		return -EINVAL;
+	adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC;
+	dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf);
+	dev_info(&adapter->pdev->dev, "Reload the VF driver to make this"
+				      " change effective.\n");
+	if (test_bit(__IGB_DOWN, &adapter->state)) {
+		dev_warn(&adapter->pdev->dev, "The VF MAC address has been set,"
+			 " but the PF device is not up.\n");
+		dev_warn(&adapter->pdev->dev, "Bring the PF device up before"
+			 " attempting to use the VF device.\n");
+	}
+	return igb_set_vf_mac(adapter, vf, mac);
+}
+
+static int igb_link_mbps(int internal_link_speed)
+{
+	switch (internal_link_speed) {
+	case SPEED_100:
+		return 100;
+	case SPEED_1000:
+		return 1000;
+	case SPEED_2500:
+		return 2500;
+	default:
+		return 0;
+	}
+}
+
+static void igb_set_vf_rate_limit(struct e1000_hw *hw, int vf, int tx_rate,
+			int link_speed)
+{
+	int rf_dec, rf_int;
+	u32 bcnrc_val;
+
+	if (tx_rate != 0) {
+		/* Calculate the rate factor values to set */
+		rf_int = link_speed / tx_rate;
+		rf_dec = (link_speed - (rf_int * tx_rate));
+		rf_dec = (rf_dec * (1<<E1000_RTTBCNRC_RF_INT_SHIFT)) / tx_rate;
+
+		bcnrc_val = E1000_RTTBCNRC_RS_ENA;
+		bcnrc_val |= ((rf_int<<E1000_RTTBCNRC_RF_INT_SHIFT) &
+				E1000_RTTBCNRC_RF_INT_MASK);
+		bcnrc_val |= (rf_dec & E1000_RTTBCNRC_RF_DEC_MASK);
+	} else {
+		bcnrc_val = 0;
+	}
+
+	E1000_WRITE_REG(hw, E1000_RTTDQSEL, vf); /* vf X uses queue X */
+	/*
+	 * Set global transmit compensation time to the MMW_SIZE in RTTBCNRM
+	 * register. MMW_SIZE=0x014 if 9728-byte jumbo is supported.
+	 */
+	E1000_WRITE_REG(hw, E1000_RTTBCNRM(0), 0x14);
+	E1000_WRITE_REG(hw, E1000_RTTBCNRC, bcnrc_val);
+}
+
+static void igb_check_vf_rate_limit(struct igb_adapter *adapter)
+{
+	int actual_link_speed, i;
+	bool reset_rate = false;
+
+	/* VF TX rate limit was not set */
+	if ((adapter->vf_rate_link_speed == 0) ||
+		(adapter->hw.mac.type != e1000_82576))
+		return;
+
+	actual_link_speed = igb_link_mbps(adapter->link_speed);
+	if (actual_link_speed != adapter->vf_rate_link_speed) {
+		reset_rate = true;
+		adapter->vf_rate_link_speed = 0;
+		dev_info(&adapter->pdev->dev,
+		"Link speed has been changed. VF Transmit rate is disabled\n");
+	}
+
+	for (i = 0; i < adapter->vfs_allocated_count; i++) {
+		if (reset_rate)
+			adapter->vf_data[i].tx_rate = 0;
+
+		igb_set_vf_rate_limit(&adapter->hw, i,
+			adapter->vf_data[i].tx_rate, actual_link_speed);
+	}
+}
+
+#ifdef HAVE_VF_MIN_MAX_TXRATE
+static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int min_tx_rate,
+			     int tx_rate)
+#else /* HAVE_VF_MIN_MAX_TXRATE */
+static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate)
+#endif /* HAVE_VF_MIN_MAX_TXRATE */
+{
+	struct igb_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+	int actual_link_speed;
+
+	if (hw->mac.type != e1000_82576)
+		return -EOPNOTSUPP;
+
+#ifdef HAVE_VF_MIN_MAX_TXRATE
+	if (min_tx_rate)
+		return -EINVAL;
+#endif /* HAVE_VF_MIN_MAX_TXRATE */
+
+	actual_link_speed = igb_link_mbps(adapter->link_speed);
+	if ((vf >= adapter->vfs_allocated_count) ||
+		(!(E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU)) ||
+		(tx_rate < 0) || (tx_rate > actual_link_speed))
+		return -EINVAL;
+
+	adapter->vf_rate_link_speed = actual_link_speed;
+	adapter->vf_data[vf].tx_rate = (u16)tx_rate;
+	igb_set_vf_rate_limit(hw, vf, tx_rate, actual_link_speed);
+
+	return 0;
+}
+
+static int igb_ndo_get_vf_config(struct net_device *netdev,
+				 int vf, struct ifla_vf_info *ivi)
+{
+	struct igb_adapter *adapter = netdev_priv(netdev);
+	if (vf >= adapter->vfs_allocated_count)
+		return -EINVAL;
+	ivi->vf = vf;
+	memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN);
+#ifdef HAVE_VF_MIN_MAX_TXRATE
+	ivi->max_tx_rate = adapter->vf_data[vf].tx_rate;
+	ivi->min_tx_rate = 0;
+#else /* HAVE_VF_MIN_MAX_TXRATE */
+	ivi->tx_rate = adapter->vf_data[vf].tx_rate;
+#endif /* HAVE_VF_MIN_MAX_TXRATE */
+	ivi->vlan = adapter->vf_data[vf].pf_vlan;
+	ivi->qos = adapter->vf_data[vf].pf_qos;
+#ifdef HAVE_VF_SPOOFCHK_CONFIGURE
+	ivi->spoofchk = adapter->vf_data[vf].spoofchk_enabled;
+#endif
+	return 0;
+}
+#endif
+static void igb_vmm_control(struct igb_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	int count;
+	u32 reg;
+
+	switch (hw->mac.type) {
+	case e1000_82575:
+	default:
+		/* replication is not supported for 82575 */
+		return;
+	case e1000_82576:
+		/* notify HW that the MAC is adding vlan tags */
+		reg = E1000_READ_REG(hw, E1000_DTXCTL);
+		reg |= (E1000_DTXCTL_VLAN_ADDED |
+			E1000_DTXCTL_SPOOF_INT);
+		E1000_WRITE_REG(hw, E1000_DTXCTL, reg);
+	case e1000_82580:
+		/* enable replication vlan tag stripping */
+		reg = E1000_READ_REG(hw, E1000_RPLOLR);
+		reg |= E1000_RPLOLR_STRVLAN;
+		E1000_WRITE_REG(hw, E1000_RPLOLR, reg);
+	case e1000_i350:
+	case e1000_i354:
+		/* none of the above registers are supported by i350 */
+		break;
+	}
+
+	/* Enable Malicious Driver Detection */
+	if ((adapter->vfs_allocated_count) &&
+	    (adapter->mdd)) {
+		if (hw->mac.type == e1000_i350)
+			igb_enable_mdd(adapter);
+	}
+
+		/* enable replication and loopback support */
+		count = adapter->vfs_allocated_count || adapter->vmdq_pools;
+		if (adapter->flags & IGB_FLAG_LOOPBACK_ENABLE && count)
+			e1000_vmdq_set_loopback_pf(hw, 1);
+		e1000_vmdq_set_anti_spoofing_pf(hw,
+			adapter->vfs_allocated_count || adapter->vmdq_pools,
+			adapter->vfs_allocated_count);
+	e1000_vmdq_set_replication_pf(hw, adapter->vfs_allocated_count ||
+				      adapter->vmdq_pools);
+}
+
+static void igb_init_fw(struct igb_adapter *adapter)
+{
+	struct e1000_fw_drv_info fw_cmd;
+	struct e1000_hw *hw = &adapter->hw;
+	int i;
+	u16 mask;
+
+	if (hw->mac.type == e1000_i210)
+		mask = E1000_SWFW_EEP_SM;
+	else
+		mask = E1000_SWFW_PHY0_SM;
+	/* i211 parts do not support this feature */
+	if (hw->mac.type == e1000_i211)
+		hw->mac.arc_subsystem_valid = false;
+
+	if (!hw->mac.ops.acquire_swfw_sync(hw, mask)) {
+		for (i = 0; i <= FW_MAX_RETRIES; i++) {
+			E1000_WRITE_REG(hw, E1000_FWSTS, E1000_FWSTS_FWRI);
+			fw_cmd.hdr.cmd = FW_CMD_DRV_INFO;
+			fw_cmd.hdr.buf_len = FW_CMD_DRV_INFO_LEN;
+			fw_cmd.hdr.cmd_or_resp.cmd_resv = FW_CMD_RESERVED;
+			fw_cmd.port_num = hw->bus.func;
+			fw_cmd.drv_version = FW_FAMILY_DRV_VER;
+			fw_cmd.hdr.checksum = 0;
+			fw_cmd.hdr.checksum = e1000_calculate_checksum((u8 *)&fw_cmd,
+			                                           (FW_HDR_LEN +
+			                                            fw_cmd.hdr.buf_len));
+			 e1000_host_interface_command(hw, (u8*)&fw_cmd,
+			                             sizeof(fw_cmd));
+			if (fw_cmd.hdr.cmd_or_resp.ret_status == FW_STATUS_SUCCESS)
+				break;
+		}
+	} else
+		dev_warn(pci_dev_to_dev(adapter->pdev),
+			 "Unable to get semaphore, firmware init failed.\n");
+	hw->mac.ops.release_swfw_sync(hw, mask);
+}
+
+static void igb_init_dmac(struct igb_adapter *adapter, u32 pba)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u32 dmac_thr;
+	u16 hwm;
+	u32 status;
+
+	if (hw->mac.type == e1000_i211)
+		return;
+
+	if (hw->mac.type > e1000_82580) {
+		if (adapter->dmac != IGB_DMAC_DISABLE) {
+			u32 reg;
+
+			/* force threshold to 0.  */
+			E1000_WRITE_REG(hw, E1000_DMCTXTH, 0);
+
+			/*
+			 * DMA Coalescing high water mark needs to be greater
+			 * than the Rx threshold. Set hwm to PBA - max frame
+			 * size in 16B units, capping it at PBA - 6KB.
+			 */
+			hwm = 64 * pba - adapter->max_frame_size / 16;
+			if (hwm < 64 * (pba - 6))
+				hwm = 64 * (pba - 6);
+			reg = E1000_READ_REG(hw, E1000_FCRTC);
+			reg &= ~E1000_FCRTC_RTH_COAL_MASK;
+			reg |= ((hwm << E1000_FCRTC_RTH_COAL_SHIFT)
+				& E1000_FCRTC_RTH_COAL_MASK);
+			E1000_WRITE_REG(hw, E1000_FCRTC, reg);
+
+			/*
+			 * Set the DMA Coalescing Rx threshold to PBA - 2 * max
+			 * frame size, capping it at PBA - 10KB.
+			 */
+			dmac_thr = pba - adapter->max_frame_size / 512;
+			if (dmac_thr < pba - 10)
+				dmac_thr = pba - 10;
+			reg = E1000_READ_REG(hw, E1000_DMACR);
+			reg &= ~E1000_DMACR_DMACTHR_MASK;
+			reg |= ((dmac_thr << E1000_DMACR_DMACTHR_SHIFT)
+				& E1000_DMACR_DMACTHR_MASK);
+
+			/* transition to L0x or L1 if available..*/
+			reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
+
+			/* Check if status is 2.5Gb backplane connection
+			 * before configuration of watchdog timer, which is
+			 * in msec values in 12.8usec intervals
+			 * watchdog timer= msec values in 32usec intervals
+			 * for non 2.5Gb connection
+			 */
+			if (hw->mac.type == e1000_i354) {
+				status = E1000_READ_REG(hw, E1000_STATUS);
+				if ((status & E1000_STATUS_2P5_SKU) &&
+				    (!(status & E1000_STATUS_2P5_SKU_OVER)))
+					reg |= ((adapter->dmac * 5) >> 6);
+				else
+					reg |= ((adapter->dmac) >> 5);
+			} else {
+				reg |= ((adapter->dmac) >> 5);
+			}
+
+			/*
+			 * Disable BMC-to-OS Watchdog enable
+			 * on devices that support OS-to-BMC
+			 */
+			if (hw->mac.type != e1000_i354)
+				reg &= ~E1000_DMACR_DC_BMC2OSW_EN;
+			E1000_WRITE_REG(hw, E1000_DMACR, reg);
+
+			/* no lower threshold to disable coalescing(smart fifb)-UTRESH=0*/
+			E1000_WRITE_REG(hw, E1000_DMCRTRH, 0);
+
+			/* This sets the time to wait before requesting
+			 * transition to low power state to number of usecs
+			 * needed to receive 1 512 byte frame at gigabit
+			 * line rate. On i350 device, time to make transition
+			 * to Lx state is delayed by 4 usec with flush disable
+			 * bit set to avoid losing mailbox interrupts
+			 */
+			reg = E1000_READ_REG(hw, E1000_DMCTLX);
+			if (hw->mac.type == e1000_i350)
+				reg |= IGB_DMCTLX_DCFLUSH_DIS;
+
+			/* in 2.5Gb connection, TTLX unit is 0.4 usec
+			 * which is 0x4*2 = 0xA. But delay is still 4 usec
+			 */
+			if (hw->mac.type == e1000_i354) {
+				status = E1000_READ_REG(hw, E1000_STATUS);
+				if ((status & E1000_STATUS_2P5_SKU) &&
+				    (!(status & E1000_STATUS_2P5_SKU_OVER)))
+					reg |= 0xA;
+				else
+					reg |= 0x4;
+			} else {
+				reg |= 0x4;
+			}
+			E1000_WRITE_REG(hw, E1000_DMCTLX, reg);
+
+			/* free space in tx packet buffer to wake from DMA coal */
+			E1000_WRITE_REG(hw, E1000_DMCTXTH, (IGB_MIN_TXPBSIZE -
+				(IGB_TX_BUF_4096 + adapter->max_frame_size)) >> 6);
+
+			/* make low power state decision controlled by DMA coal */
+			reg = E1000_READ_REG(hw, E1000_PCIEMISC);
+			reg &= ~E1000_PCIEMISC_LX_DECISION;
+			E1000_WRITE_REG(hw, E1000_PCIEMISC, reg);
+		} /* endif adapter->dmac is not disabled */
+	} else if (hw->mac.type == e1000_82580) {
+		u32 reg = E1000_READ_REG(hw, E1000_PCIEMISC);
+		E1000_WRITE_REG(hw, E1000_PCIEMISC,
+		                reg & ~E1000_PCIEMISC_LX_DECISION);
+		E1000_WRITE_REG(hw, E1000_DMACR, 0);
+	}
+}
+
+#ifdef HAVE_I2C_SUPPORT
+/*  igb_read_i2c_byte - Reads 8 bit word over I2C
+ *  @hw: pointer to hardware structure
+ *  @byte_offset: byte offset to read
+ *  @dev_addr: device address
+ *  @data: value read
+ *
+ *  Performs byte read operation over I2C interface at
+ *  a specified device address.
+ */
+s32 igb_read_i2c_byte(struct e1000_hw *hw, u8 byte_offset,
+				u8 dev_addr, u8 *data)
+{
+	struct igb_adapter *adapter = container_of(hw, struct igb_adapter, hw);
+	struct i2c_client *this_client = adapter->i2c_client;
+	s32 status;
+	u16 swfw_mask = 0;
+
+	if (!this_client)
+		return E1000_ERR_I2C;
+
+	swfw_mask = E1000_SWFW_PHY0_SM;
+
+	if (hw->mac.ops.acquire_swfw_sync(hw, swfw_mask)
+	    != E1000_SUCCESS)
+		return E1000_ERR_SWFW_SYNC;
+
+	status = i2c_smbus_read_byte_data(this_client, byte_offset);
+	hw->mac.ops.release_swfw_sync(hw, swfw_mask);
+
+	if (status < 0)
+		return E1000_ERR_I2C;
+	else {
+		*data = status;
+		return E1000_SUCCESS;
+	}
+}
+
+/*  igb_write_i2c_byte - Writes 8 bit word over I2C
+ *  @hw: pointer to hardware structure
+ *  @byte_offset: byte offset to write
+ *  @dev_addr: device address
+ *  @data: value to write
+ *
+ *  Performs byte write operation over I2C interface at
+ *  a specified device address.
+ */
+s32 igb_write_i2c_byte(struct e1000_hw *hw, u8 byte_offset,
+				 u8 dev_addr, u8 data)
+{
+	struct igb_adapter *adapter = container_of(hw, struct igb_adapter, hw);
+	struct i2c_client *this_client = adapter->i2c_client;
+	s32 status;
+	u16 swfw_mask = E1000_SWFW_PHY0_SM;
+
+	if (!this_client)
+		return E1000_ERR_I2C;
+
+	if (hw->mac.ops.acquire_swfw_sync(hw, swfw_mask) != E1000_SUCCESS)
+		return E1000_ERR_SWFW_SYNC;
+	status = i2c_smbus_write_byte_data(this_client, byte_offset, data);
+	hw->mac.ops.release_swfw_sync(hw, swfw_mask);
+
+	if (status)
+		return E1000_ERR_I2C;
+	else
+		return E1000_SUCCESS;
+}
+#endif /*  HAVE_I2C_SUPPORT */
+/* igb_main.c */
+
+
+/**
+ * igb_probe - Device Initialization Routine
+ * @pdev: PCI device information struct
+ * @ent: entry in igb_pci_tbl
+ *
+ * Returns 0 on success, negative on failure
+ *
+ * igb_probe initializes an adapter identified by a pci_dev structure.
+ * The OS initialization, configuring of the adapter private structure,
+ * and a hardware reset occur.
+ **/
+int igb_kni_probe(struct pci_dev *pdev,
+			       struct net_device **lad_dev)
+{
+	struct net_device *netdev;
+	struct igb_adapter *adapter;
+	struct e1000_hw *hw;
+	u16 eeprom_data = 0;
+	u8 pba_str[E1000_PBANUM_LENGTH];
+	s32 ret_val;
+	static int global_quad_port_a; /* global quad port a indication */
+	int i, err, pci_using_dac = 0;
+	static int cards_found;
+
+	err = pci_enable_device_mem(pdev);
+	if (err)
+		return err;
+
+#ifdef NO_KNI
+	pci_using_dac = 0;
+	err = dma_set_mask(pci_dev_to_dev(pdev), DMA_BIT_MASK(64));
+	if (!err) {
+		err = dma_set_coherent_mask(pci_dev_to_dev(pdev), DMA_BIT_MASK(64));
+		if (!err)
+			pci_using_dac = 1;
+	} else {
+		err = dma_set_mask(pci_dev_to_dev(pdev), DMA_BIT_MASK(32));
+		if (err) {
+			err = dma_set_coherent_mask(pci_dev_to_dev(pdev), DMA_BIT_MASK(32));
+			if (err) {
+				IGB_ERR("No usable DMA configuration, "
+				        "aborting\n");
+				goto err_dma;
+			}
+		}
+	}
+
+#ifndef HAVE_ASPM_QUIRKS
+	/* 82575 requires that the pci-e link partner disable the L0s state */
+	switch (pdev->device) {
+	case E1000_DEV_ID_82575EB_COPPER:
+	case E1000_DEV_ID_82575EB_FIBER_SERDES:
+	case E1000_DEV_ID_82575GB_QUAD_COPPER:
+		pci_disable_link_state(pdev, PCIE_LINK_STATE_L0S);
+	default:
+		break;
+	}
+
+#endif /* HAVE_ASPM_QUIRKS */
+	err = pci_request_selected_regions(pdev,
+	                                   pci_select_bars(pdev,
+                                                           IORESOURCE_MEM),
+	                                   igb_driver_name);
+	if (err)
+		goto err_pci_reg;
+
+	pci_enable_pcie_error_reporting(pdev);
+
+	pci_set_master(pdev);
+
+	err = -ENOMEM;
+#endif /* NO_KNI */
+#ifdef HAVE_TX_MQ
+	netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
+	                           IGB_MAX_TX_QUEUES);
+#else
+	netdev = alloc_etherdev(sizeof(struct igb_adapter));
+#endif /* HAVE_TX_MQ */
+	if (!netdev)
+		goto err_alloc_etherdev;
+
+	SET_MODULE_OWNER(netdev);
+	SET_NETDEV_DEV(netdev, &pdev->dev);
+
+	//pci_set_drvdata(pdev, netdev);
+	adapter = netdev_priv(netdev);
+	adapter->netdev = netdev;
+	adapter->pdev = pdev;
+	hw = &adapter->hw;
+	hw->back = adapter;
+	adapter->port_num = hw->bus.func;
+	adapter->msg_enable = (1 << debug) - 1;
+
+#ifdef HAVE_PCI_ERS
+	err = pci_save_state(pdev);
+	if (err)
+		goto err_ioremap;
+#endif
+	err = -EIO;
+	hw->hw_addr = ioremap(pci_resource_start(pdev, 0),
+	                      pci_resource_len(pdev, 0));
+	if (!hw->hw_addr)
+		goto err_ioremap;
+
+#ifdef HAVE_NET_DEVICE_OPS
+	netdev->netdev_ops = &igb_netdev_ops;
+#else /* HAVE_NET_DEVICE_OPS */
+	netdev->open = &igb_open;
+	netdev->stop = &igb_close;
+	netdev->get_stats = &igb_get_stats;
+#ifdef HAVE_SET_RX_MODE
+	netdev->set_rx_mode = &igb_set_rx_mode;
+#endif
+	netdev->set_multicast_list = &igb_set_rx_mode;
+	netdev->set_mac_address = &igb_set_mac;
+	netdev->change_mtu = &igb_change_mtu;
+	netdev->do_ioctl = &igb_ioctl;
+#ifdef HAVE_TX_TIMEOUT
+	netdev->tx_timeout = &igb_tx_timeout;
+#endif
+	netdev->vlan_rx_register = igb_vlan_mode;
+	netdev->vlan_rx_add_vid = igb_vlan_rx_add_vid;
+	netdev->vlan_rx_kill_vid = igb_vlan_rx_kill_vid;
+#ifdef CONFIG_NET_POLL_CONTROLLER
+	netdev->poll_controller = igb_netpoll;
+#endif
+	netdev->hard_start_xmit = &igb_xmit_frame;
+#endif /* HAVE_NET_DEVICE_OPS */
+	igb_set_ethtool_ops(netdev);
+#ifdef HAVE_TX_TIMEOUT
+	netdev->watchdog_timeo = 5 * HZ;
+#endif
+
+	strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
+
+	adapter->bd_number = cards_found;
+
+	/* setup the private structure */
+	err = igb_sw_init(adapter);
+	if (err)
+		goto err_sw_init;
+
+	e1000_get_bus_info(hw);
+
+	hw->phy.autoneg_wait_to_complete = FALSE;
+	hw->mac.adaptive_ifs = FALSE;
+
+	/* Copper options */
+	if (hw->phy.media_type == e1000_media_type_copper) {
+		hw->phy.mdix = AUTO_ALL_MODES;
+		hw->phy.disable_polarity_correction = FALSE;
+		hw->phy.ms_type = e1000_ms_hw_default;
+	}
+
+	if (e1000_check_reset_block(hw))
+		dev_info(pci_dev_to_dev(pdev),
+			"PHY reset is blocked due to SOL/IDER session.\n");
+
+	/*
+	 * features is initialized to 0 in allocation, it might have bits
+	 * set by igb_sw_init so we should use an or instead of an
+	 * assignment.
+	 */
+	netdev->features |= NETIF_F_SG |
+			    NETIF_F_IP_CSUM |
+#ifdef NETIF_F_IPV6_CSUM
+			    NETIF_F_IPV6_CSUM |
+#endif
+#ifdef NETIF_F_TSO
+			    NETIF_F_TSO |
+#ifdef NETIF_F_TSO6
+			    NETIF_F_TSO6 |
+#endif
+#endif /* NETIF_F_TSO */
+#ifdef NETIF_F_RXHASH
+			    NETIF_F_RXHASH |
+#endif
+			    NETIF_F_RXCSUM |
+#ifdef NETIF_F_HW_VLAN_CTAG_RX
+			    NETIF_F_HW_VLAN_CTAG_RX |
+			    NETIF_F_HW_VLAN_CTAG_TX;
+#else
+			    NETIF_F_HW_VLAN_RX |
+			    NETIF_F_HW_VLAN_TX;
+#endif
+
+	if (hw->mac.type >= e1000_82576)
+		netdev->features |= NETIF_F_SCTP_CSUM;
+
+#ifdef HAVE_NDO_SET_FEATURES
+	/* copy netdev features into list of user selectable features */
+	netdev->hw_features |= netdev->features;
+#ifndef IGB_NO_LRO
+
+	/* give us the option of enabling LRO later */
+	netdev->hw_features |= NETIF_F_LRO;
+#endif
+#else
+#ifdef NETIF_F_GRO
+
+	/* this is only needed on kernels prior to 2.6.39 */
+	netdev->features |= NETIF_F_GRO;
+#endif
+#endif
+
+	/* set this bit last since it cannot be part of hw_features */
+#ifdef NETIF_F_HW_VLAN_CTAG_FILTER
+	netdev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
+#else
+	netdev->features |= NETIF_F_HW_VLAN_FILTER;
+#endif
+
+#ifdef HAVE_NETDEV_VLAN_FEATURES
+	netdev->vlan_features |= NETIF_F_TSO |
+				 NETIF_F_TSO6 |
+				 NETIF_F_IP_CSUM |
+				 NETIF_F_IPV6_CSUM |
+				 NETIF_F_SG;
+
+#endif
+	if (pci_using_dac)
+		netdev->features |= NETIF_F_HIGHDMA;
+
+#ifdef NO_KNI
+	adapter->en_mng_pt = e1000_enable_mng_pass_thru(hw);
+#ifdef DEBUG
+	if (adapter->dmac != IGB_DMAC_DISABLE)
+		printk("%s: DMA Coalescing is enabled..\n", netdev->name);
+#endif
+
+	/* before reading the NVM, reset the controller to put the device in a
+	 * known good starting state */
+	e1000_reset_hw(hw);
+#endif /* NO_KNI */
+
+	/* make sure the NVM is good */
+	if (e1000_validate_nvm_checksum(hw) < 0) {
+		dev_err(pci_dev_to_dev(pdev), "The NVM Checksum Is Not"
+		        " Valid\n");
+		err = -EIO;
+		goto err_eeprom;
+	}
+
+	/* copy the MAC address out of the NVM */
+	if (e1000_read_mac_addr(hw))
+		dev_err(pci_dev_to_dev(pdev), "NVM Read Error\n");
+	memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
+#ifdef ETHTOOL_GPERMADDR
+	memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
+
+	if (!is_valid_ether_addr(netdev->perm_addr)) {
+#else
+	if (!is_valid_ether_addr(netdev->dev_addr)) {
+#endif
+		dev_err(pci_dev_to_dev(pdev), "Invalid MAC Address\n");
+		err = -EIO;
+		goto err_eeprom;
+	}
+
+	memcpy(&adapter->mac_table[0].addr, hw->mac.addr, netdev->addr_len);
+	adapter->mac_table[0].queue = adapter->vfs_allocated_count;
+	adapter->mac_table[0].state = (IGB_MAC_STATE_DEFAULT | IGB_MAC_STATE_IN_USE);
+	igb_rar_set(adapter, 0);
+
+	/* get firmware version for ethtool -i */
+	igb_set_fw_version(adapter);
+
+	/* Check if Media Autosense is enabled */
+	if (hw->mac.type == e1000_82580)
+		igb_init_mas(adapter);
+
+#ifdef NO_KNI
+	setup_timer(&adapter->watchdog_timer, &igb_watchdog,
+	            (unsigned long) adapter);
+	if (adapter->flags & IGB_FLAG_DETECT_BAD_DMA)
+		setup_timer(&adapter->dma_err_timer, &igb_dma_err_timer,
+			    (unsigned long) adapter);
+	setup_timer(&adapter->phy_info_timer, &igb_update_phy_info,
+	            (unsigned long) adapter);
+
+	INIT_WORK(&adapter->reset_task, igb_reset_task);
+	INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
+	if (adapter->flags & IGB_FLAG_DETECT_BAD_DMA)
+		INIT_WORK(&adapter->dma_err_task, igb_dma_err_task);
+#endif
+
+	/* Initialize link properties that are user-changeable */
+	adapter->fc_autoneg = true;
+	hw->mac.autoneg = true;
+	hw->phy.autoneg_advertised = 0x2f;
+
+	hw->fc.requested_mode = e1000_fc_default;
+	hw->fc.current_mode = e1000_fc_default;
+
+	e1000_validate_mdi_setting(hw);
+
+	/* By default, support wake on port A */
+	if (hw->bus.func == 0)
+		adapter->flags |= IGB_FLAG_WOL_SUPPORTED;
+
+	/* Check the NVM for wake support for non-port A ports */
+	if (hw->mac.type >= e1000_82580)
+		hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
+		                 NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
+		                 &eeprom_data);
+	else if (hw->bus.func == 1)
+		e1000_read_nvm(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
+
+	if (eeprom_data & IGB_EEPROM_APME)
+		adapter->flags |= IGB_FLAG_WOL_SUPPORTED;
+
+	/* now that we have the eeprom settings, apply the special cases where
+	 * the eeprom may be wrong or the board simply won't support wake on
+	 * lan on a particular port */
+	switch (pdev->device) {
+	case E1000_DEV_ID_82575GB_QUAD_COPPER:
+		adapter->flags &= ~IGB_FLAG_WOL_SUPPORTED;
+		break;
+	case E1000_DEV_ID_82575EB_FIBER_SERDES:
+	case E1000_DEV_ID_82576_FIBER:
+	case E1000_DEV_ID_82576_SERDES:
+		/* Wake events only supported on port A for dual fiber
+		 * regardless of eeprom setting */
+		if (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_FUNC_1)
+			adapter->flags &= ~IGB_FLAG_WOL_SUPPORTED;
+		break;
+	case E1000_DEV_ID_82576_QUAD_COPPER:
+	case E1000_DEV_ID_82576_QUAD_COPPER_ET2:
+		/* if quad port adapter, disable WoL on all but port A */
+		if (global_quad_port_a != 0)
+			adapter->flags &= ~IGB_FLAG_WOL_SUPPORTED;
+		else
+			adapter->flags |= IGB_FLAG_QUAD_PORT_A;
+		/* Reset for multiple quad port adapters */
+		if (++global_quad_port_a == 4)
+			global_quad_port_a = 0;
+		break;
+	default:
+		/* If the device can't wake, don't set software support */
+		if (!device_can_wakeup(&adapter->pdev->dev))
+			adapter->flags &= ~IGB_FLAG_WOL_SUPPORTED;
+		break;
+	}
+
+	/* initialize the wol settings based on the eeprom settings */
+	if (adapter->flags & IGB_FLAG_WOL_SUPPORTED)
+		adapter->wol |= E1000_WUFC_MAG;
+
+	/* Some vendors want WoL disabled by default, but still supported */
+	if ((hw->mac.type == e1000_i350) &&
+	    (pdev->subsystem_vendor == PCI_VENDOR_ID_HP)) {
+		adapter->flags |= IGB_FLAG_WOL_SUPPORTED;
+		adapter->wol = 0;
+	}
+
+#ifdef NO_KNI
+	device_set_wakeup_enable(pci_dev_to_dev(adapter->pdev),
+				 adapter->flags & IGB_FLAG_WOL_SUPPORTED);
+
+	/* reset the hardware with the new settings */
+	igb_reset(adapter);
+	adapter->devrc = 0;
+
+#ifdef HAVE_I2C_SUPPORT
+	/* Init the I2C interface */
+	err = igb_init_i2c(adapter);
+	if (err) {
+		dev_err(&pdev->dev, "failed to init i2c interface\n");
+		goto err_eeprom;
+	}
+#endif /* HAVE_I2C_SUPPORT */
+
+	/* let the f/w know that the h/w is now under the control of the
+	 * driver. */
+	igb_get_hw_control(adapter);
+
+	strncpy(netdev->name, "eth%d", IFNAMSIZ);
+	err = register_netdev(netdev);
+	if (err)
+		goto err_register;
+
+#ifdef CONFIG_IGB_VMDQ_NETDEV
+	err = igb_init_vmdq_netdevs(adapter);
+	if (err)
+		goto err_register;
+#endif
+	/* carrier off reporting is important to ethtool even BEFORE open */
+	netif_carrier_off(netdev);
+
+#ifdef IGB_DCA
+	if (dca_add_requester(&pdev->dev) == E1000_SUCCESS) {
+		adapter->flags |= IGB_FLAG_DCA_ENABLED;
+		dev_info(pci_dev_to_dev(pdev), "DCA enabled\n");
+		igb_setup_dca(adapter);
+	}
+
+#endif
+#ifdef HAVE_PTP_1588_CLOCK
+	/* do hw tstamp init after resetting */
+	igb_ptp_init(adapter);
+#endif /* HAVE_PTP_1588_CLOCK */
+
+#endif /* NO_KNI */
+	dev_info(pci_dev_to_dev(pdev), "Intel(R) Gigabit Ethernet Network Connection\n");
+	/* print bus type/speed/width info */
+	dev_info(pci_dev_to_dev(pdev), "%s: (PCIe:%s:%s) ",
+	         netdev->name,
+	         ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5GT/s" :
+	          (hw->bus.speed == e1000_bus_speed_5000) ? "5.0GT/s" :
+		  (hw->mac.type == e1000_i354) ? "integrated" :
+	                                                    "unknown"),
+	         ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
+	          (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
+	          (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
+		  (hw->mac.type == e1000_i354) ? "integrated" :
+	           "unknown"));
+	dev_info(pci_dev_to_dev(pdev), "%s: MAC: ", netdev->name);
+	for (i = 0; i < 6; i++)
+		printk("%2.2x%c", netdev->dev_addr[i], i == 5 ? '\n' : ':');
+
+	ret_val = e1000_read_pba_string(hw, pba_str, E1000_PBANUM_LENGTH);
+	if (ret_val)
+		strncpy(pba_str, "Unknown", sizeof(pba_str) - 1);
+	dev_info(pci_dev_to_dev(pdev), "%s: PBA No: %s\n", netdev->name,
+		 pba_str);
+
+
+	/* Initialize the thermal sensor on i350 devices. */
+	if (hw->mac.type == e1000_i350) {
+		if (hw->bus.func == 0) {
+			u16 ets_word;
+
+			/*
+			 * Read the NVM to determine if this i350 device
+			 * supports an external thermal sensor.
+			 */
+			e1000_read_nvm(hw, NVM_ETS_CFG, 1, &ets_word);
+			if (ets_word != 0x0000 && ets_word != 0xFFFF)
+				adapter->ets = true;
+			else
+				adapter->ets = false;
+		}
+#ifdef NO_KNI
+#ifdef IGB_HWMON
+
+		igb_sysfs_init(adapter);
+#else
+#ifdef IGB_PROCFS
+
+		igb_procfs_init(adapter);
+#endif /* IGB_PROCFS */
+#endif /* IGB_HWMON */
+#endif /* NO_KNI */
+	} else {
+		adapter->ets = false;
+	}
+
+	if (hw->phy.media_type == e1000_media_type_copper) {
+		switch (hw->mac.type) {
+		case e1000_i350:
+		case e1000_i210:
+		case e1000_i211:
+			/* Enable EEE for internal copper PHY devices */
+			err = e1000_set_eee_i350(hw);
+			if ((!err) &&
+			    (adapter->flags & IGB_FLAG_EEE))
+				adapter->eee_advert =
+					MDIO_EEE_100TX | MDIO_EEE_1000T;
+			break;
+		case e1000_i354:
+			if ((E1000_READ_REG(hw, E1000_CTRL_EXT)) &
+			    (E1000_CTRL_EXT_LINK_MODE_SGMII)) {
+				err = e1000_set_eee_i354(hw);
+				if ((!err) &&
+				    (adapter->flags & IGB_FLAG_EEE))
+					adapter->eee_advert =
+					   MDIO_EEE_100TX | MDIO_EEE_1000T;
+			}
+			break;
+		default:
+			break;
+		}
+	}
+
+	/* send driver version info to firmware */
+	if (hw->mac.type >= e1000_i350)
+		igb_init_fw(adapter);
+
+#ifndef IGB_NO_LRO
+	if (netdev->features & NETIF_F_LRO)
+		dev_info(pci_dev_to_dev(pdev), "Internal LRO is enabled \n");
+	else
+		dev_info(pci_dev_to_dev(pdev), "LRO is disabled \n");
+#endif
+	dev_info(pci_dev_to_dev(pdev),
+	         "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
+	         adapter->msix_entries ? "MSI-X" :
+	         (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
+	         adapter->num_rx_queues, adapter->num_tx_queues);
+
+	cards_found++;
+	*lad_dev = netdev;
+
+	pm_runtime_put_noidle(&pdev->dev);
+	return 0;
+
+//err_register:
+//	igb_release_hw_control(adapter);
+#ifdef HAVE_I2C_SUPPORT
+	memset(&adapter->i2c_adap, 0, sizeof(adapter->i2c_adap));
+#endif /* HAVE_I2C_SUPPORT */
+err_eeprom:
+//	if (!e1000_check_reset_block(hw))
+//		e1000_phy_hw_reset(hw);
+
+	if (hw->flash_address)
+		iounmap(hw->flash_address);
+err_sw_init:
+//	igb_clear_interrupt_scheme(adapter);
+//	igb_reset_sriov_capability(adapter);
+	iounmap(hw->hw_addr);
+err_ioremap:
+	free_netdev(netdev);
+err_alloc_etherdev:
+//	pci_release_selected_regions(pdev,
+//	                             pci_select_bars(pdev, IORESOURCE_MEM));
+//err_pci_reg:
+//err_dma:
+	pci_disable_device(pdev);
+	return err;
+}
+
+
+void igb_kni_remove(struct pci_dev *pdev)
+{
+	pci_disable_device(pdev);
+}
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_param.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_param.c
new file mode 100644
index 00000000..f79ce7c1
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_param.c
@@ -0,0 +1,847 @@
+/*******************************************************************************
+
+  Intel(R) Gigabit Ethernet Linux driver
+  Copyright(c) 2007-2013 Intel Corporation.
+
+  This program is free software; you can redistribute it and/or modify it
+  under the terms and conditions of the GNU General Public License,
+  version 2, as published by the Free Software Foundation.
+
+  This program is distributed in the hope it will be useful, but WITHOUT
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+  more details.
+
+  You should have received a copy of the GNU General Public License along with
+  this program; if not, write to the Free Software Foundation, Inc.,
+  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+  The full GNU General Public License is included in this distribution in
+  the file called "COPYING".
+
+  Contact Information:
+  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+
+#include <linux/netdevice.h>
+
+#include "igb.h"
+
+/* This is the only thing that needs to be changed to adjust the
+ * maximum number of ports that the driver can manage.
+ */
+
+#define IGB_MAX_NIC 32
+
+#define OPTION_UNSET   -1
+#define OPTION_DISABLED 0
+#define OPTION_ENABLED  1
+#define MAX_NUM_LIST_OPTS 15
+
+/* All parameters are treated the same, as an integer array of values.
+ * This macro just reduces the need to repeat the same declaration code
+ * over and over (plus this helps to avoid typo bugs).
+ */
+
+#define IGB_PARAM_INIT { [0 ... IGB_MAX_NIC] = OPTION_UNSET }
+#ifndef module_param_array
+/* Module Parameters are always initialized to -1, so that the driver
+ * can tell the difference between no user specified value or the
+ * user asking for the default value.
+ * The true default values are loaded in when igb_check_options is called.
+ *
+ * This is a GCC extension to ANSI C.
+ * See the item "Labeled Elements in Initializers" in the section
+ * "Extensions to the C Language Family" of the GCC documentation.
+ */
+
+#define IGB_PARAM(X, desc) \
+	static const int X[IGB_MAX_NIC+1] = IGB_PARAM_INIT; \
+	MODULE_PARM(X, "1-" __MODULE_STRING(IGB_MAX_NIC) "i"); \
+	MODULE_PARM_DESC(X, desc);
+#else
+#define IGB_PARAM(X, desc) \
+	static int X[IGB_MAX_NIC+1] = IGB_PARAM_INIT; \
+	static unsigned int num_##X; \
+	module_param_array_named(X, X, int, &num_##X, 0); \
+	MODULE_PARM_DESC(X, desc);
+#endif
+
+/* Interrupt Throttle Rate (interrupts/sec)
+ *
+ * Valid Range: 100-100000 (0=off, 1=dynamic, 3=dynamic conservative)
+ */
+IGB_PARAM(InterruptThrottleRate,
+	  "Maximum interrupts per second, per vector, (max 100000), default 3=adaptive");
+#define DEFAULT_ITR                    3
+#define MAX_ITR                   100000
+/* #define MIN_ITR                      120 */
+#define MIN_ITR                      0
+/* IntMode (Interrupt Mode)
+ *
+ * Valid Range: 0 - 2
+ *
+ * Default Value: 2 (MSI-X)
+ */
+IGB_PARAM(IntMode, "Change Interrupt Mode (0=Legacy, 1=MSI, 2=MSI-X), default 2");
+#define MAX_INTMODE                    IGB_INT_MODE_MSIX
+#define MIN_INTMODE                    IGB_INT_MODE_LEGACY
+
+IGB_PARAM(Node, "set the starting node to allocate memory on, default -1");
+
+/* LLIPort (Low Latency Interrupt TCP Port)
+ *
+ * Valid Range: 0 - 65535
+ *
+ * Default Value: 0 (disabled)
+ */
+IGB_PARAM(LLIPort, "Low Latency Interrupt TCP Port (0-65535), default 0=off");
+
+#define DEFAULT_LLIPORT                0
+#define MAX_LLIPORT               0xFFFF
+#define MIN_LLIPORT                    0
+
+/* LLIPush (Low Latency Interrupt on TCP Push flag)
+ *
+ * Valid Range: 0, 1
+ *
+ * Default Value: 0 (disabled)
+ */
+IGB_PARAM(LLIPush, "Low Latency Interrupt on TCP Push flag (0,1), default 0=off");
+
+#define DEFAULT_LLIPUSH                0
+#define MAX_LLIPUSH                    1
+#define MIN_LLIPUSH                    0
+
+/* LLISize (Low Latency Interrupt on Packet Size)
+ *
+ * Valid Range: 0 - 1500
+ *
+ * Default Value: 0 (disabled)
+ */
+IGB_PARAM(LLISize, "Low Latency Interrupt on Packet Size (0-1500), default 0=off");
+
+#define DEFAULT_LLISIZE                0
+#define MAX_LLISIZE                 1500
+#define MIN_LLISIZE                    0
+
+/* RSS (Enable RSS multiqueue receive)
+ *
+ * Valid Range: 0 - 8
+ *
+ * Default Value:  1
+ */
+IGB_PARAM(RSS, "Number of Receive-Side Scaling Descriptor Queues (0-8), default 1, 0=number of cpus");
+
+#define DEFAULT_RSS       1
+#define MAX_RSS           8
+#define MIN_RSS           0
+
+/* VMDQ (Enable VMDq multiqueue receive)
+ *
+ * Valid Range: 0 - 8
+ *
+ * Default Value:  0
+ */
+IGB_PARAM(VMDQ, "Number of Virtual Machine Device Queues: 0-1 = disable, 2-8 enable, default 0");
+
+#define DEFAULT_VMDQ      0
+#define MAX_VMDQ          MAX_RSS
+#define MIN_VMDQ          0
+
+/* max_vfs (Enable SR-IOV VF devices)
+ *
+ * Valid Range: 0 - 7
+ *
+ * Default Value:  0
+ */
+IGB_PARAM(max_vfs, "Number of Virtual Functions: 0 = disable, 1-7 enable, default 0");
+
+#define DEFAULT_SRIOV     0
+#define MAX_SRIOV         7
+#define MIN_SRIOV         0
+
+/* MDD (Enable Malicious Driver Detection)
+ *
+ * Only available when SR-IOV is enabled - max_vfs is greater than 0
+ *
+ * Valid Range: 0, 1
+ *
+ * Default Value:  1
+ */
+IGB_PARAM(MDD, "Malicious Driver Detection (0/1), default 1 = enabled. "
+	  "Only available when max_vfs is greater than 0");
+
+#ifdef DEBUG
+
+/* Disable Hardware Reset on Tx Hang
+ *
+ * Valid Range: 0, 1
+ *
+ * Default Value: 0 (disabled, i.e. h/w will reset)
+ */
+IGB_PARAM(DisableHwReset, "Disable reset of hardware on Tx hang");
+
+/* Dump Transmit and Receive buffers
+ *
+ * Valid Range: 0, 1
+ *
+ * Default Value: 0
+ */
+IGB_PARAM(DumpBuffers, "Dump Tx/Rx buffers on Tx hang or by request");
+
+#endif /* DEBUG */
+
+/* QueuePairs (Enable TX/RX queue pairs for interrupt handling)
+ *
+ * Valid Range: 0 - 1
+ *
+ * Default Value:  1
+ */
+IGB_PARAM(QueuePairs, "Enable Tx/Rx queue pairs for interrupt handling (0,1), default 1=on");
+
+#define DEFAULT_QUEUE_PAIRS           1
+#define MAX_QUEUE_PAIRS               1
+#define MIN_QUEUE_PAIRS               0
+
+/* Enable/disable EEE (a.k.a. IEEE802.3az)
+ *
+ * Valid Range: 0, 1
+ *
+ * Default Value: 1
+ */
+ IGB_PARAM(EEE, "Enable/disable on parts that support the feature");
+
+/* Enable/disable DMA Coalescing
+ *
+ * Valid Values: 0(off), 1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000,
+ * 9000, 10000(msec), 250(usec), 500(usec)
+ *
+ * Default Value: 0
+ */
+ IGB_PARAM(DMAC, "Disable or set latency for DMA Coalescing ((0=off, 1000-10000(msec), 250, 500 (usec))");
+
+#ifndef IGB_NO_LRO
+/* Enable/disable Large Receive Offload
+ *
+ * Valid Values: 0(off), 1(on)
+ *
+ * Default Value: 0
+ */
+ IGB_PARAM(LRO, "Large Receive Offload (0,1), default 0=off");
+
+#endif
+struct igb_opt_list {
+	int i;
+	char *str;
+};
+struct igb_option {
+	enum { enable_option, range_option, list_option } type;
+	const char *name;
+	const char *err;
+	int def;
+	union {
+		struct { /* range_option info */
+			int min;
+			int max;
+		} r;
+		struct { /* list_option info */
+			int nr;
+			struct igb_opt_list *p;
+		} l;
+	} arg;
+};
+
+static int igb_validate_option(unsigned int *value,
+			       struct igb_option *opt,
+			       struct igb_adapter *adapter)
+{
+	if (*value == OPTION_UNSET) {
+		*value = opt->def;
+		return 0;
+	}
+
+	switch (opt->type) {
+	case enable_option:
+		switch (*value) {
+		case OPTION_ENABLED:
+			DPRINTK(PROBE, INFO, "%s Enabled\n", opt->name);
+			return 0;
+		case OPTION_DISABLED:
+			DPRINTK(PROBE, INFO, "%s Disabled\n", opt->name);
+			return 0;
+		}
+		break;
+	case range_option:
+		if (*value >= opt->arg.r.min && *value <= opt->arg.r.max) {
+			DPRINTK(PROBE, INFO,
+					"%s set to %d\n", opt->name, *value);
+			return 0;
+		}
+		break;
+	case list_option: {
+		int i;
+		struct igb_opt_list *ent;
+
+		for (i = 0; i < opt->arg.l.nr; i++) {
+			ent = &opt->arg.l.p[i];
+			if (*value == ent->i) {
+				if (ent->str[0] != '\0')
+					DPRINTK(PROBE, INFO, "%s\n", ent->str);
+				return 0;
+			}
+		}
+	}
+		break;
+	default:
+		BUG();
+	}
+
+	DPRINTK(PROBE, INFO, "Invalid %s value specified (%d) %s\n",
+	       opt->name, *value, opt->err);
+	*value = opt->def;
+	return -1;
+}
+
+/**
+ * igb_check_options - Range Checking for Command Line Parameters
+ * @adapter: board private structure
+ *
+ * This routine checks all command line parameters for valid user
+ * input.  If an invalid value is given, or if no user specified
+ * value exists, a default value is used.  The final value is stored
+ * in a variable in the adapter structure.
+ **/
+
+void igb_check_options(struct igb_adapter *adapter)
+{
+	int bd = adapter->bd_number;
+	struct e1000_hw *hw = &adapter->hw;
+
+	if (bd >= IGB_MAX_NIC) {
+		DPRINTK(PROBE, NOTICE,
+		       "Warning: no configuration for board #%d\n", bd);
+		DPRINTK(PROBE, NOTICE, "Using defaults for all values\n");
+#ifndef module_param_array
+		bd = IGB_MAX_NIC;
+#endif
+	}
+
+	{ /* Interrupt Throttling Rate */
+		struct igb_option opt = {
+			.type = range_option,
+			.name = "Interrupt Throttling Rate (ints/sec)",
+			.err  = "using default of " __MODULE_STRING(DEFAULT_ITR),
+			.def  = DEFAULT_ITR,
+			.arg  = { .r = { .min = MIN_ITR,
+					 .max = MAX_ITR } }
+		};
+
+#ifdef module_param_array
+		if (num_InterruptThrottleRate > bd) {
+#endif
+			unsigned int itr = InterruptThrottleRate[bd];
+
+			switch (itr) {
+			case 0:
+				DPRINTK(PROBE, INFO, "%s turned off\n",
+				        opt.name);
+				if (hw->mac.type >= e1000_i350)
+					adapter->dmac = IGB_DMAC_DISABLE;
+				adapter->rx_itr_setting = itr;
+				break;
+			case 1:
+				DPRINTK(PROBE, INFO, "%s set to dynamic mode\n",
+					opt.name);
+				adapter->rx_itr_setting = itr;
+				break;
+			case 3:
+				DPRINTK(PROBE, INFO,
+				        "%s set to dynamic conservative mode\n",
+					opt.name);
+				adapter->rx_itr_setting = itr;
+				break;
+			default:
+				igb_validate_option(&itr, &opt, adapter);
+				/* Save the setting, because the dynamic bits
+				 * change itr.  In case of invalid user value,
+				 * default to conservative mode, else need to
+				 * clear the lower two bits because they are
+				 * used as control */
+				if (itr == 3) {
+					adapter->rx_itr_setting = itr;
+				} else {
+					adapter->rx_itr_setting = 1000000000 /
+					                          (itr * 256);
+					adapter->rx_itr_setting &= ~3;
+				}
+				break;
+			}
+#ifdef module_param_array
+		} else {
+			adapter->rx_itr_setting = opt.def;
+		}
+#endif
+		adapter->tx_itr_setting = adapter->rx_itr_setting;
+	}
+	{ /* Interrupt Mode */
+		struct igb_option opt = {
+			.type = range_option,
+			.name = "Interrupt Mode",
+			.err  = "defaulting to 2 (MSI-X)",
+			.def  = IGB_INT_MODE_MSIX,
+			.arg  = { .r = { .min = MIN_INTMODE,
+					 .max = MAX_INTMODE } }
+		};
+
+#ifdef module_param_array
+		if (num_IntMode > bd) {
+#endif
+			unsigned int int_mode = IntMode[bd];
+			igb_validate_option(&int_mode, &opt, adapter);
+			adapter->int_mode = int_mode;
+#ifdef module_param_array
+		} else {
+			adapter->int_mode = opt.def;
+		}
+#endif
+	}
+	{ /* Low Latency Interrupt TCP Port */
+		struct igb_option opt = {
+			.type = range_option,
+			.name = "Low Latency Interrupt TCP Port",
+			.err  = "using default of " __MODULE_STRING(DEFAULT_LLIPORT),
+			.def  = DEFAULT_LLIPORT,
+			.arg  = { .r = { .min = MIN_LLIPORT,
+					 .max = MAX_LLIPORT } }
+		};
+
+#ifdef module_param_array
+		if (num_LLIPort > bd) {
+#endif
+			adapter->lli_port = LLIPort[bd];
+			if (adapter->lli_port) {
+				igb_validate_option(&adapter->lli_port, &opt,
+				        adapter);
+			} else {
+				DPRINTK(PROBE, INFO, "%s turned off\n",
+					opt.name);
+			}
+#ifdef module_param_array
+		} else {
+			adapter->lli_port = opt.def;
+		}
+#endif
+	}
+	{ /* Low Latency Interrupt on Packet Size */
+		struct igb_option opt = {
+			.type = range_option,
+			.name = "Low Latency Interrupt on Packet Size",
+			.err  = "using default of " __MODULE_STRING(DEFAULT_LLISIZE),
+			.def  = DEFAULT_LLISIZE,
+			.arg  = { .r = { .min = MIN_LLISIZE,
+					 .max = MAX_LLISIZE } }
+		};
+
+#ifdef module_param_array
+		if (num_LLISize > bd) {
+#endif
+			adapter->lli_size = LLISize[bd];
+			if (adapter->lli_size) {
+				igb_validate_option(&adapter->lli_size, &opt,
+				        adapter);
+			} else {
+				DPRINTK(PROBE, INFO, "%s turned off\n",
+					opt.name);
+			}
+#ifdef module_param_array
+		} else {
+			adapter->lli_size = opt.def;
+		}
+#endif
+	}
+	{ /* Low Latency Interrupt on TCP Push flag */
+		struct igb_option opt = {
+			.type = enable_option,
+			.name = "Low Latency Interrupt on TCP Push flag",
+			.err  = "defaulting to Disabled",
+			.def  = OPTION_DISABLED
+		};
+
+#ifdef module_param_array
+		if (num_LLIPush > bd) {
+#endif
+			unsigned int lli_push = LLIPush[bd];
+			igb_validate_option(&lli_push, &opt, adapter);
+			adapter->flags |= lli_push ? IGB_FLAG_LLI_PUSH : 0;
+#ifdef module_param_array
+		} else {
+			adapter->flags |= opt.def ? IGB_FLAG_LLI_PUSH : 0;
+		}
+#endif
+	}
+	{ /* SRIOV - Enable SR-IOV VF devices */
+		struct igb_option opt = {
+			.type = range_option,
+			.name = "max_vfs - SR-IOV VF devices",
+			.err  = "using default of " __MODULE_STRING(DEFAULT_SRIOV),
+			.def  = DEFAULT_SRIOV,
+			.arg  = { .r = { .min = MIN_SRIOV,
+					 .max = MAX_SRIOV } }
+		};
+
+#ifdef module_param_array
+		if (num_max_vfs > bd) {
+#endif
+			adapter->vfs_allocated_count = max_vfs[bd];
+			igb_validate_option(&adapter->vfs_allocated_count, &opt, adapter);
+
+#ifdef module_param_array
+		} else {
+			adapter->vfs_allocated_count = opt.def;
+		}
+#endif
+		if (adapter->vfs_allocated_count) {
+			switch (hw->mac.type) {
+			case e1000_82575:
+			case e1000_82580:
+			case e1000_i210:
+			case e1000_i211:
+			case e1000_i354:
+				adapter->vfs_allocated_count = 0;
+				DPRINTK(PROBE, INFO, "SR-IOV option max_vfs not supported.\n");
+			default:
+				break;
+			}
+		}
+	}
+	{ /* VMDQ - Enable VMDq multiqueue receive */
+		struct igb_option opt = {
+			.type = range_option,
+			.name = "VMDQ - VMDq multiqueue queue count",
+			.err  = "using default of " __MODULE_STRING(DEFAULT_VMDQ),
+			.def  = DEFAULT_VMDQ,
+			.arg  = { .r = { .min = MIN_VMDQ,
+					 .max = (MAX_VMDQ - adapter->vfs_allocated_count) } }
+		};
+		if ((hw->mac.type != e1000_i210) ||
+		    (hw->mac.type != e1000_i211)) {
+#ifdef module_param_array
+		if (num_VMDQ > bd) {
+#endif
+			adapter->vmdq_pools = (VMDQ[bd] == 1 ? 0 : VMDQ[bd]);
+			if (adapter->vfs_allocated_count && !adapter->vmdq_pools) {
+				DPRINTK(PROBE, INFO, "Enabling SR-IOV requires VMDq be set to at least 1\n");
+				adapter->vmdq_pools = 1;
+			}
+			igb_validate_option(&adapter->vmdq_pools, &opt, adapter);
+
+#ifdef module_param_array
+		} else {
+			if (!adapter->vfs_allocated_count)
+				adapter->vmdq_pools = (opt.def == 1 ? 0 : opt.def);
+			else
+				adapter->vmdq_pools = 1;
+		}
+#endif
+#ifdef CONFIG_IGB_VMDQ_NETDEV
+		if (hw->mac.type == e1000_82575 && adapter->vmdq_pools) {
+			DPRINTK(PROBE, INFO, "VMDq not supported on this part.\n");
+			adapter->vmdq_pools = 0;
+		}
+#endif
+
+	} else {
+		DPRINTK(PROBE, INFO, "VMDq option is not supported.\n");
+		adapter->vmdq_pools = opt.def;
+	}
+	}
+	{ /* RSS - Enable RSS multiqueue receives */
+		struct igb_option opt = {
+			.type = range_option,
+			.name = "RSS - RSS multiqueue receive count",
+			.err  = "using default of " __MODULE_STRING(DEFAULT_RSS),
+			.def  = DEFAULT_RSS,
+			.arg  = { .r = { .min = MIN_RSS,
+					 .max = MAX_RSS } }
+		};
+
+		switch (hw->mac.type) {
+		case e1000_82575:
+#ifndef CONFIG_IGB_VMDQ_NETDEV
+			if (!!adapter->vmdq_pools) {
+				if (adapter->vmdq_pools <= 2) {
+					if (adapter->vmdq_pools == 2)
+						opt.arg.r.max = 3;
+				} else {
+					opt.arg.r.max = 1;
+				}
+			} else {
+				opt.arg.r.max = 4;
+			}
+#else
+			opt.arg.r.max = !!adapter->vmdq_pools ? 1 : 4;
+#endif /* CONFIG_IGB_VMDQ_NETDEV */
+			break;
+		case e1000_i210:
+			opt.arg.r.max = 4;
+			break;
+		case e1000_i211:
+			opt.arg.r.max = 2;
+			break;
+		case e1000_82576:
+#ifndef CONFIG_IGB_VMDQ_NETDEV
+			if (!!adapter->vmdq_pools)
+				opt.arg.r.max = 2;
+			break;
+#endif /* CONFIG_IGB_VMDQ_NETDEV */
+		case e1000_82580:
+		case e1000_i350:
+		case e1000_i354:
+		default:
+			if (!!adapter->vmdq_pools)
+				opt.arg.r.max = 1;
+			break;
+		}
+
+		if (adapter->int_mode != IGB_INT_MODE_MSIX) {
+			DPRINTK(PROBE, INFO, "RSS is not supported when in MSI/Legacy Interrupt mode, %s\n",
+				opt.err);
+			opt.arg.r.max = 1;
+		}
+
+#ifdef module_param_array
+		if (num_RSS > bd) {
+#endif
+			adapter->rss_queues = RSS[bd];
+			switch (adapter->rss_queues) {
+			case 1:
+				break;
+			default:
+				igb_validate_option(&adapter->rss_queues, &opt, adapter);
+				if (adapter->rss_queues)
+					break;
+			case 0:
+				adapter->rss_queues = min_t(u32, opt.arg.r.max, num_online_cpus());
+				break;
+			}
+#ifdef module_param_array
+		} else {
+			adapter->rss_queues = opt.def;
+		}
+#endif
+	}
+	{ /* QueuePairs - Enable Tx/Rx queue pairs for interrupt handling */
+		struct igb_option opt = {
+			.type = enable_option,
+			.name = "QueuePairs - Tx/Rx queue pairs for interrupt handling",
+			.err  = "defaulting to Enabled",
+			.def  = OPTION_ENABLED
+		};
+#ifdef module_param_array
+		if (num_QueuePairs > bd) {
+#endif
+			unsigned int qp = QueuePairs[bd];
+			/*
+			 * We must enable queue pairs if the number of queues
+			 * exceeds the number of available interrupts. We are
+			 * limited to 10, or 3 per unallocated vf. On I210 and
+			 * I211 devices, we are limited to 5 interrupts.
+			 * However, since I211 only supports 2 queues, we do not
+			 * need to check and override the user option.
+			 */
+			if (qp == OPTION_DISABLED) {
+				if (adapter->rss_queues > 4)
+					qp = OPTION_ENABLED;
+
+				if (adapter->vmdq_pools > 4)
+					qp = OPTION_ENABLED;
+
+				if (adapter->rss_queues > 1 &&
+				    (adapter->vmdq_pools > 3 ||
+				     adapter->vfs_allocated_count > 6))
+					qp = OPTION_ENABLED;
+
+				if (hw->mac.type == e1000_i210 &&
+				    adapter->rss_queues > 2)
+					qp = OPTION_ENABLED;
+
+				if (qp == OPTION_ENABLED)
+					DPRINTK(PROBE, INFO, "Number of queues exceeds available interrupts, %s\n",
+						opt.err);
+			}
+			igb_validate_option(&qp, &opt, adapter);
+			adapter->flags |= qp ? IGB_FLAG_QUEUE_PAIRS : 0;
+#ifdef module_param_array
+		} else {
+			adapter->flags |= opt.def ? IGB_FLAG_QUEUE_PAIRS : 0;
+		}
+#endif
+	}
+	{ /* EEE -  Enable EEE for capable adapters */
+
+		if (hw->mac.type >= e1000_i350) {
+			struct igb_option opt = {
+				.type = enable_option,
+				.name = "EEE Support",
+				.err  = "defaulting to Enabled",
+				.def  = OPTION_ENABLED
+			};
+#ifdef module_param_array
+			if (num_EEE > bd) {
+#endif
+				unsigned int eee = EEE[bd];
+				igb_validate_option(&eee, &opt, adapter);
+				adapter->flags |= eee ? IGB_FLAG_EEE : 0;
+				if (eee)
+					hw->dev_spec._82575.eee_disable = false;
+				else
+					hw->dev_spec._82575.eee_disable = true;
+
+#ifdef module_param_array
+			} else {
+				adapter->flags |= opt.def ? IGB_FLAG_EEE : 0;
+				if (adapter->flags & IGB_FLAG_EEE)
+					hw->dev_spec._82575.eee_disable = false;
+				else
+					hw->dev_spec._82575.eee_disable = true;
+			}
+#endif
+		}
+	}
+	{ /* DMAC -  Enable DMA Coalescing for capable adapters */
+
+		if (hw->mac.type >= e1000_i350) {
+			struct igb_opt_list list [] = {
+				{ IGB_DMAC_DISABLE, "DMAC Disable"},
+				{ IGB_DMAC_MIN, "DMAC 250 usec"},
+				{ IGB_DMAC_500, "DMAC 500 usec"},
+				{ IGB_DMAC_EN_DEFAULT, "DMAC 1000 usec"},
+				{ IGB_DMAC_2000, "DMAC 2000 usec"},
+				{ IGB_DMAC_3000, "DMAC 3000 usec"},
+				{ IGB_DMAC_4000, "DMAC 4000 usec"},
+				{ IGB_DMAC_5000, "DMAC 5000 usec"},
+				{ IGB_DMAC_6000, "DMAC 6000 usec"},
+				{ IGB_DMAC_7000, "DMAC 7000 usec"},
+				{ IGB_DMAC_8000, "DMAC 8000 usec"},
+				{ IGB_DMAC_9000, "DMAC 9000 usec"},
+				{ IGB_DMAC_MAX, "DMAC 10000 usec"}
+			};
+			struct igb_option opt = {
+				.type = list_option,
+				.name = "DMA Coalescing",
+				.err  = "using default of "__MODULE_STRING(IGB_DMAC_DISABLE),
+				.def  = IGB_DMAC_DISABLE,
+				.arg = { .l = { .nr = 13,
+					 	.p = list
+					}
+				}
+			};
+#ifdef module_param_array
+			if (num_DMAC > bd) {
+#endif
+				unsigned int dmac = DMAC[bd];
+				if (adapter->rx_itr_setting == IGB_DMAC_DISABLE)
+					dmac = IGB_DMAC_DISABLE;
+				igb_validate_option(&dmac, &opt, adapter);
+				switch (dmac) {
+				case IGB_DMAC_DISABLE:
+					adapter->dmac = dmac;
+					break;
+				case IGB_DMAC_MIN:
+					adapter->dmac = dmac;
+					break;
+				case IGB_DMAC_500:
+					adapter->dmac = dmac;
+					break;
+				case IGB_DMAC_EN_DEFAULT:
+					adapter->dmac = dmac;
+					break;
+				case IGB_DMAC_2000:
+					adapter->dmac = dmac;
+					break;
+				case IGB_DMAC_3000:
+					adapter->dmac = dmac;
+					break;
+				case IGB_DMAC_4000:
+					adapter->dmac = dmac;
+					break;
+				case IGB_DMAC_5000:
+					adapter->dmac = dmac;
+					break;
+				case IGB_DMAC_6000:
+					adapter->dmac = dmac;
+					break;
+				case IGB_DMAC_7000:
+					adapter->dmac = dmac;
+					break;
+				case IGB_DMAC_8000:
+					adapter->dmac = dmac;
+					break;
+				case IGB_DMAC_9000:
+					adapter->dmac = dmac;
+					break;
+				case IGB_DMAC_MAX:
+					adapter->dmac = dmac;
+					break;
+				default:
+					adapter->dmac = opt.def;
+					DPRINTK(PROBE, INFO,
+					"Invalid DMAC setting, "
+					"resetting DMAC to %d\n", opt.def);
+				}
+#ifdef module_param_array
+			} else
+				adapter->dmac = opt.def;
+#endif
+		}
+	}
+#ifndef IGB_NO_LRO
+	{ /* LRO - Enable Large Receive Offload */
+		struct igb_option opt = {
+			.type = enable_option,
+			.name = "LRO - Large Receive Offload",
+			.err  = "defaulting to Disabled",
+			.def  = OPTION_DISABLED
+		};
+		struct net_device *netdev = adapter->netdev;
+#ifdef module_param_array
+		if (num_LRO > bd) {
+#endif
+			unsigned int lro = LRO[bd];
+			igb_validate_option(&lro, &opt, adapter);
+			netdev->features |= lro ? NETIF_F_LRO : 0;
+#ifdef module_param_array
+		} else if (opt.def == OPTION_ENABLED) {
+			netdev->features |= NETIF_F_LRO;
+		}
+#endif
+	}
+#endif /* IGB_NO_LRO */
+	{ /* MDD - Enable Malicious Driver Detection. Only available when
+	     SR-IOV is enabled. */
+		struct igb_option opt = {
+			.type = enable_option,
+			.name = "Malicious Driver Detection",
+			.err  = "defaulting to 1",
+			.def  = OPTION_ENABLED,
+			.arg  = { .r = { .min = OPTION_DISABLED,
+					 .max = OPTION_ENABLED } }
+		};
+
+#ifdef module_param_array
+		if (num_MDD > bd) {
+#endif
+			adapter->mdd = MDD[bd];
+			igb_validate_option((uint *)&adapter->mdd, &opt,
+					    adapter);
+#ifdef module_param_array
+		} else {
+			adapter->mdd = opt.def;
+		}
+#endif
+	}
+}
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_procfs.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_procfs.c
new file mode 100644
index 00000000..66236d29
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_procfs.c
@@ -0,0 +1,363 @@
+/*******************************************************************************
+
+  Intel(R) Gigabit Ethernet Linux driver
+  Copyright(c) 2007-2013 Intel Corporation.
+
+  This program is free software; you can redistribute it and/or modify it
+  under the terms and conditions of the GNU General Public License,
+  version 2, as published by the Free Software Foundation.
+
+  This program is distributed in the hope it will be useful, but WITHOUT
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+  more details.
+
+  You should have received a copy of the GNU General Public License along with
+  this program; if not, write to the Free Software Foundation, Inc.,
+  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+  The full GNU General Public License is included in this distribution in
+  the file called "COPYING".
+
+  Contact Information:
+  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+#include "igb.h"
+#include "e1000_82575.h"
+#include "e1000_hw.h"
+
+#ifdef IGB_PROCFS
+#ifndef IGB_HWMON
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/proc_fs.h>
+#include <linux/device.h>
+#include <linux/netdevice.h>
+
+static struct proc_dir_entry *igb_top_dir = NULL;
+
+
+bool igb_thermal_present(struct igb_adapter *adapter)
+{
+	s32 status;
+	struct e1000_hw *hw;
+
+	if (adapter == NULL)
+		return false;
+	hw = &adapter->hw;
+
+	/*
+	 * Only set I2C bit-bang mode if an external thermal sensor is
+	 * supported on this device.
+	 */
+	if (adapter->ets) {
+		status = e1000_set_i2c_bb(hw);
+		if (status != E1000_SUCCESS)
+			return false;
+	}
+
+	status = hw->mac.ops.init_thermal_sensor_thresh(hw);
+	if (status != E1000_SUCCESS)
+		return false;
+
+	return true;
+}
+
+
+static int igb_macburn(char *page, char **start, off_t off, int count,
+			int *eof, void *data)
+{
+	struct e1000_hw *hw;
+	struct igb_adapter *adapter = (struct igb_adapter *)data;
+	if (adapter == NULL)
+		return snprintf(page, count, "error: no adapter\n");
+
+	hw = &adapter->hw;
+	if (hw == NULL)
+		return snprintf(page, count, "error: no hw data\n");
+
+	return snprintf(page, count, "0x%02X%02X%02X%02X%02X%02X\n",
+		       (unsigned int)hw->mac.perm_addr[0],
+		       (unsigned int)hw->mac.perm_addr[1],
+		       (unsigned int)hw->mac.perm_addr[2],
+		       (unsigned int)hw->mac.perm_addr[3],
+		       (unsigned int)hw->mac.perm_addr[4],
+		       (unsigned int)hw->mac.perm_addr[5]);
+}
+
+static int igb_macadmn(char *page, char **start, off_t off,
+		       int count, int *eof, void *data)
+{
+	struct e1000_hw *hw;
+	struct igb_adapter *adapter = (struct igb_adapter *)data;
+	if (adapter == NULL)
+		return snprintf(page, count, "error: no adapter\n");
+
+	hw = &adapter->hw;
+	if (hw == NULL)
+		return snprintf(page, count, "error: no hw data\n");
+
+	return snprintf(page, count, "0x%02X%02X%02X%02X%02X%02X\n",
+		       (unsigned int)hw->mac.addr[0],
+		       (unsigned int)hw->mac.addr[1],
+		       (unsigned int)hw->mac.addr[2],
+		       (unsigned int)hw->mac.addr[3],
+		       (unsigned int)hw->mac.addr[4],
+		       (unsigned int)hw->mac.addr[5]);
+}
+
+static int igb_numeports(char *page, char **start, off_t off, int count,
+			 int *eof, void *data)
+{
+	struct e1000_hw *hw;
+	int ports;
+	struct igb_adapter *adapter = (struct igb_adapter *)data;
+	if (adapter == NULL)
+		return snprintf(page, count, "error: no adapter\n");
+
+	hw = &adapter->hw;
+	if (hw == NULL)
+		return snprintf(page, count, "error: no hw data\n");
+
+	ports = 4;
+
+	return snprintf(page, count, "%d\n", ports);
+}
+
+static int igb_porttype(char *page, char **start, off_t off, int count,
+			int *eof, void *data)
+{
+	struct igb_adapter *adapter = (struct igb_adapter *)data;
+	if (adapter == NULL)
+		return snprintf(page, count, "error: no adapter\n");
+
+	return snprintf(page, count, "%d\n",
+			test_bit(__IGB_DOWN, &adapter->state));
+}
+
+static int igb_therm_location(char *page, char **start, off_t off,
+				     int count, int *eof, void *data)
+{
+	struct igb_therm_proc_data *therm_data =
+		(struct igb_therm_proc_data *)data;
+
+	if (therm_data == NULL)
+		return snprintf(page, count, "error: no therm_data\n");
+
+	return snprintf(page, count, "%d\n", therm_data->sensor_data->location);
+}
+
+static int igb_therm_maxopthresh(char *page, char **start, off_t off,
+				    int count, int *eof, void *data)
+{
+	struct igb_therm_proc_data *therm_data =
+		(struct igb_therm_proc_data *)data;
+
+	if (therm_data == NULL)
+		return snprintf(page, count, "error: no therm_data\n");
+
+	return snprintf(page, count, "%d\n",
+			therm_data->sensor_data->max_op_thresh);
+}
+
+static int igb_therm_cautionthresh(char *page, char **start, off_t off,
+				      int count, int *eof, void *data)
+{
+	struct igb_therm_proc_data *therm_data =
+		(struct igb_therm_proc_data *)data;
+
+	if (therm_data == NULL)
+		return snprintf(page, count, "error: no therm_data\n");
+
+	return snprintf(page, count, "%d\n",
+			therm_data->sensor_data->caution_thresh);
+}
+
+static int igb_therm_temp(char *page, char **start, off_t off,
+			     int count, int *eof, void *data)
+{
+	s32 status;
+	struct igb_therm_proc_data *therm_data =
+		(struct igb_therm_proc_data *)data;
+
+	if (therm_data == NULL)
+		return snprintf(page, count, "error: no therm_data\n");
+
+	status = e1000_get_thermal_sensor_data(therm_data->hw);
+	if (status != E1000_SUCCESS)
+		snprintf(page, count, "error: status %d returned\n", status);
+
+	return snprintf(page, count, "%d\n", therm_data->sensor_data->temp);
+}
+
+struct igb_proc_type{
+	char name[32];
+	int (*read)(char*, char**, off_t, int, int*, void*);
+};
+
+struct igb_proc_type igb_proc_entries[] = {
+	{"numeports", &igb_numeports},
+	{"porttype", &igb_porttype},
+	{"macburn", &igb_macburn},
+	{"macadmn", &igb_macadmn},
+	{"", NULL}
+};
+
+struct igb_proc_type igb_internal_entries[] = {
+	{"location", &igb_therm_location},
+	{"temp", &igb_therm_temp},
+	{"cautionthresh", &igb_therm_cautionthresh},
+	{"maxopthresh", &igb_therm_maxopthresh},
+	{"", NULL}
+};
+
+void igb_del_proc_entries(struct igb_adapter *adapter)
+{
+	int index, i;
+	char buf[16];	/* much larger than the sensor number will ever be */
+
+	if (igb_top_dir == NULL)
+		return;
+
+	for (i = 0; i < E1000_MAX_SENSORS; i++) {
+		if (adapter->therm_dir[i] == NULL)
+			continue;
+
+		for (index = 0; ; index++) {
+			if (igb_internal_entries[index].read == NULL)
+				break;
+
+			 remove_proc_entry(igb_internal_entries[index].name,
+					   adapter->therm_dir[i]);
+		}
+		snprintf(buf, sizeof(buf), "sensor_%d", i);
+		remove_proc_entry(buf, adapter->info_dir);
+	}
+
+	if (adapter->info_dir != NULL) {
+		for (index = 0; ; index++) {
+			if (igb_proc_entries[index].read == NULL)
+				break;
+		        remove_proc_entry(igb_proc_entries[index].name,
+					  adapter->info_dir);
+		}
+		remove_proc_entry("info", adapter->eth_dir);
+	}
+
+	if (adapter->eth_dir != NULL)
+		remove_proc_entry(pci_name(adapter->pdev), igb_top_dir);
+}
+
+/* called from igb_main.c */
+void igb_procfs_exit(struct igb_adapter *adapter)
+{
+	igb_del_proc_entries(adapter);
+}
+
+int igb_procfs_topdir_init(void)
+{
+	igb_top_dir = proc_mkdir("driver/igb", NULL);
+	if (igb_top_dir == NULL)
+		return -ENOMEM;
+
+	return 0;
+}
+
+void igb_procfs_topdir_exit(void)
+{
+	remove_proc_entry("driver/igb", NULL);
+}
+
+/* called from igb_main.c */
+int igb_procfs_init(struct igb_adapter *adapter)
+{
+	int rc = 0;
+	int i;
+	int index;
+	char buf[16];	/* much larger than the sensor number will ever be */
+
+	adapter->eth_dir = NULL;
+	adapter->info_dir = NULL;
+	for (i = 0; i < E1000_MAX_SENSORS; i++)
+		adapter->therm_dir[i] = NULL;
+
+	if ( igb_top_dir == NULL ) {
+		rc = -ENOMEM;
+		goto fail;
+	}
+
+	adapter->eth_dir = proc_mkdir(pci_name(adapter->pdev), igb_top_dir);
+	if (adapter->eth_dir == NULL) {
+		rc = -ENOMEM;
+		goto fail;
+	}
+
+	adapter->info_dir = proc_mkdir("info", adapter->eth_dir);
+	if (adapter->info_dir == NULL) {
+		rc = -ENOMEM;
+		goto fail;
+	}
+	for (index = 0; ; index++) {
+		if (igb_proc_entries[index].read == NULL) {
+			break;
+		}
+		if (!(create_proc_read_entry(igb_proc_entries[index].name,
+					   0444,
+					   adapter->info_dir,
+					   igb_proc_entries[index].read,
+					   adapter))) {
+
+			rc = -ENOMEM;
+			goto fail;
+		}
+	}
+	if (igb_thermal_present(adapter) == false)
+		goto exit;
+
+	for (i = 0; i < E1000_MAX_SENSORS; i++) {
+
+		 if (adapter->hw.mac.thermal_sensor_data.sensor[i].location== 0)
+			continue;
+
+		snprintf(buf, sizeof(buf), "sensor_%d", i);
+		adapter->therm_dir[i] = proc_mkdir(buf, adapter->info_dir);
+		if (adapter->therm_dir[i] == NULL) {
+			rc = -ENOMEM;
+			goto fail;
+		}
+		for (index = 0; ; index++) {
+			if (igb_internal_entries[index].read == NULL)
+				break;
+			/*
+			 * therm_data struct contains pointer the read func
+			 * will be needing
+			 */
+			adapter->therm_data[i].hw = &adapter->hw;
+			adapter->therm_data[i].sensor_data =
+				&adapter->hw.mac.thermal_sensor_data.sensor[i];
+
+			if (!(create_proc_read_entry(
+					   igb_internal_entries[index].name,
+					   0444,
+					   adapter->therm_dir[i],
+					   igb_internal_entries[index].read,
+					   &adapter->therm_data[i]))) {
+				rc = -ENOMEM;
+				goto fail;
+			}
+		}
+	}
+	goto exit;
+
+fail:
+	igb_del_proc_entries(adapter);
+exit:
+	return rc;
+}
+
+#endif /* !IGB_HWMON */
+#endif /* IGB_PROCFS */
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_ptp.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_ptp.c
new file mode 100644
index 00000000..454b70ce
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_ptp.c
@@ -0,0 +1,944 @@
+/*******************************************************************************
+
+  Intel(R) Gigabit Ethernet Linux driver
+  Copyright(c) 2007-2013 Intel Corporation.
+
+  This program is free software; you can redistribute it and/or modify it
+  under the terms and conditions of the GNU General Public License,
+  version 2, as published by the Free Software Foundation.
+
+  This program is distributed in the hope it will be useful, but WITHOUT
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+  more details.
+
+  You should have received a copy of the GNU General Public License along with
+  this program; if not, write to the Free Software Foundation, Inc.,
+  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+  The full GNU General Public License is included in this distribution in
+  the file called "COPYING".
+
+  Contact Information:
+  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+/******************************************************************************
+ Copyright(c) 2011 Richard Cochran <richardcochran@gmail.com> for some of the
+ 82576 and 82580 code
+******************************************************************************/
+
+#include "igb.h"
+
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/pci.h>
+#include <linux/ptp_classify.h>
+
+#define INCVALUE_MASK		0x7fffffff
+#define ISGN			0x80000000
+
+/*
+ * The 82580 timesync updates the system timer every 8ns by 8ns,
+ * and this update value cannot be reprogrammed.
+ *
+ * Neither the 82576 nor the 82580 offer registers wide enough to hold
+ * nanoseconds time values for very long. For the 82580, SYSTIM always
+ * counts nanoseconds, but the upper 24 bits are not available. The
+ * frequency is adjusted by changing the 32 bit fractional nanoseconds
+ * register, TIMINCA.
+ *
+ * For the 82576, the SYSTIM register time unit is affect by the
+ * choice of the 24 bit TININCA:IV (incvalue) field. Five bits of this
+ * field are needed to provide the nominal 16 nanosecond period,
+ * leaving 19 bits for fractional nanoseconds.
+ *
+ * We scale the NIC clock cycle by a large factor so that relatively
+ * small clock corrections can be added or subtracted at each clock
+ * tick. The drawbacks of a large factor are a) that the clock
+ * register overflows more quickly (not such a big deal) and b) that
+ * the increment per tick has to fit into 24 bits.  As a result we
+ * need to use a shift of 19 so we can fit a value of 16 into the
+ * TIMINCA register.
+ *
+ *
+ *             SYSTIMH            SYSTIML
+ *        +--------------+   +---+---+------+
+ *  82576 |      32      |   | 8 | 5 |  19  |
+ *        +--------------+   +---+---+------+
+ *         \________ 45 bits _______/  fract
+ *
+ *        +----------+---+   +--------------+
+ *  82580 |    24    | 8 |   |      32      |
+ *        +----------+---+   +--------------+
+ *          reserved  \______ 40 bits _____/
+ *
+ *
+ * The 45 bit 82576 SYSTIM overflows every
+ *   2^45 * 10^-9 / 3600 = 9.77 hours.
+ *
+ * The 40 bit 82580 SYSTIM overflows every
+ *   2^40 * 10^-9 /  60  = 18.3 minutes.
+ */
+
+#define IGB_SYSTIM_OVERFLOW_PERIOD	(HZ * 60 * 9)
+#define IGB_PTP_TX_TIMEOUT		(HZ * 15)
+#define INCPERIOD_82576			(1 << E1000_TIMINCA_16NS_SHIFT)
+#define INCVALUE_82576_MASK		((1 << E1000_TIMINCA_16NS_SHIFT) - 1)
+#define INCVALUE_82576			(16 << IGB_82576_TSYNC_SHIFT)
+#define IGB_NBITS_82580			40
+
+/*
+ * SYSTIM read access for the 82576
+ */
+
+static cycle_t igb_ptp_read_82576(const struct cyclecounter *cc)
+{
+	struct igb_adapter *igb = container_of(cc, struct igb_adapter, cc);
+	struct e1000_hw *hw = &igb->hw;
+	u64 val;
+	u32 lo, hi;
+
+	lo = E1000_READ_REG(hw, E1000_SYSTIML);
+	hi = E1000_READ_REG(hw, E1000_SYSTIMH);
+
+	val = ((u64) hi) << 32;
+	val |= lo;
+
+	return val;
+}
+
+/*
+ * SYSTIM read access for the 82580
+ */
+
+static cycle_t igb_ptp_read_82580(const struct cyclecounter *cc)
+{
+	struct igb_adapter *igb = container_of(cc, struct igb_adapter, cc);
+	struct e1000_hw *hw = &igb->hw;
+	u64 val;
+	u32 lo, hi;
+
+	/* The timestamp latches on lowest register read. For the 82580
+	 * the lowest register is SYSTIMR instead of SYSTIML.  However we only
+	 * need to provide nanosecond resolution, so we just ignore it.
+	 */
+	E1000_READ_REG(hw, E1000_SYSTIMR);
+	lo = E1000_READ_REG(hw, E1000_SYSTIML);
+	hi = E1000_READ_REG(hw, E1000_SYSTIMH);
+
+	val = ((u64) hi) << 32;
+	val |= lo;
+
+	return val;
+}
+
+/*
+ * SYSTIM read access for I210/I211
+ */
+
+static void igb_ptp_read_i210(struct igb_adapter *adapter, struct timespec *ts)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u32 sec, nsec;
+
+	/* The timestamp latches on lowest register read. For I210/I211, the
+	 * lowest register is SYSTIMR. Since we only need to provide nanosecond
+	 * resolution, we can ignore it.
+	 */
+	E1000_READ_REG(hw, E1000_SYSTIMR);
+	nsec = E1000_READ_REG(hw, E1000_SYSTIML);
+	sec = E1000_READ_REG(hw, E1000_SYSTIMH);
+
+	ts->tv_sec = sec;
+	ts->tv_nsec = nsec;
+}
+
+static void igb_ptp_write_i210(struct igb_adapter *adapter,
+			       const struct timespec *ts)
+{
+	struct e1000_hw *hw = &adapter->hw;
+
+	/*
+	 * Writing the SYSTIMR register is not necessary as it only provides
+	 * sub-nanosecond resolution.
+	 */
+	E1000_WRITE_REG(hw, E1000_SYSTIML, ts->tv_nsec);
+	E1000_WRITE_REG(hw, E1000_SYSTIMH, ts->tv_sec);
+}
+
+/**
+ * igb_ptp_systim_to_hwtstamp - convert system time value to hw timestamp
+ * @adapter: board private structure
+ * @hwtstamps: timestamp structure to update
+ * @systim: unsigned 64bit system time value.
+ *
+ * We need to convert the system time value stored in the RX/TXSTMP registers
+ * into a hwtstamp which can be used by the upper level timestamping functions.
+ *
+ * The 'tmreg_lock' spinlock is used to protect the consistency of the
+ * system time value. This is needed because reading the 64 bit time
+ * value involves reading two (or three) 32 bit registers. The first
+ * read latches the value. Ditto for writing.
+ *
+ * In addition, here have extended the system time with an overflow
+ * counter in software.
+ **/
+static void igb_ptp_systim_to_hwtstamp(struct igb_adapter *adapter,
+				       struct skb_shared_hwtstamps *hwtstamps,
+				       u64 systim)
+{
+	unsigned long flags;
+	u64 ns;
+
+	switch (adapter->hw.mac.type) {
+	case e1000_82576:
+	case e1000_82580:
+	case e1000_i350:
+	case e1000_i354:
+		spin_lock_irqsave(&adapter->tmreg_lock, flags);
+
+		ns = timecounter_cyc2time(&adapter->tc, systim);
+
+		spin_unlock_irqrestore(&adapter->tmreg_lock, flags);
+
+		memset(hwtstamps, 0, sizeof(*hwtstamps));
+		hwtstamps->hwtstamp = ns_to_ktime(ns);
+		break;
+	case e1000_i210:
+	case e1000_i211:
+		memset(hwtstamps, 0, sizeof(*hwtstamps));
+		/* Upper 32 bits contain s, lower 32 bits contain ns. */
+		hwtstamps->hwtstamp = ktime_set(systim >> 32,
+						systim & 0xFFFFFFFF);
+		break;
+	default:
+		break;
+	}
+}
+
+/*
+ * PTP clock operations
+ */
+
+static int igb_ptp_adjfreq_82576(struct ptp_clock_info *ptp, s32 ppb)
+{
+	struct igb_adapter *igb = container_of(ptp, struct igb_adapter,
+					       ptp_caps);
+	struct e1000_hw *hw = &igb->hw;
+	int neg_adj = 0;
+	u64 rate;
+	u32 incvalue;
+
+	if (ppb < 0) {
+		neg_adj = 1;
+		ppb = -ppb;
+	}
+	rate = ppb;
+	rate <<= 14;
+	rate = div_u64(rate, 1953125);
+
+	incvalue = 16 << IGB_82576_TSYNC_SHIFT;
+
+	if (neg_adj)
+		incvalue -= rate;
+	else
+		incvalue += rate;
+
+	E1000_WRITE_REG(hw, E1000_TIMINCA, INCPERIOD_82576 | (incvalue & INCVALUE_82576_MASK));
+
+	return 0;
+}
+
+static int igb_ptp_adjfreq_82580(struct ptp_clock_info *ptp, s32 ppb)
+{
+	struct igb_adapter *igb = container_of(ptp, struct igb_adapter,
+					       ptp_caps);
+	struct e1000_hw *hw = &igb->hw;
+	int neg_adj = 0;
+	u64 rate;
+	u32 inca;
+
+	if (ppb < 0) {
+		neg_adj = 1;
+		ppb = -ppb;
+	}
+	rate = ppb;
+	rate <<= 26;
+	rate = div_u64(rate, 1953125);
+
+	/* At 2.5G speeds, the TIMINCA register on I354 updates the clock 2.5x
+	 * as quickly. Account for this by dividing the adjustment by 2.5.
+	 */
+	if (hw->mac.type == e1000_i354) {
+		u32 status = E1000_READ_REG(hw, E1000_STATUS);
+
+		if ((status & E1000_STATUS_2P5_SKU) &&
+		    !(status & E1000_STATUS_2P5_SKU_OVER)) {
+			rate <<= 1;
+			rate = div_u64(rate, 5);
+		}
+	}
+
+	inca = rate & INCVALUE_MASK;
+	if (neg_adj)
+		inca |= ISGN;
+
+	E1000_WRITE_REG(hw, E1000_TIMINCA, inca);
+
+	return 0;
+}
+
+static int igb_ptp_adjtime_82576(struct ptp_clock_info *ptp, s64 delta)
+{
+	struct igb_adapter *igb = container_of(ptp, struct igb_adapter,
+					       ptp_caps);
+	unsigned long flags;
+	s64 now;
+
+	spin_lock_irqsave(&igb->tmreg_lock, flags);
+
+	now = timecounter_read(&igb->tc);
+	now += delta;
+	timecounter_init(&igb->tc, &igb->cc, now);
+
+	spin_unlock_irqrestore(&igb->tmreg_lock, flags);
+
+	return 0;
+}
+
+static int igb_ptp_adjtime_i210(struct ptp_clock_info *ptp, s64 delta)
+{
+	struct igb_adapter *igb = container_of(ptp, struct igb_adapter,
+					       ptp_caps);
+	unsigned long flags;
+	struct timespec now, then = ns_to_timespec(delta);
+
+	spin_lock_irqsave(&igb->tmreg_lock, flags);
+
+	igb_ptp_read_i210(igb, &now);
+	now = timespec_add(now, then);
+	igb_ptp_write_i210(igb, (const struct timespec *)&now);
+
+	spin_unlock_irqrestore(&igb->tmreg_lock, flags);
+
+	return 0;
+}
+
+static int igb_ptp_gettime_82576(struct ptp_clock_info *ptp,
+				 struct timespec *ts)
+{
+	struct igb_adapter *igb = container_of(ptp, struct igb_adapter,
+					       ptp_caps);
+	unsigned long flags;
+	u64 ns;
+	u32 remainder;
+
+	spin_lock_irqsave(&igb->tmreg_lock, flags);
+
+	ns = timecounter_read(&igb->tc);
+
+	spin_unlock_irqrestore(&igb->tmreg_lock, flags);
+
+	ts->tv_sec = div_u64_rem(ns, 1000000000, &remainder);
+	ts->tv_nsec = remainder;
+
+	return 0;
+}
+
+static int igb_ptp_gettime_i210(struct ptp_clock_info *ptp,
+				struct timespec *ts)
+{
+	struct igb_adapter *igb = container_of(ptp, struct igb_adapter,
+					       ptp_caps);
+	unsigned long flags;
+
+	spin_lock_irqsave(&igb->tmreg_lock, flags);
+
+	igb_ptp_read_i210(igb, ts);
+
+	spin_unlock_irqrestore(&igb->tmreg_lock, flags);
+
+	return 0;
+}
+
+static int igb_ptp_settime_82576(struct ptp_clock_info *ptp,
+				 const struct timespec *ts)
+{
+	struct igb_adapter *igb = container_of(ptp, struct igb_adapter,
+					       ptp_caps);
+	unsigned long flags;
+	u64 ns;
+
+	ns = ts->tv_sec * 1000000000ULL;
+	ns += ts->tv_nsec;
+
+	spin_lock_irqsave(&igb->tmreg_lock, flags);
+
+	timecounter_init(&igb->tc, &igb->cc, ns);
+
+	spin_unlock_irqrestore(&igb->tmreg_lock, flags);
+
+	return 0;
+}
+
+static int igb_ptp_settime_i210(struct ptp_clock_info *ptp,
+				const struct timespec *ts)
+{
+	struct igb_adapter *igb = container_of(ptp, struct igb_adapter,
+					       ptp_caps);
+	unsigned long flags;
+
+	spin_lock_irqsave(&igb->tmreg_lock, flags);
+
+	igb_ptp_write_i210(igb, ts);
+
+	spin_unlock_irqrestore(&igb->tmreg_lock, flags);
+
+	return 0;
+}
+
+static int igb_ptp_enable(struct ptp_clock_info *ptp,
+			  struct ptp_clock_request *rq, int on)
+{
+	return -EOPNOTSUPP;
+}
+
+/**
+ * igb_ptp_tx_work
+ * @work: pointer to work struct
+ *
+ * This work function polls the TSYNCTXCTL valid bit to determine when a
+ * timestamp has been taken for the current stored skb.
+ */
+void igb_ptp_tx_work(struct work_struct *work)
+{
+	struct igb_adapter *adapter = container_of(work, struct igb_adapter,
+						   ptp_tx_work);
+	struct e1000_hw *hw = &adapter->hw;
+	u32 tsynctxctl;
+
+	if (!adapter->ptp_tx_skb)
+		return;
+
+	if (time_is_before_jiffies(adapter->ptp_tx_start +
+				   IGB_PTP_TX_TIMEOUT)) {
+		dev_kfree_skb_any(adapter->ptp_tx_skb);
+		adapter->ptp_tx_skb = NULL;
+		adapter->tx_hwtstamp_timeouts++;
+		dev_warn(&adapter->pdev->dev, "clearing Tx timestamp hang");
+		return;
+	}
+
+	tsynctxctl = E1000_READ_REG(hw, E1000_TSYNCTXCTL);
+	if (tsynctxctl & E1000_TSYNCTXCTL_VALID)
+		igb_ptp_tx_hwtstamp(adapter);
+	else
+		/* reschedule to check later */
+		schedule_work(&adapter->ptp_tx_work);
+}
+
+static void igb_ptp_overflow_check(struct work_struct *work)
+{
+	struct igb_adapter *igb =
+		container_of(work, struct igb_adapter, ptp_overflow_work.work);
+	struct timespec ts;
+
+	igb->ptp_caps.gettime(&igb->ptp_caps, &ts);
+
+	pr_debug("igb overflow check at %ld.%09lu\n", ts.tv_sec, ts.tv_nsec);
+
+	schedule_delayed_work(&igb->ptp_overflow_work,
+			      IGB_SYSTIM_OVERFLOW_PERIOD);
+}
+
+/**
+ * igb_ptp_rx_hang - detect error case when Rx timestamp registers latched
+ * @adapter: private network adapter structure
+ *
+ * This watchdog task is scheduled to detect error case where hardware has
+ * dropped an Rx packet that was timestamped when the ring is full. The
+ * particular error is rare but leaves the device in a state unable to timestamp
+ * any future packets.
+ */
+void igb_ptp_rx_hang(struct igb_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	struct igb_ring *rx_ring;
+	u32 tsyncrxctl = E1000_READ_REG(hw, E1000_TSYNCRXCTL);
+	unsigned long rx_event;
+	int n;
+
+	if (hw->mac.type != e1000_82576)
+		return;
+
+	/* If we don't have a valid timestamp in the registers, just update the
+	 * timeout counter and exit
+	 */
+	if (!(tsyncrxctl & E1000_TSYNCRXCTL_VALID)) {
+		adapter->last_rx_ptp_check = jiffies;
+		return;
+	}
+
+	/* Determine the most recent watchdog or rx_timestamp event */
+	rx_event = adapter->last_rx_ptp_check;
+	for (n = 0; n < adapter->num_rx_queues; n++) {
+		rx_ring = adapter->rx_ring[n];
+		if (time_after(rx_ring->last_rx_timestamp, rx_event))
+			rx_event = rx_ring->last_rx_timestamp;
+	}
+
+	/* Only need to read the high RXSTMP register to clear the lock */
+	if (time_is_before_jiffies(rx_event + 5 * HZ)) {
+		E1000_READ_REG(hw, E1000_RXSTMPH);
+		adapter->last_rx_ptp_check = jiffies;
+		adapter->rx_hwtstamp_cleared++;
+		dev_warn(&adapter->pdev->dev, "clearing Rx timestamp hang");
+	}
+}
+
+/**
+ * igb_ptp_tx_hwtstamp - utility function which checks for TX time stamp
+ * @adapter: Board private structure.
+ *
+ * If we were asked to do hardware stamping and such a time stamp is
+ * available, then it must have been for this skb here because we only
+ * allow only one such packet into the queue.
+ */
+void igb_ptp_tx_hwtstamp(struct igb_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	struct skb_shared_hwtstamps shhwtstamps;
+	u64 regval;
+
+	regval = E1000_READ_REG(hw, E1000_TXSTMPL);
+	regval |= (u64)E1000_READ_REG(hw, E1000_TXSTMPH) << 32;
+
+	igb_ptp_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
+	skb_tstamp_tx(adapter->ptp_tx_skb, &shhwtstamps);
+	dev_kfree_skb_any(adapter->ptp_tx_skb);
+	adapter->ptp_tx_skb = NULL;
+}
+
+/**
+ * igb_ptp_rx_pktstamp - retrieve Rx per packet timestamp
+ * @q_vector: Pointer to interrupt specific structure
+ * @va: Pointer to address containing Rx buffer
+ * @skb: Buffer containing timestamp and packet
+ *
+ * This function is meant to retrieve a timestamp from the first buffer of an
+ * incoming frame.  The value is stored in little endian format starting on
+ * byte 8.
+ */
+void igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector,
+			 unsigned char *va,
+			 struct sk_buff *skb)
+{
+	__le64 *regval = (__le64 *)va;
+
+	/*
+	 * The timestamp is recorded in little endian format.
+	 * DWORD: 0        1        2        3
+	 * Field: Reserved Reserved SYSTIML  SYSTIMH
+	 */
+	igb_ptp_systim_to_hwtstamp(q_vector->adapter, skb_hwtstamps(skb),
+				   le64_to_cpu(regval[1]));
+}
+
+/**
+ * igb_ptp_rx_rgtstamp - retrieve Rx timestamp stored in register
+ * @q_vector: Pointer to interrupt specific structure
+ * @skb: Buffer containing timestamp and packet
+ *
+ * This function is meant to retrieve a timestamp from the internal registers
+ * of the adapter and store it in the skb.
+ */
+void igb_ptp_rx_rgtstamp(struct igb_q_vector *q_vector,
+			 struct sk_buff *skb)
+{
+	struct igb_adapter *adapter = q_vector->adapter;
+	struct e1000_hw *hw = &adapter->hw;
+	u64 regval;
+
+	/*
+	 * If this bit is set, then the RX registers contain the time stamp. No
+	 * other packet will be time stamped until we read these registers, so
+	 * read the registers to make them available again. Because only one
+	 * packet can be time stamped at a time, we know that the register
+	 * values must belong to this one here and therefore we don't need to
+	 * compare any of the additional attributes stored for it.
+	 *
+	 * If nothing went wrong, then it should have a shared tx_flags that we
+	 * can turn into a skb_shared_hwtstamps.
+	 */
+	if (!(E1000_READ_REG(hw, E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
+		return;
+
+	regval = E1000_READ_REG(hw, E1000_RXSTMPL);
+	regval |= (u64)E1000_READ_REG(hw, E1000_RXSTMPH) << 32;
+
+	igb_ptp_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
+}
+
+/**
+ * igb_ptp_hwtstamp_ioctl - control hardware time stamping
+ * @netdev:
+ * @ifreq:
+ * @cmd:
+ *
+ * Outgoing time stamping can be enabled and disabled. Play nice and
+ * disable it when requested, although it shouldn't case any overhead
+ * when no packet needs it. At most one packet in the queue may be
+ * marked for time stamping, otherwise it would be impossible to tell
+ * for sure to which packet the hardware time stamp belongs.
+ *
+ * Incoming time stamping has to be configured via the hardware
+ * filters. Not all combinations are supported, in particular event
+ * type has to be specified. Matching the kind of event packet is
+ * not supported, with the exception of "all V2 events regardless of
+ * level 2 or 4".
+ *
+ **/
+int igb_ptp_hwtstamp_ioctl(struct net_device *netdev,
+			   struct ifreq *ifr, int cmd)
+{
+	struct igb_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+	struct hwtstamp_config config;
+	u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
+	u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
+	u32 tsync_rx_cfg = 0;
+	bool is_l4 = false;
+	bool is_l2 = false;
+	u32 regval;
+
+	if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
+		return -EFAULT;
+
+	/* reserved for future extensions */
+	if (config.flags)
+		return -EINVAL;
+
+	switch (config.tx_type) {
+	case HWTSTAMP_TX_OFF:
+		tsync_tx_ctl = 0;
+	case HWTSTAMP_TX_ON:
+		break;
+	default:
+		return -ERANGE;
+	}
+
+	switch (config.rx_filter) {
+	case HWTSTAMP_FILTER_NONE:
+		tsync_rx_ctl = 0;
+		break;
+	case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
+		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
+		tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
+		is_l4 = true;
+		break;
+	case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
+		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
+		tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
+		is_l4 = true;
+		break;
+	case HWTSTAMP_FILTER_PTP_V2_EVENT:
+	case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
+	case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
+	case HWTSTAMP_FILTER_PTP_V2_SYNC:
+	case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
+	case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
+	case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
+	case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
+	case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
+		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
+		config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
+		is_l2 = true;
+		is_l4 = true;
+		break;
+	case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
+	case HWTSTAMP_FILTER_ALL:
+		/*
+		 * 82576 cannot timestamp all packets, which it needs to do to
+		 * support both V1 Sync and Delay_Req messages
+		 */
+		if (hw->mac.type != e1000_82576) {
+			tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
+			config.rx_filter = HWTSTAMP_FILTER_ALL;
+			break;
+		}
+		/* fall through */
+	default:
+		config.rx_filter = HWTSTAMP_FILTER_NONE;
+		return -ERANGE;
+	}
+
+	if (hw->mac.type == e1000_82575) {
+		if (tsync_rx_ctl | tsync_tx_ctl)
+			return -EINVAL;
+		return 0;
+	}
+
+	/*
+	 * Per-packet timestamping only works if all packets are
+	 * timestamped, so enable timestamping in all packets as
+	 * long as one rx filter was configured.
+	 */
+	if ((hw->mac.type >= e1000_82580) && tsync_rx_ctl) {
+		tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
+		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
+		config.rx_filter = HWTSTAMP_FILTER_ALL;
+		is_l2 = true;
+		is_l4 = true;
+
+		if ((hw->mac.type == e1000_i210) ||
+		    (hw->mac.type == e1000_i211)) {
+			regval = E1000_READ_REG(hw, E1000_RXPBS);
+			regval |= E1000_RXPBS_CFG_TS_EN;
+			E1000_WRITE_REG(hw, E1000_RXPBS, regval);
+		}
+	}
+
+	/* enable/disable TX */
+	regval = E1000_READ_REG(hw, E1000_TSYNCTXCTL);
+	regval &= ~E1000_TSYNCTXCTL_ENABLED;
+	regval |= tsync_tx_ctl;
+	E1000_WRITE_REG(hw, E1000_TSYNCTXCTL, regval);
+
+	/* enable/disable RX */
+	regval = E1000_READ_REG(hw, E1000_TSYNCRXCTL);
+	regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
+	regval |= tsync_rx_ctl;
+	E1000_WRITE_REG(hw, E1000_TSYNCRXCTL, regval);
+
+	/* define which PTP packets are time stamped */
+	E1000_WRITE_REG(hw, E1000_TSYNCRXCFG, tsync_rx_cfg);
+
+	/* define ethertype filter for timestamped packets */
+	if (is_l2)
+		E1000_WRITE_REG(hw, E1000_ETQF(3),
+		     (E1000_ETQF_FILTER_ENABLE | /* enable filter */
+		      E1000_ETQF_1588 | /* enable timestamping */
+		      ETH_P_1588));     /* 1588 eth protocol type */
+	else
+		E1000_WRITE_REG(hw, E1000_ETQF(3), 0);
+
+	/* L4 Queue Filter[3]: filter by destination port and protocol */
+	if (is_l4) {
+		u32 ftqf = (IPPROTO_UDP /* UDP */
+			| E1000_FTQF_VF_BP /* VF not compared */
+			| E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
+			| E1000_FTQF_MASK); /* mask all inputs */
+		ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
+
+		E1000_WRITE_REG(hw, E1000_IMIR(3), htons(PTP_EV_PORT));
+		E1000_WRITE_REG(hw, E1000_IMIREXT(3),
+		     (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
+		if (hw->mac.type == e1000_82576) {
+			/* enable source port check */
+			E1000_WRITE_REG(hw, E1000_SPQF(3), htons(PTP_EV_PORT));
+			ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
+		}
+		E1000_WRITE_REG(hw, E1000_FTQF(3), ftqf);
+	} else {
+		E1000_WRITE_REG(hw, E1000_FTQF(3), E1000_FTQF_MASK);
+	}
+	E1000_WRITE_FLUSH(hw);
+
+	/* clear TX/RX time stamp registers, just to be sure */
+	regval = E1000_READ_REG(hw, E1000_TXSTMPL);
+	regval = E1000_READ_REG(hw, E1000_TXSTMPH);
+	regval = E1000_READ_REG(hw, E1000_RXSTMPL);
+	regval = E1000_READ_REG(hw, E1000_RXSTMPH);
+
+	return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
+		-EFAULT : 0;
+}
+
+void igb_ptp_init(struct igb_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	struct net_device *netdev = adapter->netdev;
+
+	switch (hw->mac.type) {
+	case e1000_82576:
+		snprintf(adapter->ptp_caps.name, 16, "%pm", netdev->dev_addr);
+		adapter->ptp_caps.owner = THIS_MODULE;
+		adapter->ptp_caps.max_adj = 999999881;
+		adapter->ptp_caps.n_ext_ts = 0;
+		adapter->ptp_caps.pps = 0;
+		adapter->ptp_caps.adjfreq = igb_ptp_adjfreq_82576;
+		adapter->ptp_caps.adjtime = igb_ptp_adjtime_82576;
+		adapter->ptp_caps.gettime = igb_ptp_gettime_82576;
+		adapter->ptp_caps.settime = igb_ptp_settime_82576;
+		adapter->ptp_caps.enable = igb_ptp_enable;
+		adapter->cc.read = igb_ptp_read_82576;
+		adapter->cc.mask = CLOCKSOURCE_MASK(64);
+		adapter->cc.mult = 1;
+		adapter->cc.shift = IGB_82576_TSYNC_SHIFT;
+		/* Dial the nominal frequency. */
+		E1000_WRITE_REG(hw, E1000_TIMINCA, INCPERIOD_82576 |
+						   INCVALUE_82576);
+		break;
+	case e1000_82580:
+	case e1000_i350:
+	case e1000_i354:
+		snprintf(adapter->ptp_caps.name, 16, "%pm", netdev->dev_addr);
+		adapter->ptp_caps.owner = THIS_MODULE;
+		adapter->ptp_caps.max_adj = 62499999;
+		adapter->ptp_caps.n_ext_ts = 0;
+		adapter->ptp_caps.pps = 0;
+		adapter->ptp_caps.adjfreq = igb_ptp_adjfreq_82580;
+		adapter->ptp_caps.adjtime = igb_ptp_adjtime_82576;
+		adapter->ptp_caps.gettime = igb_ptp_gettime_82576;
+		adapter->ptp_caps.settime = igb_ptp_settime_82576;
+		adapter->ptp_caps.enable = igb_ptp_enable;
+		adapter->cc.read = igb_ptp_read_82580;
+		adapter->cc.mask = CLOCKSOURCE_MASK(IGB_NBITS_82580);
+		adapter->cc.mult = 1;
+		adapter->cc.shift = 0;
+		/* Enable the timer functions by clearing bit 31. */
+		E1000_WRITE_REG(hw, E1000_TSAUXC, 0x0);
+		break;
+	case e1000_i210:
+	case e1000_i211:
+		snprintf(adapter->ptp_caps.name, 16, "%pm", netdev->dev_addr);
+		adapter->ptp_caps.owner = THIS_MODULE;
+		adapter->ptp_caps.max_adj = 62499999;
+		adapter->ptp_caps.n_ext_ts = 0;
+		adapter->ptp_caps.pps = 0;
+		adapter->ptp_caps.adjfreq = igb_ptp_adjfreq_82580;
+		adapter->ptp_caps.adjtime = igb_ptp_adjtime_i210;
+		adapter->ptp_caps.gettime = igb_ptp_gettime_i210;
+		adapter->ptp_caps.settime = igb_ptp_settime_i210;
+		adapter->ptp_caps.enable = igb_ptp_enable;
+		/* Enable the timer functions by clearing bit 31. */
+		E1000_WRITE_REG(hw, E1000_TSAUXC, 0x0);
+		break;
+	default:
+		adapter->ptp_clock = NULL;
+		return;
+	}
+
+	E1000_WRITE_FLUSH(hw);
+
+	spin_lock_init(&adapter->tmreg_lock);
+	INIT_WORK(&adapter->ptp_tx_work, igb_ptp_tx_work);
+
+	/* Initialize the clock and overflow work for devices that need it. */
+	if ((hw->mac.type == e1000_i210) || (hw->mac.type == e1000_i211)) {
+		struct timespec ts = ktime_to_timespec(ktime_get_real());
+
+		igb_ptp_settime_i210(&adapter->ptp_caps, &ts);
+	} else {
+		timecounter_init(&adapter->tc, &adapter->cc,
+				 ktime_to_ns(ktime_get_real()));
+
+		INIT_DELAYED_WORK(&adapter->ptp_overflow_work,
+				  igb_ptp_overflow_check);
+
+		schedule_delayed_work(&adapter->ptp_overflow_work,
+				      IGB_SYSTIM_OVERFLOW_PERIOD);
+	}
+
+	/* Initialize the time sync interrupts for devices that support it. */
+	if (hw->mac.type >= e1000_82580) {
+		E1000_WRITE_REG(hw, E1000_TSIM, E1000_TSIM_TXTS);
+		E1000_WRITE_REG(hw, E1000_IMS, E1000_IMS_TS);
+	}
+
+	adapter->ptp_clock = ptp_clock_register(&adapter->ptp_caps,
+						&adapter->pdev->dev);
+	if (IS_ERR(adapter->ptp_clock)) {
+		adapter->ptp_clock = NULL;
+		dev_err(&adapter->pdev->dev, "ptp_clock_register failed\n");
+	} else {
+		dev_info(&adapter->pdev->dev, "added PHC on %s\n",
+			 adapter->netdev->name);
+		adapter->flags |= IGB_FLAG_PTP;
+	}
+}
+
+/**
+ * igb_ptp_stop - Disable PTP device and stop the overflow check.
+ * @adapter: Board private structure.
+ *
+ * This function stops the PTP support and cancels the delayed work.
+ **/
+void igb_ptp_stop(struct igb_adapter *adapter)
+{
+	switch (adapter->hw.mac.type) {
+	case e1000_82576:
+	case e1000_82580:
+	case e1000_i350:
+	case e1000_i354:
+		cancel_delayed_work_sync(&adapter->ptp_overflow_work);
+		break;
+	case e1000_i210:
+	case e1000_i211:
+		/* No delayed work to cancel. */
+		break;
+	default:
+		return;
+	}
+
+	cancel_work_sync(&adapter->ptp_tx_work);
+	if (adapter->ptp_tx_skb) {
+		dev_kfree_skb_any(adapter->ptp_tx_skb);
+		adapter->ptp_tx_skb = NULL;
+	}
+
+	if (adapter->ptp_clock) {
+		ptp_clock_unregister(adapter->ptp_clock);
+		dev_info(&adapter->pdev->dev, "removed PHC on %s\n",
+			 adapter->netdev->name);
+		adapter->flags &= ~IGB_FLAG_PTP;
+	}
+}
+
+/**
+ * igb_ptp_reset - Re-enable the adapter for PTP following a reset.
+ * @adapter: Board private structure.
+ *
+ * This function handles the reset work required to re-enable the PTP device.
+ **/
+void igb_ptp_reset(struct igb_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+
+	if (!(adapter->flags & IGB_FLAG_PTP))
+		return;
+
+	switch (adapter->hw.mac.type) {
+	case e1000_82576:
+		/* Dial the nominal frequency. */
+		E1000_WRITE_REG(hw, E1000_TIMINCA, INCPERIOD_82576 |
+						   INCVALUE_82576);
+		break;
+	case e1000_82580:
+	case e1000_i350:
+	case e1000_i354:
+	case e1000_i210:
+	case e1000_i211:
+		/* Enable the timer functions and interrupts. */
+		E1000_WRITE_REG(hw, E1000_TSAUXC, 0x0);
+		E1000_WRITE_REG(hw, E1000_TSIM, E1000_TSIM_TXTS);
+		E1000_WRITE_REG(hw, E1000_IMS, E1000_IMS_TS);
+		break;
+	default:
+		/* No work to do. */
+		return;
+	}
+
+	/* Re-initialize the timer. */
+	if ((hw->mac.type == e1000_i210) || (hw->mac.type == e1000_i211)) {
+		struct timespec ts = ktime_to_timespec(ktime_get_real());
+
+		igb_ptp_settime_i210(&adapter->ptp_caps, &ts);
+	} else {
+		timecounter_init(&adapter->tc, &adapter->cc,
+				 ktime_to_ns(ktime_get_real()));
+	}
+}
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_regtest.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_regtest.h
new file mode 100644
index 00000000..18da64a3
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_regtest.h
@@ -0,0 +1,249 @@
+/*******************************************************************************
+
+  Intel(R) Gigabit Ethernet Linux driver
+  Copyright(c) 2007-2013 Intel Corporation.
+
+  This program is free software; you can redistribute it and/or modify it
+  under the terms and conditions of the GNU General Public License,
+  version 2, as published by the Free Software Foundation.
+
+  This program is distributed in the hope it will be useful, but WITHOUT
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+  more details.
+
+  You should have received a copy of the GNU General Public License along with
+  this program; if not, write to the Free Software Foundation, Inc.,
+  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+  The full GNU General Public License is included in this distribution in
+  the file called "COPYING".
+
+  Contact Information:
+  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+/* ethtool register test data */
+struct igb_reg_test {
+	u16 reg;
+	u16 reg_offset;
+	u16 array_len;
+	u16 test_type;
+	u32 mask;
+	u32 write;
+};
+
+/* In the hardware, registers are laid out either singly, in arrays
+ * spaced 0x100 bytes apart, or in contiguous tables.  We assume
+ * most tests take place on arrays or single registers (handled
+ * as a single-element array) and special-case the tables.
+ * Table tests are always pattern tests.
+ *
+ * We also make provision for some required setup steps by specifying
+ * registers to be written without any read-back testing.
+ */
+
+#define PATTERN_TEST	1
+#define SET_READ_TEST	2
+#define WRITE_NO_TEST	3
+#define TABLE32_TEST	4
+#define TABLE64_TEST_LO	5
+#define TABLE64_TEST_HI	6
+
+/* i210 reg test */
+static struct igb_reg_test reg_test_i210[] = {
+	{ E1000_FCAL,	   0x100, 1,  PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
+	{ E1000_FCAH,	   0x100, 1,  PATTERN_TEST, 0x0000FFFF, 0xFFFFFFFF },
+	{ E1000_FCT,	   0x100, 1,  PATTERN_TEST, 0x0000FFFF, 0xFFFFFFFF },
+	{ E1000_RDBAL(0),  0x100, 4,  PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF },
+	{ E1000_RDBAH(0),  0x100, 4,  PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
+	{ E1000_RDLEN(0),  0x100, 4,  PATTERN_TEST, 0x000FFF80, 0x000FFFFF },
+	/* RDH is read-only for i210, only test RDT. */
+	{ E1000_RDT(0),	   0x100, 4,  PATTERN_TEST, 0x0000FFFF, 0x0000FFFF },
+	{ E1000_FCRTH,	   0x100, 1,  PATTERN_TEST, 0x0003FFF0, 0x0003FFF0 },
+	{ E1000_FCTTV,	   0x100, 1,  PATTERN_TEST, 0x0000FFFF, 0x0000FFFF },
+	{ E1000_TIPG,	   0x100, 1,  PATTERN_TEST, 0x3FFFFFFF, 0x3FFFFFFF },
+	{ E1000_TDBAL(0),  0x100, 4,  PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF },
+	{ E1000_TDBAH(0),  0x100, 4,  PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
+	{ E1000_TDLEN(0),  0x100, 4,  PATTERN_TEST, 0x000FFF80, 0x000FFFFF },
+	{ E1000_TDT(0),	   0x100, 4,  PATTERN_TEST, 0x0000FFFF, 0x0000FFFF },
+	{ E1000_RCTL,	   0x100, 1,  SET_READ_TEST, 0xFFFFFFFF, 0x00000000 },
+	{ E1000_RCTL, 	   0x100, 1,  SET_READ_TEST, 0x04CFB0FE, 0x003FFFFB },
+	{ E1000_RCTL, 	   0x100, 1,  SET_READ_TEST, 0x04CFB0FE, 0xFFFFFFFF },
+	{ E1000_TCTL,	   0x100, 1,  SET_READ_TEST, 0xFFFFFFFF, 0x00000000 },
+	{ E1000_RA,	   0, 16, TABLE64_TEST_LO,
+						0xFFFFFFFF, 0xFFFFFFFF },
+	{ E1000_RA,	   0, 16, TABLE64_TEST_HI,
+						0x900FFFFF, 0xFFFFFFFF },
+	{ E1000_MTA,	   0, 128, TABLE32_TEST,
+						0xFFFFFFFF, 0xFFFFFFFF },
+	{ 0, 0, 0, 0 }
+};
+
+/* i350 reg test */
+static struct igb_reg_test reg_test_i350[] = {
+	{ E1000_FCAL,	   0x100, 1,  PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
+	{ E1000_FCAH,	   0x100, 1,  PATTERN_TEST, 0x0000FFFF, 0xFFFFFFFF },
+	{ E1000_FCT,	   0x100, 1,  PATTERN_TEST, 0x0000FFFF, 0xFFFFFFFF },
+	/* VET is readonly on i350 */
+	{ E1000_RDBAL(0),  0x100, 4,  PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF },
+	{ E1000_RDBAH(0),  0x100, 4,  PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
+	{ E1000_RDLEN(0),  0x100, 4,  PATTERN_TEST, 0x000FFF80, 0x000FFFFF },
+	{ E1000_RDBAL(4),  0x40,  4,  PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF },
+	{ E1000_RDBAH(4),  0x40,  4,  PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
+	{ E1000_RDLEN(4),  0x40,  4,  PATTERN_TEST, 0x000FFF80, 0x000FFFFF },
+	/* RDH is read-only for i350, only test RDT. */
+	{ E1000_RDT(0),	   0x100, 4,  PATTERN_TEST, 0x0000FFFF, 0x0000FFFF },
+	{ E1000_RDT(4),	   0x40,  4,  PATTERN_TEST, 0x0000FFFF, 0x0000FFFF },
+	{ E1000_FCRTH,	   0x100, 1,  PATTERN_TEST, 0x0000FFF0, 0x0000FFF0 },
+	{ E1000_FCTTV,	   0x100, 1,  PATTERN_TEST, 0x0000FFFF, 0x0000FFFF },
+	{ E1000_TIPG,	   0x100, 1,  PATTERN_TEST, 0x3FFFFFFF, 0x3FFFFFFF },
+	{ E1000_TDBAL(0),  0x100, 4,  PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF },
+	{ E1000_TDBAH(0),  0x100, 4,  PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
+	{ E1000_TDLEN(0),  0x100, 4,  PATTERN_TEST, 0x000FFF80, 0x000FFFFF },
+	{ E1000_TDBAL(4),  0x40,  4,  PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF },
+	{ E1000_TDBAH(4),  0x40,  4,  PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
+	{ E1000_TDLEN(4),  0x40,  4,  PATTERN_TEST, 0x000FFF80, 0x000FFFFF },
+	{ E1000_TDT(0),	   0x100, 4,  PATTERN_TEST, 0x0000FFFF, 0x0000FFFF },
+	{ E1000_TDT(4),	   0x40,  4,  PATTERN_TEST, 0x0000FFFF, 0x0000FFFF },
+	{ E1000_RCTL,	   0x100, 1,  SET_READ_TEST, 0xFFFFFFFF, 0x00000000 },
+	{ E1000_RCTL, 	   0x100, 1,  SET_READ_TEST, 0x04CFB0FE, 0x003FFFFB },
+	{ E1000_RCTL, 	   0x100, 1,  SET_READ_TEST, 0x04CFB0FE, 0xFFFFFFFF },
+	{ E1000_TCTL,	   0x100, 1,  SET_READ_TEST, 0xFFFFFFFF, 0x00000000 },
+	{ E1000_RA,	   0, 16, TABLE64_TEST_LO,
+						0xFFFFFFFF, 0xFFFFFFFF },
+	{ E1000_RA,	   0, 16, TABLE64_TEST_HI,
+						0xC3FFFFFF, 0xFFFFFFFF },
+	{ E1000_RA2,	   0, 16, TABLE64_TEST_LO,
+						0xFFFFFFFF, 0xFFFFFFFF },
+	{ E1000_RA2,	   0, 16, TABLE64_TEST_HI,
+						0xC3FFFFFF, 0xFFFFFFFF },
+	{ E1000_MTA,	   0, 128, TABLE32_TEST,
+						0xFFFFFFFF, 0xFFFFFFFF },
+	{ 0, 0, 0, 0 }
+};
+
+/* 82580 reg test */
+static struct igb_reg_test reg_test_82580[] = {
+	{ E1000_FCAL,	   0x100, 1,  PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
+	{ E1000_FCAH,	   0x100, 1,  PATTERN_TEST, 0x0000FFFF, 0xFFFFFFFF },
+	{ E1000_FCT,	   0x100, 1,  PATTERN_TEST, 0x0000FFFF, 0xFFFFFFFF },
+	{ E1000_VET,	   0x100, 1,  PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
+	{ E1000_RDBAL(0),  0x100, 4,  PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF },
+	{ E1000_RDBAH(0),  0x100, 4,  PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
+	{ E1000_RDLEN(0),  0x100, 4,  PATTERN_TEST, 0x000FFFF0, 0x000FFFFF },
+	{ E1000_RDBAL(4),  0x40,  4,  PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF },
+	{ E1000_RDBAH(4),  0x40,  4,  PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
+	{ E1000_RDLEN(4),  0x40,  4,  PATTERN_TEST, 0x000FFFF0, 0x000FFFFF },
+	/* RDH is read-only for 82580, only test RDT. */
+	{ E1000_RDT(0),	   0x100, 4,  PATTERN_TEST, 0x0000FFFF, 0x0000FFFF },
+	{ E1000_RDT(4),	   0x40,  4,  PATTERN_TEST, 0x0000FFFF, 0x0000FFFF },
+	{ E1000_FCRTH,	   0x100, 1,  PATTERN_TEST, 0x0000FFF0, 0x0000FFF0 },
+	{ E1000_FCTTV,	   0x100, 1,  PATTERN_TEST, 0x0000FFFF, 0x0000FFFF },
+	{ E1000_TIPG,	   0x100, 1,  PATTERN_TEST, 0x3FFFFFFF, 0x3FFFFFFF },
+	{ E1000_TDBAL(0),  0x100, 4,  PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF },
+	{ E1000_TDBAH(0),  0x100, 4,  PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
+	{ E1000_TDLEN(0),  0x100, 4,  PATTERN_TEST, 0x000FFFF0, 0x000FFFFF },
+	{ E1000_TDBAL(4),  0x40,  4,  PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF },
+	{ E1000_TDBAH(4),  0x40,  4,  PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
+	{ E1000_TDLEN(4),  0x40,  4,  PATTERN_TEST, 0x000FFFF0, 0x000FFFFF },
+	{ E1000_TDT(0),	   0x100, 4,  PATTERN_TEST, 0x0000FFFF, 0x0000FFFF },
+	{ E1000_TDT(4),	   0x40,  4,  PATTERN_TEST, 0x0000FFFF, 0x0000FFFF },
+	{ E1000_RCTL,	   0x100, 1,  SET_READ_TEST, 0xFFFFFFFF, 0x00000000 },
+	{ E1000_RCTL, 	   0x100, 1,  SET_READ_TEST, 0x04CFB0FE, 0x003FFFFB },
+	{ E1000_RCTL, 	   0x100, 1,  SET_READ_TEST, 0x04CFB0FE, 0xFFFFFFFF },
+	{ E1000_TCTL,	   0x100, 1,  SET_READ_TEST, 0xFFFFFFFF, 0x00000000 },
+	{ E1000_RA,	   0, 16, TABLE64_TEST_LO,
+						0xFFFFFFFF, 0xFFFFFFFF },
+	{ E1000_RA,	   0, 16, TABLE64_TEST_HI,
+						0x83FFFFFF, 0xFFFFFFFF },
+	{ E1000_RA2,	   0, 8, TABLE64_TEST_LO,
+						0xFFFFFFFF, 0xFFFFFFFF },
+	{ E1000_RA2,	   0, 8, TABLE64_TEST_HI,
+						0x83FFFFFF, 0xFFFFFFFF },
+	{ E1000_MTA,	   0, 128, TABLE32_TEST,
+						0xFFFFFFFF, 0xFFFFFFFF },
+	{ 0, 0, 0, 0 }
+};
+
+/* 82576 reg test */
+static struct igb_reg_test reg_test_82576[] = {
+	{ E1000_FCAL,	   0x100, 1,  PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
+	{ E1000_FCAH,	   0x100, 1,  PATTERN_TEST, 0x0000FFFF, 0xFFFFFFFF },
+	{ E1000_FCT,	   0x100, 1,  PATTERN_TEST, 0x0000FFFF, 0xFFFFFFFF },
+	{ E1000_VET,	   0x100, 1,  PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
+	{ E1000_RDBAL(0),  0x100, 4,  PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF },
+	{ E1000_RDBAH(0),  0x100, 4,  PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
+	{ E1000_RDLEN(0),  0x100, 4,  PATTERN_TEST, 0x000FFFF0, 0x000FFFFF },
+	{ E1000_RDBAL(4),  0x40,  12, PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF },
+	{ E1000_RDBAH(4),  0x40,  12, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
+	{ E1000_RDLEN(4),  0x40,  12, PATTERN_TEST, 0x000FFFF0, 0x000FFFFF },
+	/* Enable all queues before testing. */
+	{ E1000_RXDCTL(0), 0x100, 4,  WRITE_NO_TEST, 0, E1000_RXDCTL_QUEUE_ENABLE },
+	{ E1000_RXDCTL(4), 0x40,  12, WRITE_NO_TEST, 0, E1000_RXDCTL_QUEUE_ENABLE },
+	/* RDH is read-only for 82576, only test RDT. */
+	{ E1000_RDT(0),	   0x100, 4,  PATTERN_TEST, 0x0000FFFF, 0x0000FFFF },
+	{ E1000_RDT(4),	   0x40,  12, PATTERN_TEST, 0x0000FFFF, 0x0000FFFF },
+	{ E1000_RXDCTL(0), 0x100, 4,  WRITE_NO_TEST, 0, 0 },
+	{ E1000_RXDCTL(4), 0x40,  12, WRITE_NO_TEST, 0, 0 },
+	{ E1000_FCRTH,	   0x100, 1,  PATTERN_TEST, 0x0000FFF0, 0x0000FFF0 },
+	{ E1000_FCTTV,	   0x100, 1,  PATTERN_TEST, 0x0000FFFF, 0x0000FFFF },
+	{ E1000_TIPG,	   0x100, 1,  PATTERN_TEST, 0x3FFFFFFF, 0x3FFFFFFF },
+	{ E1000_TDBAL(0),  0x100, 4,  PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF },
+	{ E1000_TDBAH(0),  0x100, 4,  PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
+	{ E1000_TDLEN(0),  0x100, 4,  PATTERN_TEST, 0x000FFFF0, 0x000FFFFF },
+	{ E1000_TDBAL(4),  0x40,  12, PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF },
+	{ E1000_TDBAH(4),  0x40,  12, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
+	{ E1000_TDLEN(4),  0x40,  12, PATTERN_TEST, 0x000FFFF0, 0x000FFFFF },
+	{ E1000_RCTL,	   0x100, 1,  SET_READ_TEST, 0xFFFFFFFF, 0x00000000 },
+	{ E1000_RCTL, 	   0x100, 1,  SET_READ_TEST, 0x04CFB0FE, 0x003FFFFB },
+	{ E1000_RCTL, 	   0x100, 1,  SET_READ_TEST, 0x04CFB0FE, 0xFFFFFFFF },
+	{ E1000_TCTL,	   0x100, 1,  SET_READ_TEST, 0xFFFFFFFF, 0x00000000 },
+	{ E1000_RA,	   0, 16, TABLE64_TEST_LO,
+						0xFFFFFFFF, 0xFFFFFFFF },
+	{ E1000_RA,	   0, 16, TABLE64_TEST_HI,
+						0x83FFFFFF, 0xFFFFFFFF },
+	{ E1000_RA2,	   0, 8, TABLE64_TEST_LO,
+						0xFFFFFFFF, 0xFFFFFFFF },
+	{ E1000_RA2,	   0, 8, TABLE64_TEST_HI,
+						0x83FFFFFF, 0xFFFFFFFF },
+	{ E1000_MTA,	   0, 128, TABLE32_TEST,
+						0xFFFFFFFF, 0xFFFFFFFF },
+	{ 0, 0, 0, 0 }
+};
+
+/* 82575 register test */
+static struct igb_reg_test reg_test_82575[] = {
+	{ E1000_FCAL,	0x100,	1, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
+	{ E1000_FCAH,	0x100,	1, PATTERN_TEST, 0x0000FFFF, 0xFFFFFFFF },
+	{ E1000_FCT,	0x100,	1, PATTERN_TEST, 0x0000FFFF, 0xFFFFFFFF },
+	{ E1000_VET,	0x100,	1, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
+	{ E1000_RDBAL(0),	0x100,	4, PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF },
+	{ E1000_RDBAH(0),	0x100,	4, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
+	{ E1000_RDLEN(0),	0x100,	4, PATTERN_TEST, 0x000FFF80, 0x000FFFFF },
+	/* Enable all four RX queues before testing. */
+	{ E1000_RXDCTL(0),	0x100,	4, WRITE_NO_TEST, 0, E1000_RXDCTL_QUEUE_ENABLE },
+	/* RDH is read-only for 82575, only test RDT. */
+	{ E1000_RDT(0),	0x100,	4, PATTERN_TEST, 0x0000FFFF, 0x0000FFFF },
+	{ E1000_RXDCTL(0),	0x100,	4, WRITE_NO_TEST, 0, 0 },
+	{ E1000_FCRTH,	0x100,	1, PATTERN_TEST, 0x0000FFF0, 0x0000FFF0 },
+	{ E1000_FCTTV,	0x100,	1, PATTERN_TEST, 0x0000FFFF, 0x0000FFFF },
+	{ E1000_TIPG,	0x100,	1, PATTERN_TEST, 0x3FFFFFFF, 0x3FFFFFFF },
+	{ E1000_TDBAL(0),	0x100,	4, PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF },
+	{ E1000_TDBAH(0),	0x100,	4, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
+	{ E1000_TDLEN(0),	0x100,	4, PATTERN_TEST, 0x000FFF80, 0x000FFFFF },
+	{ E1000_RCTL,	0x100,	1, SET_READ_TEST, 0xFFFFFFFF, 0x00000000 },
+	{ E1000_RCTL, 	0x100,	1, SET_READ_TEST, 0x04CFB3FE, 0x003FFFFB },
+	{ E1000_RCTL, 	0x100,	1, SET_READ_TEST, 0x04CFB3FE, 0xFFFFFFFF },
+	{ E1000_TCTL,	0x100,	1, SET_READ_TEST, 0xFFFFFFFF, 0x00000000 },
+	{ E1000_TXCW,	0x100,	1, PATTERN_TEST, 0xC000FFFF, 0x0000FFFF },
+	{ E1000_RA,	0,	16, TABLE64_TEST_LO,
+						0xFFFFFFFF, 0xFFFFFFFF },
+	{ E1000_RA,	0,	16, TABLE64_TEST_HI,
+						0x800FFFFF, 0xFFFFFFFF },
+	{ E1000_MTA,	0,	128, TABLE32_TEST,
+						0xFFFFFFFF, 0xFFFFFFFF },
+	{ 0, 0, 0, 0 }
+};
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_vmdq.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_vmdq.c
new file mode 100644
index 00000000..015c8952
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_vmdq.c
@@ -0,0 +1,436 @@
+/*******************************************************************************
+
+  Intel(R) Gigabit Ethernet Linux driver
+  Copyright(c) 2007-2013 Intel Corporation.
+
+  This program is free software; you can redistribute it and/or modify it
+  under the terms and conditions of the GNU General Public License,
+  version 2, as published by the Free Software Foundation.
+
+  This program is distributed in the hope it will be useful, but WITHOUT
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+  more details.
+
+  You should have received a copy of the GNU General Public License along with
+  this program; if not, write to the Free Software Foundation, Inc.,
+  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+  The full GNU General Public License is included in this distribution in
+  the file called "COPYING".
+
+  Contact Information:
+  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+
+#include <linux/tcp.h>
+
+#include "igb.h"
+#include "igb_vmdq.h"
+#include <linux/if_vlan.h>
+
+#ifdef CONFIG_IGB_VMDQ_NETDEV
+int igb_vmdq_open(struct net_device *dev)
+{
+	struct igb_vmdq_adapter *vadapter = netdev_priv(dev);
+	struct igb_adapter *adapter = vadapter->real_adapter;
+	struct net_device *main_netdev = adapter->netdev;
+	int hw_queue = vadapter->rx_ring->queue_index +
+		       adapter->vfs_allocated_count;
+
+	if (test_bit(__IGB_DOWN, &adapter->state)) {
+		DPRINTK(DRV, WARNING,
+			"Open %s before opening this device.\n",
+			main_netdev->name);
+		return -EAGAIN;
+	}
+	netif_carrier_off(dev);
+	vadapter->tx_ring->vmdq_netdev = dev;
+	vadapter->rx_ring->vmdq_netdev = dev;
+	if (is_valid_ether_addr(dev->dev_addr)) {
+		igb_del_mac_filter(adapter, dev->dev_addr, hw_queue);
+		igb_add_mac_filter(adapter, dev->dev_addr, hw_queue);
+	}
+	netif_carrier_on(dev);
+	return 0;
+}
+
+int igb_vmdq_close(struct net_device *dev)
+{
+	struct igb_vmdq_adapter *vadapter = netdev_priv(dev);
+	struct igb_adapter *adapter = vadapter->real_adapter;
+	int hw_queue = vadapter->rx_ring->queue_index +
+		       adapter->vfs_allocated_count;
+
+	netif_carrier_off(dev);
+	igb_del_mac_filter(adapter, dev->dev_addr, hw_queue);
+
+	vadapter->tx_ring->vmdq_netdev = NULL;
+	vadapter->rx_ring->vmdq_netdev = NULL;
+	return 0;
+}
+
+netdev_tx_t igb_vmdq_xmit_frame(struct sk_buff *skb, struct net_device *dev)
+{
+	struct igb_vmdq_adapter *vadapter = netdev_priv(dev);
+
+	return igb_xmit_frame_ring(skb, vadapter->tx_ring);
+}
+
+struct net_device_stats *igb_vmdq_get_stats(struct net_device *dev)
+{
+	struct igb_vmdq_adapter *vadapter = netdev_priv(dev);
+        struct igb_adapter *adapter = vadapter->real_adapter;
+        struct e1000_hw *hw = &adapter->hw;
+	int hw_queue = vadapter->rx_ring->queue_index +
+		       adapter->vfs_allocated_count;
+
+	vadapter->net_stats.rx_packets +=
+			E1000_READ_REG(hw, E1000_PFVFGPRC(hw_queue));
+	E1000_WRITE_REG(hw, E1000_PFVFGPRC(hw_queue), 0);
+        vadapter->net_stats.tx_packets +=
+			E1000_READ_REG(hw, E1000_PFVFGPTC(hw_queue));
+        E1000_WRITE_REG(hw, E1000_PFVFGPTC(hw_queue), 0);
+        vadapter->net_stats.rx_bytes +=
+			E1000_READ_REG(hw, E1000_PFVFGORC(hw_queue));
+        E1000_WRITE_REG(hw, E1000_PFVFGORC(hw_queue), 0);
+        vadapter->net_stats.tx_bytes +=
+			E1000_READ_REG(hw, E1000_PFVFGOTC(hw_queue));
+        E1000_WRITE_REG(hw, E1000_PFVFGOTC(hw_queue), 0);
+        vadapter->net_stats.multicast +=
+			E1000_READ_REG(hw, E1000_PFVFMPRC(hw_queue));
+        E1000_WRITE_REG(hw, E1000_PFVFMPRC(hw_queue), 0);
+	/* only return the current stats */
+	return &vadapter->net_stats;
+}
+
+/**
+ * igb_write_vm_addr_list - write unicast addresses to RAR table
+ * @netdev: network interface device structure
+ *
+ * Writes unicast address list to the RAR table.
+ * Returns: -ENOMEM on failure/insufficient address space
+ *                0 on no addresses written
+ *                X on writing X addresses to the RAR table
+ **/
+static int igb_write_vm_addr_list(struct net_device *netdev)
+{
+	struct igb_vmdq_adapter *vadapter = netdev_priv(netdev);
+        struct igb_adapter *adapter = vadapter->real_adapter;
+	int count = 0;
+	int hw_queue = vadapter->rx_ring->queue_index +
+		       adapter->vfs_allocated_count;
+
+	/* return ENOMEM indicating insufficient memory for addresses */
+	if (netdev_uc_count(netdev) > igb_available_rars(adapter))
+		return -ENOMEM;
+
+	if (!netdev_uc_empty(netdev)) {
+#ifdef NETDEV_HW_ADDR_T_UNICAST
+		struct netdev_hw_addr *ha;
+#else
+		struct dev_mc_list *ha;
+#endif
+		netdev_for_each_uc_addr(ha, netdev) {
+#ifdef NETDEV_HW_ADDR_T_UNICAST
+			igb_del_mac_filter(adapter, ha->addr, hw_queue);
+			igb_add_mac_filter(adapter, ha->addr, hw_queue);
+#else
+			igb_del_mac_filter(adapter, ha->da_addr, hw_queue);
+			igb_add_mac_filter(adapter, ha->da_addr, hw_queue);
+#endif
+			count++;
+		}
+	}
+	return count;
+}
+
+
+#define E1000_VMOLR_UPE		0x20000000 /* Unicast promiscuous mode */
+void igb_vmdq_set_rx_mode(struct net_device *dev)
+{
+	struct igb_vmdq_adapter *vadapter = netdev_priv(dev);
+        struct igb_adapter *adapter = vadapter->real_adapter;
+        struct e1000_hw *hw = &adapter->hw;
+	u32 vmolr, rctl;
+	int hw_queue = vadapter->rx_ring->queue_index +
+		       adapter->vfs_allocated_count;
+
+	/* Check for Promiscuous and All Multicast modes */
+	vmolr = E1000_READ_REG(hw, E1000_VMOLR(hw_queue));
+
+	/* clear the affected bits */
+	vmolr &= ~(E1000_VMOLR_UPE | E1000_VMOLR_MPME |
+		   E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE);
+
+	if (dev->flags & IFF_PROMISC) {
+		vmolr |= E1000_VMOLR_UPE;
+		rctl = E1000_READ_REG(hw, E1000_RCTL);
+		rctl |= E1000_RCTL_UPE;
+		E1000_WRITE_REG(hw, E1000_RCTL, rctl);
+	} else {
+		rctl = E1000_READ_REG(hw, E1000_RCTL);
+		rctl &= ~E1000_RCTL_UPE;
+		E1000_WRITE_REG(hw, E1000_RCTL, rctl);
+		if (dev->flags & IFF_ALLMULTI) {
+			vmolr |= E1000_VMOLR_MPME;
+		} else {
+			/*
+			 * Write addresses to the MTA, if the attempt fails
+			 * then we should just turn on promiscuous mode so
+			 * that we can at least receive multicast traffic
+			 */
+			if (igb_write_mc_addr_list(adapter->netdev) != 0)
+				vmolr |= E1000_VMOLR_ROMPE;
+		}
+#ifdef HAVE_SET_RX_MODE
+		/*
+		 * Write addresses to available RAR registers, if there is not
+		 * sufficient space to store all the addresses then enable
+		 * unicast promiscuous mode
+		 */
+		if (igb_write_vm_addr_list(dev) < 0)
+			vmolr |= E1000_VMOLR_UPE;
+#endif
+	}
+	E1000_WRITE_REG(hw, E1000_VMOLR(hw_queue), vmolr);
+
+	return;
+}
+
+int igb_vmdq_set_mac(struct net_device *dev, void *p)
+{
+	struct sockaddr *addr = p;
+	struct igb_vmdq_adapter *vadapter = netdev_priv(dev);
+        struct igb_adapter *adapter = vadapter->real_adapter;
+	int hw_queue = vadapter->rx_ring->queue_index +
+		       adapter->vfs_allocated_count;
+
+	igb_del_mac_filter(adapter, dev->dev_addr, hw_queue);
+	memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
+	return igb_add_mac_filter(adapter, dev->dev_addr, hw_queue);
+}
+
+int igb_vmdq_change_mtu(struct net_device *dev, int new_mtu)
+{
+	struct igb_vmdq_adapter *vadapter = netdev_priv(dev);
+	struct igb_adapter *adapter = vadapter->real_adapter;
+
+	if (adapter->netdev->mtu < new_mtu) {
+		DPRINTK(PROBE, INFO,
+			"Set MTU on %s to >= %d "
+			"before changing MTU on %s\n",
+			adapter->netdev->name, new_mtu, dev->name);
+		return -EINVAL;
+	}
+	dev->mtu = new_mtu;
+	return 0;
+}
+
+void igb_vmdq_tx_timeout(struct net_device *dev)
+{
+	return;
+}
+
+void igb_vmdq_vlan_rx_register(struct net_device *dev, struct vlan_group *grp)
+{
+	struct igb_vmdq_adapter *vadapter = netdev_priv(dev);
+	struct igb_adapter *adapter = vadapter->real_adapter;
+	struct e1000_hw *hw = &adapter->hw;
+	int hw_queue = vadapter->rx_ring->queue_index +
+		       adapter->vfs_allocated_count;
+
+	vadapter->vlgrp = grp;
+
+	igb_enable_vlan_tags(adapter);
+	E1000_WRITE_REG(hw, E1000_VMVIR(hw_queue), 0);
+
+	return;
+}
+void igb_vmdq_vlan_rx_add_vid(struct net_device *dev, unsigned short vid)
+{
+	struct igb_vmdq_adapter *vadapter = netdev_priv(dev);
+	struct igb_adapter *adapter = vadapter->real_adapter;
+#ifndef HAVE_NETDEV_VLAN_FEATURES
+	struct net_device *v_netdev;
+#endif
+	int hw_queue = vadapter->rx_ring->queue_index +
+		       adapter->vfs_allocated_count;
+
+	/* attempt to add filter to vlvf array */
+	igb_vlvf_set(adapter, vid, TRUE, hw_queue);
+
+#ifndef HAVE_NETDEV_VLAN_FEATURES
+
+	/* Copy feature flags from netdev to the vlan netdev for this vid.
+	 * This allows things like TSO to bubble down to our vlan device.
+	 */
+	v_netdev = vlan_group_get_device(vadapter->vlgrp, vid);
+	v_netdev->features |= adapter->netdev->features;
+	vlan_group_set_device(vadapter->vlgrp, vid, v_netdev);
+#endif
+
+	return;
+}
+void igb_vmdq_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid)
+{
+	struct igb_vmdq_adapter *vadapter = netdev_priv(dev);
+	struct igb_adapter *adapter = vadapter->real_adapter;
+	int hw_queue = vadapter->rx_ring->queue_index +
+		       adapter->vfs_allocated_count;
+
+	vlan_group_set_device(vadapter->vlgrp, vid, NULL);
+	/* remove vlan from VLVF table array */
+	igb_vlvf_set(adapter, vid, FALSE, hw_queue);
+
+
+	return;
+}
+
+static int igb_vmdq_get_settings(struct net_device *netdev,
+				   struct ethtool_cmd *ecmd)
+{
+	struct igb_vmdq_adapter *vadapter = netdev_priv(netdev);
+	struct igb_adapter *adapter = vadapter->real_adapter;
+	struct e1000_hw *hw = &adapter->hw;
+	u32 status;
+
+	if (hw->phy.media_type == e1000_media_type_copper) {
+
+		ecmd->supported = (SUPPORTED_10baseT_Half |
+				   SUPPORTED_10baseT_Full |
+				   SUPPORTED_100baseT_Half |
+				   SUPPORTED_100baseT_Full |
+				   SUPPORTED_1000baseT_Full|
+				   SUPPORTED_Autoneg |
+				   SUPPORTED_TP);
+		ecmd->advertising = ADVERTISED_TP;
+
+		if (hw->mac.autoneg == 1) {
+			ecmd->advertising |= ADVERTISED_Autoneg;
+			/* the e1000 autoneg seems to match ethtool nicely */
+			ecmd->advertising |= hw->phy.autoneg_advertised;
+		}
+
+		ecmd->port = PORT_TP;
+		ecmd->phy_address = hw->phy.addr;
+	} else {
+		ecmd->supported   = (SUPPORTED_1000baseT_Full |
+				     SUPPORTED_FIBRE |
+				     SUPPORTED_Autoneg);
+
+		ecmd->advertising = (ADVERTISED_1000baseT_Full |
+				     ADVERTISED_FIBRE |
+				     ADVERTISED_Autoneg);
+
+		ecmd->port = PORT_FIBRE;
+	}
+
+	ecmd->transceiver = XCVR_INTERNAL;
+
+	status = E1000_READ_REG(hw, E1000_STATUS);
+
+	if (status & E1000_STATUS_LU) {
+
+		if ((status & E1000_STATUS_SPEED_1000) ||
+		    hw->phy.media_type != e1000_media_type_copper)
+			ecmd->speed = SPEED_1000;
+		else if (status & E1000_STATUS_SPEED_100)
+			ecmd->speed = SPEED_100;
+		else
+			ecmd->speed = SPEED_10;
+
+		if ((status & E1000_STATUS_FD) ||
+		    hw->phy.media_type != e1000_media_type_copper)
+			ecmd->duplex = DUPLEX_FULL;
+		else
+			ecmd->duplex = DUPLEX_HALF;
+	} else {
+		ecmd->speed = -1;
+		ecmd->duplex = -1;
+	}
+
+	ecmd->autoneg = hw->mac.autoneg ? AUTONEG_ENABLE : AUTONEG_DISABLE;
+	return 0;
+}
+
+
+static u32 igb_vmdq_get_msglevel(struct net_device *netdev)
+{
+	struct igb_vmdq_adapter *vadapter = netdev_priv(netdev);
+	struct igb_adapter *adapter = vadapter->real_adapter;
+	return adapter->msg_enable;
+}
+
+static void igb_vmdq_get_drvinfo(struct net_device *netdev,
+				   struct ethtool_drvinfo *drvinfo)
+{
+	struct igb_vmdq_adapter *vadapter = netdev_priv(netdev);
+	struct igb_adapter *adapter = vadapter->real_adapter;
+	struct net_device *main_netdev = adapter->netdev;
+
+	strncpy(drvinfo->driver, igb_driver_name, 32);
+	strncpy(drvinfo->version, igb_driver_version, 32);
+
+	strncpy(drvinfo->fw_version, "N/A", 4);
+	snprintf(drvinfo->bus_info, 32, "%s VMDQ %d", main_netdev->name,
+		 vadapter->rx_ring->queue_index);
+	drvinfo->n_stats = 0;
+	drvinfo->testinfo_len = 0;
+	drvinfo->regdump_len = 0;
+}
+
+static void igb_vmdq_get_ringparam(struct net_device *netdev,
+				     struct ethtool_ringparam *ring)
+{
+	struct igb_vmdq_adapter *vadapter = netdev_priv(netdev);
+
+	struct igb_ring *tx_ring = vadapter->tx_ring;
+	struct igb_ring *rx_ring = vadapter->rx_ring;
+
+	ring->rx_max_pending = IGB_MAX_RXD;
+	ring->tx_max_pending = IGB_MAX_TXD;
+	ring->rx_mini_max_pending = 0;
+	ring->rx_jumbo_max_pending = 0;
+	ring->rx_pending = rx_ring->count;
+	ring->tx_pending = tx_ring->count;
+	ring->rx_mini_pending = 0;
+	ring->rx_jumbo_pending = 0;
+}
+static u32 igb_vmdq_get_rx_csum(struct net_device *netdev)
+{
+	struct igb_vmdq_adapter *vadapter = netdev_priv(netdev);
+	struct igb_adapter *adapter = vadapter->real_adapter;
+
+	return test_bit(IGB_RING_FLAG_RX_CSUM, &adapter->rx_ring[0]->flags);
+}
+
+
+static struct ethtool_ops igb_vmdq_ethtool_ops = {
+	.get_settings           = igb_vmdq_get_settings,
+	.get_drvinfo            = igb_vmdq_get_drvinfo,
+	.get_link               = ethtool_op_get_link,
+	.get_ringparam          = igb_vmdq_get_ringparam,
+	.get_rx_csum            = igb_vmdq_get_rx_csum,
+	.get_tx_csum            = ethtool_op_get_tx_csum,
+	.get_sg                 = ethtool_op_get_sg,
+	.set_sg                 = ethtool_op_set_sg,
+	.get_msglevel           = igb_vmdq_get_msglevel,
+#ifdef NETIF_F_TSO
+	.get_tso                = ethtool_op_get_tso,
+#endif
+#ifdef HAVE_ETHTOOL_GET_PERM_ADDR
+	.get_perm_addr          = ethtool_op_get_perm_addr,
+#endif
+};
+
+void igb_vmdq_set_ethtool_ops(struct net_device *netdev)
+{
+	SET_ETHTOOL_OPS(netdev, &igb_vmdq_ethtool_ops);
+}
+
+
+#endif /* CONFIG_IGB_VMDQ_NETDEV */
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_vmdq.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_vmdq.h
new file mode 100644
index 00000000..e51e7c4e
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_vmdq.h
@@ -0,0 +1,46 @@
+/*******************************************************************************
+
+  Intel(R) Gigabit Ethernet Linux driver
+  Copyright(c) 2007-2013 Intel Corporation.
+
+  This program is free software; you can redistribute it and/or modify it
+  under the terms and conditions of the GNU General Public License,
+  version 2, as published by the Free Software Foundation.
+
+  This program is distributed in the hope it will be useful, but WITHOUT
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+  more details.
+
+  You should have received a copy of the GNU General Public License along with
+  this program; if not, write to the Free Software Foundation, Inc.,
+  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+  The full GNU General Public License is included in this distribution in
+  the file called "COPYING".
+
+  Contact Information:
+  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+#ifndef _IGB_VMDQ_H_
+#define _IGB_VMDQ_H_
+
+#ifdef CONFIG_IGB_VMDQ_NETDEV
+int igb_vmdq_open(struct net_device *dev);
+int igb_vmdq_close(struct net_device *dev);
+netdev_tx_t igb_vmdq_xmit_frame(struct sk_buff *skb, struct net_device *dev);
+struct net_device_stats *igb_vmdq_get_stats(struct net_device *dev);
+void igb_vmdq_set_rx_mode(struct net_device *dev);
+int igb_vmdq_set_mac(struct net_device *dev, void *addr);
+int igb_vmdq_change_mtu(struct net_device *dev, int new_mtu);
+void igb_vmdq_tx_timeout(struct net_device *dev);
+void igb_vmdq_vlan_rx_register(struct net_device *dev,
+				 struct vlan_group *grp);
+void igb_vmdq_vlan_rx_add_vid(struct net_device *dev, unsigned short vid);
+void igb_vmdq_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid);
+void igb_vmdq_set_ethtool_ops(struct net_device *netdev);
+#endif /* CONFIG_IGB_VMDQ_NETDEV */
+#endif /* _IGB_VMDQ_H_ */
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.c
new file mode 100644
index 00000000..bde3a83c
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.c
@@ -0,0 +1,1482 @@
+/*******************************************************************************
+
+  Intel(R) Gigabit Ethernet Linux driver
+  Copyright(c) 2007-2013 Intel Corporation.
+
+  This program is free software; you can redistribute it and/or modify it
+  under the terms and conditions of the GNU General Public License,
+  version 2, as published by the Free Software Foundation.
+
+  This program is distributed in the hope it will be useful, but WITHOUT
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+  more details.
+
+  You should have received a copy of the GNU General Public License along with
+  this program; if not, write to the Free Software Foundation, Inc.,
+  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+  The full GNU General Public License is included in this distribution in
+  the file called "COPYING".
+
+  Contact Information:
+  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+#include "igb.h"
+#include "kcompat.h"
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,8) )
+/* From lib/vsprintf.c */
+#include <asm/div64.h>
+
+static int skip_atoi(const char **s)
+{
+	int i=0;
+
+	while (isdigit(**s))
+		i = i*10 + *((*s)++) - '0';
+	return i;
+}
+
+#define _kc_ZEROPAD	1		/* pad with zero */
+#define _kc_SIGN	2		/* unsigned/signed long */
+#define _kc_PLUS	4		/* show plus */
+#define _kc_SPACE	8		/* space if plus */
+#define _kc_LEFT	16		/* left justified */
+#define _kc_SPECIAL	32		/* 0x */
+#define _kc_LARGE	64		/* use 'ABCDEF' instead of 'abcdef' */
+
+static char * number(char * buf, char * end, long long num, int base, int size, int precision, int type)
+{
+	char c,sign,tmp[66];
+	const char *digits;
+	const char small_digits[] = "0123456789abcdefghijklmnopqrstuvwxyz";
+	const char large_digits[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
+	int i;
+
+	digits = (type & _kc_LARGE) ? large_digits : small_digits;
+	if (type & _kc_LEFT)
+		type &= ~_kc_ZEROPAD;
+	if (base < 2 || base > 36)
+		return 0;
+	c = (type & _kc_ZEROPAD) ? '0' : ' ';
+	sign = 0;
+	if (type & _kc_SIGN) {
+		if (num < 0) {
+			sign = '-';
+			num = -num;
+			size--;
+		} else if (type & _kc_PLUS) {
+			sign = '+';
+			size--;
+		} else if (type & _kc_SPACE) {
+			sign = ' ';
+			size--;
+		}
+	}
+	if (type & _kc_SPECIAL) {
+		if (base == 16)
+			size -= 2;
+		else if (base == 8)
+			size--;
+	}
+	i = 0;
+	if (num == 0)
+		tmp[i++]='0';
+	else while (num != 0)
+		tmp[i++] = digits[do_div(num,base)];
+	if (i > precision)
+		precision = i;
+	size -= precision;
+	if (!(type&(_kc_ZEROPAD+_kc_LEFT))) {
+		while(size-->0) {
+			if (buf <= end)
+				*buf = ' ';
+			++buf;
+		}
+	}
+	if (sign) {
+		if (buf <= end)
+			*buf = sign;
+		++buf;
+	}
+	if (type & _kc_SPECIAL) {
+		if (base==8) {
+			if (buf <= end)
+				*buf = '0';
+			++buf;
+		} else if (base==16) {
+			if (buf <= end)
+				*buf = '0';
+			++buf;
+			if (buf <= end)
+				*buf = digits[33];
+			++buf;
+		}
+	}
+	if (!(type & _kc_LEFT)) {
+		while (size-- > 0) {
+			if (buf <= end)
+				*buf = c;
+			++buf;
+		}
+	}
+	while (i < precision--) {
+		if (buf <= end)
+			*buf = '0';
+		++buf;
+	}
+	while (i-- > 0) {
+		if (buf <= end)
+			*buf = tmp[i];
+		++buf;
+	}
+	while (size-- > 0) {
+		if (buf <= end)
+			*buf = ' ';
+		++buf;
+	}
+	return buf;
+}
+
+int _kc_vsnprintf(char *buf, size_t size, const char *fmt, va_list args)
+{
+	int len;
+	unsigned long long num;
+	int i, base;
+	char *str, *end, c;
+	const char *s;
+
+	int flags;		/* flags to number() */
+
+	int field_width;	/* width of output field */
+	int precision;		/* min. # of digits for integers; max
+				   number of chars for from string */
+	int qualifier;		/* 'h', 'l', or 'L' for integer fields */
+				/* 'z' support added 23/7/1999 S.H.    */
+				/* 'z' changed to 'Z' --davidm 1/25/99 */
+
+	str = buf;
+	end = buf + size - 1;
+
+	if (end < buf - 1) {
+		end = ((void *) -1);
+		size = end - buf + 1;
+	}
+
+	for (; *fmt ; ++fmt) {
+		if (*fmt != '%') {
+			if (str <= end)
+				*str = *fmt;
+			++str;
+			continue;
+		}
+
+		/* process flags */
+		flags = 0;
+		repeat:
+			++fmt;		/* this also skips first '%' */
+			switch (*fmt) {
+				case '-': flags |= _kc_LEFT; goto repeat;
+				case '+': flags |= _kc_PLUS; goto repeat;
+				case ' ': flags |= _kc_SPACE; goto repeat;
+				case '#': flags |= _kc_SPECIAL; goto repeat;
+				case '0': flags |= _kc_ZEROPAD; goto repeat;
+			}
+
+		/* get field width */
+		field_width = -1;
+		if (isdigit(*fmt))
+			field_width = skip_atoi(&fmt);
+		else if (*fmt == '*') {
+			++fmt;
+			/* it's the next argument */
+			field_width = va_arg(args, int);
+			if (field_width < 0) {
+				field_width = -field_width;
+				flags |= _kc_LEFT;
+			}
+		}
+
+		/* get the precision */
+		precision = -1;
+		if (*fmt == '.') {
+			++fmt;
+			if (isdigit(*fmt))
+				precision = skip_atoi(&fmt);
+			else if (*fmt == '*') {
+				++fmt;
+				/* it's the next argument */
+				precision = va_arg(args, int);
+			}
+			if (precision < 0)
+				precision = 0;
+		}
+
+		/* get the conversion qualifier */
+		qualifier = -1;
+		if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L' || *fmt =='Z') {
+			qualifier = *fmt;
+			++fmt;
+		}
+
+		/* default base */
+		base = 10;
+
+		switch (*fmt) {
+			case 'c':
+				if (!(flags & _kc_LEFT)) {
+					while (--field_width > 0) {
+						if (str <= end)
+							*str = ' ';
+						++str;
+					}
+				}
+				c = (unsigned char) va_arg(args, int);
+				if (str <= end)
+					*str = c;
+				++str;
+				while (--field_width > 0) {
+					if (str <= end)
+						*str = ' ';
+					++str;
+				}
+				continue;
+
+			case 's':
+				s = va_arg(args, char *);
+				if (!s)
+					s = "<NULL>";
+
+				len = strnlen(s, precision);
+
+				if (!(flags & _kc_LEFT)) {
+					while (len < field_width--) {
+						if (str <= end)
+							*str = ' ';
+						++str;
+					}
+				}
+				for (i = 0; i < len; ++i) {
+					if (str <= end)
+						*str = *s;
+					++str; ++s;
+				}
+				while (len < field_width--) {
+					if (str <= end)
+						*str = ' ';
+					++str;
+				}
+				continue;
+
+			case 'p':
+				if (field_width == -1) {
+					field_width = 2*sizeof(void *);
+					flags |= _kc_ZEROPAD;
+				}
+				str = number(str, end,
+						(unsigned long) va_arg(args, void *),
+						16, field_width, precision, flags);
+				continue;
+
+
+			case 'n':
+				/* FIXME:
+				* What does C99 say about the overflow case here? */
+				if (qualifier == 'l') {
+					long * ip = va_arg(args, long *);
+					*ip = (str - buf);
+				} else if (qualifier == 'Z') {
+					size_t * ip = va_arg(args, size_t *);
+					*ip = (str - buf);
+				} else {
+					int * ip = va_arg(args, int *);
+					*ip = (str - buf);
+				}
+				continue;
+
+			case '%':
+				if (str <= end)
+					*str = '%';
+				++str;
+				continue;
+
+				/* integer number formats - set up the flags and "break" */
+			case 'o':
+				base = 8;
+				break;
+
+			case 'X':
+				flags |= _kc_LARGE;
+			case 'x':
+				base = 16;
+				break;
+
+			case 'd':
+			case 'i':
+				flags |= _kc_SIGN;
+			case 'u':
+				break;
+
+			default:
+				if (str <= end)
+					*str = '%';
+				++str;
+				if (*fmt) {
+					if (str <= end)
+						*str = *fmt;
+					++str;
+				} else {
+					--fmt;
+				}
+				continue;
+		}
+		if (qualifier == 'L')
+			num = va_arg(args, long long);
+		else if (qualifier == 'l') {
+			num = va_arg(args, unsigned long);
+			if (flags & _kc_SIGN)
+				num = (signed long) num;
+		} else if (qualifier == 'Z') {
+			num = va_arg(args, size_t);
+		} else if (qualifier == 'h') {
+			num = (unsigned short) va_arg(args, int);
+			if (flags & _kc_SIGN)
+				num = (signed short) num;
+		} else {
+			num = va_arg(args, unsigned int);
+			if (flags & _kc_SIGN)
+				num = (signed int) num;
+		}
+		str = number(str, end, num, base,
+				field_width, precision, flags);
+	}
+	if (str <= end)
+		*str = '\0';
+	else if (size > 0)
+		/* don't write out a null byte if the buf size is zero */
+		*end = '\0';
+	/* the trailing null byte doesn't count towards the total
+	* ++str;
+	*/
+	return str-buf;
+}
+
+int _kc_snprintf(char * buf, size_t size, const char *fmt, ...)
+{
+	va_list args;
+	int i;
+
+	va_start(args, fmt);
+	i = _kc_vsnprintf(buf,size,fmt,args);
+	va_end(args);
+	return i;
+}
+#endif /* < 2.4.8 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,13) )
+
+/**************************************/
+/* PCI DMA MAPPING */
+
+#if defined(CONFIG_HIGHMEM)
+
+#ifndef PCI_DRAM_OFFSET
+#define PCI_DRAM_OFFSET 0
+#endif
+
+u64
+_kc_pci_map_page(struct pci_dev *dev, struct page *page, unsigned long offset,
+                 size_t size, int direction)
+{
+	return (((u64) (page - mem_map) << PAGE_SHIFT) + offset +
+		PCI_DRAM_OFFSET);
+}
+
+#else /* CONFIG_HIGHMEM */
+
+u64
+_kc_pci_map_page(struct pci_dev *dev, struct page *page, unsigned long offset,
+                 size_t size, int direction)
+{
+	return pci_map_single(dev, (void *)page_address(page) + offset, size,
+			      direction);
+}
+
+#endif /* CONFIG_HIGHMEM */
+
+void
+_kc_pci_unmap_page(struct pci_dev *dev, u64 dma_addr, size_t size,
+                   int direction)
+{
+	return pci_unmap_single(dev, dma_addr, size, direction);
+}
+
+#endif /* 2.4.13 => 2.4.3 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,3) )
+
+/**************************************/
+/* PCI DRIVER API */
+
+int
+_kc_pci_set_dma_mask(struct pci_dev *dev, dma_addr_t mask)
+{
+	if (!pci_dma_supported(dev, mask))
+		return -EIO;
+	dev->dma_mask = mask;
+	return 0;
+}
+
+int
+_kc_pci_request_regions(struct pci_dev *dev, char *res_name)
+{
+	int i;
+
+	for (i = 0; i < 6; i++) {
+		if (pci_resource_len(dev, i) == 0)
+			continue;
+
+		if (pci_resource_flags(dev, i) & IORESOURCE_IO) {
+			if (!request_region(pci_resource_start(dev, i), pci_resource_len(dev, i), res_name)) {
+				pci_release_regions(dev);
+				return -EBUSY;
+			}
+		} else if (pci_resource_flags(dev, i) & IORESOURCE_MEM) {
+			if (!request_mem_region(pci_resource_start(dev, i), pci_resource_len(dev, i), res_name)) {
+				pci_release_regions(dev);
+				return -EBUSY;
+			}
+		}
+	}
+	return 0;
+}
+
+void
+_kc_pci_release_regions(struct pci_dev *dev)
+{
+	int i;
+
+	for (i = 0; i < 6; i++) {
+		if (pci_resource_len(dev, i) == 0)
+			continue;
+
+		if (pci_resource_flags(dev, i) & IORESOURCE_IO)
+			release_region(pci_resource_start(dev, i), pci_resource_len(dev, i));
+
+		else if (pci_resource_flags(dev, i) & IORESOURCE_MEM)
+			release_mem_region(pci_resource_start(dev, i), pci_resource_len(dev, i));
+	}
+}
+
+/**************************************/
+/* NETWORK DRIVER API */
+
+struct net_device *
+_kc_alloc_etherdev(int sizeof_priv)
+{
+	struct net_device *dev;
+	int alloc_size;
+
+	alloc_size = sizeof(*dev) + sizeof_priv + IFNAMSIZ + 31;
+	dev = kzalloc(alloc_size, GFP_KERNEL);
+	if (!dev)
+		return NULL;
+
+	if (sizeof_priv)
+		dev->priv = (void *) (((unsigned long)(dev + 1) + 31) & ~31);
+	dev->name[0] = '\0';
+	ether_setup(dev);
+
+	return dev;
+}
+
+int
+_kc_is_valid_ether_addr(u8 *addr)
+{
+	const char zaddr[6] = { 0, };
+
+	return !(addr[0] & 1) && memcmp(addr, zaddr, 6);
+}
+
+#endif /* 2.4.3 => 2.4.0 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,6) )
+
+int
+_kc_pci_set_power_state(struct pci_dev *dev, int state)
+{
+	return 0;
+}
+
+int
+_kc_pci_enable_wake(struct pci_dev *pdev, u32 state, int enable)
+{
+	return 0;
+}
+
+#endif /* 2.4.6 => 2.4.3 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) )
+void _kc_skb_fill_page_desc(struct sk_buff *skb, int i, struct page *page,
+                            int off, int size)
+{
+	skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+	frag->page = page;
+	frag->page_offset = off;
+	frag->size = size;
+	skb_shinfo(skb)->nr_frags = i + 1;
+}
+
+/*
+ * Original Copyright:
+ * find_next_bit.c: fallback find next bit implementation
+ *
+ * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+/**
+ * find_next_bit - find the next set bit in a memory region
+ * @addr: The address to base the search on
+ * @offset: The bitnumber to start searching at
+ * @size: The maximum size to search
+ */
+unsigned long find_next_bit(const unsigned long *addr, unsigned long size,
+                            unsigned long offset)
+{
+	const unsigned long *p = addr + BITOP_WORD(offset);
+	unsigned long result = offset & ~(BITS_PER_LONG-1);
+	unsigned long tmp;
+
+	if (offset >= size)
+		return size;
+	size -= result;
+	offset %= BITS_PER_LONG;
+	if (offset) {
+		tmp = *(p++);
+		tmp &= (~0UL << offset);
+		if (size < BITS_PER_LONG)
+			goto found_first;
+		if (tmp)
+			goto found_middle;
+		size -= BITS_PER_LONG;
+		result += BITS_PER_LONG;
+	}
+	while (size & ~(BITS_PER_LONG-1)) {
+		if ((tmp = *(p++)))
+			goto found_middle;
+		result += BITS_PER_LONG;
+		size -= BITS_PER_LONG;
+	}
+	if (!size)
+		return result;
+	tmp = *p;
+
+found_first:
+	tmp &= (~0UL >> (BITS_PER_LONG - size));
+	if (tmp == 0UL)		/* Are any bits set? */
+		return result + size;	/* Nope. */
+found_middle:
+	return result + ffs(tmp);
+}
+
+size_t _kc_strlcpy(char *dest, const char *src, size_t size)
+{
+	size_t ret = strlen(src);
+
+	if (size) {
+		size_t len = (ret >= size) ? size - 1 : ret;
+		memcpy(dest, src, len);
+		dest[len] = '\0';
+	}
+	return ret;
+}
+
+#ifndef do_div
+#if BITS_PER_LONG == 32
+uint32_t __attribute__((weak)) _kc__div64_32(uint64_t *n, uint32_t base)
+{
+	uint64_t rem = *n;
+	uint64_t b = base;
+	uint64_t res, d = 1;
+	uint32_t high = rem >> 32;
+
+	/* Reduce the thing a bit first */
+	res = 0;
+	if (high >= base) {
+		high /= base;
+		res = (uint64_t) high << 32;
+		rem -= (uint64_t) (high*base) << 32;
+	}
+
+	while ((int64_t)b > 0 && b < rem) {
+		b = b+b;
+		d = d+d;
+	}
+
+	do {
+		if (rem >= b) {
+			rem -= b;
+			res += d;
+		}
+		b >>= 1;
+		d >>= 1;
+	} while (d);
+
+	*n = res;
+	return rem;
+}
+#endif /* BITS_PER_LONG == 32 */
+#endif /* do_div */
+#endif /* 2.6.0 => 2.4.6 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,4) )
+int _kc_scnprintf(char * buf, size_t size, const char *fmt, ...)
+{
+	va_list args;
+	int i;
+
+	va_start(args, fmt);
+	i = vsnprintf(buf, size, fmt, args);
+	va_end(args);
+	return (i >= size) ? (size - 1) : i;
+}
+#endif /* < 2.6.4 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,10) )
+DECLARE_BITMAP(_kcompat_node_online_map, MAX_NUMNODES) = {1};
+#endif /* < 2.6.10 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,13) )
+char *_kc_kstrdup(const char *s, unsigned int gfp)
+{
+	size_t len;
+	char *buf;
+
+	if (!s)
+		return NULL;
+
+	len = strlen(s) + 1;
+	buf = kmalloc(len, gfp);
+	if (buf)
+		memcpy(buf, s, len);
+	return buf;
+}
+#endif /* < 2.6.13 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,14) )
+void *_kc_kzalloc(size_t size, int flags)
+{
+	void *ret = kmalloc(size, flags);
+	if (ret)
+		memset(ret, 0, size);
+	return ret;
+}
+#endif /* <= 2.6.13 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19) )
+int _kc_skb_pad(struct sk_buff *skb, int pad)
+{
+	int ntail;
+
+        /* If the skbuff is non linear tailroom is always zero.. */
+        if(!skb_cloned(skb) && skb_tailroom(skb) >= pad) {
+		memset(skb->data+skb->len, 0, pad);
+		return 0;
+        }
+
+	ntail = skb->data_len + pad - (skb->end - skb->tail);
+	if (likely(skb_cloned(skb) || ntail > 0)) {
+		if (pskb_expand_head(skb, 0, ntail, GFP_ATOMIC));
+			goto free_skb;
+	}
+
+#ifdef MAX_SKB_FRAGS
+	if (skb_is_nonlinear(skb) &&
+	    !__pskb_pull_tail(skb, skb->data_len))
+		goto free_skb;
+
+#endif
+	memset(skb->data + skb->len, 0, pad);
+        return 0;
+
+free_skb:
+	kfree_skb(skb);
+	return -ENOMEM;
+}
+
+#if (!(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(5,4)))
+int _kc_pci_save_state(struct pci_dev *pdev)
+{
+	struct net_device *netdev = pci_get_drvdata(pdev);
+	struct adapter_struct *adapter = netdev_priv(netdev);
+	int size = PCI_CONFIG_SPACE_LEN, i;
+	u16 pcie_cap_offset, pcie_link_status;
+
+#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) )
+	/* no ->dev for 2.4 kernels */
+	WARN_ON(pdev->dev.driver_data == NULL);
+#endif
+	pcie_cap_offset = pci_find_capability(pdev, PCI_CAP_ID_EXP);
+	if (pcie_cap_offset) {
+		if (!pci_read_config_word(pdev,
+		                          pcie_cap_offset + PCIE_LINK_STATUS,
+		                          &pcie_link_status))
+		size = PCIE_CONFIG_SPACE_LEN;
+	}
+	pci_config_space_ich8lan();
+#ifdef HAVE_PCI_ERS
+	if (adapter->config_space == NULL)
+#else
+	WARN_ON(adapter->config_space != NULL);
+#endif
+		adapter->config_space = kmalloc(size, GFP_KERNEL);
+	if (!adapter->config_space) {
+		printk(KERN_ERR "Out of memory in pci_save_state\n");
+		return -ENOMEM;
+	}
+	for (i = 0; i < (size / 4); i++)
+		pci_read_config_dword(pdev, i * 4, &adapter->config_space[i]);
+	return 0;
+}
+
+void _kc_pci_restore_state(struct pci_dev *pdev)
+{
+	struct net_device *netdev = pci_get_drvdata(pdev);
+	struct adapter_struct *adapter = netdev_priv(netdev);
+	int size = PCI_CONFIG_SPACE_LEN, i;
+	u16 pcie_cap_offset;
+	u16 pcie_link_status;
+
+	if (adapter->config_space != NULL) {
+		pcie_cap_offset = pci_find_capability(pdev, PCI_CAP_ID_EXP);
+		if (pcie_cap_offset &&
+		    !pci_read_config_word(pdev,
+		                          pcie_cap_offset + PCIE_LINK_STATUS,
+		                          &pcie_link_status))
+			size = PCIE_CONFIG_SPACE_LEN;
+
+		pci_config_space_ich8lan();
+		for (i = 0; i < (size / 4); i++)
+		pci_write_config_dword(pdev, i * 4, adapter->config_space[i]);
+#ifndef HAVE_PCI_ERS
+		kfree(adapter->config_space);
+		adapter->config_space = NULL;
+#endif
+	}
+}
+#endif /* !(RHEL_RELEASE_CODE >= RHEL 5.4) */
+
+#ifdef HAVE_PCI_ERS
+void _kc_free_netdev(struct net_device *netdev)
+{
+	struct adapter_struct *adapter = netdev_priv(netdev);
+
+	if (adapter->config_space != NULL)
+		kfree(adapter->config_space);
+#ifdef CONFIG_SYSFS
+	if (netdev->reg_state == NETREG_UNINITIALIZED) {
+		kfree((char *)netdev - netdev->padded);
+	} else {
+		BUG_ON(netdev->reg_state != NETREG_UNREGISTERED);
+		netdev->reg_state = NETREG_RELEASED;
+		class_device_put(&netdev->class_dev);
+	}
+#else
+	kfree((char *)netdev - netdev->padded);
+#endif
+}
+#endif
+
+void *_kc_kmemdup(const void *src, size_t len, unsigned gfp)
+{
+	void *p;
+
+	p = kzalloc(len, gfp);
+	if (p)
+		memcpy(p, src, len);
+	return p;
+}
+#endif /* <= 2.6.19 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,21) )
+struct pci_dev *_kc_netdev_to_pdev(struct net_device *netdev)
+{
+	return ((struct adapter_struct *)netdev_priv(netdev))->pdev;
+}
+#endif /* < 2.6.21 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22) )
+/* hexdump code taken from lib/hexdump.c */
+static void _kc_hex_dump_to_buffer(const void *buf, size_t len, int rowsize,
+			int groupsize, unsigned char *linebuf,
+			size_t linebuflen, bool ascii)
+{
+	const u8 *ptr = buf;
+	u8 ch;
+	int j, lx = 0;
+	int ascii_column;
+
+	if (rowsize != 16 && rowsize != 32)
+		rowsize = 16;
+
+	if (!len)
+		goto nil;
+	if (len > rowsize)		/* limit to one line at a time */
+		len = rowsize;
+	if ((len % groupsize) != 0)	/* no mixed size output */
+		groupsize = 1;
+
+	switch (groupsize) {
+	case 8: {
+		const u64 *ptr8 = buf;
+		int ngroups = len / groupsize;
+
+		for (j = 0; j < ngroups; j++)
+			lx += scnprintf((char *)(linebuf + lx), linebuflen - lx,
+				"%s%16.16llx", j ? " " : "",
+				(unsigned long long)*(ptr8 + j));
+		ascii_column = 17 * ngroups + 2;
+		break;
+	}
+
+	case 4: {
+		const u32 *ptr4 = buf;
+		int ngroups = len / groupsize;
+
+		for (j = 0; j < ngroups; j++)
+			lx += scnprintf((char *)(linebuf + lx), linebuflen - lx,
+				"%s%8.8x", j ? " " : "", *(ptr4 + j));
+		ascii_column = 9 * ngroups + 2;
+		break;
+	}
+
+	case 2: {
+		const u16 *ptr2 = buf;
+		int ngroups = len / groupsize;
+
+		for (j = 0; j < ngroups; j++)
+			lx += scnprintf((char *)(linebuf + lx), linebuflen - lx,
+				"%s%4.4x", j ? " " : "", *(ptr2 + j));
+		ascii_column = 5 * ngroups + 2;
+		break;
+	}
+
+	default:
+		for (j = 0; (j < len) && (lx + 3) <= linebuflen; j++) {
+			ch = ptr[j];
+			linebuf[lx++] = hex_asc(ch >> 4);
+			linebuf[lx++] = hex_asc(ch & 0x0f);
+			linebuf[lx++] = ' ';
+		}
+		if (j)
+			lx--;
+
+		ascii_column = 3 * rowsize + 2;
+		break;
+	}
+	if (!ascii)
+		goto nil;
+
+	while (lx < (linebuflen - 1) && lx < (ascii_column - 1))
+		linebuf[lx++] = ' ';
+	for (j = 0; (j < len) && (lx + 2) < linebuflen; j++)
+		linebuf[lx++] = (isascii(ptr[j]) && isprint(ptr[j])) ? ptr[j]
+				: '.';
+nil:
+	linebuf[lx++] = '\0';
+}
+
+void _kc_print_hex_dump(const char *level,
+			const char *prefix_str, int prefix_type,
+			int rowsize, int groupsize,
+			const void *buf, size_t len, bool ascii)
+{
+	const u8 *ptr = buf;
+	int i, linelen, remaining = len;
+	unsigned char linebuf[200];
+
+	if (rowsize != 16 && rowsize != 32)
+		rowsize = 16;
+
+	for (i = 0; i < len; i += rowsize) {
+		linelen = min(remaining, rowsize);
+		remaining -= rowsize;
+		_kc_hex_dump_to_buffer(ptr + i, linelen, rowsize, groupsize,
+				linebuf, sizeof(linebuf), ascii);
+
+		switch (prefix_type) {
+		case DUMP_PREFIX_ADDRESS:
+			printk("%s%s%*p: %s\n", level, prefix_str,
+				(int)(2 * sizeof(void *)), ptr + i, linebuf);
+			break;
+		case DUMP_PREFIX_OFFSET:
+			printk("%s%s%.8x: %s\n", level, prefix_str, i, linebuf);
+			break;
+		default:
+			printk("%s%s%s\n", level, prefix_str, linebuf);
+			break;
+		}
+	}
+}
+
+#ifdef HAVE_I2C_SUPPORT
+struct i2c_client *
+_kc_i2c_new_device(struct i2c_adapter *adap, struct i2c_board_info const *info)
+{
+	struct i2c_client	*client;
+	int			status;
+
+	client = kzalloc(sizeof *client, GFP_KERNEL);
+	if (!client)
+		return NULL;
+
+	client->adapter = adap;
+
+	client->dev.platform_data = info->platform_data;
+
+	client->flags = info->flags;
+	client->addr = info->addr;
+
+	strlcpy(client->name, info->type, sizeof(client->name));
+
+	/* Check for address business */
+	status = i2c_check_addr(adap, client->addr);
+	if (status)
+		goto out_err;
+
+	client->dev.parent = &client->adapter->dev;
+	client->dev.bus = &i2c_bus_type;
+
+	status = i2c_attach_client(client);
+	if (status)
+		goto out_err;
+
+	dev_dbg(&adap->dev, "client [%s] registered with bus id %s\n",
+		client->name, dev_name(&client->dev));
+
+	return client;
+
+out_err:
+	dev_err(&adap->dev, "Failed to register i2c client %s at 0x%02x "
+		"(%d)\n", client->name, client->addr, status);
+	kfree(client);
+	return NULL;
+}
+#endif /* HAVE_I2C_SUPPORT */
+#endif /* < 2.6.22 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24) )
+#ifdef NAPI
+struct net_device *napi_to_poll_dev(const struct napi_struct *napi)
+{
+	struct adapter_q_vector *q_vector = container_of(napi,
+	                                                struct adapter_q_vector,
+	                                                napi);
+	return &q_vector->poll_dev;
+}
+
+int __kc_adapter_clean(struct net_device *netdev, int *budget)
+{
+	int work_done;
+	int work_to_do = min(*budget, netdev->quota);
+	/* kcompat.h netif_napi_add puts napi struct in "fake netdev->priv" */
+	struct napi_struct *napi = netdev->priv;
+	work_done = napi->poll(napi, work_to_do);
+	*budget -= work_done;
+	netdev->quota -= work_done;
+	return (work_done >= work_to_do) ? 1 : 0;
+}
+#endif /* NAPI */
+#endif /* <= 2.6.24 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,26) )
+void _kc_pci_disable_link_state(struct pci_dev *pdev, int state)
+{
+	struct pci_dev *parent = pdev->bus->self;
+	u16 link_state;
+	int pos;
+
+	if (!parent)
+		return;
+
+	pos = pci_find_capability(parent, PCI_CAP_ID_EXP);
+	if (pos) {
+		pci_read_config_word(parent, pos + PCI_EXP_LNKCTL, &link_state);
+		link_state &= ~state;
+		pci_write_config_word(parent, pos + PCI_EXP_LNKCTL, link_state);
+	}
+}
+#endif /* < 2.6.26 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,27) )
+#ifdef HAVE_TX_MQ
+void _kc_netif_tx_stop_all_queues(struct net_device *netdev)
+{
+	struct adapter_struct *adapter = netdev_priv(netdev);
+	int i;
+
+	netif_stop_queue(netdev);
+	if (netif_is_multiqueue(netdev))
+		for (i = 0; i < adapter->num_tx_queues; i++)
+			netif_stop_subqueue(netdev, i);
+}
+void _kc_netif_tx_wake_all_queues(struct net_device *netdev)
+{
+	struct adapter_struct *adapter = netdev_priv(netdev);
+	int i;
+
+	netif_wake_queue(netdev);
+	if (netif_is_multiqueue(netdev))
+		for (i = 0; i < adapter->num_tx_queues; i++)
+			netif_wake_subqueue(netdev, i);
+}
+void _kc_netif_tx_start_all_queues(struct net_device *netdev)
+{
+	struct adapter_struct *adapter = netdev_priv(netdev);
+	int i;
+
+	netif_start_queue(netdev);
+	if (netif_is_multiqueue(netdev))
+		for (i = 0; i < adapter->num_tx_queues; i++)
+			netif_start_subqueue(netdev, i);
+}
+#endif /* HAVE_TX_MQ */
+
+#ifndef __WARN_printf
+void __kc_warn_slowpath(const char *file, int line, const char *fmt, ...)
+{
+	va_list args;
+
+	printk(KERN_WARNING "------------[ cut here ]------------\n");
+	printk(KERN_WARNING "WARNING: at %s:%d %s()\n", file, line);
+	va_start(args, fmt);
+	vprintk(fmt, args);
+	va_end(args);
+
+	dump_stack();
+}
+#endif /* __WARN_printf */
+#endif /* < 2.6.27 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28) )
+
+int
+_kc_pci_prepare_to_sleep(struct pci_dev *dev)
+{
+	pci_power_t target_state;
+	int error;
+
+	target_state = pci_choose_state(dev, PMSG_SUSPEND);
+
+	pci_enable_wake(dev, target_state, true);
+
+	error = pci_set_power_state(dev, target_state);
+
+	if (error)
+		pci_enable_wake(dev, target_state, false);
+
+	return error;
+}
+
+int
+_kc_pci_wake_from_d3(struct pci_dev *dev, bool enable)
+{
+	int err;
+
+	err = pci_enable_wake(dev, PCI_D3cold, enable);
+	if (err)
+		goto out;
+
+	err = pci_enable_wake(dev, PCI_D3hot, enable);
+
+out:
+	return err;
+}
+#endif /* < 2.6.28 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,29) )
+static void __kc_pci_set_master(struct pci_dev *pdev, bool enable)
+{
+	u16 old_cmd, cmd;
+
+	pci_read_config_word(pdev, PCI_COMMAND, &old_cmd);
+	if (enable)
+		cmd = old_cmd | PCI_COMMAND_MASTER;
+	else
+		cmd = old_cmd & ~PCI_COMMAND_MASTER;
+	if (cmd != old_cmd) {
+		dev_dbg(pci_dev_to_dev(pdev), "%s bus mastering\n",
+			enable ? "enabling" : "disabling");
+		pci_write_config_word(pdev, PCI_COMMAND, cmd);
+	}
+#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,7) )
+	pdev->is_busmaster = enable;
+#endif
+}
+
+void _kc_pci_clear_master(struct pci_dev *dev)
+{
+	__kc_pci_set_master(dev, false);
+}
+#endif /* < 2.6.29 */
+
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,34) )
+#if (RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(6,0))
+int _kc_pci_num_vf(struct pci_dev *dev)
+{
+	int num_vf = 0;
+#ifdef CONFIG_PCI_IOV
+	struct pci_dev *vfdev;
+
+	/* loop through all ethernet devices starting at PF dev */
+	vfdev = pci_get_class(PCI_CLASS_NETWORK_ETHERNET << 8, NULL);
+	while (vfdev) {
+		if (vfdev->is_virtfn && vfdev->physfn == dev)
+			num_vf++;
+
+		vfdev = pci_get_class(PCI_CLASS_NETWORK_ETHERNET << 8, vfdev);
+	}
+
+#endif
+	return num_vf;
+}
+#endif /* RHEL_RELEASE_CODE */
+#endif /* < 2.6.34 */
+
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,35) )
+#ifdef HAVE_TX_MQ
+#if (!(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,0)))
+#ifndef CONFIG_NETDEVICES_MULTIQUEUE
+void _kc_netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
+{
+	unsigned int real_num = dev->real_num_tx_queues;
+	struct Qdisc *qdisc;
+	int i;
+
+	if (unlikely(txq > dev->num_tx_queues))
+		;
+	else if (txq > real_num)
+		dev->real_num_tx_queues = txq;
+	else if ( txq < real_num) {
+		dev->real_num_tx_queues = txq;
+		for (i = txq; i < dev->num_tx_queues; i++) {
+			qdisc = netdev_get_tx_queue(dev, i)->qdisc;
+			if (qdisc) {
+				spin_lock_bh(qdisc_lock(qdisc));
+				qdisc_reset(qdisc);
+				spin_unlock_bh(qdisc_lock(qdisc));
+			}
+		}
+	}
+}
+#endif /* CONFIG_NETDEVICES_MULTIQUEUE */
+#endif /* !(RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,0)) */
+#endif /* HAVE_TX_MQ */
+
+ssize_t _kc_simple_write_to_buffer(void *to, size_t available, loff_t *ppos,
+				   const void __user *from, size_t count)
+{
+        loff_t pos = *ppos;
+        size_t res;
+
+        if (pos < 0)
+                return -EINVAL;
+        if (pos >= available || !count)
+                return 0;
+        if (count > available - pos)
+                count = available - pos;
+        res = copy_from_user(to + pos, from, count);
+        if (res == count)
+                return -EFAULT;
+        count -= res;
+        *ppos = pos + count;
+        return count;
+}
+
+#endif /* < 2.6.35 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,36) )
+static const u32 _kc_flags_dup_features =
+	(ETH_FLAG_LRO | ETH_FLAG_NTUPLE | ETH_FLAG_RXHASH);
+
+u32 _kc_ethtool_op_get_flags(struct net_device *dev)
+{
+	return dev->features & _kc_flags_dup_features;
+}
+
+int _kc_ethtool_op_set_flags(struct net_device *dev, u32 data, u32 supported)
+{
+	if (data & ~supported)
+		return -EINVAL;
+
+	dev->features = ((dev->features & ~_kc_flags_dup_features) |
+			 (data & _kc_flags_dup_features));
+	return 0;
+}
+#endif /* < 2.6.36 */
+
+/******************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,39) )
+#if (!(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(6,0)))
+
+
+
+#endif /* !(RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(6,0)) */
+#endif /* < 2.6.39 */
+
+/******************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,4,0) )
+void _kc_skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page,
+			 int off, int size, unsigned int truesize)
+{
+	skb_fill_page_desc(skb, i, page, off, size);
+	skb->len += size;
+	skb->data_len += size;
+	skb->truesize += truesize;
+}
+
+int _kc_simple_open(struct inode *inode, struct file *file)
+{
+        if (inode->i_private)
+                file->private_data = inode->i_private;
+
+        return 0;
+}
+
+#endif /* < 3.4.0 */
+
+/******************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,7,0) )
+#if !(SLE_VERSION_CODE && SLE_VERSION_CODE >= SLE_VERSION(11,3,0)) && \
+    !(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,5))
+static inline int __kc_pcie_cap_version(struct pci_dev *dev)
+{
+	int pos;
+	u16 reg16;
+
+	pos = pci_find_capability(dev, PCI_CAP_ID_EXP);
+	if (!pos)
+		return 0;
+	pci_read_config_word(dev, pos + PCI_EXP_FLAGS, &reg16);
+	return reg16 & PCI_EXP_FLAGS_VERS;
+}
+
+static inline bool __kc_pcie_cap_has_devctl(const struct pci_dev __always_unused *dev)
+{
+	return true;
+}
+
+static inline bool __kc_pcie_cap_has_lnkctl(struct pci_dev *dev)
+{
+	int type = pci_pcie_type(dev);
+
+	return __kc_pcie_cap_version(dev) > 1 ||
+	       type == PCI_EXP_TYPE_ROOT_PORT ||
+	       type == PCI_EXP_TYPE_ENDPOINT ||
+	       type == PCI_EXP_TYPE_LEG_END;
+}
+
+static inline bool __kc_pcie_cap_has_sltctl(struct pci_dev *dev)
+{
+	int type = pci_pcie_type(dev);
+	int pos;
+	u16 pcie_flags_reg;
+
+	pos = pci_find_capability(dev, PCI_CAP_ID_EXP);
+	if (!pos)
+		return 0;
+	pci_read_config_word(dev, pos + PCI_EXP_FLAGS, &pcie_flags_reg);
+
+	return __kc_pcie_cap_version(dev) > 1 ||
+	       type == PCI_EXP_TYPE_ROOT_PORT ||
+	       (type == PCI_EXP_TYPE_DOWNSTREAM &&
+		pcie_flags_reg & PCI_EXP_FLAGS_SLOT);
+}
+
+static inline bool __kc_pcie_cap_has_rtctl(struct pci_dev *dev)
+{
+	int type = pci_pcie_type(dev);
+
+	return __kc_pcie_cap_version(dev) > 1 ||
+	       type == PCI_EXP_TYPE_ROOT_PORT ||
+	       type == PCI_EXP_TYPE_RC_EC;
+}
+
+static bool __kc_pcie_capability_reg_implemented(struct pci_dev *dev, int pos)
+{
+	if (!pci_is_pcie(dev))
+		return false;
+
+	switch (pos) {
+	case PCI_EXP_FLAGS_TYPE:
+		return true;
+	case PCI_EXP_DEVCAP:
+	case PCI_EXP_DEVCTL:
+	case PCI_EXP_DEVSTA:
+		return __kc_pcie_cap_has_devctl(dev);
+	case PCI_EXP_LNKCAP:
+	case PCI_EXP_LNKCTL:
+	case PCI_EXP_LNKSTA:
+		return __kc_pcie_cap_has_lnkctl(dev);
+	case PCI_EXP_SLTCAP:
+	case PCI_EXP_SLTCTL:
+	case PCI_EXP_SLTSTA:
+		return __kc_pcie_cap_has_sltctl(dev);
+	case PCI_EXP_RTCTL:
+	case PCI_EXP_RTCAP:
+	case PCI_EXP_RTSTA:
+		return __kc_pcie_cap_has_rtctl(dev);
+	case PCI_EXP_DEVCAP2:
+	case PCI_EXP_DEVCTL2:
+	case PCI_EXP_LNKCAP2:
+	case PCI_EXP_LNKCTL2:
+	case PCI_EXP_LNKSTA2:
+		return __kc_pcie_cap_version(dev) > 1;
+	default:
+		return false;
+	}
+}
+
+/*
+ * Note that these accessor functions are only for the "PCI Express
+ * Capability" (see PCIe spec r3.0, sec 7.8).  They do not apply to the
+ * other "PCI Express Extended Capabilities" (AER, VC, ACS, MFVC, etc.)
+ */
+int __kc_pcie_capability_read_word(struct pci_dev *dev, int pos, u16 *val)
+{
+	int ret;
+
+	*val = 0;
+	if (pos & 1)
+		return -EINVAL;
+
+	if (__kc_pcie_capability_reg_implemented(dev, pos)) {
+		ret = pci_read_config_word(dev, pci_pcie_cap(dev) + pos, val);
+		/*
+		 * Reset *val to 0 if pci_read_config_word() fails, it may
+		 * have been written as 0xFFFF if hardware error happens
+		 * during pci_read_config_word().
+		 */
+		if (ret)
+			*val = 0;
+		return ret;
+	}
+
+	/*
+	 * For Functions that do not implement the Slot Capabilities,
+	 * Slot Status, and Slot Control registers, these spaces must
+	 * be hardwired to 0b, with the exception of the Presence Detect
+	 * State bit in the Slot Status register of Downstream Ports,
+	 * which must be hardwired to 1b.  (PCIe Base Spec 3.0, sec 7.8)
+	 */
+	if (pci_is_pcie(dev) && pos == PCI_EXP_SLTSTA &&
+	    pci_pcie_type(dev) == PCI_EXP_TYPE_DOWNSTREAM) {
+		*val = PCI_EXP_SLTSTA_PDS;
+	}
+
+	return 0;
+}
+
+int __kc_pcie_capability_write_word(struct pci_dev *dev, int pos, u16 val)
+{
+	if (pos & 1)
+		return -EINVAL;
+
+	if (!__kc_pcie_capability_reg_implemented(dev, pos))
+		return 0;
+
+	return pci_write_config_word(dev, pci_pcie_cap(dev) + pos, val);
+}
+
+int __kc_pcie_capability_clear_and_set_word(struct pci_dev *dev, int pos,
+					    u16 clear, u16 set)
+{
+	int ret;
+	u16 val;
+
+	ret = __kc_pcie_capability_read_word(dev, pos, &val);
+	if (!ret) {
+		val &= ~clear;
+		val |= set;
+		ret = __kc_pcie_capability_write_word(dev, pos, val);
+	}
+
+	return ret;
+}
+#endif /* !(SLE_VERSION_CODE && SLE_VERSION_CODE >= SLE_VERSION(11,3,0)) && \
+          !(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,5)) */
+#endif /* < 3.7.0 */
+
+/******************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,9,0) )
+#endif /* 3.9.0 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0) )
+#ifdef CONFIG_PCI_IOV
+int __kc_pci_vfs_assigned(struct pci_dev *dev)
+{
+	unsigned int vfs_assigned = 0;
+#ifdef HAVE_PCI_DEV_FLAGS_ASSIGNED
+	int pos;
+	struct pci_dev *vfdev;
+	unsigned short dev_id;
+
+	/* only search if we are a PF */
+	if (!dev->is_physfn)
+		return 0;
+
+	/* find SR-IOV capability */
+	pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_SRIOV);
+	if (!pos)
+		return 0;
+
+	/*
+	 * determine the device ID for the VFs, the vendor ID will be the
+	 * same as the PF so there is no need to check for that one
+	 */
+	pci_read_config_word(dev, pos + PCI_SRIOV_VF_DID, &dev_id);
+
+	/* loop through all the VFs to see if we own any that are assigned */
+	vfdev = pci_get_device(dev->vendor, dev_id, NULL);
+	while (vfdev) {
+		/*
+		 * It is considered assigned if it is a virtual function with
+		 * our dev as the physical function and the assigned bit is set
+		 */
+		if (vfdev->is_virtfn && (vfdev->physfn == dev) &&
+		    (vfdev->dev_flags & PCI_DEV_FLAGS_ASSIGNED))
+			vfs_assigned++;
+
+		vfdev = pci_get_device(dev->vendor, dev_id, vfdev);
+	}
+
+#endif /* HAVE_PCI_DEV_FLAGS_ASSIGNED */
+	return vfs_assigned;
+}
+
+#endif /* CONFIG_PCI_IOV */
+#endif /* 3.10.0 */
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.h
new file mode 100644
index 00000000..e2cf71e0
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.h
@@ -0,0 +1,3918 @@
+/*******************************************************************************
+
+  Intel(R) Gigabit Ethernet Linux driver
+  Copyright(c) 2007-2013 Intel Corporation.
+
+  This program is free software; you can redistribute it and/or modify it
+  under the terms and conditions of the GNU General Public License,
+  version 2, as published by the Free Software Foundation.
+
+  This program is distributed in the hope it will be useful, but WITHOUT
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+  more details.
+
+  You should have received a copy of the GNU General Public License along with
+  this program; if not, write to the Free Software Foundation, Inc.,
+  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+  The full GNU General Public License is included in this distribution in
+  the file called "COPYING".
+
+  Contact Information:
+  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+#ifndef _KCOMPAT_H_
+#define _KCOMPAT_H_
+
+#ifndef LINUX_VERSION_CODE
+#include <linux/version.h>
+#else
+#define KERNEL_VERSION(a,b,c) (((a) << 16) + ((b) << 8) + (c))
+#endif
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/skbuff.h>
+#include <linux/ioport.h>
+#include <linux/slab.h>
+#include <linux/list.h>
+#include <linux/delay.h>
+#include <linux/sched.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/udp.h>
+#include <linux/mii.h>
+#include <linux/vmalloc.h>
+#include <asm/io.h>
+#include <linux/ethtool.h>
+#include <linux/if_vlan.h>
+
+/* NAPI enable/disable flags here */
+#define NAPI
+
+#define adapter_struct igb_adapter
+#define adapter_q_vector igb_q_vector
+#define NAPI
+
+/* and finally set defines so that the code sees the changes */
+#ifdef NAPI
+#else
+#endif /* NAPI */
+
+/* packet split disable/enable */
+#ifdef DISABLE_PACKET_SPLIT
+#ifndef CONFIG_IGB_DISABLE_PACKET_SPLIT
+#define CONFIG_IGB_DISABLE_PACKET_SPLIT
+#endif
+#endif /* DISABLE_PACKET_SPLIT */
+
+/* MSI compatibility code for all kernels and drivers */
+#ifdef DISABLE_PCI_MSI
+#undef CONFIG_PCI_MSI
+#endif
+#ifndef CONFIG_PCI_MSI
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,8) )
+struct msix_entry {
+	u16 vector; /* kernel uses to write allocated vector */
+	u16 entry;  /* driver uses to specify entry, OS writes */
+};
+#endif
+#undef pci_enable_msi
+#define pci_enable_msi(a) -ENOTSUPP
+#undef pci_disable_msi
+#define pci_disable_msi(a) do {} while (0)
+#undef pci_enable_msix
+#define pci_enable_msix(a, b, c) -ENOTSUPP
+#undef pci_disable_msix
+#define pci_disable_msix(a) do {} while (0)
+#define msi_remove_pci_irq_vectors(a) do {} while (0)
+#endif /* CONFIG_PCI_MSI */
+#ifdef DISABLE_PM
+#undef CONFIG_PM
+#endif
+
+#ifdef DISABLE_NET_POLL_CONTROLLER
+#undef CONFIG_NET_POLL_CONTROLLER
+#endif
+
+#ifndef PMSG_SUSPEND
+#define PMSG_SUSPEND 3
+#endif
+
+/* generic boolean compatibility */
+#undef TRUE
+#undef FALSE
+#define TRUE true
+#define FALSE false
+#ifdef GCC_VERSION
+#if ( GCC_VERSION < 3000 )
+#define _Bool char
+#endif
+#else
+#define _Bool char
+#endif
+
+/* kernels less than 2.4.14 don't have this */
+#ifndef ETH_P_8021Q
+#define ETH_P_8021Q 0x8100
+#endif
+
+#ifndef module_param
+#define module_param(v,t,p) MODULE_PARM(v, "i");
+#endif
+
+#ifndef DMA_64BIT_MASK
+#define DMA_64BIT_MASK  0xffffffffffffffffULL
+#endif
+
+#ifndef DMA_32BIT_MASK
+#define DMA_32BIT_MASK  0x00000000ffffffffULL
+#endif
+
+#ifndef PCI_CAP_ID_EXP
+#define PCI_CAP_ID_EXP 0x10
+#endif
+
+#ifndef PCIE_LINK_STATE_L0S
+#define PCIE_LINK_STATE_L0S 1
+#endif
+#ifndef PCIE_LINK_STATE_L1
+#define PCIE_LINK_STATE_L1 2
+#endif
+
+#ifndef mmiowb
+#ifdef CONFIG_IA64
+#define mmiowb() asm volatile ("mf.a" ::: "memory")
+#else
+#define mmiowb()
+#endif
+#endif
+
+#ifndef SET_NETDEV_DEV
+#define SET_NETDEV_DEV(net, pdev)
+#endif
+
+#if !defined(HAVE_FREE_NETDEV) && ( LINUX_VERSION_CODE < KERNEL_VERSION(3,1,0) )
+#define free_netdev(x)	kfree(x)
+#endif
+
+#ifdef HAVE_POLL_CONTROLLER
+#define CONFIG_NET_POLL_CONTROLLER
+#endif
+
+#ifndef SKB_DATAREF_SHIFT
+/* if we do not have the infrastructure to detect if skb_header is cloned
+   just return false in all cases */
+#define skb_header_cloned(x) 0
+#endif
+
+#ifndef NETIF_F_GSO
+#define gso_size tso_size
+#define gso_segs tso_segs
+#endif
+
+#ifndef NETIF_F_GRO
+#define vlan_gro_receive(_napi, _vlgrp, _vlan, _skb) \
+		vlan_hwaccel_receive_skb(_skb, _vlgrp, _vlan)
+#define napi_gro_receive(_napi, _skb) netif_receive_skb(_skb)
+#endif
+
+#ifndef NETIF_F_SCTP_CSUM
+#define NETIF_F_SCTP_CSUM 0
+#endif
+
+#ifndef NETIF_F_LRO
+#define NETIF_F_LRO (1 << 15)
+#endif
+
+#ifndef NETIF_F_NTUPLE
+#define NETIF_F_NTUPLE (1 << 27)
+#endif
+
+#ifndef IPPROTO_SCTP
+#define IPPROTO_SCTP 132
+#endif
+
+#ifndef CHECKSUM_PARTIAL
+#define CHECKSUM_PARTIAL CHECKSUM_HW
+#define CHECKSUM_COMPLETE CHECKSUM_HW
+#endif
+
+#ifndef __read_mostly
+#define __read_mostly
+#endif
+
+#ifndef MII_RESV1
+#define MII_RESV1		0x17		/* Reserved...		*/
+#endif
+
+#ifndef unlikely
+#define unlikely(_x) _x
+#define likely(_x) _x
+#endif
+
+#ifndef WARN_ON
+#define WARN_ON(x)
+#endif
+
+#ifndef PCI_DEVICE
+#define PCI_DEVICE(vend,dev) \
+	.vendor = (vend), .device = (dev), \
+	.subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID
+#endif
+
+#ifndef node_online
+#define node_online(node) ((node) == 0)
+#endif
+
+#ifndef num_online_cpus
+#define num_online_cpus() smp_num_cpus
+#endif
+
+#ifndef cpu_online
+#define cpu_online(cpuid) test_bit((cpuid), &cpu_online_map)
+#endif
+
+#ifndef _LINUX_RANDOM_H
+#include <linux/random.h>
+#endif
+
+#ifndef DECLARE_BITMAP
+#ifndef BITS_TO_LONGS
+#define BITS_TO_LONGS(bits) (((bits)+BITS_PER_LONG-1)/BITS_PER_LONG)
+#endif
+#define DECLARE_BITMAP(name,bits) long name[BITS_TO_LONGS(bits)]
+#endif
+
+#ifndef VLAN_HLEN
+#define VLAN_HLEN 4
+#endif
+
+#ifndef VLAN_ETH_HLEN
+#define VLAN_ETH_HLEN 18
+#endif
+
+#ifndef VLAN_ETH_FRAME_LEN
+#define VLAN_ETH_FRAME_LEN 1518
+#endif
+
+#if !defined(IXGBE_DCA) && !defined(IGB_DCA)
+#define dca_get_tag(b) 0
+#define dca_add_requester(a) -1
+#define dca_remove_requester(b) do { } while(0)
+#define DCA_PROVIDER_ADD     0x0001
+#define DCA_PROVIDER_REMOVE  0x0002
+#endif
+
+#ifndef DCA_GET_TAG_TWO_ARGS
+#define dca3_get_tag(a,b) dca_get_tag(b)
+#endif
+
+#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+#if defined(__i386__) || defined(__x86_64__)
+#define CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+#endif
+#endif
+
+/* taken from 2.6.24 definition in linux/kernel.h */
+#ifndef IS_ALIGNED
+#define IS_ALIGNED(x,a)         (((x) % ((typeof(x))(a))) == 0)
+#endif
+
+#ifdef IS_ENABLED
+#undef IS_ENABLED
+#undef __ARG_PLACEHOLDER_1
+#undef config_enabled
+#undef _config_enabled
+#undef __config_enabled
+#undef ___config_enabled
+#endif
+
+#define __ARG_PLACEHOLDER_1 0,
+#define config_enabled(cfg) _config_enabled(cfg)
+#define _config_enabled(value) __config_enabled(__ARG_PLACEHOLDER_##value)
+#define __config_enabled(arg1_or_junk) ___config_enabled(arg1_or_junk 1, 0)
+#define ___config_enabled(__ignored, val, ...) val
+
+#define IS_ENABLED(option) \
+	(config_enabled(option) || config_enabled(option##_MODULE))
+
+#if !defined(NETIF_F_HW_VLAN_TX) && !defined(NETIF_F_HW_VLAN_CTAG_TX)
+struct _kc_vlan_ethhdr {
+	unsigned char	h_dest[ETH_ALEN];
+	unsigned char	h_source[ETH_ALEN];
+	__be16		h_vlan_proto;
+	__be16		h_vlan_TCI;
+	__be16		h_vlan_encapsulated_proto;
+};
+#define vlan_ethhdr _kc_vlan_ethhdr
+struct _kc_vlan_hdr {
+	__be16		h_vlan_TCI;
+	__be16		h_vlan_encapsulated_proto;
+};
+#define vlan_hdr _kc_vlan_hdr
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0) )
+#define vlan_tx_tag_present(_skb) 0
+#define vlan_tx_tag_get(_skb) 0
+#endif
+#endif /* NETIF_F_HW_VLAN_TX && NETIF_F_HW_VLAN_CTAG_TX */
+
+#ifndef VLAN_PRIO_SHIFT
+#define VLAN_PRIO_SHIFT 13
+#endif
+
+
+#ifndef __GFP_COLD
+#define __GFP_COLD 0
+#endif
+
+#ifndef __GFP_COMP
+#define __GFP_COMP 0
+#endif
+
+/*****************************************************************************/
+/* Installations with ethtool version without eeprom, adapter id, or statistics
+ * support */
+
+#ifndef ETH_GSTRING_LEN
+#define ETH_GSTRING_LEN 32
+#endif
+
+#ifndef ETHTOOL_GSTATS
+#define ETHTOOL_GSTATS 0x1d
+#undef ethtool_drvinfo
+#define ethtool_drvinfo k_ethtool_drvinfo
+struct k_ethtool_drvinfo {
+	u32 cmd;
+	char driver[32];
+	char version[32];
+	char fw_version[32];
+	char bus_info[32];
+	char reserved1[32];
+	char reserved2[16];
+	u32 n_stats;
+	u32 testinfo_len;
+	u32 eedump_len;
+	u32 regdump_len;
+};
+
+struct ethtool_stats {
+	u32 cmd;
+	u32 n_stats;
+	u64 data[0];
+};
+#endif /* ETHTOOL_GSTATS */
+
+#ifndef ETHTOOL_PHYS_ID
+#define ETHTOOL_PHYS_ID 0x1c
+#endif /* ETHTOOL_PHYS_ID */
+
+#ifndef ETHTOOL_GSTRINGS
+#define ETHTOOL_GSTRINGS 0x1b
+enum ethtool_stringset {
+	ETH_SS_TEST             = 0,
+	ETH_SS_STATS,
+};
+struct ethtool_gstrings {
+	u32 cmd;            /* ETHTOOL_GSTRINGS */
+	u32 string_set;     /* string set id e.c. ETH_SS_TEST, etc*/
+	u32 len;            /* number of strings in the string set */
+	u8 data[0];
+};
+#endif /* ETHTOOL_GSTRINGS */
+
+#ifndef ETHTOOL_TEST
+#define ETHTOOL_TEST 0x1a
+enum ethtool_test_flags {
+	ETH_TEST_FL_OFFLINE	= (1 << 0),
+	ETH_TEST_FL_FAILED	= (1 << 1),
+};
+struct ethtool_test {
+	u32 cmd;
+	u32 flags;
+	u32 reserved;
+	u32 len;
+	u64 data[0];
+};
+#endif /* ETHTOOL_TEST */
+
+#ifndef ETHTOOL_GEEPROM
+#define ETHTOOL_GEEPROM 0xb
+#undef ETHTOOL_GREGS
+struct ethtool_eeprom {
+	u32 cmd;
+	u32 magic;
+	u32 offset;
+	u32 len;
+	u8 data[0];
+};
+
+struct ethtool_value {
+	u32 cmd;
+	u32 data;
+};
+#endif /* ETHTOOL_GEEPROM */
+
+#ifndef ETHTOOL_GLINK
+#define ETHTOOL_GLINK 0xa
+#endif /* ETHTOOL_GLINK */
+
+#ifndef ETHTOOL_GWOL
+#define ETHTOOL_GWOL 0x5
+#define ETHTOOL_SWOL 0x6
+#define SOPASS_MAX      6
+struct ethtool_wolinfo {
+	u32 cmd;
+	u32 supported;
+	u32 wolopts;
+	u8 sopass[SOPASS_MAX]; /* SecureOn(tm) password */
+};
+#endif /* ETHTOOL_GWOL */
+
+#ifndef ETHTOOL_GREGS
+#define ETHTOOL_GREGS		0x00000004 /* Get NIC registers */
+#define ethtool_regs _kc_ethtool_regs
+/* for passing big chunks of data */
+struct _kc_ethtool_regs {
+	u32 cmd;
+	u32 version; /* driver-specific, indicates different chips/revs */
+	u32 len; /* bytes */
+	u8 data[0];
+};
+#endif /* ETHTOOL_GREGS */
+
+#ifndef ETHTOOL_GMSGLVL
+#define ETHTOOL_GMSGLVL		0x00000007 /* Get driver message level */
+#endif
+#ifndef ETHTOOL_SMSGLVL
+#define ETHTOOL_SMSGLVL		0x00000008 /* Set driver msg level, priv. */
+#endif
+#ifndef ETHTOOL_NWAY_RST
+#define ETHTOOL_NWAY_RST	0x00000009 /* Restart autonegotiation, priv */
+#endif
+#ifndef ETHTOOL_GLINK
+#define ETHTOOL_GLINK		0x0000000a /* Get link status */
+#endif
+#ifndef ETHTOOL_GEEPROM
+#define ETHTOOL_GEEPROM		0x0000000b /* Get EEPROM data */
+#endif
+#ifndef ETHTOOL_SEEPROM
+#define ETHTOOL_SEEPROM		0x0000000c /* Set EEPROM data */
+#endif
+#ifndef ETHTOOL_GCOALESCE
+#define ETHTOOL_GCOALESCE	0x0000000e /* Get coalesce config */
+/* for configuring coalescing parameters of chip */
+#define ethtool_coalesce _kc_ethtool_coalesce
+struct _kc_ethtool_coalesce {
+	u32	cmd;	/* ETHTOOL_{G,S}COALESCE */
+
+	/* How many usecs to delay an RX interrupt after
+	 * a packet arrives.  If 0, only rx_max_coalesced_frames
+	 * is used.
+	 */
+	u32	rx_coalesce_usecs;
+
+	/* How many packets to delay an RX interrupt after
+	 * a packet arrives.  If 0, only rx_coalesce_usecs is
+	 * used.  It is illegal to set both usecs and max frames
+	 * to zero as this would cause RX interrupts to never be
+	 * generated.
+	 */
+	u32	rx_max_coalesced_frames;
+
+	/* Same as above two parameters, except that these values
+	 * apply while an IRQ is being serviced by the host.  Not
+	 * all cards support this feature and the values are ignored
+	 * in that case.
+	 */
+	u32	rx_coalesce_usecs_irq;
+	u32	rx_max_coalesced_frames_irq;
+
+	/* How many usecs to delay a TX interrupt after
+	 * a packet is sent.  If 0, only tx_max_coalesced_frames
+	 * is used.
+	 */
+	u32	tx_coalesce_usecs;
+
+	/* How many packets to delay a TX interrupt after
+	 * a packet is sent.  If 0, only tx_coalesce_usecs is
+	 * used.  It is illegal to set both usecs and max frames
+	 * to zero as this would cause TX interrupts to never be
+	 * generated.
+	 */
+	u32	tx_max_coalesced_frames;
+
+	/* Same as above two parameters, except that these values
+	 * apply while an IRQ is being serviced by the host.  Not
+	 * all cards support this feature and the values are ignored
+	 * in that case.
+	 */
+	u32	tx_coalesce_usecs_irq;
+	u32	tx_max_coalesced_frames_irq;
+
+	/* How many usecs to delay in-memory statistics
+	 * block updates.  Some drivers do not have an in-memory
+	 * statistic block, and in such cases this value is ignored.
+	 * This value must not be zero.
+	 */
+	u32	stats_block_coalesce_usecs;
+
+	/* Adaptive RX/TX coalescing is an algorithm implemented by
+	 * some drivers to improve latency under low packet rates and
+	 * improve throughput under high packet rates.  Some drivers
+	 * only implement one of RX or TX adaptive coalescing.  Anything
+	 * not implemented by the driver causes these values to be
+	 * silently ignored.
+	 */
+	u32	use_adaptive_rx_coalesce;
+	u32	use_adaptive_tx_coalesce;
+
+	/* When the packet rate (measured in packets per second)
+	 * is below pkt_rate_low, the {rx,tx}_*_low parameters are
+	 * used.
+	 */
+	u32	pkt_rate_low;
+	u32	rx_coalesce_usecs_low;
+	u32	rx_max_coalesced_frames_low;
+	u32	tx_coalesce_usecs_low;
+	u32	tx_max_coalesced_frames_low;
+
+	/* When the packet rate is below pkt_rate_high but above
+	 * pkt_rate_low (both measured in packets per second) the
+	 * normal {rx,tx}_* coalescing parameters are used.
+	 */
+
+	/* When the packet rate is (measured in packets per second)
+	 * is above pkt_rate_high, the {rx,tx}_*_high parameters are
+	 * used.
+	 */
+	u32	pkt_rate_high;
+	u32	rx_coalesce_usecs_high;
+	u32	rx_max_coalesced_frames_high;
+	u32	tx_coalesce_usecs_high;
+	u32	tx_max_coalesced_frames_high;
+
+	/* How often to do adaptive coalescing packet rate sampling,
+	 * measured in seconds.  Must not be zero.
+	 */
+	u32	rate_sample_interval;
+};
+#endif /* ETHTOOL_GCOALESCE */
+
+#ifndef ETHTOOL_SCOALESCE
+#define ETHTOOL_SCOALESCE	0x0000000f /* Set coalesce config. */
+#endif
+#ifndef ETHTOOL_GRINGPARAM
+#define ETHTOOL_GRINGPARAM	0x00000010 /* Get ring parameters */
+/* for configuring RX/TX ring parameters */
+#define ethtool_ringparam _kc_ethtool_ringparam
+struct _kc_ethtool_ringparam {
+	u32	cmd;	/* ETHTOOL_{G,S}RINGPARAM */
+
+	/* Read only attributes.  These indicate the maximum number
+	 * of pending RX/TX ring entries the driver will allow the
+	 * user to set.
+	 */
+	u32	rx_max_pending;
+	u32	rx_mini_max_pending;
+	u32	rx_jumbo_max_pending;
+	u32	tx_max_pending;
+
+	/* Values changeable by the user.  The valid values are
+	 * in the range 1 to the "*_max_pending" counterpart above.
+	 */
+	u32	rx_pending;
+	u32	rx_mini_pending;
+	u32	rx_jumbo_pending;
+	u32	tx_pending;
+};
+#endif /* ETHTOOL_GRINGPARAM */
+
+#ifndef ETHTOOL_SRINGPARAM
+#define ETHTOOL_SRINGPARAM	0x00000011 /* Set ring parameters, priv. */
+#endif
+#ifndef ETHTOOL_GPAUSEPARAM
+#define ETHTOOL_GPAUSEPARAM	0x00000012 /* Get pause parameters */
+/* for configuring link flow control parameters */
+#define ethtool_pauseparam _kc_ethtool_pauseparam
+struct _kc_ethtool_pauseparam {
+	u32	cmd;	/* ETHTOOL_{G,S}PAUSEPARAM */
+
+	/* If the link is being auto-negotiated (via ethtool_cmd.autoneg
+	 * being true) the user may set 'autoneg' here non-zero to have the
+	 * pause parameters be auto-negotiated too.  In such a case, the
+	 * {rx,tx}_pause values below determine what capabilities are
+	 * advertised.
+	 *
+	 * If 'autoneg' is zero or the link is not being auto-negotiated,
+	 * then {rx,tx}_pause force the driver to use/not-use pause
+	 * flow control.
+	 */
+	u32	autoneg;
+	u32	rx_pause;
+	u32	tx_pause;
+};
+#endif /* ETHTOOL_GPAUSEPARAM */
+
+#ifndef ETHTOOL_SPAUSEPARAM
+#define ETHTOOL_SPAUSEPARAM	0x00000013 /* Set pause parameters. */
+#endif
+#ifndef ETHTOOL_GRXCSUM
+#define ETHTOOL_GRXCSUM		0x00000014 /* Get RX hw csum enable (ethtool_value) */
+#endif
+#ifndef ETHTOOL_SRXCSUM
+#define ETHTOOL_SRXCSUM		0x00000015 /* Set RX hw csum enable (ethtool_value) */
+#endif
+#ifndef ETHTOOL_GTXCSUM
+#define ETHTOOL_GTXCSUM		0x00000016 /* Get TX hw csum enable (ethtool_value) */
+#endif
+#ifndef ETHTOOL_STXCSUM
+#define ETHTOOL_STXCSUM		0x00000017 /* Set TX hw csum enable (ethtool_value) */
+#endif
+#ifndef ETHTOOL_GSG
+#define ETHTOOL_GSG		0x00000018 /* Get scatter-gather enable
+					    * (ethtool_value) */
+#endif
+#ifndef ETHTOOL_SSG
+#define ETHTOOL_SSG		0x00000019 /* Set scatter-gather enable
+					    * (ethtool_value). */
+#endif
+#ifndef ETHTOOL_TEST
+#define ETHTOOL_TEST		0x0000001a /* execute NIC self-test, priv. */
+#endif
+#ifndef ETHTOOL_GSTRINGS
+#define ETHTOOL_GSTRINGS	0x0000001b /* get specified string set */
+#endif
+#ifndef ETHTOOL_PHYS_ID
+#define ETHTOOL_PHYS_ID		0x0000001c /* identify the NIC */
+#endif
+#ifndef ETHTOOL_GSTATS
+#define ETHTOOL_GSTATS		0x0000001d /* get NIC-specific statistics */
+#endif
+#ifndef ETHTOOL_GTSO
+#define ETHTOOL_GTSO		0x0000001e /* Get TSO enable (ethtool_value) */
+#endif
+#ifndef ETHTOOL_STSO
+#define ETHTOOL_STSO		0x0000001f /* Set TSO enable (ethtool_value) */
+#endif
+
+#ifndef ETHTOOL_BUSINFO_LEN
+#define ETHTOOL_BUSINFO_LEN	32
+#endif
+
+#ifndef RHEL_RELEASE_VERSION
+#define RHEL_RELEASE_VERSION(a,b) (((a) << 8) + (b))
+#endif
+#ifndef AX_RELEASE_VERSION
+#define AX_RELEASE_VERSION(a,b) (((a) << 8) + (b))
+#endif
+
+#ifndef AX_RELEASE_CODE
+#define AX_RELEASE_CODE 0
+#endif
+
+#if (AX_RELEASE_CODE && AX_RELEASE_CODE == AX_RELEASE_VERSION(3,0))
+#define RHEL_RELEASE_CODE RHEL_RELEASE_VERSION(5,0)
+#elif (AX_RELEASE_CODE && AX_RELEASE_CODE == AX_RELEASE_VERSION(3,1))
+#define RHEL_RELEASE_CODE RHEL_RELEASE_VERSION(5,1)
+#elif (AX_RELEASE_CODE && AX_RELEASE_CODE == AX_RELEASE_VERSION(3,2))
+#define RHEL_RELEASE_CODE RHEL_RELEASE_VERSION(5,3)
+#endif
+
+#ifndef RHEL_RELEASE_CODE
+/* NOTE: RHEL_RELEASE_* introduced in RHEL4.5 */
+#define RHEL_RELEASE_CODE 0
+#endif
+
+/* SuSE version macro is the same as Linux kernel version */
+#ifndef SLE_VERSION
+#define SLE_VERSION(a,b,c) KERNEL_VERSION(a,b,c)
+#endif
+#ifdef CONFIG_SUSE_KERNEL
+#if ( LINUX_VERSION_CODE == KERNEL_VERSION(2,6,27) )
+/* SLES11 GA is 2.6.27 based */
+#define SLE_VERSION_CODE SLE_VERSION(11,0,0)
+#elif ( LINUX_VERSION_CODE == KERNEL_VERSION(2,6,32) )
+/* SLES11 SP1 is 2.6.32 based */
+#define SLE_VERSION_CODE SLE_VERSION(11,1,0)
+#elif ((LINUX_VERSION_CODE >= KERNEL_VERSION(3,0,61)) && \
+       (LINUX_VERSION_CODE < KERNEL_VERSION(3,1,0)))
+/* SLES11 SP3 is at least 3.0.61+ based */
+#define SLE_VERSION_CODE SLE_VERSION(11,3,0)
+#elif ( LINUX_VERSION_CODE >= KERNEL_VERSION(3,12,28) )
+/* SLES12 is at least 3.12.28+ based */
+#define SLE_VERSION_CODE SLE_VERSION(12,0,0)
+#endif /* LINUX_VERSION_CODE == KERNEL_VERSION(x,y,z) */
+#endif /* CONFIG_SUSE_KERNEL */
+#ifndef SLE_VERSION_CODE
+#define SLE_VERSION_CODE 0
+#endif /* SLE_VERSION_CODE */
+
+/* Ubuntu release and kernel codes must be specified from Makefile */
+#ifndef UBUNTU_RELEASE_VERSION
+#define UBUNTU_RELEASE_VERSION(a,b) (((a) * 100) + (b))
+#endif
+#ifndef UBUNTU_KERNEL_VERSION
+#define UBUNTU_KERNEL_VERSION(a,b,c,abi,upload) (((a) << 40) + ((b) << 32) + ((c) << 24) + ((abi) << 8) + (upload))
+#endif
+#ifndef UBUNTU_RELEASE_CODE
+#define UBUNTU_RELEASE_CODE 0
+#endif
+#ifndef UBUNTU_KERNEL_CODE
+#define UBUNTU_KERNEL_CODE 0
+#endif
+
+#ifdef __KLOCWORK__
+#ifdef ARRAY_SIZE
+#undef ARRAY_SIZE
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+#endif
+#endif /* __KLOCWORK__ */
+
+/*****************************************************************************/
+/* 2.4.3 => 2.4.0 */
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,3) )
+
+/**************************************/
+/* PCI DRIVER API */
+
+#ifndef pci_set_dma_mask
+#define pci_set_dma_mask _kc_pci_set_dma_mask
+extern int _kc_pci_set_dma_mask(struct pci_dev *dev, dma_addr_t mask);
+#endif
+
+#ifndef pci_request_regions
+#define pci_request_regions _kc_pci_request_regions
+extern int _kc_pci_request_regions(struct pci_dev *pdev, char *res_name);
+#endif
+
+#ifndef pci_release_regions
+#define pci_release_regions _kc_pci_release_regions
+extern void _kc_pci_release_regions(struct pci_dev *pdev);
+#endif
+
+/**************************************/
+/* NETWORK DRIVER API */
+
+#ifndef alloc_etherdev
+#define alloc_etherdev _kc_alloc_etherdev
+extern struct net_device * _kc_alloc_etherdev(int sizeof_priv);
+#endif
+
+#ifndef is_valid_ether_addr
+#define is_valid_ether_addr _kc_is_valid_ether_addr
+extern int _kc_is_valid_ether_addr(u8 *addr);
+#endif
+
+/**************************************/
+/* MISCELLANEOUS */
+
+#ifndef INIT_TQUEUE
+#define INIT_TQUEUE(_tq, _routine, _data)		\
+	do {						\
+		INIT_LIST_HEAD(&(_tq)->list);		\
+		(_tq)->sync = 0;			\
+		(_tq)->routine = _routine;		\
+		(_tq)->data = _data;			\
+	} while (0)
+#endif
+
+#endif /* 2.4.3 => 2.4.0 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,5) )
+/* Generic MII registers. */
+#define MII_BMCR            0x00        /* Basic mode control register */
+#define MII_BMSR            0x01        /* Basic mode status register  */
+#define MII_PHYSID1         0x02        /* PHYS ID 1                   */
+#define MII_PHYSID2         0x03        /* PHYS ID 2                   */
+#define MII_ADVERTISE       0x04        /* Advertisement control reg   */
+#define MII_LPA             0x05        /* Link partner ability reg    */
+#define MII_EXPANSION       0x06        /* Expansion register          */
+/* Basic mode control register. */
+#define BMCR_FULLDPLX           0x0100  /* Full duplex                 */
+#define BMCR_ANENABLE           0x1000  /* Enable auto negotiation     */
+/* Basic mode status register. */
+#define BMSR_ERCAP              0x0001  /* Ext-reg capability          */
+#define BMSR_ANEGCAPABLE        0x0008  /* Able to do auto-negotiation */
+#define BMSR_10HALF             0x0800  /* Can do 10mbps, half-duplex  */
+#define BMSR_10FULL             0x1000  /* Can do 10mbps, full-duplex  */
+#define BMSR_100HALF            0x2000  /* Can do 100mbps, half-duplex */
+#define BMSR_100FULL            0x4000  /* Can do 100mbps, full-duplex */
+/* Advertisement control register. */
+#define ADVERTISE_CSMA          0x0001  /* Only selector supported     */
+#define ADVERTISE_10HALF        0x0020  /* Try for 10mbps half-duplex  */
+#define ADVERTISE_10FULL        0x0040  /* Try for 10mbps full-duplex  */
+#define ADVERTISE_100HALF       0x0080  /* Try for 100mbps half-duplex */
+#define ADVERTISE_100FULL       0x0100  /* Try for 100mbps full-duplex */
+#define ADVERTISE_ALL (ADVERTISE_10HALF | ADVERTISE_10FULL | \
+                       ADVERTISE_100HALF | ADVERTISE_100FULL)
+/* Expansion register for auto-negotiation. */
+#define EXPANSION_ENABLENPAGE   0x0004  /* This enables npage words    */
+#endif
+
+/*****************************************************************************/
+/* 2.4.6 => 2.4.3 */
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,6) )
+
+#ifndef pci_set_power_state
+#define pci_set_power_state _kc_pci_set_power_state
+extern int _kc_pci_set_power_state(struct pci_dev *dev, int state);
+#endif
+
+#ifndef pci_enable_wake
+#define pci_enable_wake _kc_pci_enable_wake
+extern int _kc_pci_enable_wake(struct pci_dev *pdev, u32 state, int enable);
+#endif
+
+#ifndef pci_disable_device
+#define pci_disable_device _kc_pci_disable_device
+extern void _kc_pci_disable_device(struct pci_dev *pdev);
+#endif
+
+/* PCI PM entry point syntax changed, so don't support suspend/resume */
+#undef CONFIG_PM
+
+#endif /* 2.4.6 => 2.4.3 */
+
+#ifndef HAVE_PCI_SET_MWI
+#define pci_set_mwi(X) pci_write_config_word(X, \
+			       PCI_COMMAND, adapter->hw.bus.pci_cmd_word | \
+			       PCI_COMMAND_INVALIDATE);
+#define pci_clear_mwi(X) pci_write_config_word(X, \
+			       PCI_COMMAND, adapter->hw.bus.pci_cmd_word & \
+			       ~PCI_COMMAND_INVALIDATE);
+#endif
+
+/*****************************************************************************/
+/* 2.4.10 => 2.4.9 */
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,10) )
+
+/**************************************/
+/* MODULE API */
+
+#ifndef MODULE_LICENSE
+	#define MODULE_LICENSE(X)
+#endif
+
+/**************************************/
+/* OTHER */
+
+#undef min
+#define min(x,y) ({ \
+	const typeof(x) _x = (x);	\
+	const typeof(y) _y = (y);	\
+	(void) (&_x == &_y);		\
+	_x < _y ? _x : _y; })
+
+#undef max
+#define max(x,y) ({ \
+	const typeof(x) _x = (x);	\
+	const typeof(y) _y = (y);	\
+	(void) (&_x == &_y);		\
+	_x > _y ? _x : _y; })
+
+#define min_t(type,x,y) ({ \
+	type _x = (x); \
+	type _y = (y); \
+	_x < _y ? _x : _y; })
+
+#define max_t(type,x,y) ({ \
+	type _x = (x); \
+	type _y = (y); \
+	_x > _y ? _x : _y; })
+
+#ifndef list_for_each_safe
+#define list_for_each_safe(pos, n, head) \
+	for (pos = (head)->next, n = pos->next; pos != (head); \
+		pos = n, n = pos->next)
+#endif
+
+#ifndef ____cacheline_aligned_in_smp
+#ifdef CONFIG_SMP
+#define ____cacheline_aligned_in_smp ____cacheline_aligned
+#else
+#define ____cacheline_aligned_in_smp
+#endif /* CONFIG_SMP */
+#endif
+
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,8) )
+extern int _kc_snprintf(char * buf, size_t size, const char *fmt, ...);
+#define snprintf(buf, size, fmt, args...) _kc_snprintf(buf, size, fmt, ##args)
+extern int _kc_vsnprintf(char *buf, size_t size, const char *fmt, va_list args);
+#define vsnprintf(buf, size, fmt, args) _kc_vsnprintf(buf, size, fmt, args)
+#else /* 2.4.8 => 2.4.9 */
+extern int snprintf(char * buf, size_t size, const char *fmt, ...);
+extern int vsnprintf(char *buf, size_t size, const char *fmt, va_list args);
+#endif
+#endif /* 2.4.10 -> 2.4.6 */
+
+
+/*****************************************************************************/
+/* 2.4.12 => 2.4.10 */
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,12) )
+#ifndef HAVE_NETIF_MSG
+#define HAVE_NETIF_MSG 1
+enum {
+	NETIF_MSG_DRV		= 0x0001,
+	NETIF_MSG_PROBE		= 0x0002,
+	NETIF_MSG_LINK		= 0x0004,
+	NETIF_MSG_TIMER		= 0x0008,
+	NETIF_MSG_IFDOWN	= 0x0010,
+	NETIF_MSG_IFUP		= 0x0020,
+	NETIF_MSG_RX_ERR	= 0x0040,
+	NETIF_MSG_TX_ERR	= 0x0080,
+	NETIF_MSG_TX_QUEUED	= 0x0100,
+	NETIF_MSG_INTR		= 0x0200,
+	NETIF_MSG_TX_DONE	= 0x0400,
+	NETIF_MSG_RX_STATUS	= 0x0800,
+	NETIF_MSG_PKTDATA	= 0x1000,
+	NETIF_MSG_HW		= 0x2000,
+	NETIF_MSG_WOL		= 0x4000,
+};
+
+#define netif_msg_drv(p)	((p)->msg_enable & NETIF_MSG_DRV)
+#define netif_msg_probe(p)	((p)->msg_enable & NETIF_MSG_PROBE)
+#define netif_msg_link(p)	((p)->msg_enable & NETIF_MSG_LINK)
+#define netif_msg_timer(p)	((p)->msg_enable & NETIF_MSG_TIMER)
+#define netif_msg_ifdown(p)	((p)->msg_enable & NETIF_MSG_IFDOWN)
+#define netif_msg_ifup(p)	((p)->msg_enable & NETIF_MSG_IFUP)
+#define netif_msg_rx_err(p)	((p)->msg_enable & NETIF_MSG_RX_ERR)
+#define netif_msg_tx_err(p)	((p)->msg_enable & NETIF_MSG_TX_ERR)
+#define netif_msg_tx_queued(p)	((p)->msg_enable & NETIF_MSG_TX_QUEUED)
+#define netif_msg_intr(p)	((p)->msg_enable & NETIF_MSG_INTR)
+#define netif_msg_tx_done(p)	((p)->msg_enable & NETIF_MSG_TX_DONE)
+#define netif_msg_rx_status(p)	((p)->msg_enable & NETIF_MSG_RX_STATUS)
+#define netif_msg_pktdata(p)	((p)->msg_enable & NETIF_MSG_PKTDATA)
+#endif /* !HAVE_NETIF_MSG */
+#endif /* 2.4.12 => 2.4.10 */
+
+/*****************************************************************************/
+/* 2.4.13 => 2.4.12 */
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,13) )
+
+/**************************************/
+/* PCI DMA MAPPING */
+
+#ifndef virt_to_page
+	#define virt_to_page(v) (mem_map + (virt_to_phys(v) >> PAGE_SHIFT))
+#endif
+
+#ifndef pci_map_page
+#define pci_map_page _kc_pci_map_page
+extern u64 _kc_pci_map_page(struct pci_dev *dev, struct page *page, unsigned long offset, size_t size, int direction);
+#endif
+
+#ifndef pci_unmap_page
+#define pci_unmap_page _kc_pci_unmap_page
+extern void _kc_pci_unmap_page(struct pci_dev *dev, u64 dma_addr, size_t size, int direction);
+#endif
+
+/* pci_set_dma_mask takes dma_addr_t, which is only 32-bits prior to 2.4.13 */
+
+#undef DMA_32BIT_MASK
+#define DMA_32BIT_MASK	0xffffffff
+#undef DMA_64BIT_MASK
+#define DMA_64BIT_MASK	0xffffffff
+
+/**************************************/
+/* OTHER */
+
+#ifndef cpu_relax
+#define cpu_relax()	rep_nop()
+#endif
+
+struct vlan_ethhdr {
+	unsigned char h_dest[ETH_ALEN];
+	unsigned char h_source[ETH_ALEN];
+	unsigned short h_vlan_proto;
+	unsigned short h_vlan_TCI;
+	unsigned short h_vlan_encapsulated_proto;
+};
+#endif /* 2.4.13 => 2.4.12 */
+
+/*****************************************************************************/
+/* 2.4.17 => 2.4.12 */
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,17) )
+
+#ifndef __devexit_p
+	#define __devexit_p(x) &(x)
+#endif
+
+#else
+        /* For Kernel 3.8 these are not defined - so undefine all */
+        #undef __devexit_p
+        #undef __devexit
+        #undef __devinit
+        #undef __devinitdata
+        #define __devexit_p(x) &(x)
+        #define __devexit
+        #define __devinit
+        #define __devinitdata
+
+#endif /* 2.4.17 => 2.4.13 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,18) )
+#define NETIF_MSG_HW	0x2000
+#define NETIF_MSG_WOL	0x4000
+
+#ifndef netif_msg_hw
+#define netif_msg_hw(p)		((p)->msg_enable & NETIF_MSG_HW)
+#endif
+#ifndef netif_msg_wol
+#define netif_msg_wol(p)	((p)->msg_enable & NETIF_MSG_WOL)
+#endif
+#endif /* 2.4.18 */
+
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* 2.4.20 => 2.4.19 */
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,20) )
+
+/* we won't support NAPI on less than 2.4.20 */
+#ifdef NAPI
+#undef NAPI
+#endif
+
+#endif /* 2.4.20 => 2.4.19 */
+
+/*****************************************************************************/
+/* 2.4.22 => 2.4.17 */
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,22) )
+#define pci_name(x)	((x)->slot_name)
+
+#ifndef SUPPORTED_10000baseT_Full
+#define SUPPORTED_10000baseT_Full	(1 << 12)
+#endif
+#ifndef ADVERTISED_10000baseT_Full
+#define ADVERTISED_10000baseT_Full	(1 << 12)
+#endif
+#endif
+
+/*****************************************************************************/
+/* 2.4.22 => 2.4.17 */
+
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,22) )
+#ifndef IGB_NO_LRO
+#define IGB_NO_LRO
+#endif
+#endif
+
+/*****************************************************************************/
+/*****************************************************************************/
+/* 2.4.23 => 2.4.22 */
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,23) )
+/*****************************************************************************/
+#ifdef NAPI
+#ifndef netif_poll_disable
+#define netif_poll_disable(x) _kc_netif_poll_disable(x)
+static inline void _kc_netif_poll_disable(struct net_device *netdev)
+{
+	while (test_and_set_bit(__LINK_STATE_RX_SCHED, &netdev->state)) {
+		/* No hurry */
+		current->state = TASK_INTERRUPTIBLE;
+		schedule_timeout(1);
+	}
+}
+#endif
+#ifndef netif_poll_enable
+#define netif_poll_enable(x) _kc_netif_poll_enable(x)
+static inline void _kc_netif_poll_enable(struct net_device *netdev)
+{
+	clear_bit(__LINK_STATE_RX_SCHED, &netdev->state);
+}
+#endif
+#endif /* NAPI */
+#ifndef netif_tx_disable
+#define netif_tx_disable(x) _kc_netif_tx_disable(x)
+static inline void _kc_netif_tx_disable(struct net_device *dev)
+{
+	spin_lock_bh(&dev->xmit_lock);
+	netif_stop_queue(dev);
+	spin_unlock_bh(&dev->xmit_lock);
+}
+#endif
+#else /* 2.4.23 => 2.4.22 */
+#define HAVE_SCTP
+#endif /* 2.4.23 => 2.4.22 */
+
+/*****************************************************************************/
+/* 2.6.4 => 2.6.0 */
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,25) || \
+    ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) && \
+      LINUX_VERSION_CODE < KERNEL_VERSION(2,6,4) ) )
+#define ETHTOOL_OPS_COMPAT
+#endif /* 2.6.4 => 2.6.0 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,27) )
+#define __user
+#endif /* < 2.4.27 */
+
+/*****************************************************************************/
+/* 2.5.71 => 2.4.x */
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,5,71) )
+#define sk_protocol protocol
+#define pci_get_device pci_find_device
+#endif /* 2.5.70 => 2.4.x */
+
+/*****************************************************************************/
+/* < 2.4.27 or 2.6.0 <= 2.6.5 */
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,27) || \
+    ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) && \
+      LINUX_VERSION_CODE < KERNEL_VERSION(2,6,5) ) )
+
+#ifndef netif_msg_init
+#define netif_msg_init _kc_netif_msg_init
+static inline u32 _kc_netif_msg_init(int debug_value, int default_msg_enable_bits)
+{
+	/* use default */
+	if (debug_value < 0 || debug_value >= (sizeof(u32) * 8))
+		return default_msg_enable_bits;
+	if (debug_value == 0) /* no output */
+		return 0;
+	/* set low N bits */
+	return (1 << debug_value) -1;
+}
+#endif
+
+#endif /* < 2.4.27 or 2.6.0 <= 2.6.5 */
+/*****************************************************************************/
+#if (( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,27) ) || \
+     (( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) ) && \
+      ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,3) )))
+#define netdev_priv(x) x->priv
+#endif
+
+/*****************************************************************************/
+/* <= 2.5.0 */
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0) )
+#include <linux/rtnetlink.h>
+#undef pci_register_driver
+#define pci_register_driver pci_module_init
+
+/*
+ * Most of the dma compat code is copied/modifed from the 2.4.37
+ * /include/linux/libata-compat.h header file
+ */
+/* These definitions mirror those in pci.h, so they can be used
+ * interchangeably with their PCI_ counterparts */
+enum dma_data_direction {
+	DMA_BIDIRECTIONAL = 0,
+	DMA_TO_DEVICE = 1,
+	DMA_FROM_DEVICE = 2,
+	DMA_NONE = 3,
+};
+
+struct device {
+	struct pci_dev pdev;
+};
+
+static inline struct pci_dev *to_pci_dev (struct device *dev)
+{
+	return (struct pci_dev *) dev;
+}
+static inline struct device *pci_dev_to_dev(struct pci_dev *pdev)
+{
+	return (struct device *) pdev;
+}
+
+#define pdev_printk(lvl, pdev, fmt, args...)	\
+	printk("%s %s: " fmt, lvl, pci_name(pdev), ## args)
+#define dev_err(dev, fmt, args...)            \
+	pdev_printk(KERN_ERR, to_pci_dev(dev), fmt, ## args)
+#define dev_info(dev, fmt, args...)            \
+	pdev_printk(KERN_INFO, to_pci_dev(dev), fmt, ## args)
+#define dev_warn(dev, fmt, args...)            \
+	pdev_printk(KERN_WARNING, to_pci_dev(dev), fmt, ## args)
+#define dev_notice(dev, fmt, args...)            \
+	pdev_printk(KERN_NOTICE, to_pci_dev(dev), fmt, ## args)
+#define dev_dbg(dev, fmt, args...) \
+	pdev_printk(KERN_DEBUG, to_pci_dev(dev), fmt, ## args)
+
+/* NOTE: dangerous! we ignore the 'gfp' argument */
+#define dma_alloc_coherent(dev,sz,dma,gfp) \
+	pci_alloc_consistent(to_pci_dev(dev),(sz),(dma))
+#define dma_free_coherent(dev,sz,addr,dma_addr) \
+	pci_free_consistent(to_pci_dev(dev),(sz),(addr),(dma_addr))
+
+#define dma_map_page(dev,a,b,c,d) \
+	pci_map_page(to_pci_dev(dev),(a),(b),(c),(d))
+#define dma_unmap_page(dev,a,b,c) \
+	pci_unmap_page(to_pci_dev(dev),(a),(b),(c))
+
+#define dma_map_single(dev,a,b,c) \
+	pci_map_single(to_pci_dev(dev),(a),(b),(c))
+#define dma_unmap_single(dev,a,b,c) \
+	pci_unmap_single(to_pci_dev(dev),(a),(b),(c))
+
+#define dma_map_sg(dev, sg, nents, dir) \
+	pci_map_sg(to_pci_dev(dev), (sg), (nents), (dir)
+#define dma_unmap_sg(dev, sg, nents, dir) \
+	pci_unmap_sg(to_pci_dev(dev), (sg), (nents), (dir)
+
+#define dma_sync_single(dev,a,b,c) \
+	pci_dma_sync_single(to_pci_dev(dev),(a),(b),(c))
+
+/* for range just sync everything, that's all the pci API can do */
+#define dma_sync_single_range(dev,addr,off,sz,dir) \
+	pci_dma_sync_single(to_pci_dev(dev),(addr),(off)+(sz),(dir))
+
+#define dma_set_mask(dev,mask) \
+	pci_set_dma_mask(to_pci_dev(dev),(mask))
+
+/* hlist_* code - double linked lists */
+struct hlist_head {
+	struct hlist_node *first;
+};
+
+struct hlist_node {
+	struct hlist_node *next, **pprev;
+};
+
+static inline void __hlist_del(struct hlist_node *n)
+{
+	struct hlist_node *next = n->next;
+	struct hlist_node **pprev = n->pprev;
+	*pprev = next;
+	if (next)
+	next->pprev = pprev;
+}
+
+static inline void hlist_del(struct hlist_node *n)
+{
+	__hlist_del(n);
+	n->next = NULL;
+	n->pprev = NULL;
+}
+
+static inline void hlist_add_head(struct hlist_node *n, struct hlist_head *h)
+{
+	struct hlist_node *first = h->first;
+	n->next = first;
+	if (first)
+		first->pprev = &n->next;
+	h->first = n;
+	n->pprev = &h->first;
+}
+
+static inline int hlist_empty(const struct hlist_head *h)
+{
+	return !h->first;
+}
+#define HLIST_HEAD_INIT { .first = NULL }
+#define HLIST_HEAD(name) struct hlist_head name = {  .first = NULL }
+#define INIT_HLIST_HEAD(ptr) ((ptr)->first = NULL)
+static inline void INIT_HLIST_NODE(struct hlist_node *h)
+{
+	h->next = NULL;
+	h->pprev = NULL;
+}
+
+#ifndef might_sleep
+#define might_sleep()
+#endif
+#else
+static inline struct device *pci_dev_to_dev(struct pci_dev *pdev)
+{
+	return &pdev->dev;
+}
+#endif /* <= 2.5.0 */
+
+/*****************************************************************************/
+/* 2.5.28 => 2.4.23 */
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,5,28) )
+
+#include <linux/tqueue.h>
+#define work_struct tq_struct
+#undef INIT_WORK
+#define INIT_WORK(a,b) INIT_TQUEUE(a,(void (*)(void *))b,a)
+#undef container_of
+#define container_of list_entry
+#define schedule_work schedule_task
+#define flush_scheduled_work flush_scheduled_tasks
+#define cancel_work_sync(x) flush_scheduled_work()
+
+#endif /* 2.5.28 => 2.4.17 */
+
+/*****************************************************************************/
+/* 2.6.0 => 2.5.28 */
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) )
+#ifndef read_barrier_depends
+#define read_barrier_depends() rmb()
+#endif
+
+#undef get_cpu
+#define get_cpu() smp_processor_id()
+#undef put_cpu
+#define put_cpu() do { } while(0)
+#define MODULE_INFO(version, _version)
+#ifndef CONFIG_E1000_DISABLE_PACKET_SPLIT
+#define CONFIG_E1000_DISABLE_PACKET_SPLIT 1
+#endif
+#ifndef CONFIG_IGB_DISABLE_PACKET_SPLIT
+#define CONFIG_IGB_DISABLE_PACKET_SPLIT 1
+#endif
+
+#define dma_set_coherent_mask(dev,mask) 1
+
+#undef dev_put
+#define dev_put(dev) __dev_put(dev)
+
+#ifndef skb_fill_page_desc
+#define skb_fill_page_desc _kc_skb_fill_page_desc
+extern void _kc_skb_fill_page_desc(struct sk_buff *skb, int i, struct page *page, int off, int size);
+#endif
+
+#undef ALIGN
+#define ALIGN(x,a) (((x)+(a)-1)&~((a)-1))
+
+#ifndef page_count
+#define page_count(p) atomic_read(&(p)->count)
+#endif
+
+#ifdef MAX_NUMNODES
+#undef MAX_NUMNODES
+#endif
+#define MAX_NUMNODES 1
+
+/* find_first_bit and find_next bit are not defined for most
+ * 2.4 kernels (except for the redhat 2.4.21 kernels
+ */
+#include <linux/bitops.h>
+#define BITOP_WORD(nr)          ((nr) / BITS_PER_LONG)
+#undef find_next_bit
+#define find_next_bit _kc_find_next_bit
+extern unsigned long _kc_find_next_bit(const unsigned long *addr,
+                                       unsigned long size,
+                                       unsigned long offset);
+#define find_first_bit(addr, size) find_next_bit((addr), (size), 0)
+
+
+#ifndef netdev_name
+static inline const char *_kc_netdev_name(const struct net_device *dev)
+{
+	if (strchr(dev->name, '%'))
+		return "(unregistered net_device)";
+	return dev->name;
+}
+#define netdev_name(netdev)	_kc_netdev_name(netdev)
+#endif /* netdev_name */
+
+#ifndef strlcpy
+#define strlcpy _kc_strlcpy
+extern size_t _kc_strlcpy(char *dest, const char *src, size_t size);
+#endif /* strlcpy */
+
+#ifndef do_div
+#if BITS_PER_LONG == 64
+# define do_div(n,base) ({					\
+	uint32_t __base = (base);				\
+	uint32_t __rem;						\
+	__rem = ((uint64_t)(n)) % __base;			\
+	(n) = ((uint64_t)(n)) / __base;				\
+	__rem;							\
+ })
+#elif BITS_PER_LONG == 32
+extern uint32_t _kc__div64_32(uint64_t *dividend, uint32_t divisor);
+# define do_div(n,base) ({				\
+	uint32_t __base = (base);			\
+	uint32_t __rem;					\
+	if (likely(((n) >> 32) == 0)) {			\
+		__rem = (uint32_t)(n) % __base;		\
+		(n) = (uint32_t)(n) / __base;		\
+	} else 						\
+		__rem = _kc__div64_32(&(n), __base);	\
+	__rem;						\
+ })
+#else /* BITS_PER_LONG == ?? */
+# error do_div() does not yet support the C64
+#endif /* BITS_PER_LONG */
+#endif /* do_div */
+
+#ifndef NSEC_PER_SEC
+#define NSEC_PER_SEC	1000000000L
+#endif
+
+#undef HAVE_I2C_SUPPORT
+#else /* 2.6.0 */
+#if IS_ENABLED(CONFIG_I2C_ALGOBIT) && \
+	(RHEL_RELEASE_CODE && (RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(4,9)))
+#define HAVE_I2C_SUPPORT
+#endif /* IS_ENABLED(CONFIG_I2C_ALGOBIT) */
+
+#endif /* 2.6.0 => 2.5.28 */
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,3) )
+#define dma_pool pci_pool
+#define dma_pool_destroy pci_pool_destroy
+#define dma_pool_alloc pci_pool_alloc
+#define dma_pool_free pci_pool_free
+
+#define dma_pool_create(name,dev,size,align,allocation) \
+       pci_pool_create((name),to_pci_dev(dev),(size),(align),(allocation))
+#endif /* < 2.6.3 */
+
+/*****************************************************************************/
+/* 2.6.4 => 2.6.0 */
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,4) )
+#define MODULE_VERSION(_version) MODULE_INFO(version, _version)
+#endif /* 2.6.4 => 2.6.0 */
+
+/*****************************************************************************/
+/* 2.6.5 => 2.6.0 */
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,5) )
+#define dma_sync_single_for_cpu		dma_sync_single
+#define dma_sync_single_for_device	dma_sync_single
+#define dma_sync_single_range_for_cpu		dma_sync_single_range
+#define dma_sync_single_range_for_device	dma_sync_single_range
+#ifndef pci_dma_mapping_error
+#define pci_dma_mapping_error _kc_pci_dma_mapping_error
+static inline int _kc_pci_dma_mapping_error(dma_addr_t dma_addr)
+{
+	return dma_addr == 0;
+}
+#endif
+#endif /* 2.6.5 => 2.6.0 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,4) )
+extern int _kc_scnprintf(char * buf, size_t size, const char *fmt, ...);
+#define scnprintf(buf, size, fmt, args...) _kc_scnprintf(buf, size, fmt, ##args)
+#endif /* < 2.6.4 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,6) )
+/* taken from 2.6 include/linux/bitmap.h */
+#undef bitmap_zero
+#define bitmap_zero _kc_bitmap_zero
+static inline void _kc_bitmap_zero(unsigned long *dst, int nbits)
+{
+        if (nbits <= BITS_PER_LONG)
+                *dst = 0UL;
+        else {
+                int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long);
+                memset(dst, 0, len);
+        }
+}
+#define random_ether_addr _kc_random_ether_addr
+static inline void _kc_random_ether_addr(u8 *addr)
+{
+        get_random_bytes(addr, ETH_ALEN);
+        addr[0] &= 0xfe; /* clear multicast */
+        addr[0] |= 0x02; /* set local assignment */
+}
+#define page_to_nid(x) 0
+
+#endif /* < 2.6.6 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,7) )
+#undef if_mii
+#define if_mii _kc_if_mii
+static inline struct mii_ioctl_data *_kc_if_mii(struct ifreq *rq)
+{
+	return (struct mii_ioctl_data *) &rq->ifr_ifru;
+}
+
+#ifndef __force
+#define __force
+#endif
+#endif /* < 2.6.7 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,8) )
+#ifndef PCI_EXP_DEVCTL
+#define PCI_EXP_DEVCTL 8
+#endif
+#ifndef PCI_EXP_DEVCTL_CERE
+#define PCI_EXP_DEVCTL_CERE 0x0001
+#endif
+#define PCI_EXP_FLAGS		2	/* Capabilities register */
+#define PCI_EXP_FLAGS_VERS	0x000f	/* Capability version */
+#define PCI_EXP_FLAGS_TYPE	0x00f0	/* Device/Port type */
+#define  PCI_EXP_TYPE_ENDPOINT	0x0	/* Express Endpoint */
+#define  PCI_EXP_TYPE_LEG_END	0x1	/* Legacy Endpoint */
+#define  PCI_EXP_TYPE_ROOT_PORT 0x4	/* Root Port */
+#define  PCI_EXP_TYPE_DOWNSTREAM 0x6	/* Downstream Port */
+#define PCI_EXP_FLAGS_SLOT	0x0100	/* Slot implemented */
+#define PCI_EXP_DEVCAP		4	/* Device capabilities */
+#define PCI_EXP_DEVSTA		10	/* Device Status */
+#define msleep(x)	do { set_current_state(TASK_UNINTERRUPTIBLE); \
+				schedule_timeout((x * HZ)/1000 + 2); \
+			} while (0)
+
+#endif /* < 2.6.8 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,9))
+#include <net/dsfield.h>
+#define __iomem
+
+#ifndef kcalloc
+#define kcalloc(n, size, flags) _kc_kzalloc(((n) * (size)), flags)
+extern void *_kc_kzalloc(size_t size, int flags);
+#endif
+#define MSEC_PER_SEC    1000L
+static inline unsigned int _kc_jiffies_to_msecs(const unsigned long j)
+{
+#if HZ <= MSEC_PER_SEC && !(MSEC_PER_SEC % HZ)
+	return (MSEC_PER_SEC / HZ) * j;
+#elif HZ > MSEC_PER_SEC && !(HZ % MSEC_PER_SEC)
+	return (j + (HZ / MSEC_PER_SEC) - 1)/(HZ / MSEC_PER_SEC);
+#else
+	return (j * MSEC_PER_SEC) / HZ;
+#endif
+}
+static inline unsigned long _kc_msecs_to_jiffies(const unsigned int m)
+{
+	if (m > _kc_jiffies_to_msecs(MAX_JIFFY_OFFSET))
+		return MAX_JIFFY_OFFSET;
+#if HZ <= MSEC_PER_SEC && !(MSEC_PER_SEC % HZ)
+	return (m + (MSEC_PER_SEC / HZ) - 1) / (MSEC_PER_SEC / HZ);
+#elif HZ > MSEC_PER_SEC && !(HZ % MSEC_PER_SEC)
+	return m * (HZ / MSEC_PER_SEC);
+#else
+	return (m * HZ + MSEC_PER_SEC - 1) / MSEC_PER_SEC;
+#endif
+}
+
+#define msleep_interruptible _kc_msleep_interruptible
+static inline unsigned long _kc_msleep_interruptible(unsigned int msecs)
+{
+	unsigned long timeout = _kc_msecs_to_jiffies(msecs) + 1;
+
+	while (timeout && !signal_pending(current)) {
+		__set_current_state(TASK_INTERRUPTIBLE);
+		timeout = schedule_timeout(timeout);
+	}
+	return _kc_jiffies_to_msecs(timeout);
+}
+
+/* Basic mode control register. */
+#define BMCR_SPEED1000		0x0040  /* MSB of Speed (1000)         */
+
+#ifndef __le16
+#define __le16 u16
+#endif
+#ifndef __le32
+#define __le32 u32
+#endif
+#ifndef __le64
+#define __le64 u64
+#endif
+#ifndef __be16
+#define __be16 u16
+#endif
+#ifndef __be32
+#define __be32 u32
+#endif
+#ifndef __be64
+#define __be64 u64
+#endif
+
+static inline struct vlan_ethhdr *vlan_eth_hdr(const struct sk_buff *skb)
+{
+	return (struct vlan_ethhdr *)skb->mac.raw;
+}
+
+/* Wake-On-Lan options. */
+#define WAKE_PHY		(1 << 0)
+#define WAKE_UCAST		(1 << 1)
+#define WAKE_MCAST		(1 << 2)
+#define WAKE_BCAST		(1 << 3)
+#define WAKE_ARP		(1 << 4)
+#define WAKE_MAGIC		(1 << 5)
+#define WAKE_MAGICSECURE	(1 << 6) /* only meaningful if WAKE_MAGIC */
+
+#define skb_header_pointer _kc_skb_header_pointer
+static inline void *_kc_skb_header_pointer(const struct sk_buff *skb,
+					    int offset, int len, void *buffer)
+{
+	int hlen = skb_headlen(skb);
+
+	if (hlen - offset >= len)
+		return skb->data + offset;
+
+#ifdef MAX_SKB_FRAGS
+	if (skb_copy_bits(skb, offset, buffer, len) < 0)
+		return NULL;
+
+	return buffer;
+#else
+	return NULL;
+#endif
+
+#ifndef NETDEV_TX_OK
+#define NETDEV_TX_OK 0
+#endif
+#ifndef NETDEV_TX_BUSY
+#define NETDEV_TX_BUSY 1
+#endif
+#ifndef NETDEV_TX_LOCKED
+#define NETDEV_TX_LOCKED -1
+#endif
+}
+
+#ifndef __bitwise
+#define __bitwise
+#endif
+#endif /* < 2.6.9 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,10) )
+#ifdef module_param_array_named
+#undef module_param_array_named
+#define module_param_array_named(name, array, type, nump, perm)          \
+	static struct kparam_array __param_arr_##name                    \
+	= { ARRAY_SIZE(array), nump, param_set_##type, param_get_##type, \
+	    sizeof(array[0]), array };                                   \
+	module_param_call(name, param_array_set, param_array_get,        \
+			  &__param_arr_##name, perm)
+#endif /* module_param_array_named */
+/*
+ * num_online is broken for all < 2.6.10 kernels.  This is needed to support
+ * Node module parameter of ixgbe.
+ */
+#undef num_online_nodes
+#define num_online_nodes(n) 1
+extern DECLARE_BITMAP(_kcompat_node_online_map, MAX_NUMNODES);
+#undef node_online_map
+#define node_online_map _kcompat_node_online_map
+#define pci_get_class pci_find_class
+#endif /* < 2.6.10 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,11) )
+#define PCI_D0      0
+#define PCI_D1      1
+#define PCI_D2      2
+#define PCI_D3hot   3
+#define PCI_D3cold  4
+typedef int pci_power_t;
+#define pci_choose_state(pdev,state) state
+#define PMSG_SUSPEND 3
+#define PCI_EXP_LNKCTL	16
+
+#undef NETIF_F_LLTX
+
+#ifndef ARCH_HAS_PREFETCH
+#define prefetch(X)
+#endif
+
+#ifndef NET_IP_ALIGN
+#define NET_IP_ALIGN 2
+#endif
+
+#define KC_USEC_PER_SEC	1000000L
+#define usecs_to_jiffies _kc_usecs_to_jiffies
+static inline unsigned int _kc_jiffies_to_usecs(const unsigned long j)
+{
+#if HZ <= KC_USEC_PER_SEC && !(KC_USEC_PER_SEC % HZ)
+	return (KC_USEC_PER_SEC / HZ) * j;
+#elif HZ > KC_USEC_PER_SEC && !(HZ % KC_USEC_PER_SEC)
+	return (j + (HZ / KC_USEC_PER_SEC) - 1)/(HZ / KC_USEC_PER_SEC);
+#else
+	return (j * KC_USEC_PER_SEC) / HZ;
+#endif
+}
+static inline unsigned long _kc_usecs_to_jiffies(const unsigned int m)
+{
+	if (m > _kc_jiffies_to_usecs(MAX_JIFFY_OFFSET))
+		return MAX_JIFFY_OFFSET;
+#if HZ <= KC_USEC_PER_SEC && !(KC_USEC_PER_SEC % HZ)
+	return (m + (KC_USEC_PER_SEC / HZ) - 1) / (KC_USEC_PER_SEC / HZ);
+#elif HZ > KC_USEC_PER_SEC && !(HZ % KC_USEC_PER_SEC)
+	return m * (HZ / KC_USEC_PER_SEC);
+#else
+	return (m * HZ + KC_USEC_PER_SEC - 1) / KC_USEC_PER_SEC;
+#endif
+}
+
+#define PCI_EXP_LNKCAP		12	/* Link Capabilities */
+#define PCI_EXP_LNKSTA		18	/* Link Status */
+#define PCI_EXP_SLTCAP		20	/* Slot Capabilities */
+#define PCI_EXP_SLTCTL		24	/* Slot Control */
+#define PCI_EXP_SLTSTA		26	/* Slot Status */
+#define PCI_EXP_RTCTL		28	/* Root Control */
+#define PCI_EXP_RTCAP		30	/* Root Capabilities */
+#define PCI_EXP_RTSTA		32	/* Root Status */
+#endif /* < 2.6.11 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,12) )
+#include <linux/reboot.h>
+#define USE_REBOOT_NOTIFIER
+
+/* Generic MII registers. */
+#define MII_CTRL1000        0x09        /* 1000BASE-T control          */
+#define MII_STAT1000        0x0a        /* 1000BASE-T status           */
+/* Advertisement control register. */
+#define ADVERTISE_PAUSE_CAP     0x0400  /* Try for pause               */
+#define ADVERTISE_PAUSE_ASYM    0x0800  /* Try for asymmetric pause     */
+/* Link partner ability register. */
+#define LPA_PAUSE_CAP		0x0400	/* Can pause                   */
+#define LPA_PAUSE_ASYM		0x0800	/* Can pause asymetrically     */
+/* 1000BASE-T Control register */
+#define ADVERTISE_1000FULL      0x0200  /* Advertise 1000BASE-T full duplex */
+#define ADVERTISE_1000HALF	0x0100  /* Advertise 1000BASE-T half duplex */
+/* 1000BASE-T Status register */
+#define LPA_1000LOCALRXOK	0x2000	/* Link partner local receiver status */
+#define LPA_1000REMRXOK		0x1000	/* Link partner remote receiver status */
+
+#ifndef is_zero_ether_addr
+#define is_zero_ether_addr _kc_is_zero_ether_addr
+static inline int _kc_is_zero_ether_addr(const u8 *addr)
+{
+	return !(addr[0] | addr[1] | addr[2] | addr[3] | addr[4] | addr[5]);
+}
+#endif /* is_zero_ether_addr */
+#ifndef is_multicast_ether_addr
+#define is_multicast_ether_addr _kc_is_multicast_ether_addr
+static inline int _kc_is_multicast_ether_addr(const u8 *addr)
+{
+	return addr[0] & 0x01;
+}
+#endif /* is_multicast_ether_addr */
+#endif /* < 2.6.12 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,13) )
+#ifndef kstrdup
+#define kstrdup _kc_kstrdup
+extern char *_kc_kstrdup(const char *s, unsigned int gfp);
+#endif
+#endif /* < 2.6.13 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,14) )
+#define pm_message_t u32
+#ifndef kzalloc
+#define kzalloc _kc_kzalloc
+extern void *_kc_kzalloc(size_t size, int flags);
+#endif
+
+/* Generic MII registers. */
+#define MII_ESTATUS	    0x0f	/* Extended Status */
+/* Basic mode status register. */
+#define BMSR_ESTATEN		0x0100	/* Extended Status in R15 */
+/* Extended status register. */
+#define ESTATUS_1000_TFULL	0x2000	/* Can do 1000BT Full */
+#define ESTATUS_1000_THALF	0x1000	/* Can do 1000BT Half */
+
+#define SUPPORTED_Pause	        (1 << 13)
+#define SUPPORTED_Asym_Pause	(1 << 14)
+#define ADVERTISED_Pause	(1 << 13)
+#define ADVERTISED_Asym_Pause	(1 << 14)
+
+#if (!(RHEL_RELEASE_CODE && \
+       (RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(4,3)) && \
+       (RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(5,0))))
+#if ((LINUX_VERSION_CODE == KERNEL_VERSION(2,6,9)) && !defined(gfp_t))
+#define gfp_t unsigned
+#else
+typedef unsigned gfp_t;
+#endif
+#endif /* !RHEL4.3->RHEL5.0 */
+
+#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,9) )
+#ifdef CONFIG_X86_64
+#define dma_sync_single_range_for_cpu(dev, addr, off, sz, dir)       \
+	dma_sync_single_for_cpu((dev), (addr), (off) + (sz), (dir))
+#define dma_sync_single_range_for_device(dev, addr, off, sz, dir)    \
+	dma_sync_single_for_device((dev), (addr), (off) + (sz), (dir))
+#endif
+#endif
+#endif /* < 2.6.14 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,15) )
+#ifndef vmalloc_node
+#define vmalloc_node(a,b) vmalloc(a)
+#endif /* vmalloc_node*/
+
+#define setup_timer(_timer, _function, _data) \
+do { \
+	(_timer)->function = _function; \
+	(_timer)->data = _data; \
+	init_timer(_timer); \
+} while (0)
+#ifndef device_can_wakeup
+#define device_can_wakeup(dev)	(1)
+#endif
+#ifndef device_set_wakeup_enable
+#define device_set_wakeup_enable(dev, val)	do{}while(0)
+#endif
+#ifndef device_init_wakeup
+#define device_init_wakeup(dev,val) do {} while (0)
+#endif
+static inline unsigned _kc_compare_ether_addr(const u8 *addr1, const u8 *addr2)
+{
+	const u16 *a = (const u16 *) addr1;
+	const u16 *b = (const u16 *) addr2;
+
+	return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2])) != 0;
+}
+#undef compare_ether_addr
+#define compare_ether_addr(addr1, addr2) _kc_compare_ether_addr(addr1, addr2)
+#endif /* < 2.6.15 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,16) )
+#undef DEFINE_MUTEX
+#define DEFINE_MUTEX(x)	DECLARE_MUTEX(x)
+#define mutex_lock(x)	down_interruptible(x)
+#define mutex_unlock(x)	up(x)
+
+#ifndef ____cacheline_internodealigned_in_smp
+#ifdef CONFIG_SMP
+#define ____cacheline_internodealigned_in_smp ____cacheline_aligned_in_smp
+#else
+#define ____cacheline_internodealigned_in_smp
+#endif /* CONFIG_SMP */
+#endif /* ____cacheline_internodealigned_in_smp */
+#undef HAVE_PCI_ERS
+#else /* 2.6.16 and above */
+#undef HAVE_PCI_ERS
+#define HAVE_PCI_ERS
+#if ( SLE_VERSION_CODE && SLE_VERSION_CODE == SLE_VERSION(10,4,0) )
+#ifdef device_can_wakeup
+#undef device_can_wakeup
+#endif /* device_can_wakeup */
+#define device_can_wakeup(dev) 1
+#endif /* SLE_VERSION(10,4,0) */
+#endif /* < 2.6.16 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,17) )
+#ifndef dev_notice
+#define dev_notice(dev, fmt, args...)            \
+	dev_printk(KERN_NOTICE, dev, fmt, ## args)
+#endif
+
+#ifndef first_online_node
+#define first_online_node 0
+#endif
+#ifndef NET_SKB_PAD
+#define NET_SKB_PAD 16
+#endif
+#endif /* < 2.6.17 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,18) )
+
+#ifndef IRQ_HANDLED
+#define irqreturn_t void
+#define IRQ_HANDLED
+#define IRQ_NONE
+#endif
+
+#ifndef IRQF_PROBE_SHARED
+#ifdef SA_PROBEIRQ
+#define IRQF_PROBE_SHARED SA_PROBEIRQ
+#else
+#define IRQF_PROBE_SHARED 0
+#endif
+#endif
+
+#ifndef IRQF_SHARED
+#define IRQF_SHARED SA_SHIRQ
+#endif
+
+#ifndef ARRAY_SIZE
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+#endif
+
+#ifndef FIELD_SIZEOF
+#define FIELD_SIZEOF(t, f) (sizeof(((t*)0)->f))
+#endif
+
+#ifndef skb_is_gso
+#ifdef NETIF_F_TSO
+#define skb_is_gso _kc_skb_is_gso
+static inline int _kc_skb_is_gso(const struct sk_buff *skb)
+{
+	return skb_shinfo(skb)->gso_size;
+}
+#else
+#define skb_is_gso(a) 0
+#endif
+#endif
+
+#ifndef resource_size_t
+#define resource_size_t unsigned long
+#endif
+
+#ifdef skb_pad
+#undef skb_pad
+#endif
+#define skb_pad(x,y) _kc_skb_pad(x, y)
+int _kc_skb_pad(struct sk_buff *skb, int pad);
+#ifdef skb_padto
+#undef skb_padto
+#endif
+#define skb_padto(x,y) _kc_skb_padto(x, y)
+static inline int _kc_skb_padto(struct sk_buff *skb, unsigned int len)
+{
+	unsigned int size = skb->len;
+	if(likely(size >= len))
+		return 0;
+	return _kc_skb_pad(skb, len - size);
+}
+
+#ifndef DECLARE_PCI_UNMAP_ADDR
+#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME) \
+	dma_addr_t ADDR_NAME
+#define DECLARE_PCI_UNMAP_LEN(LEN_NAME) \
+	u32 LEN_NAME
+#define pci_unmap_addr(PTR, ADDR_NAME) \
+	((PTR)->ADDR_NAME)
+#define pci_unmap_addr_set(PTR, ADDR_NAME, VAL) \
+	(((PTR)->ADDR_NAME) = (VAL))
+#define pci_unmap_len(PTR, LEN_NAME) \
+	((PTR)->LEN_NAME)
+#define pci_unmap_len_set(PTR, LEN_NAME, VAL) \
+	(((PTR)->LEN_NAME) = (VAL))
+#endif /* DECLARE_PCI_UNMAP_ADDR */
+#endif /* < 2.6.18 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19) )
+
+#if (!(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(5,0)))
+#define i_private u.generic_ip
+#endif /* >= RHEL 5.0 */
+
+#ifndef DIV_ROUND_UP
+#define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d))
+#endif
+#ifndef __ALIGN_MASK
+#define __ALIGN_MASK(x, mask) (((x) + (mask)) & ~(mask))
+#endif
+#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0) )
+#if (!((RHEL_RELEASE_CODE && \
+        ((RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(4,4) && \
+          RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(5,0)) || \
+         (RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(5,0))))))
+typedef irqreturn_t (*irq_handler_t)(int, void*, struct pt_regs *);
+#endif
+#if (RHEL_RELEASE_CODE && RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(6,0))
+#undef CONFIG_INET_LRO
+#undef CONFIG_INET_LRO_MODULE
+#ifdef IXGBE_FCOE
+#undef CONFIG_FCOE
+#undef CONFIG_FCOE_MODULE
+#endif /* IXGBE_FCOE */
+#endif
+typedef irqreturn_t (*new_handler_t)(int, void*);
+static inline irqreturn_t _kc_request_irq(unsigned int irq, new_handler_t handler, unsigned long flags, const char *devname, void *dev_id)
+#else /* 2.4.x */
+typedef void (*irq_handler_t)(int, void*, struct pt_regs *);
+typedef void (*new_handler_t)(int, void*);
+static inline int _kc_request_irq(unsigned int irq, new_handler_t handler, unsigned long flags, const char *devname, void *dev_id)
+#endif /* >= 2.5.x */
+{
+	irq_handler_t new_handler = (irq_handler_t) handler;
+	return request_irq(irq, new_handler, flags, devname, dev_id);
+}
+
+#undef request_irq
+#define request_irq(irq, handler, flags, devname, dev_id) _kc_request_irq((irq), (handler), (flags), (devname), (dev_id))
+
+#define irq_handler_t new_handler_t
+/* pci_restore_state and pci_save_state handles MSI/PCIE from 2.6.19 */
+#if (!(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(5,4)))
+#define PCIE_CONFIG_SPACE_LEN 256
+#define PCI_CONFIG_SPACE_LEN 64
+#define PCIE_LINK_STATUS 0x12
+#define pci_config_space_ich8lan() do {} while(0)
+#undef pci_save_state
+extern int _kc_pci_save_state(struct pci_dev *);
+#define pci_save_state(pdev) _kc_pci_save_state(pdev)
+#undef pci_restore_state
+extern void _kc_pci_restore_state(struct pci_dev *);
+#define pci_restore_state(pdev) _kc_pci_restore_state(pdev)
+#endif /* !(RHEL_RELEASE_CODE >= RHEL 5.4) */
+
+#ifdef HAVE_PCI_ERS
+#undef free_netdev
+extern void _kc_free_netdev(struct net_device *);
+#define free_netdev(netdev) _kc_free_netdev(netdev)
+#endif
+static inline int pci_enable_pcie_error_reporting(struct pci_dev *dev)
+{
+	return 0;
+}
+#define pci_disable_pcie_error_reporting(dev) do {} while (0)
+#define pci_cleanup_aer_uncorrect_error_status(dev) do {} while (0)
+
+extern void *_kc_kmemdup(const void *src, size_t len, unsigned gfp);
+#define kmemdup(src, len, gfp) _kc_kmemdup(src, len, gfp)
+#ifndef bool
+#define bool _Bool
+#define true 1
+#define false 0
+#endif
+#else /* 2.6.19 */
+#include <linux/aer.h>
+#include <linux/string.h>
+#endif /* < 2.6.19 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,20) )
+#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,28) )
+#undef INIT_WORK
+#define INIT_WORK(_work, _func) \
+do { \
+	INIT_LIST_HEAD(&(_work)->entry); \
+	(_work)->pending = 0; \
+	(_work)->func = (void (*)(void *))_func; \
+	(_work)->data = _work; \
+	init_timer(&(_work)->timer); \
+} while (0)
+#endif
+
+#ifndef PCI_VDEVICE
+#define PCI_VDEVICE(ven, dev)        \
+	PCI_VENDOR_ID_##ven, (dev),  \
+	PCI_ANY_ID, PCI_ANY_ID, 0, 0
+#endif
+
+#ifndef PCI_VENDOR_ID_INTEL
+#define PCI_VENDOR_ID_INTEL 0x8086
+#endif
+
+#ifndef round_jiffies
+#define round_jiffies(x) x
+#endif
+
+#define csum_offset csum
+
+#define HAVE_EARLY_VMALLOC_NODE
+#define dev_to_node(dev) -1
+#undef set_dev_node
+/* remove compiler warning with b=b, for unused variable */
+#define set_dev_node(a, b) do { (b) = (b); } while(0)
+
+#if (!(RHEL_RELEASE_CODE && \
+       (((RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(4,7)) && \
+         (RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(5,0))) || \
+        (RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(5,6)))) && \
+     !(SLE_VERSION_CODE && SLE_VERSION_CODE >= SLE_VERSION(10,2,0)))
+typedef __u16 __bitwise __sum16;
+typedef __u32 __bitwise __wsum;
+#endif
+
+#if (!(RHEL_RELEASE_CODE && \
+       (((RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(4,7)) && \
+         (RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(5,0))) || \
+        (RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(5,4)))) && \
+     !(SLE_VERSION_CODE && SLE_VERSION_CODE >= SLE_VERSION(10,2,0)))
+static inline __wsum csum_unfold(__sum16 n)
+{
+	return (__force __wsum)n;
+}
+#endif
+
+#else /* < 2.6.20 */
+#define HAVE_DEVICE_NUMA_NODE
+#endif /* < 2.6.20 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,21) )
+#define to_net_dev(class) container_of(class, struct net_device, class_dev)
+#define NETDEV_CLASS_DEV
+#if (!(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(5,5)))
+#define vlan_group_get_device(vg, id) (vg->vlan_devices[id])
+#define vlan_group_set_device(vg, id, dev)		\
+	do {						\
+		if (vg) vg->vlan_devices[id] = dev;	\
+	} while (0)
+#endif /* !(RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(5,5)) */
+#define pci_channel_offline(pdev) (pdev->error_state && \
+	pdev->error_state != pci_channel_io_normal)
+#define pci_request_selected_regions(pdev, bars, name) \
+        pci_request_regions(pdev, name)
+#define pci_release_selected_regions(pdev, bars) pci_release_regions(pdev);
+
+#ifndef __aligned
+#define __aligned(x)			__attribute__((aligned(x)))
+#endif
+
+extern struct pci_dev *_kc_netdev_to_pdev(struct net_device *netdev);
+#define netdev_to_dev(netdev)	\
+	pci_dev_to_dev(_kc_netdev_to_pdev(netdev))
+#else
+static inline struct device *netdev_to_dev(struct net_device *netdev)
+{
+	return &netdev->dev;
+}
+
+#endif /* < 2.6.21 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22) )
+#define tcp_hdr(skb) (skb->h.th)
+#define tcp_hdrlen(skb) (skb->h.th->doff << 2)
+#define skb_transport_offset(skb) (skb->h.raw - skb->data)
+#define skb_transport_header(skb) (skb->h.raw)
+#define ipv6_hdr(skb) (skb->nh.ipv6h)
+#define ip_hdr(skb) (skb->nh.iph)
+#define skb_network_offset(skb) (skb->nh.raw - skb->data)
+#define skb_network_header(skb) (skb->nh.raw)
+#define skb_tail_pointer(skb) skb->tail
+#define skb_reset_tail_pointer(skb) \
+	do { \
+		skb->tail = skb->data; \
+	} while (0)
+#define skb_set_tail_pointer(skb, offset) \
+	do { \
+		skb->tail = skb->data + offset; \
+	} while (0)
+#define skb_copy_to_linear_data(skb, from, len) \
+				memcpy(skb->data, from, len)
+#define skb_copy_to_linear_data_offset(skb, offset, from, len) \
+				memcpy(skb->data + offset, from, len)
+#define skb_network_header_len(skb) (skb->h.raw - skb->nh.raw)
+#define pci_register_driver pci_module_init
+#define skb_mac_header(skb) skb->mac.raw
+
+#ifdef NETIF_F_MULTI_QUEUE
+#ifndef alloc_etherdev_mq
+#define alloc_etherdev_mq(_a, _b) alloc_etherdev(_a)
+#endif
+#endif /* NETIF_F_MULTI_QUEUE */
+
+#ifndef ETH_FCS_LEN
+#define ETH_FCS_LEN 4
+#endif
+#define cancel_work_sync(x) flush_scheduled_work()
+#ifndef udp_hdr
+#define udp_hdr _udp_hdr
+static inline struct udphdr *_udp_hdr(const struct sk_buff *skb)
+{
+	return (struct udphdr *)skb_transport_header(skb);
+}
+#endif
+
+#ifdef cpu_to_be16
+#undef cpu_to_be16
+#endif
+#define cpu_to_be16(x) __constant_htons(x)
+
+#if (!(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(5,1)))
+enum {
+	DUMP_PREFIX_NONE,
+	DUMP_PREFIX_ADDRESS,
+	DUMP_PREFIX_OFFSET
+};
+#endif /* !(RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(5,1)) */
+#ifndef hex_asc
+#define hex_asc(x)	"0123456789abcdef"[x]
+#endif
+#include <linux/ctype.h>
+extern void _kc_print_hex_dump(const char *level, const char *prefix_str,
+			       int prefix_type, int rowsize, int groupsize,
+			       const void *buf, size_t len, bool ascii);
+#define print_hex_dump(lvl, s, t, r, g, b, l, a) \
+		_kc_print_hex_dump(lvl, s, t, r, g, b, l, a)
+#ifndef ADVERTISED_2500baseX_Full
+#define ADVERTISED_2500baseX_Full (1 << 15)
+#endif
+#ifndef SUPPORTED_2500baseX_Full
+#define SUPPORTED_2500baseX_Full (1 << 15)
+#endif
+
+#ifdef HAVE_I2C_SUPPORT
+#include <linux/i2c.h>
+#if (!(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(5,5)))
+struct i2c_board_info {
+	char	driver_name[KOBJ_NAME_LEN];
+	char	type[I2C_NAME_SIZE];
+	unsigned short	flags;
+	unsigned short	addr;
+	void		*platform_data;
+};
+#define I2C_BOARD_INFO(driver, dev_addr) .driver_name = (driver),\
+			.addr = (dev_addr)
+#endif /* !(RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(5,5)) */
+#define i2c_new_device(adap, info) _kc_i2c_new_device(adap, info)
+extern struct i2c_client *
+_kc_i2c_new_device(struct i2c_adapter *adap, struct i2c_board_info const *info);
+#endif /* HAVE_I2C_SUPPORT */
+
+#else /* 2.6.22 */
+#define ETH_TYPE_TRANS_SETS_DEV
+#define HAVE_NETDEV_STATS_IN_NETDEV
+#endif /* < 2.6.22 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE > KERNEL_VERSION(2,6,22) )
+#undef SET_MODULE_OWNER
+#define SET_MODULE_OWNER(dev) do { } while (0)
+#endif /* > 2.6.22 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23) )
+#define netif_subqueue_stopped(_a, _b) 0
+#ifndef PTR_ALIGN
+#define PTR_ALIGN(p, a)         ((typeof(p))ALIGN((unsigned long)(p), (a)))
+#endif
+
+#ifndef CONFIG_PM_SLEEP
+#define CONFIG_PM_SLEEP	CONFIG_PM
+#endif
+
+#if ( LINUX_VERSION_CODE > KERNEL_VERSION(2,6,13) )
+#define HAVE_ETHTOOL_GET_PERM_ADDR
+#endif /* 2.6.14 through 2.6.22 */
+#endif /* < 2.6.23 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24) )
+#ifndef ETH_FLAG_LRO
+#define ETH_FLAG_LRO NETIF_F_LRO
+#endif
+
+/* if GRO is supported then the napi struct must already exist */
+#ifndef NETIF_F_GRO
+/* NAPI API changes in 2.6.24 break everything */
+struct napi_struct {
+	/* used to look up the real NAPI polling routine */
+	int (*poll)(struct napi_struct *, int);
+	struct net_device *dev;
+	int weight;
+};
+#endif
+
+#ifdef NAPI
+extern int __kc_adapter_clean(struct net_device *, int *);
+extern struct net_device *napi_to_poll_dev(const struct napi_struct *napi);
+#define netif_napi_add(_netdev, _napi, _poll, _weight) \
+	do { \
+		struct napi_struct *__napi = (_napi); \
+		struct net_device *poll_dev = napi_to_poll_dev(__napi); \
+		poll_dev->poll = &(__kc_adapter_clean); \
+		poll_dev->priv = (_napi); \
+		poll_dev->weight = (_weight); \
+		set_bit(__LINK_STATE_RX_SCHED, &poll_dev->state); \
+		set_bit(__LINK_STATE_START, &poll_dev->state);\
+		dev_hold(poll_dev); \
+		__napi->poll = &(_poll); \
+		__napi->weight = (_weight); \
+		__napi->dev = (_netdev); \
+	} while (0)
+#define netif_napi_del(_napi) \
+	do { \
+		struct net_device *poll_dev = napi_to_poll_dev(_napi); \
+		WARN_ON(!test_bit(__LINK_STATE_RX_SCHED, &poll_dev->state)); \
+		dev_put(poll_dev); \
+		memset(poll_dev, 0, sizeof(struct net_device));\
+	} while (0)
+#define napi_schedule_prep(_napi) \
+	(netif_running((_napi)->dev) && netif_rx_schedule_prep(napi_to_poll_dev(_napi)))
+#define napi_schedule(_napi) \
+	do { \
+		if (napi_schedule_prep(_napi)) \
+			__netif_rx_schedule(napi_to_poll_dev(_napi)); \
+	} while (0)
+#define napi_enable(_napi) netif_poll_enable(napi_to_poll_dev(_napi))
+#define napi_disable(_napi) netif_poll_disable(napi_to_poll_dev(_napi))
+#ifdef CONFIG_SMP
+static inline void napi_synchronize(const struct napi_struct *n)
+{
+	struct net_device *dev = napi_to_poll_dev(n);
+
+	while (test_bit(__LINK_STATE_RX_SCHED, &dev->state)) {
+		/* No hurry. */
+		msleep(1);
+	}
+}
+#else
+#define napi_synchronize(n)	barrier()
+#endif /* CONFIG_SMP */
+#define __napi_schedule(_napi) __netif_rx_schedule(napi_to_poll_dev(_napi))
+#ifndef NETIF_F_GRO
+#define napi_complete(_napi) netif_rx_complete(napi_to_poll_dev(_napi))
+#else
+#define napi_complete(_napi) \
+	do { \
+		napi_gro_flush(_napi); \
+		netif_rx_complete(napi_to_poll_dev(_napi)); \
+	} while (0)
+#endif /* NETIF_F_GRO */
+#else /* NAPI */
+#define netif_napi_add(_netdev, _napi, _poll, _weight) \
+	do { \
+		struct napi_struct *__napi = _napi; \
+		_netdev->poll = &(_poll); \
+		_netdev->weight = (_weight); \
+		__napi->poll = &(_poll); \
+		__napi->weight = (_weight); \
+		__napi->dev = (_netdev); \
+	} while (0)
+#define netif_napi_del(_a) do {} while (0)
+#endif /* NAPI */
+
+#undef dev_get_by_name
+#define dev_get_by_name(_a, _b) dev_get_by_name(_b)
+#define __netif_subqueue_stopped(_a, _b) netif_subqueue_stopped(_a, _b)
+#ifndef DMA_BIT_MASK
+#define DMA_BIT_MASK(n)	(((n) == 64) ? DMA_64BIT_MASK : ((1ULL<<(n))-1))
+#endif
+
+#ifdef NETIF_F_TSO6
+#define skb_is_gso_v6 _kc_skb_is_gso_v6
+static inline int _kc_skb_is_gso_v6(const struct sk_buff *skb)
+{
+	return skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6;
+}
+#endif /* NETIF_F_TSO6 */
+
+#ifndef KERN_CONT
+#define KERN_CONT	""
+#endif
+#ifndef pr_err
+#define pr_err(fmt, arg...) \
+	printk(KERN_ERR fmt, ##arg)
+#endif
+#else /* < 2.6.24 */
+#define HAVE_ETHTOOL_GET_SSET_COUNT
+#define HAVE_NETDEV_NAPI_LIST
+#endif /* < 2.6.24 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE > KERNEL_VERSION(2,6,24) )
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,2,0) )
+#include <linux/pm_qos_params.h>
+#else /* >= 3.2.0 */
+#include <linux/pm_qos.h>
+#endif /* else >= 3.2.0 */
+#endif /* > 2.6.24 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,25) )
+#define PM_QOS_CPU_DMA_LATENCY	1
+
+#if ( LINUX_VERSION_CODE > KERNEL_VERSION(2,6,18) )
+#include <linux/latency.h>
+#define PM_QOS_DEFAULT_VALUE	INFINITE_LATENCY
+#define pm_qos_add_requirement(pm_qos_class, name, value) \
+		set_acceptable_latency(name, value)
+#define pm_qos_remove_requirement(pm_qos_class, name) \
+		remove_acceptable_latency(name)
+#define pm_qos_update_requirement(pm_qos_class, name, value) \
+		modify_acceptable_latency(name, value)
+#else
+#define PM_QOS_DEFAULT_VALUE	-1
+#define pm_qos_add_requirement(pm_qos_class, name, value)
+#define pm_qos_remove_requirement(pm_qos_class, name)
+#define pm_qos_update_requirement(pm_qos_class, name, value) { \
+	if (value != PM_QOS_DEFAULT_VALUE) { \
+		printk(KERN_WARNING "%s: unable to set PM QoS requirement\n", \
+			pci_name(adapter->pdev)); \
+	} \
+}
+
+#endif /* > 2.6.18 */
+
+#define pci_enable_device_mem(pdev) pci_enable_device(pdev)
+
+#ifndef DEFINE_PCI_DEVICE_TABLE
+#define DEFINE_PCI_DEVICE_TABLE(_table) struct pci_device_id _table[]
+#endif /* DEFINE_PCI_DEVICE_TABLE */
+
+
+#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) )
+#ifndef IGB_PROCFS
+#define IGB_PROCFS
+#endif /* IGB_PROCFS */
+#endif /* >= 2.6.0 */
+
+#else /* < 2.6.25 */
+
+
+#if IS_ENABLED(CONFIG_HWMON)
+#ifndef IGB_HWMON
+#define IGB_HWMON
+#endif /* IGB_HWMON */
+#endif /* CONFIG_HWMON */
+
+#endif /* < 2.6.25 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,26) )
+#ifndef clamp_t
+#define clamp_t(type, val, min, max) ({		\
+	type __val = (val);			\
+	type __min = (min);			\
+	type __max = (max);			\
+	__val = __val < __min ? __min : __val;	\
+	__val > __max ? __max : __val; })
+#endif /* clamp_t */
+#undef kzalloc_node
+#define kzalloc_node(_size, _flags, _node) kzalloc(_size, _flags)
+
+extern void _kc_pci_disable_link_state(struct pci_dev *dev, int state);
+#define pci_disable_link_state(p, s) _kc_pci_disable_link_state(p, s)
+#else /* < 2.6.26 */
+#include <linux/pci-aspm.h>
+#define HAVE_NETDEV_VLAN_FEATURES
+#ifndef PCI_EXP_LNKCAP_ASPMS
+#define PCI_EXP_LNKCAP_ASPMS 0x00000c00 /* ASPM Support */
+#endif /* PCI_EXP_LNKCAP_ASPMS */
+#endif /* < 2.6.26 */
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,27) )
+static inline void _kc_ethtool_cmd_speed_set(struct ethtool_cmd *ep,
+					     __u32 speed)
+{
+	ep->speed = (__u16)speed;
+	/* ep->speed_hi = (__u16)(speed >> 16); */
+}
+#define ethtool_cmd_speed_set _kc_ethtool_cmd_speed_set
+
+static inline __u32 _kc_ethtool_cmd_speed(struct ethtool_cmd *ep)
+{
+	/* no speed_hi before 2.6.27, and probably no need for it yet */
+	return (__u32)ep->speed;
+}
+#define ethtool_cmd_speed _kc_ethtool_cmd_speed
+
+#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,15) )
+#if ((LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23)) && defined(CONFIG_PM))
+#define ANCIENT_PM 1
+#elif ((LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,23)) && \
+       (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,26)) && \
+       defined(CONFIG_PM_SLEEP))
+#define NEWER_PM 1
+#endif
+#if defined(ANCIENT_PM) || defined(NEWER_PM)
+#undef device_set_wakeup_enable
+#define device_set_wakeup_enable(dev, val) \
+	do { \
+		u16 pmc = 0; \
+		int pm = pci_find_capability(adapter->pdev, PCI_CAP_ID_PM); \
+		if (pm) { \
+			pci_read_config_word(adapter->pdev, pm + PCI_PM_PMC, \
+				&pmc); \
+		} \
+		(dev)->power.can_wakeup = !!(pmc >> 11); \
+		(dev)->power.should_wakeup = (val && (pmc >> 11)); \
+	} while (0)
+#endif /* 2.6.15-2.6.22 and CONFIG_PM or 2.6.23-2.6.25 and CONFIG_PM_SLEEP */
+#endif /* 2.6.15 through 2.6.27 */
+#ifndef netif_napi_del
+#define netif_napi_del(_a) do {} while (0)
+#ifdef NAPI
+#ifdef CONFIG_NETPOLL
+#undef netif_napi_del
+#define netif_napi_del(_a) list_del(&(_a)->dev_list);
+#endif
+#endif
+#endif /* netif_napi_del */
+#ifdef dma_mapping_error
+#undef dma_mapping_error
+#endif
+#define dma_mapping_error(dev, dma_addr) pci_dma_mapping_error(dma_addr)
+
+#ifdef CONFIG_NETDEVICES_MULTIQUEUE
+#define HAVE_TX_MQ
+#endif
+
+#ifdef HAVE_TX_MQ
+extern void _kc_netif_tx_stop_all_queues(struct net_device *);
+extern void _kc_netif_tx_wake_all_queues(struct net_device *);
+extern void _kc_netif_tx_start_all_queues(struct net_device *);
+#define netif_tx_stop_all_queues(a) _kc_netif_tx_stop_all_queues(a)
+#define netif_tx_wake_all_queues(a) _kc_netif_tx_wake_all_queues(a)
+#define netif_tx_start_all_queues(a) _kc_netif_tx_start_all_queues(a)
+#undef netif_stop_subqueue
+#define netif_stop_subqueue(_ndev,_qi) do { \
+	if (netif_is_multiqueue((_ndev))) \
+		netif_stop_subqueue((_ndev), (_qi)); \
+	else \
+		netif_stop_queue((_ndev)); \
+	} while (0)
+#undef netif_start_subqueue
+#define netif_start_subqueue(_ndev,_qi) do { \
+	if (netif_is_multiqueue((_ndev))) \
+		netif_start_subqueue((_ndev), (_qi)); \
+	else \
+		netif_start_queue((_ndev)); \
+	} while (0)
+#else /* HAVE_TX_MQ */
+#define netif_tx_stop_all_queues(a) netif_stop_queue(a)
+#define netif_tx_wake_all_queues(a) netif_wake_queue(a)
+#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,12) )
+#define netif_tx_start_all_queues(a) netif_start_queue(a)
+#else
+#define netif_tx_start_all_queues(a) do {} while (0)
+#endif
+#define netif_stop_subqueue(_ndev,_qi) netif_stop_queue((_ndev))
+#define netif_start_subqueue(_ndev,_qi) netif_start_queue((_ndev))
+#endif /* HAVE_TX_MQ */
+#ifndef NETIF_F_MULTI_QUEUE
+#define NETIF_F_MULTI_QUEUE 0
+#define netif_is_multiqueue(a) 0
+#define netif_wake_subqueue(a, b)
+#endif /* NETIF_F_MULTI_QUEUE */
+
+#ifndef __WARN_printf
+extern void __kc_warn_slowpath(const char *file, const int line,
+		const char *fmt, ...) __attribute__((format(printf, 3, 4)));
+#define __WARN_printf(arg...) __kc_warn_slowpath(__FILE__, __LINE__, arg)
+#endif /* __WARN_printf */
+
+#ifndef WARN
+#define WARN(condition, format...) ({						\
+	int __ret_warn_on = !!(condition);				\
+	if (unlikely(__ret_warn_on))					\
+		__WARN_printf(format);					\
+	unlikely(__ret_warn_on);					\
+})
+#endif /* WARN */
+#undef HAVE_IXGBE_DEBUG_FS
+#undef HAVE_IGB_DEBUG_FS
+#else /* < 2.6.27 */
+#define HAVE_TX_MQ
+#define HAVE_NETDEV_SELECT_QUEUE
+#ifdef CONFIG_DEBUG_FS
+#define HAVE_IXGBE_DEBUG_FS
+#define HAVE_IGB_DEBUG_FS
+#endif /* CONFIG_DEBUG_FS */
+#endif /* < 2.6.27 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28) )
+#define pci_ioremap_bar(pdev, bar)	ioremap(pci_resource_start(pdev, bar), \
+					        pci_resource_len(pdev, bar))
+#define pci_wake_from_d3 _kc_pci_wake_from_d3
+#define pci_prepare_to_sleep _kc_pci_prepare_to_sleep
+extern int _kc_pci_wake_from_d3(struct pci_dev *dev, bool enable);
+extern int _kc_pci_prepare_to_sleep(struct pci_dev *dev);
+#define netdev_alloc_page(a) alloc_page(GFP_ATOMIC)
+#ifndef __skb_queue_head_init
+static inline void __kc_skb_queue_head_init(struct sk_buff_head *list)
+{
+	list->prev = list->next = (struct sk_buff *)list;
+	list->qlen = 0;
+}
+#define __skb_queue_head_init(_q) __kc_skb_queue_head_init(_q)
+#endif
+
+#define PCI_EXP_DEVCAP2		36	/* Device Capabilities 2 */
+#define PCI_EXP_DEVCTL2		40	/* Device Control 2 */
+
+#endif /* < 2.6.28 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,29) )
+#ifndef swap
+#define swap(a, b) \
+	do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0)
+#endif
+#define pci_request_selected_regions_exclusive(pdev, bars, name) \
+		pci_request_selected_regions(pdev, bars, name)
+#ifndef CONFIG_NR_CPUS
+#define CONFIG_NR_CPUS 1
+#endif /* CONFIG_NR_CPUS */
+#ifndef pcie_aspm_enabled
+#define pcie_aspm_enabled()   (1)
+#endif /* pcie_aspm_enabled */
+
+#define  PCI_EXP_SLTSTA_PDS	0x0040	/* Presence Detect State */
+
+#ifndef pci_clear_master
+extern void _kc_pci_clear_master(struct pci_dev *dev);
+#define pci_clear_master(dev)	_kc_pci_clear_master(dev)
+#endif
+
+#ifndef PCI_EXP_LNKCTL_ASPMC
+#define  PCI_EXP_LNKCTL_ASPMC	0x0003	/* ASPM Control */
+#endif
+#else /* < 2.6.29 */
+#ifndef HAVE_NET_DEVICE_OPS
+#define HAVE_NET_DEVICE_OPS
+#endif
+#ifdef CONFIG_DCB
+#define HAVE_PFC_MODE_ENABLE
+#endif /* CONFIG_DCB */
+#endif /* < 2.6.29 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,30) )
+#define skb_rx_queue_recorded(a) false
+#define skb_get_rx_queue(a) 0
+#define skb_record_rx_queue(a, b) do {} while (0)
+#define skb_tx_hash(n, s) ___kc_skb_tx_hash((n), (s), (n)->real_num_tx_queues)
+#ifndef CONFIG_PCI_IOV
+#undef pci_enable_sriov
+#define pci_enable_sriov(a, b) -ENOTSUPP
+#undef pci_disable_sriov
+#define pci_disable_sriov(a) do {} while (0)
+#endif /* CONFIG_PCI_IOV */
+#ifndef pr_cont
+#define pr_cont(fmt, ...) \
+	printk(KERN_CONT fmt, ##__VA_ARGS__)
+#endif /* pr_cont */
+static inline void _kc_synchronize_irq(unsigned int a)
+{
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,5,28) )
+	synchronize_irq();
+#else /* < 2.5.28 */
+	synchronize_irq(a);
+#endif /* < 2.5.28 */
+}
+#undef synchronize_irq
+#define synchronize_irq(a) _kc_synchronize_irq(a)
+
+#define PCI_EXP_LNKCTL2		48	/* Link Control 2 */
+
+#else /* < 2.6.30 */
+#define HAVE_ASPM_QUIRKS
+#endif /* < 2.6.30 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,31) )
+#define ETH_P_1588 0x88F7
+#define ETH_P_FIP  0x8914
+#ifndef netdev_uc_count
+#define netdev_uc_count(dev) ((dev)->uc_count)
+#endif
+#ifndef netdev_for_each_uc_addr
+#define netdev_for_each_uc_addr(uclist, dev) \
+	for (uclist = dev->uc_list; uclist; uclist = uclist->next)
+#endif
+#ifndef PORT_OTHER
+#define PORT_OTHER 0xff
+#endif
+#ifndef MDIO_PHY_ID_PRTAD
+#define MDIO_PHY_ID_PRTAD 0x03e0
+#endif
+#ifndef MDIO_PHY_ID_DEVAD
+#define MDIO_PHY_ID_DEVAD 0x001f
+#endif
+#ifndef skb_dst
+#define skb_dst(s) ((s)->dst)
+#endif
+
+#ifndef SUPPORTED_1000baseKX_Full
+#define SUPPORTED_1000baseKX_Full	(1 << 17)
+#endif
+#ifndef SUPPORTED_10000baseKX4_Full
+#define SUPPORTED_10000baseKX4_Full	(1 << 18)
+#endif
+#ifndef SUPPORTED_10000baseKR_Full
+#define SUPPORTED_10000baseKR_Full	(1 << 19)
+#endif
+
+#ifndef ADVERTISED_1000baseKX_Full
+#define ADVERTISED_1000baseKX_Full	(1 << 17)
+#endif
+#ifndef ADVERTISED_10000baseKX4_Full
+#define ADVERTISED_10000baseKX4_Full	(1 << 18)
+#endif
+#ifndef ADVERTISED_10000baseKR_Full
+#define ADVERTISED_10000baseKR_Full	(1 << 19)
+#endif
+
+#else /* < 2.6.31 */
+#ifndef HAVE_NETDEV_STORAGE_ADDRESS
+#define HAVE_NETDEV_STORAGE_ADDRESS
+#endif
+#ifndef HAVE_NETDEV_HW_ADDR
+#define HAVE_NETDEV_HW_ADDR
+#endif
+#ifndef HAVE_TRANS_START_IN_QUEUE
+#define HAVE_TRANS_START_IN_QUEUE
+#endif
+#ifndef HAVE_INCLUDE_LINUX_MDIO_H
+#define HAVE_INCLUDE_LINUX_MDIO_H
+#endif
+#endif /* < 2.6.31 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,32) )
+#undef netdev_tx_t
+#define netdev_tx_t int
+#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
+#ifndef NETIF_F_FCOE_MTU
+#define NETIF_F_FCOE_MTU       (1 << 26)
+#endif
+#endif /* CONFIG_FCOE || CONFIG_FCOE_MODULE */
+
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) )
+static inline int _kc_pm_runtime_get_sync()
+{
+	return 1;
+}
+#define pm_runtime_get_sync(dev)	_kc_pm_runtime_get_sync()
+#else /* 2.6.0 => 2.6.32 */
+static inline int _kc_pm_runtime_get_sync(struct device *dev)
+{
+	return 1;
+}
+#ifndef pm_runtime_get_sync
+#define pm_runtime_get_sync(dev)	_kc_pm_runtime_get_sync(dev)
+#endif
+#endif /* 2.6.0 => 2.6.32 */
+#ifndef pm_runtime_put
+#define pm_runtime_put(dev)		do {} while (0)
+#endif
+#ifndef pm_runtime_put_sync
+#define pm_runtime_put_sync(dev)	do {} while (0)
+#endif
+#ifndef pm_runtime_resume
+#define pm_runtime_resume(dev)		do {} while (0)
+#endif
+#ifndef pm_schedule_suspend
+#define pm_schedule_suspend(dev, t)	do {} while (0)
+#endif
+#ifndef pm_runtime_set_suspended
+#define pm_runtime_set_suspended(dev)	do {} while (0)
+#endif
+#ifndef pm_runtime_disable
+#define pm_runtime_disable(dev)		do {} while (0)
+#endif
+#ifndef pm_runtime_put_noidle
+#define pm_runtime_put_noidle(dev)	do {} while (0)
+#endif
+#ifndef pm_runtime_set_active
+#define pm_runtime_set_active(dev)	do {} while (0)
+#endif
+#ifndef pm_runtime_enable
+#define pm_runtime_enable(dev)	do {} while (0)
+#endif
+#ifndef pm_runtime_get_noresume
+#define pm_runtime_get_noresume(dev)	do {} while (0)
+#endif
+#else /* < 2.6.32 */
+#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
+#ifndef HAVE_NETDEV_OPS_FCOE_ENABLE
+#define HAVE_NETDEV_OPS_FCOE_ENABLE
+#endif
+#endif /* CONFIG_FCOE || CONFIG_FCOE_MODULE */
+#ifdef CONFIG_DCB
+#ifndef HAVE_DCBNL_OPS_GETAPP
+#define HAVE_DCBNL_OPS_GETAPP
+#endif
+#endif /* CONFIG_DCB */
+#include <linux/pm_runtime.h>
+/* IOV bad DMA target work arounds require at least this kernel rev support */
+#define HAVE_PCIE_TYPE
+#endif /* < 2.6.32 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,33) )
+#ifndef pci_pcie_cap
+#define pci_pcie_cap(pdev) pci_find_capability(pdev, PCI_CAP_ID_EXP)
+#endif
+#ifndef IPV4_FLOW
+#define IPV4_FLOW 0x10
+#endif /* IPV4_FLOW */
+#ifndef IPV6_FLOW
+#define IPV6_FLOW 0x11
+#endif /* IPV6_FLOW */
+/* Features back-ported to RHEL6 or SLES11 SP1 after 2.6.32 */
+#if ( (RHEL_RELEASE_CODE && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,0)) || \
+      (SLE_VERSION_CODE && SLE_VERSION_CODE >= SLE_VERSION(11,1,0)) )
+#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
+#ifndef HAVE_NETDEV_OPS_FCOE_GETWWN
+#define HAVE_NETDEV_OPS_FCOE_GETWWN
+#endif
+#endif /* CONFIG_FCOE || CONFIG_FCOE_MODULE */
+#endif /* RHEL6 or SLES11 SP1 */
+#ifndef __percpu
+#define __percpu
+#endif /* __percpu */
+#ifndef PORT_DA
+#define PORT_DA PORT_OTHER
+#endif
+#ifndef PORT_NONE
+#define PORT_NONE PORT_OTHER
+#endif
+
+#if ((RHEL_RELEASE_CODE && \
+     (RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,3)) && \
+     (RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(7,0))))
+#if !defined(CONFIG_X86_32) && !defined(CONFIG_NEED_DMA_MAP_STATE)
+#undef DEFINE_DMA_UNMAP_ADDR
+#define DEFINE_DMA_UNMAP_ADDR(ADDR_NAME)	dma_addr_t ADDR_NAME
+#undef DEFINE_DMA_UNMAP_LEN
+#define DEFINE_DMA_UNMAP_LEN(LEN_NAME)		__u32 LEN_NAME
+#undef dma_unmap_addr
+#define dma_unmap_addr(PTR, ADDR_NAME)		((PTR)->ADDR_NAME)
+#undef dma_unmap_addr_set
+#define dma_unmap_addr_set(PTR, ADDR_NAME, VAL)	(((PTR)->ADDR_NAME) = (VAL))
+#undef dma_unmap_len
+#define dma_unmap_len(PTR, LEN_NAME)		((PTR)->LEN_NAME)
+#undef dma_unmap_len_set
+#define dma_unmap_len_set(PTR, LEN_NAME, VAL)	(((PTR)->LEN_NAME) = (VAL))
+#endif /* CONFIG_X86_64 && !CONFIG_NEED_DMA_MAP_STATE */
+#endif /* RHEL_RELEASE_CODE */
+
+#if (!(RHEL_RELEASE_CODE && \
+       (((RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(5,8)) && \
+         (RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(6,0))) || \
+        ((RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,1)) && \
+         (RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(7,0))))))
+static inline bool pci_is_pcie(struct pci_dev *dev)
+{
+	return !!pci_pcie_cap(dev);
+}
+#endif /* RHEL_RELEASE_CODE */
+
+#ifndef __always_unused
+#define __always_unused __attribute__((__unused__))
+#endif
+#ifndef __maybe_unused
+#define __maybe_unused __attribute__((__unused__))
+#endif
+
+#if (!(RHEL_RELEASE_CODE && \
+      (RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,2))))
+#define sk_tx_queue_get(_sk) (-1)
+#define sk_tx_queue_set(_sk, _tx_queue) do {} while(0)
+#endif /* !(RHEL >= 6.2) */
+
+#if (RHEL_RELEASE_CODE && \
+     (RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,4)) && \
+     (RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(7,0)))
+#define HAVE_RHEL6_ETHTOOL_OPS_EXT_STRUCT
+#define HAVE_ETHTOOL_SET_PHYS_ID
+#define HAVE_ETHTOOL_GET_TS_INFO
+#endif /* RHEL >= 6.4 && RHEL < 7.0 */
+
+#if (RHEL_RELEASE_CODE && \
+     (RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,5)) && \
+     (RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(7,0)))
+#define HAVE_RHEL6_NETDEV_OPS_EXT_FDB
+#endif /* RHEL >= 6.5 && RHEL < 7.0 */
+
+#else /* < 2.6.33 */
+#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
+#ifndef HAVE_NETDEV_OPS_FCOE_GETWWN
+#define HAVE_NETDEV_OPS_FCOE_GETWWN
+#endif
+#endif /* CONFIG_FCOE || CONFIG_FCOE_MODULE */
+#endif /* < 2.6.33 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,34) )
+#if (RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(6,0))
+#ifndef pci_num_vf
+#define pci_num_vf(pdev) _kc_pci_num_vf(pdev)
+extern int _kc_pci_num_vf(struct pci_dev *dev);
+#endif
+#endif /* RHEL_RELEASE_CODE */
+
+#ifndef ETH_FLAG_NTUPLE
+#define ETH_FLAG_NTUPLE NETIF_F_NTUPLE
+#endif
+
+#ifndef netdev_mc_count
+#define netdev_mc_count(dev) ((dev)->mc_count)
+#endif
+#ifndef netdev_mc_empty
+#define netdev_mc_empty(dev) (netdev_mc_count(dev) == 0)
+#endif
+#ifndef netdev_for_each_mc_addr
+#define netdev_for_each_mc_addr(mclist, dev) \
+	for (mclist = dev->mc_list; mclist; mclist = mclist->next)
+#endif
+#ifndef netdev_uc_count
+#define netdev_uc_count(dev) ((dev)->uc.count)
+#endif
+#ifndef netdev_uc_empty
+#define netdev_uc_empty(dev) (netdev_uc_count(dev) == 0)
+#endif
+#ifndef netdev_for_each_uc_addr
+#define netdev_for_each_uc_addr(ha, dev) \
+	list_for_each_entry(ha, &dev->uc.list, list)
+#endif
+#ifndef dma_set_coherent_mask
+#define dma_set_coherent_mask(dev,mask) \
+	pci_set_consistent_dma_mask(to_pci_dev(dev),(mask))
+#endif
+#ifndef pci_dev_run_wake
+#define pci_dev_run_wake(pdev)	(0)
+#endif
+
+/* netdev logging taken from include/linux/netdevice.h */
+#ifndef netdev_name
+static inline const char *_kc_netdev_name(const struct net_device *dev)
+{
+	if (dev->reg_state != NETREG_REGISTERED)
+		return "(unregistered net_device)";
+	return dev->name;
+}
+#define netdev_name(netdev)	_kc_netdev_name(netdev)
+#endif /* netdev_name */
+
+#undef netdev_printk
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) )
+#define netdev_printk(level, netdev, format, args...)		\
+do {								\
+	struct pci_dev *pdev = _kc_netdev_to_pdev(netdev);	\
+	printk(level "%s: " format, pci_name(pdev), ##args);	\
+} while(0)
+#elif ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,21) )
+#define netdev_printk(level, netdev, format, args...)		\
+do {								\
+	struct pci_dev *pdev = _kc_netdev_to_pdev(netdev);	\
+	struct device *dev = pci_dev_to_dev(pdev);		\
+	dev_printk(level, dev, "%s: " format,			\
+		   netdev_name(netdev), ##args);		\
+} while(0)
+#else /* 2.6.21 => 2.6.34 */
+#define netdev_printk(level, netdev, format, args...)		\
+	dev_printk(level, (netdev)->dev.parent,			\
+		   "%s: " format,				\
+		   netdev_name(netdev), ##args)
+#endif /* <2.6.0 <2.6.21 <2.6.34 */
+#undef netdev_emerg
+#define netdev_emerg(dev, format, args...)			\
+	netdev_printk(KERN_EMERG, dev, format, ##args)
+#undef netdev_alert
+#define netdev_alert(dev, format, args...)			\
+	netdev_printk(KERN_ALERT, dev, format, ##args)
+#undef netdev_crit
+#define netdev_crit(dev, format, args...)			\
+	netdev_printk(KERN_CRIT, dev, format, ##args)
+#undef netdev_err
+#define netdev_err(dev, format, args...)			\
+	netdev_printk(KERN_ERR, dev, format, ##args)
+#undef netdev_warn
+#define netdev_warn(dev, format, args...)			\
+	netdev_printk(KERN_WARNING, dev, format, ##args)
+#undef netdev_notice
+#define netdev_notice(dev, format, args...)			\
+	netdev_printk(KERN_NOTICE, dev, format, ##args)
+#undef netdev_info
+#define netdev_info(dev, format, args...)			\
+	netdev_printk(KERN_INFO, dev, format, ##args)
+#undef netdev_dbg
+#if defined(DEBUG)
+#define netdev_dbg(__dev, format, args...)			\
+	netdev_printk(KERN_DEBUG, __dev, format, ##args)
+#elif defined(CONFIG_DYNAMIC_DEBUG)
+#define netdev_dbg(__dev, format, args...)			\
+do {								\
+	dynamic_dev_dbg((__dev)->dev.parent, "%s: " format,	\
+			netdev_name(__dev), ##args);		\
+} while (0)
+#else /* DEBUG */
+#define netdev_dbg(__dev, format, args...)			\
+({								\
+	if (0)							\
+		netdev_printk(KERN_DEBUG, __dev, format, ##args); \
+	0;							\
+})
+#endif /* DEBUG */
+
+#undef netif_printk
+#define netif_printk(priv, type, level, dev, fmt, args...)	\
+do {								\
+	if (netif_msg_##type(priv))				\
+		netdev_printk(level, (dev), fmt, ##args);	\
+} while (0)
+
+#undef netif_emerg
+#define netif_emerg(priv, type, dev, fmt, args...)		\
+	netif_level(emerg, priv, type, dev, fmt, ##args)
+#undef netif_alert
+#define netif_alert(priv, type, dev, fmt, args...)		\
+	netif_level(alert, priv, type, dev, fmt, ##args)
+#undef netif_crit
+#define netif_crit(priv, type, dev, fmt, args...)		\
+	netif_level(crit, priv, type, dev, fmt, ##args)
+#undef netif_err
+#define netif_err(priv, type, dev, fmt, args...)		\
+	netif_level(err, priv, type, dev, fmt, ##args)
+#undef netif_warn
+#define netif_warn(priv, type, dev, fmt, args...)		\
+	netif_level(warn, priv, type, dev, fmt, ##args)
+#undef netif_notice
+#define netif_notice(priv, type, dev, fmt, args...)		\
+	netif_level(notice, priv, type, dev, fmt, ##args)
+#undef netif_info
+#define netif_info(priv, type, dev, fmt, args...)		\
+	netif_level(info, priv, type, dev, fmt, ##args)
+#undef netif_dbg
+#define netif_dbg(priv, type, dev, fmt, args...)		\
+	netif_level(dbg, priv, type, dev, fmt, ##args)
+
+#ifdef SET_SYSTEM_SLEEP_PM_OPS
+#define HAVE_SYSTEM_SLEEP_PM_OPS
+#endif
+
+#ifndef for_each_set_bit
+#define for_each_set_bit(bit, addr, size) \
+	for ((bit) = find_first_bit((addr), (size)); \
+		(bit) < (size); \
+		(bit) = find_next_bit((addr), (size), (bit) + 1))
+#endif /* for_each_set_bit */
+
+#ifndef DEFINE_DMA_UNMAP_ADDR
+#define DEFINE_DMA_UNMAP_ADDR DECLARE_PCI_UNMAP_ADDR
+#define DEFINE_DMA_UNMAP_LEN DECLARE_PCI_UNMAP_LEN
+#define dma_unmap_addr pci_unmap_addr
+#define dma_unmap_addr_set pci_unmap_addr_set
+#define dma_unmap_len pci_unmap_len
+#define dma_unmap_len_set pci_unmap_len_set
+#endif /* DEFINE_DMA_UNMAP_ADDR */
+
+#if (RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(6,3))
+#ifdef IGB_HWMON
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+#define sysfs_attr_init(attr)				\
+	do {						\
+		static struct lock_class_key __key;	\
+		(attr)->key = &__key;			\
+	} while (0)
+#else
+#define sysfs_attr_init(attr) do {} while (0)
+#endif /* CONFIG_DEBUG_LOCK_ALLOC */
+#endif /* IGB_HWMON */
+#endif /* RHEL_RELEASE_CODE */
+
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) )
+static inline bool _kc_pm_runtime_suspended()
+{
+	return false;
+}
+#define pm_runtime_suspended(dev)	_kc_pm_runtime_suspended()
+#else /* 2.6.0 => 2.6.34 */
+static inline bool _kc_pm_runtime_suspended(struct device *dev)
+{
+	return false;
+}
+#ifndef pm_runtime_suspended
+#define pm_runtime_suspended(dev)	_kc_pm_runtime_suspended(dev)
+#endif
+#endif /* 2.6.0 => 2.6.34 */
+
+#else /* < 2.6.34 */
+#define HAVE_SYSTEM_SLEEP_PM_OPS
+#ifndef HAVE_SET_RX_MODE
+#define HAVE_SET_RX_MODE
+#endif
+
+#endif /* < 2.6.34 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,35) )
+
+ssize_t _kc_simple_write_to_buffer(void *to, size_t available, loff_t *ppos,
+				   const void __user *from, size_t count);
+#define simple_write_to_buffer _kc_simple_write_to_buffer
+
+#ifndef numa_node_id
+#define numa_node_id() 0
+#endif
+#ifdef HAVE_TX_MQ
+#include <net/sch_generic.h>
+#ifndef CONFIG_NETDEVICES_MULTIQUEUE
+#if (!(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,0)))
+void _kc_netif_set_real_num_tx_queues(struct net_device *, unsigned int);
+#define netif_set_real_num_tx_queues  _kc_netif_set_real_num_tx_queues
+#endif /* !(RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,0)) */
+#else /* CONFIG_NETDEVICES_MULTI_QUEUE */
+#define netif_set_real_num_tx_queues(_netdev, _count) \
+	do { \
+		(_netdev)->egress_subqueue_count = _count; \
+	} while (0)
+#endif /* CONFIG_NETDEVICES_MULTI_QUEUE */
+#else /* HAVE_TX_MQ */
+#define netif_set_real_num_tx_queues(_netdev, _count) do {} while(0)
+#endif /* HAVE_TX_MQ */
+#ifndef ETH_FLAG_RXHASH
+#define ETH_FLAG_RXHASH (1<<28)
+#endif /* ETH_FLAG_RXHASH */
+#if (RHEL_RELEASE_CODE && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,0))
+#define HAVE_IRQ_AFFINITY_HINT
+#endif
+#else /* < 2.6.35 */
+#define HAVE_PM_QOS_REQUEST_LIST
+#define HAVE_IRQ_AFFINITY_HINT
+#endif /* < 2.6.35 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,36) )
+extern int _kc_ethtool_op_set_flags(struct net_device *, u32, u32);
+#define ethtool_op_set_flags _kc_ethtool_op_set_flags
+extern u32 _kc_ethtool_op_get_flags(struct net_device *);
+#define ethtool_op_get_flags _kc_ethtool_op_get_flags
+
+#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+#ifdef NET_IP_ALIGN
+#undef NET_IP_ALIGN
+#endif
+#define NET_IP_ALIGN 0
+#endif /* CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS */
+
+#ifdef NET_SKB_PAD
+#undef NET_SKB_PAD
+#endif
+
+#if (L1_CACHE_BYTES > 32)
+#define NET_SKB_PAD L1_CACHE_BYTES
+#else
+#define NET_SKB_PAD 32
+#endif
+
+static inline struct sk_buff *_kc_netdev_alloc_skb_ip_align(struct net_device *dev,
+							    unsigned int length)
+{
+	struct sk_buff *skb;
+
+	skb = alloc_skb(length + NET_SKB_PAD + NET_IP_ALIGN, GFP_ATOMIC);
+	if (skb) {
+#if (NET_IP_ALIGN + NET_SKB_PAD)
+		skb_reserve(skb, NET_IP_ALIGN + NET_SKB_PAD);
+#endif
+		skb->dev = dev;
+	}
+	return skb;
+}
+
+#ifdef netdev_alloc_skb_ip_align
+#undef netdev_alloc_skb_ip_align
+#endif
+#define netdev_alloc_skb_ip_align(n, l) _kc_netdev_alloc_skb_ip_align(n, l)
+
+#undef netif_level
+#define netif_level(level, priv, type, dev, fmt, args...)	\
+do {								\
+	if (netif_msg_##type(priv))				\
+		netdev_##level(dev, fmt, ##args);		\
+} while (0)
+
+#undef usleep_range
+#define usleep_range(min, max)	msleep(DIV_ROUND_UP(min, 1000))
+
+#define u64_stats_update_begin(a) do { } while(0)
+#define u64_stats_update_end(a) do { } while(0)
+#define u64_stats_fetch_begin(a) do { } while(0)
+#define u64_stats_fetch_retry_bh(a) (0)
+#define u64_stats_fetch_begin_bh(a) (0)
+
+#if (RHEL_RELEASE_CODE && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,1))
+#define HAVE_8021P_SUPPORT
+#endif
+
+#else /* < 2.6.36 */
+
+
+#define HAVE_PM_QOS_REQUEST_ACTIVE
+#define HAVE_8021P_SUPPORT
+#define HAVE_NDO_GET_STATS64
+#endif /* < 2.6.36 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,37) )
+#ifndef netif_set_real_num_rx_queues
+static inline int __kc_netif_set_real_num_rx_queues(struct net_device *dev,
+						    unsigned int rxq)
+{
+	return 0;
+}
+#define netif_set_real_num_rx_queues(dev, rxq) \
+	__kc_netif_set_real_num_rx_queues((dev), (rxq))
+#endif
+#ifndef ETHTOOL_RXNTUPLE_ACTION_CLEAR
+#define ETHTOOL_RXNTUPLE_ACTION_CLEAR (-2)
+#endif
+#ifndef VLAN_N_VID
+#define VLAN_N_VID	VLAN_GROUP_ARRAY_LEN
+#endif /* VLAN_N_VID */
+#ifndef ETH_FLAG_TXVLAN
+#define ETH_FLAG_TXVLAN (1 << 7)
+#endif /* ETH_FLAG_TXVLAN */
+#ifndef ETH_FLAG_RXVLAN
+#define ETH_FLAG_RXVLAN (1 << 8)
+#endif /* ETH_FLAG_RXVLAN */
+
+static inline void _kc_skb_checksum_none_assert(struct sk_buff *skb)
+{
+	WARN_ON(skb->ip_summed != CHECKSUM_NONE);
+}
+#define skb_checksum_none_assert(skb) _kc_skb_checksum_none_assert(skb)
+
+static inline void *_kc_vzalloc_node(unsigned long size, int node)
+{
+	void *addr = vmalloc_node(size, node);
+	if (addr)
+		memset(addr, 0, size);
+	return addr;
+}
+#define vzalloc_node(_size, _node) _kc_vzalloc_node(_size, _node)
+
+static inline void *_kc_vzalloc(unsigned long size)
+{
+	void *addr = vmalloc(size);
+	if (addr)
+		memset(addr, 0, size);
+	return addr;
+}
+#define vzalloc(_size) _kc_vzalloc(_size)
+
+#ifndef vlan_get_protocol
+static inline __be16 __kc_vlan_get_protocol(const struct sk_buff *skb)
+{
+	if (vlan_tx_tag_present(skb) ||
+	    skb->protocol != cpu_to_be16(ETH_P_8021Q))
+		return skb->protocol;
+
+	if (skb_headlen(skb) < sizeof(struct vlan_ethhdr))
+		return 0;
+
+	return ((struct vlan_ethhdr*)skb->data)->h_vlan_encapsulated_proto;
+}
+#define vlan_get_protocol(_skb) __kc_vlan_get_protocol(_skb)
+#endif
+#ifdef HAVE_HW_TIME_STAMP
+#define SKBTX_HW_TSTAMP (1 << 0)
+#define SKBTX_IN_PROGRESS (1 << 2)
+#define SKB_SHARED_TX_IS_UNION
+#endif
+
+#ifndef device_wakeup_enable
+#define device_wakeup_enable(dev)	device_set_wakeup_enable(dev, true)
+#endif
+
+#if ( LINUX_VERSION_CODE > KERNEL_VERSION(2,4,18) )
+#ifndef HAVE_VLAN_RX_REGISTER
+#define HAVE_VLAN_RX_REGISTER
+#endif
+#endif /* > 2.4.18 */
+#endif /* < 2.6.37 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,38) )
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22) )
+#define skb_checksum_start_offset(skb) skb_transport_offset(skb)
+#else /* 2.6.22 -> 2.6.37 */
+static inline int _kc_skb_checksum_start_offset(const struct sk_buff *skb)
+{
+        return skb->csum_start - skb_headroom(skb);
+}
+#define skb_checksum_start_offset(skb) _kc_skb_checksum_start_offset(skb)
+#endif /* 2.6.22 -> 2.6.37 */
+#ifdef CONFIG_DCB
+#ifndef IEEE_8021QAZ_MAX_TCS
+#define IEEE_8021QAZ_MAX_TCS 8
+#endif
+#ifndef DCB_CAP_DCBX_HOST
+#define DCB_CAP_DCBX_HOST		0x01
+#endif
+#ifndef DCB_CAP_DCBX_LLD_MANAGED
+#define DCB_CAP_DCBX_LLD_MANAGED	0x02
+#endif
+#ifndef DCB_CAP_DCBX_VER_CEE
+#define DCB_CAP_DCBX_VER_CEE		0x04
+#endif
+#ifndef DCB_CAP_DCBX_VER_IEEE
+#define DCB_CAP_DCBX_VER_IEEE		0x08
+#endif
+#ifndef DCB_CAP_DCBX_STATIC
+#define DCB_CAP_DCBX_STATIC		0x10
+#endif
+#endif /* CONFIG_DCB */
+#if (RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,2))
+#define CONFIG_XPS
+#endif /* RHEL_RELEASE_VERSION(6,2) */
+#endif /* < 2.6.38 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,39) )
+#ifndef NETIF_F_RXCSUM
+#define NETIF_F_RXCSUM		(1 << 29)
+#endif
+#ifndef skb_queue_reverse_walk_safe
+#define skb_queue_reverse_walk_safe(queue, skb, tmp)				\
+		for (skb = (queue)->prev, tmp = skb->prev;			\
+		     skb != (struct sk_buff *)(queue);				\
+		     skb = tmp, tmp = skb->prev)
+#endif
+#else /* < 2.6.39 */
+#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
+#ifndef HAVE_NETDEV_OPS_FCOE_DDP_TARGET
+#define HAVE_NETDEV_OPS_FCOE_DDP_TARGET
+#endif
+#endif /* CONFIG_FCOE || CONFIG_FCOE_MODULE */
+#ifndef HAVE_MQPRIO
+#define HAVE_MQPRIO
+#endif
+#ifndef HAVE_SETUP_TC
+#define HAVE_SETUP_TC
+#endif
+#ifdef CONFIG_DCB
+#ifndef HAVE_DCBNL_IEEE
+#define HAVE_DCBNL_IEEE
+#endif
+#endif /* CONFIG_DCB */
+#ifndef HAVE_NDO_SET_FEATURES
+#define HAVE_NDO_SET_FEATURES
+#endif
+#endif /* < 2.6.39 */
+
+/*****************************************************************************/
+/* use < 2.6.40 because of a Fedora 15 kernel update where they
+ * updated the kernel version to 2.6.40.x and they back-ported 3.0 features
+ * like set_phys_id for ethtool.
+ */
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,40) )
+#ifdef ETHTOOL_GRXRINGS
+#ifndef FLOW_EXT
+#define	FLOW_EXT	0x80000000
+union _kc_ethtool_flow_union {
+	struct ethtool_tcpip4_spec		tcp_ip4_spec;
+	struct ethtool_usrip4_spec		usr_ip4_spec;
+	__u8					hdata[60];
+};
+struct _kc_ethtool_flow_ext {
+	__be16	vlan_etype;
+	__be16	vlan_tci;
+	__be32	data[2];
+};
+struct _kc_ethtool_rx_flow_spec {
+	__u32		flow_type;
+	union _kc_ethtool_flow_union h_u;
+	struct _kc_ethtool_flow_ext h_ext;
+	union _kc_ethtool_flow_union m_u;
+	struct _kc_ethtool_flow_ext m_ext;
+	__u64		ring_cookie;
+	__u32		location;
+};
+#define ethtool_rx_flow_spec _kc_ethtool_rx_flow_spec
+#endif /* FLOW_EXT */
+#endif
+
+#define pci_disable_link_state_locked pci_disable_link_state
+
+#ifndef PCI_LTR_VALUE_MASK
+#define  PCI_LTR_VALUE_MASK	0x000003ff
+#endif
+#ifndef PCI_LTR_SCALE_MASK
+#define  PCI_LTR_SCALE_MASK	0x00001c00
+#endif
+#ifndef PCI_LTR_SCALE_SHIFT
+#define  PCI_LTR_SCALE_SHIFT	10
+#endif
+
+#else /* < 2.6.40 */
+#define HAVE_ETHTOOL_SET_PHYS_ID
+#endif /* < 2.6.40 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,0,0) )
+#define USE_LEGACY_PM_SUPPORT
+#endif /* < 3.0.0 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,1,0) )
+#ifndef __netdev_alloc_skb_ip_align
+#define __netdev_alloc_skb_ip_align(d,l,_g) netdev_alloc_skb_ip_align(d,l)
+#endif /* __netdev_alloc_skb_ip_align */
+#define dcb_ieee_setapp(dev, app) dcb_setapp(dev, app)
+#define dcb_ieee_delapp(dev, app) 0
+#define dcb_ieee_getapp_mask(dev, app) (1 << app->priority)
+
+/* 1000BASE-T Control register */
+#define CTL1000_AS_MASTER	0x0800
+#define CTL1000_ENABLE_MASTER	0x1000
+
+#else /* < 3.1.0 */
+#ifndef HAVE_DCBNL_IEEE_DELAPP
+#define HAVE_DCBNL_IEEE_DELAPP
+#endif
+#endif /* < 3.1.0 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,2,0) )
+#ifdef ETHTOOL_GRXRINGS
+#define HAVE_ETHTOOL_GET_RXNFC_VOID_RULE_LOCS
+#endif /* ETHTOOL_GRXRINGS */
+
+#ifndef skb_frag_size
+#define skb_frag_size(frag)	_kc_skb_frag_size(frag)
+static inline unsigned int _kc_skb_frag_size(const skb_frag_t *frag)
+{
+	return frag->size;
+}
+#endif /* skb_frag_size */
+
+#ifndef skb_frag_size_sub
+#define skb_frag_size_sub(frag, delta)	_kc_skb_frag_size_sub(frag, delta)
+static inline void _kc_skb_frag_size_sub(skb_frag_t *frag, int delta)
+{
+	frag->size -= delta;
+}
+#endif /* skb_frag_size_sub */
+
+#ifndef skb_frag_page
+#define skb_frag_page(frag)	_kc_skb_frag_page(frag)
+static inline struct page *_kc_skb_frag_page(const skb_frag_t *frag)
+{
+	return frag->page;
+}
+#endif /* skb_frag_page */
+
+#ifndef skb_frag_address
+#define skb_frag_address(frag)	_kc_skb_frag_address(frag)
+static inline void *_kc_skb_frag_address(const skb_frag_t *frag)
+{
+	return page_address(skb_frag_page(frag)) + frag->page_offset;
+}
+#endif /* skb_frag_address */
+
+#ifndef skb_frag_dma_map
+#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) )
+#include <linux/dma-mapping.h>
+#endif
+#define skb_frag_dma_map(dev,frag,offset,size,dir) \
+		_kc_skb_frag_dma_map(dev,frag,offset,size,dir)
+static inline dma_addr_t _kc_skb_frag_dma_map(struct device *dev,
+					      const skb_frag_t *frag,
+					      size_t offset, size_t size,
+					      enum dma_data_direction dir)
+{
+	return dma_map_page(dev, skb_frag_page(frag),
+			    frag->page_offset + offset, size, dir);
+}
+#endif /* skb_frag_dma_map */
+
+#ifndef __skb_frag_unref
+#define __skb_frag_unref(frag) __kc_skb_frag_unref(frag)
+static inline void __kc_skb_frag_unref(skb_frag_t *frag)
+{
+	put_page(skb_frag_page(frag));
+}
+#endif /* __skb_frag_unref */
+
+#ifndef SPEED_UNKNOWN
+#define SPEED_UNKNOWN	-1
+#endif
+#ifndef DUPLEX_UNKNOWN
+#define DUPLEX_UNKNOWN	0xff
+#endif
+#if (RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,3))
+#ifndef HAVE_PCI_DEV_FLAGS_ASSIGNED
+#define HAVE_PCI_DEV_FLAGS_ASSIGNED
+#endif
+#endif
+#else /* < 3.2.0 */
+#ifndef HAVE_PCI_DEV_FLAGS_ASSIGNED
+#define HAVE_PCI_DEV_FLAGS_ASSIGNED
+#define HAVE_VF_SPOOFCHK_CONFIGURE
+#endif
+#endif /* < 3.2.0 */
+
+#if (RHEL_RELEASE_CODE && RHEL_RELEASE_CODE == RHEL_RELEASE_VERSION(6,2))
+#undef ixgbe_get_netdev_tc_txq
+#define ixgbe_get_netdev_tc_txq(dev, tc) (&netdev_extended(dev)->qos_data.tc_to_txq[tc])
+#endif
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,3,0) )
+typedef u32 kni_netdev_features_t;
+#undef PCI_EXP_TYPE_RC_EC
+#define  PCI_EXP_TYPE_RC_EC	0xa	/* Root Complex Event Collector */
+#ifndef CONFIG_BQL
+#define netdev_tx_completed_queue(_q, _p, _b) do {} while (0)
+#define netdev_completed_queue(_n, _p, _b) do {} while (0)
+#define netdev_tx_sent_queue(_q, _b) do {} while (0)
+#define netdev_sent_queue(_n, _b) do {} while (0)
+#define netdev_tx_reset_queue(_q) do {} while (0)
+#define netdev_reset_queue(_n) do {} while (0)
+#endif
+#else /* ! < 3.3.0 */
+typedef netdev_features_t kni_netdev_features_t;
+#define HAVE_INT_NDO_VLAN_RX_ADD_VID
+#ifdef ETHTOOL_SRXNTUPLE
+#undef ETHTOOL_SRXNTUPLE
+#endif
+#endif /* < 3.3.0 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,4,0) )
+#ifndef NETIF_F_RXFCS
+#define NETIF_F_RXFCS	0
+#endif /* NETIF_F_RXFCS */
+#ifndef NETIF_F_RXALL
+#define NETIF_F_RXALL	0
+#endif /* NETIF_F_RXALL */
+
+#if !(SLE_VERSION_CODE && SLE_VERSION_CODE >= SLE_VERSION(11,3,0))
+#define NUMTCS_RETURNS_U8
+
+int _kc_simple_open(struct inode *inode, struct file *file);
+#define simple_open _kc_simple_open
+#endif /* !(SLE_VERSION_CODE && SLE_VERSION_CODE >= SLE_VERSION(11,3,0)) */
+
+
+#ifndef skb_add_rx_frag
+#define skb_add_rx_frag _kc_skb_add_rx_frag
+extern void _kc_skb_add_rx_frag(struct sk_buff *, int, struct page *,
+				int, int, unsigned int);
+#endif
+#ifdef NET_ADDR_RANDOM
+#define eth_hw_addr_random(N) do { \
+	random_ether_addr(N->dev_addr); \
+	N->addr_assign_type |= NET_ADDR_RANDOM; \
+	} while (0)
+#else /* NET_ADDR_RANDOM */
+#define eth_hw_addr_random(N) random_ether_addr(N->dev_addr)
+#endif /* NET_ADDR_RANDOM */
+#else /* < 3.4.0 */
+#include <linux/kconfig.h>
+#endif /* >= 3.4.0 */
+
+/*****************************************************************************/
+#if defined(E1000E_PTP) || defined(IGB_PTP) || defined(IXGBE_PTP) || defined(I40E_PTP)
+#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(3,0,0) ) && IS_ENABLED(CONFIG_PTP_1588_CLOCK)
+#define HAVE_PTP_1588_CLOCK
+#else
+#error Cannot enable PTP Hardware Clock support due to a pre-3.0 kernel version or CONFIG_PTP_1588_CLOCK not enabled in the kernel
+#endif /* > 3.0.0 && IS_ENABLED(CONFIG_PTP_1588_CLOCK) */
+#endif /* E1000E_PTP || IGB_PTP || IXGBE_PTP || I40E_PTP */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,5,0) )
+#define skb_tx_timestamp(skb) do {} while (0)
+static inline bool __kc_ether_addr_equal(const u8 *addr1, const u8 *addr2)
+{
+	return !compare_ether_addr(addr1, addr2);
+}
+#define ether_addr_equal(_addr1, _addr2) __kc_ether_addr_equal((_addr1),(_addr2))
+#else
+#define HAVE_FDB_OPS
+#define HAVE_ETHTOOL_GET_TS_INFO
+#endif /* < 3.5.0 */
+
+/*****************************************************************************/
+#include <linux/mdio.h>
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,6,0) )
+#define PCI_EXP_LNKCAP2		44	/* Link Capability 2 */
+
+#ifndef MDIO_EEE_100TX
+#define MDIO_EEE_100TX		0x0002	/* 100TX EEE cap */
+#endif
+#ifndef MDIO_EEE_1000T
+#define MDIO_EEE_1000T		0x0004	/* 1000T EEE cap */
+#endif
+#ifndef MDIO_EEE_10GT
+#define MDIO_EEE_10GT		0x0008	/* 10GT EEE cap */
+#endif
+#ifndef MDIO_EEE_1000KX
+#define MDIO_EEE_1000KX		0x0010	/* 1000KX EEE cap */
+#endif
+#ifndef MDIO_EEE_10GKX4
+#define MDIO_EEE_10GKX4		0x0020	/* 10G KX4 EEE cap */
+#endif
+#ifndef MDIO_EEE_10GKR
+#define MDIO_EEE_10GKR		0x0040	/* 10G KR EEE cap */
+#endif
+#endif /* < 3.6.0 */
+
+/******************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,7,0) )
+#ifndef ADVERTISED_40000baseKR4_Full
+/* these defines were all added in one commit, so should be safe
+ * to trigger activiation on one define
+ */
+#define SUPPORTED_40000baseKR4_Full	(1 << 23)
+#define SUPPORTED_40000baseCR4_Full	(1 << 24)
+#define SUPPORTED_40000baseSR4_Full	(1 << 25)
+#define SUPPORTED_40000baseLR4_Full	(1 << 26)
+#define ADVERTISED_40000baseKR4_Full	(1 << 23)
+#define ADVERTISED_40000baseCR4_Full	(1 << 24)
+#define ADVERTISED_40000baseSR4_Full	(1 << 25)
+#define ADVERTISED_40000baseLR4_Full	(1 << 26)
+#endif
+
+/**
+ * mmd_eee_cap_to_ethtool_sup_t
+ * @eee_cap: value of the MMD EEE Capability register
+ *
+ * A small helper function that translates MMD EEE Capability (3.20) bits
+ * to ethtool supported settings.
+ */
+static inline u32 __kc_mmd_eee_cap_to_ethtool_sup_t(u16 eee_cap)
+{
+	u32 supported = 0;
+
+	if (eee_cap & MDIO_EEE_100TX)
+		supported |= SUPPORTED_100baseT_Full;
+	if (eee_cap & MDIO_EEE_1000T)
+		supported |= SUPPORTED_1000baseT_Full;
+	if (eee_cap & MDIO_EEE_10GT)
+		supported |= SUPPORTED_10000baseT_Full;
+	if (eee_cap & MDIO_EEE_1000KX)
+		supported |= SUPPORTED_1000baseKX_Full;
+	if (eee_cap & MDIO_EEE_10GKX4)
+		supported |= SUPPORTED_10000baseKX4_Full;
+	if (eee_cap & MDIO_EEE_10GKR)
+		supported |= SUPPORTED_10000baseKR_Full;
+
+	return supported;
+}
+#define mmd_eee_cap_to_ethtool_sup_t(eee_cap) \
+	__kc_mmd_eee_cap_to_ethtool_sup_t(eee_cap)
+
+/**
+ * mmd_eee_adv_to_ethtool_adv_t
+ * @eee_adv: value of the MMD EEE Advertisement/Link Partner Ability registers
+ *
+ * A small helper function that translates the MMD EEE Advertisement (7.60)
+ * and MMD EEE Link Partner Ability (7.61) bits to ethtool advertisement
+ * settings.
+ */
+static inline u32 __kc_mmd_eee_adv_to_ethtool_adv_t(u16 eee_adv)
+{
+	u32 adv = 0;
+
+	if (eee_adv & MDIO_EEE_100TX)
+		adv |= ADVERTISED_100baseT_Full;
+	if (eee_adv & MDIO_EEE_1000T)
+		adv |= ADVERTISED_1000baseT_Full;
+	if (eee_adv & MDIO_EEE_10GT)
+		adv |= ADVERTISED_10000baseT_Full;
+	if (eee_adv & MDIO_EEE_1000KX)
+		adv |= ADVERTISED_1000baseKX_Full;
+	if (eee_adv & MDIO_EEE_10GKX4)
+		adv |= ADVERTISED_10000baseKX4_Full;
+	if (eee_adv & MDIO_EEE_10GKR)
+		adv |= ADVERTISED_10000baseKR_Full;
+
+	return adv;
+}
+#define mmd_eee_adv_to_ethtool_adv_t(eee_adv) \
+	__kc_mmd_eee_adv_to_ethtool_adv_t(eee_adv)
+
+/**
+ * ethtool_adv_to_mmd_eee_adv_t
+ * @adv: the ethtool advertisement settings
+ *
+ * A small helper function that translates ethtool advertisement settings
+ * to EEE advertisements for the MMD EEE Advertisement (7.60) and
+ * MMD EEE Link Partner Ability (7.61) registers.
+ */
+static inline u16 __kc_ethtool_adv_to_mmd_eee_adv_t(u32 adv)
+{
+	u16 reg = 0;
+
+	if (adv & ADVERTISED_100baseT_Full)
+		reg |= MDIO_EEE_100TX;
+	if (adv & ADVERTISED_1000baseT_Full)
+		reg |= MDIO_EEE_1000T;
+	if (adv & ADVERTISED_10000baseT_Full)
+		reg |= MDIO_EEE_10GT;
+	if (adv & ADVERTISED_1000baseKX_Full)
+		reg |= MDIO_EEE_1000KX;
+	if (adv & ADVERTISED_10000baseKX4_Full)
+		reg |= MDIO_EEE_10GKX4;
+	if (adv & ADVERTISED_10000baseKR_Full)
+		reg |= MDIO_EEE_10GKR;
+
+	return reg;
+}
+#define ethtool_adv_to_mmd_eee_adv_t(adv) \
+	__kc_ethtool_adv_to_mmd_eee_adv_t(adv)
+
+#ifndef pci_pcie_type
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24) )
+static inline u8 pci_pcie_type(struct pci_dev *pdev)
+{
+	int pos;
+	u16 reg16;
+
+	pos = pci_find_capability(pdev, PCI_CAP_ID_EXP);
+	if (!pos)
+		BUG();
+	pci_read_config_word(pdev, pos + PCI_EXP_FLAGS, &reg16);
+	return (reg16 & PCI_EXP_FLAGS_TYPE) >> 4;
+}
+#else /* < 2.6.24 */
+#define pci_pcie_type(x)	(x)->pcie_type
+#endif /* < 2.6.24 */
+#endif /* pci_pcie_type */
+
+#define ptp_clock_register(caps, args...) ptp_clock_register(caps)
+
+#ifndef PCI_EXP_LNKSTA2
+int __kc_pcie_capability_read_word(struct pci_dev *dev, int pos, u16 *val);
+#define pcie_capability_read_word(d,p,v) __kc_pcie_capability_read_word(d,p,v)
+int __kc_pcie_capability_write_word(struct pci_dev *dev, int pos, u16 val);
+#define pcie_capability_write_word(d,p,v) __kc_pcie_capability_write_word(d,p,v)
+int __kc_pcie_capability_clear_and_set_word(struct pci_dev *dev, int pos,
+					    u16 clear, u16 set);
+#define pcie_capability_clear_and_set_word(d,p,c,s) \
+	__kc_pcie_capability_clear_and_set_word(d,p,c,s)
+
+#define PCI_EXP_LNKSTA2		50	/* Link Status 2 */
+
+static inline int pcie_capability_clear_word(struct pci_dev *dev, int pos,
+					     u16 clear)
+{
+	return __kc_pcie_capability_clear_and_set_word(dev, pos, clear, 0);
+}
+#endif /* !PCI_EXP_LNKSTA2 */
+
+#if (SLE_VERSION_CODE && SLE_VERSION_CODE >= SLE_VERSION(11,3,0))
+#define USE_CONST_DEV_UC_CHAR
+#endif
+
+#else /* >= 3.7.0 */
+#define HAVE_CONST_STRUCT_PCI_ERROR_HANDLERS
+#define USE_CONST_DEV_UC_CHAR
+#endif /* >= 3.7.0 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,8,0) )
+#ifndef PCI_EXP_LNKCTL_ASPM_L0S
+#define  PCI_EXP_LNKCTL_ASPM_L0S  0x01	/* L0s Enable */
+#endif
+#ifndef PCI_EXP_LNKCTL_ASPM_L1
+#define  PCI_EXP_LNKCTL_ASPM_L1   0x02	/* L1 Enable */
+#endif
+#define HAVE_CONFIG_HOTPLUG
+/* Reserved Ethernet Addresses per IEEE 802.1Q */
+static const u8 eth_reserved_addr_base[ETH_ALEN] __aligned(2) = {
+	0x01, 0x80, 0xc2, 0x00, 0x00, 0x00 };
+#if !(SLE_VERSION_CODE && SLE_VERSION_CODE >= SLE_VERSION(11,3,0)) &&\
+    !(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,5))
+static inline bool is_link_local_ether_addr(const u8 *addr)
+{
+	__be16 *a = (__be16 *)addr;
+	static const __be16 *b = (const __be16 *)eth_reserved_addr_base;
+	static const __be16 m = cpu_to_be16(0xfff0);
+
+	return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | ((a[2] ^ b[2]) & m)) == 0;
+}
+#endif /* !(SLE_VERSION_CODE && SLE_VERSION_CODE >= SLE_VERSION(11,3,0)) */
+#else /* >= 3.8.0 */
+#ifndef __devinit
+#define __devinit
+#define HAVE_ENCAP_CSUM_OFFLOAD
+#endif
+
+#ifndef __devinitdata
+#define __devinitdata
+#endif
+
+#ifndef __devexit
+#define __devexit
+#endif
+
+#ifndef __devexit_p
+#define __devexit_p
+#endif
+
+#ifndef HAVE_SRIOV_CONFIGURE
+#define HAVE_SRIOV_CONFIGURE
+#endif
+
+#define HAVE_BRIDGE_ATTRIBS
+#ifndef BRIDGE_MODE_VEB
+#define BRIDGE_MODE_VEB		0	/* Default loopback mode */
+#endif /* BRIDGE_MODE_VEB */
+#ifndef BRIDGE_MODE_VEPA
+#define BRIDGE_MODE_VEPA	1	/* 802.1Qbg defined VEPA mode */
+#endif /* BRIDGE_MODE_VEPA */
+#endif /* >= 3.8.0 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,9,0) )
+
+#undef hlist_entry
+#define hlist_entry(ptr, type, member) container_of(ptr,type,member)
+
+#undef hlist_entry_safe
+#define hlist_entry_safe(ptr, type, member) \
+	(ptr) ? hlist_entry(ptr, type, member) : NULL
+
+#undef hlist_for_each_entry
+#define hlist_for_each_entry(pos, head, member)                             \
+	for (pos = hlist_entry_safe((head)->first, typeof(*(pos)), member); \
+	     pos;                                                           \
+	     pos = hlist_entry_safe((pos)->member.next, typeof(*(pos)), member))
+
+#undef hlist_for_each_entry_safe
+#define hlist_for_each_entry_safe(pos, n, head, member)		    \
+	for (pos = hlist_entry_safe((head)->first, typeof(*pos), member);   \
+	     pos && ({ n = pos->member.next; 1; });			    \
+	     pos = hlist_entry_safe(n, typeof(*pos), member))
+
+#ifdef CONFIG_XPS
+extern int __kc_netif_set_xps_queue(struct net_device *, struct cpumask *, u16);
+#define netif_set_xps_queue(_dev, _mask, _idx) __kc_netif_set_xps_queue((_dev), (_mask), (_idx))
+#else /* CONFIG_XPS */
+#define netif_set_xps_queue(_dev, _mask, _idx) do {} while (0)
+#endif /* CONFIG_XPS */
+
+#ifdef HAVE_NETDEV_SELECT_QUEUE
+#define _kc_hashrnd 0xd631614b /* not so random hash salt */
+extern u16 __kc_netdev_pick_tx(struct net_device *dev, struct sk_buff *skb);
+#define __netdev_pick_tx __kc_netdev_pick_tx
+#endif /* HAVE_NETDEV_SELECT_QUEUE */
+#else
+#define HAVE_BRIDGE_FILTER
+#define USE_DEFAULT_FDB_DEL_DUMP
+#endif /* < 3.9.0 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0) )
+#ifdef CONFIG_PCI_IOV
+extern int __kc_pci_vfs_assigned(struct pci_dev *dev);
+#else
+static inline int __kc_pci_vfs_assigned(struct pci_dev *dev)
+{
+	return 0;
+}
+#endif
+#define pci_vfs_assigned(dev) __kc_pci_vfs_assigned(dev)
+
+#ifndef VLAN_TX_COOKIE_MAGIC
+static inline struct sk_buff *__kc__vlan_hwaccel_put_tag(struct sk_buff *skb,
+							 u16 vlan_tci)
+{
+#ifdef VLAN_TAG_PRESENT
+	vlan_tci |= VLAN_TAG_PRESENT;
+#endif
+	skb->vlan_tci = vlan_tci;
+        return skb;
+}
+#define __vlan_hwaccel_put_tag(skb, vlan_proto, vlan_tci) \
+	__kc__vlan_hwaccel_put_tag(skb, vlan_tci)
+#endif
+
+#else /* >= 3.10.0 */
+#define HAVE_ENCAP_TSO_OFFLOAD
+#endif /* >= 3.10.0 */
+
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,14,0) )
+#if (!(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,6)))
+#if (!(UBUNTU_KERNEL_CODE >= UBUNTU_KERNEL_VERSION(3,13,0,30,0) \
+    && (UBUNTU_RELEASE_CODE == UBUNTU_RELEASE_VERSION(12,4) \
+     || UBUNTU_RELEASE_CODE == UBUNTU_RELEASE_VERSION(14,4))))
+#if (!(SLE_VERSION_CODE == SLE_VERSION(12,0,0)))
+#ifdef NETIF_F_RXHASH
+#define PKT_HASH_TYPE_L3 0
+static inline void
+skb_set_hash(struct sk_buff *skb, __u32 hash, __always_unused int type)
+{
+	skb->rxhash = hash;
+}
+#endif /* NETIF_F_RXHASH */
+#endif /* < SLES12 */
+#endif /* < 3.13.0-30.54 (Ubuntu 14.04) */
+#endif /* < RHEL7 */
+#endif /* < 3.14.0 */
+
+#if (( LINUX_VERSION_CODE >= KERNEL_VERSION(3,16,0) ) \
+    || ( RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(7,2) ))
+#undef SET_ETHTOOL_OPS
+#define SET_ETHTOOL_OPS(netdev, ops) ((netdev)->ethtool_ops = (ops))
+#define HAVE_VF_MIN_MAX_TXRATE 1
+#endif /* >= 3.16.0 */
+
+#if (( LINUX_VERSION_CODE >= KERNEL_VERSION(3,19,0) ) \
+    || ( RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(7,2) ))
+#define HAVE_NDO_DFLT_BRIDGE_ADD_MASK
+#if (!( RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(7,2) ))
+#define HAVE_NDO_FDB_ADD_VID
+#endif /* !RHEL 7.2 */
+#endif /* >= 3.19.0 */
+
+#if (( LINUX_VERSION_CODE >= KERNEL_VERSION(4,0,0) ) \
+    || ( RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(7,2) ))
+/* vlan_tx_xx functions got renamed to skb_vlan */
+#define vlan_tx_tag_get skb_vlan_tag_get
+#define vlan_tx_tag_present skb_vlan_tag_present
+#if (!( RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(7,2) ))
+#define HAVE_NDO_BRIDGE_SET_DEL_LINK_FLAGS
+#endif /* !RHEL 7.2 */
+#endif /* 4.0.0 */
+
+#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(4,1,0) )
+/* ndo_bridge_getlink adds new nlflags parameter */
+#define HAVE_NDO_BRIDGE_GETLINK_NLFLAGS
+#endif /* >= 4.1.0 */
+
+#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(4,2,0) )
+/* ndo_bridge_getlink adds new filter_mask and vlan_fill parameters */
+#define HAVE_NDO_BRIDGE_GETLINK_FILTER_MASK_VLAN_FILL
+#endif /* >= 4.2.0 */
+#endif /* _KCOMPAT_H_ */
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat_ethtool.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat_ethtool.c
new file mode 100644
index 00000000..e1a89388
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat_ethtool.c
@@ -0,0 +1,1171 @@
+/*******************************************************************************
+
+  Intel(R) Gigabit Ethernet Linux driver
+  Copyright(c) 2007-2013 Intel Corporation.
+
+  This program is free software; you can redistribute it and/or modify it
+  under the terms and conditions of the GNU General Public License,
+  version 2, as published by the Free Software Foundation.
+
+  This program is distributed in the hope it will be useful, but WITHOUT
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+  more details.
+
+  You should have received a copy of the GNU General Public License along with
+  this program; if not, write to the Free Software Foundation, Inc.,
+  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+  The full GNU General Public License is included in this distribution in
+  the file called "COPYING".
+
+  Contact Information:
+  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+/*
+ * net/core/ethtool.c - Ethtool ioctl handler
+ * Copyright (c) 2003 Matthew Wilcox <matthew@wil.cx>
+ *
+ * This file is where we call all the ethtool_ops commands to get
+ * the information ethtool needs.  We fall back to calling do_ioctl()
+ * for drivers which haven't been converted to ethtool_ops yet.
+ *
+ * It's GPL, stupid.
+ *
+ * Modification by sfeldma@pobox.com to work as backward compat
+ * solution for pre-ethtool_ops kernels.
+ *	- copied struct ethtool_ops from ethtool.h
+ *	- defined SET_ETHTOOL_OPS
+ *	- put in some #ifndef NETIF_F_xxx wrappers
+ *	- changes refs to dev->ethtool_ops to ethtool_ops
+ *	- changed dev_ethtool to ethtool_ioctl
+ *      - remove EXPORT_SYMBOL()s
+ *      - added _kc_ prefix in built-in ethtool_op_xxx ops.
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/mii.h>
+#include <linux/ethtool.h>
+#include <linux/netdevice.h>
+#include <asm/uaccess.h>
+
+#include "kcompat.h"
+
+#undef SUPPORTED_10000baseT_Full
+#define SUPPORTED_10000baseT_Full	(1 << 12)
+#undef ADVERTISED_10000baseT_Full
+#define ADVERTISED_10000baseT_Full	(1 << 12)
+#undef SPEED_10000
+#define SPEED_10000		10000
+
+#undef ethtool_ops
+#define ethtool_ops _kc_ethtool_ops
+
+struct _kc_ethtool_ops {
+	int  (*get_settings)(struct net_device *, struct ethtool_cmd *);
+	int  (*set_settings)(struct net_device *, struct ethtool_cmd *);
+	void (*get_drvinfo)(struct net_device *, struct ethtool_drvinfo *);
+	int  (*get_regs_len)(struct net_device *);
+	void (*get_regs)(struct net_device *, struct ethtool_regs *, void *);
+	void (*get_wol)(struct net_device *, struct ethtool_wolinfo *);
+	int  (*set_wol)(struct net_device *, struct ethtool_wolinfo *);
+	u32  (*get_msglevel)(struct net_device *);
+	void (*set_msglevel)(struct net_device *, u32);
+	int  (*nway_reset)(struct net_device *);
+	u32  (*get_link)(struct net_device *);
+	int  (*get_eeprom_len)(struct net_device *);
+	int  (*get_eeprom)(struct net_device *, struct ethtool_eeprom *, u8 *);
+	int  (*set_eeprom)(struct net_device *, struct ethtool_eeprom *, u8 *);
+	int  (*get_coalesce)(struct net_device *, struct ethtool_coalesce *);
+	int  (*set_coalesce)(struct net_device *, struct ethtool_coalesce *);
+	void (*get_ringparam)(struct net_device *, struct ethtool_ringparam *);
+	int  (*set_ringparam)(struct net_device *, struct ethtool_ringparam *);
+	void (*get_pauseparam)(struct net_device *,
+	                       struct ethtool_pauseparam*);
+	int  (*set_pauseparam)(struct net_device *,
+	                       struct ethtool_pauseparam*);
+	u32  (*get_rx_csum)(struct net_device *);
+	int  (*set_rx_csum)(struct net_device *, u32);
+	u32  (*get_tx_csum)(struct net_device *);
+	int  (*set_tx_csum)(struct net_device *, u32);
+	u32  (*get_sg)(struct net_device *);
+	int  (*set_sg)(struct net_device *, u32);
+	u32  (*get_tso)(struct net_device *);
+	int  (*set_tso)(struct net_device *, u32);
+	int  (*self_test_count)(struct net_device *);
+	void (*self_test)(struct net_device *, struct ethtool_test *, u64 *);
+	void (*get_strings)(struct net_device *, u32 stringset, u8 *);
+	int  (*phys_id)(struct net_device *, u32);
+	int  (*get_stats_count)(struct net_device *);
+	void (*get_ethtool_stats)(struct net_device *, struct ethtool_stats *,
+	                          u64 *);
+} *ethtool_ops = NULL;
+
+#undef SET_ETHTOOL_OPS
+#define SET_ETHTOOL_OPS(netdev, ops) (ethtool_ops = (ops))
+
+/*
+ * Some useful ethtool_ops methods that are device independent. If we find that
+ * all drivers want to do the same thing here, we can turn these into dev_()
+ * function calls.
+ */
+
+#undef ethtool_op_get_link
+#define ethtool_op_get_link _kc_ethtool_op_get_link
+u32 _kc_ethtool_op_get_link(struct net_device *dev)
+{
+	return netif_carrier_ok(dev) ? 1 : 0;
+}
+
+#undef ethtool_op_get_tx_csum
+#define ethtool_op_get_tx_csum _kc_ethtool_op_get_tx_csum
+u32 _kc_ethtool_op_get_tx_csum(struct net_device *dev)
+{
+#ifdef NETIF_F_IP_CSUM
+	return (dev->features & NETIF_F_IP_CSUM) != 0;
+#else
+	return 0;
+#endif
+}
+
+#undef ethtool_op_set_tx_csum
+#define ethtool_op_set_tx_csum _kc_ethtool_op_set_tx_csum
+int _kc_ethtool_op_set_tx_csum(struct net_device *dev, u32 data)
+{
+#ifdef NETIF_F_IP_CSUM
+	if (data)
+#ifdef NETIF_F_IPV6_CSUM
+		dev->features |= (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM);
+	else
+		dev->features &= ~(NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM);
+#else
+		dev->features |= NETIF_F_IP_CSUM;
+	else
+		dev->features &= ~NETIF_F_IP_CSUM;
+#endif
+#endif
+
+	return 0;
+}
+
+#undef ethtool_op_get_sg
+#define ethtool_op_get_sg _kc_ethtool_op_get_sg
+u32 _kc_ethtool_op_get_sg(struct net_device *dev)
+{
+#ifdef NETIF_F_SG
+	return (dev->features & NETIF_F_SG) != 0;
+#else
+	return 0;
+#endif
+}
+
+#undef ethtool_op_set_sg
+#define ethtool_op_set_sg _kc_ethtool_op_set_sg
+int _kc_ethtool_op_set_sg(struct net_device *dev, u32 data)
+{
+#ifdef NETIF_F_SG
+	if (data)
+		dev->features |= NETIF_F_SG;
+	else
+		dev->features &= ~NETIF_F_SG;
+#endif
+
+	return 0;
+}
+
+#undef ethtool_op_get_tso
+#define ethtool_op_get_tso _kc_ethtool_op_get_tso
+u32 _kc_ethtool_op_get_tso(struct net_device *dev)
+{
+#ifdef NETIF_F_TSO
+	return (dev->features & NETIF_F_TSO) != 0;
+#else
+	return 0;
+#endif
+}
+
+#undef ethtool_op_set_tso
+#define ethtool_op_set_tso _kc_ethtool_op_set_tso
+int _kc_ethtool_op_set_tso(struct net_device *dev, u32 data)
+{
+#ifdef NETIF_F_TSO
+	if (data)
+		dev->features |= NETIF_F_TSO;
+	else
+		dev->features &= ~NETIF_F_TSO;
+#endif
+
+	return 0;
+}
+
+/* Handlers for each ethtool command */
+
+static int ethtool_get_settings(struct net_device *dev, void *useraddr)
+{
+	struct ethtool_cmd cmd = { ETHTOOL_GSET };
+	int err;
+
+	if (!ethtool_ops->get_settings)
+		return -EOPNOTSUPP;
+
+	err = ethtool_ops->get_settings(dev, &cmd);
+	if (err < 0)
+		return err;
+
+	if (copy_to_user(useraddr, &cmd, sizeof(cmd)))
+		return -EFAULT;
+	return 0;
+}
+
+static int ethtool_set_settings(struct net_device *dev, void *useraddr)
+{
+	struct ethtool_cmd cmd;
+
+	if (!ethtool_ops->set_settings)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&cmd, useraddr, sizeof(cmd)))
+		return -EFAULT;
+
+	return ethtool_ops->set_settings(dev, &cmd);
+}
+
+static int ethtool_get_drvinfo(struct net_device *dev, void *useraddr)
+{
+	struct ethtool_drvinfo info;
+	struct ethtool_ops *ops = ethtool_ops;
+
+	if (!ops->get_drvinfo)
+		return -EOPNOTSUPP;
+
+	memset(&info, 0, sizeof(info));
+	info.cmd = ETHTOOL_GDRVINFO;
+	ops->get_drvinfo(dev, &info);
+
+	if (ops->self_test_count)
+		info.testinfo_len = ops->self_test_count(dev);
+	if (ops->get_stats_count)
+		info.n_stats = ops->get_stats_count(dev);
+	if (ops->get_regs_len)
+		info.regdump_len = ops->get_regs_len(dev);
+	if (ops->get_eeprom_len)
+		info.eedump_len = ops->get_eeprom_len(dev);
+
+	if (copy_to_user(useraddr, &info, sizeof(info)))
+		return -EFAULT;
+	return 0;
+}
+
+static int ethtool_get_regs(struct net_device *dev, char *useraddr)
+{
+	struct ethtool_regs regs;
+	struct ethtool_ops *ops = ethtool_ops;
+	void *regbuf;
+	int reglen, ret;
+
+	if (!ops->get_regs || !ops->get_regs_len)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&regs, useraddr, sizeof(regs)))
+		return -EFAULT;
+
+	reglen = ops->get_regs_len(dev);
+	if (regs.len > reglen)
+		regs.len = reglen;
+
+	regbuf = kmalloc(reglen, GFP_USER);
+	if (!regbuf)
+		return -ENOMEM;
+
+	ops->get_regs(dev, &regs, regbuf);
+
+	ret = -EFAULT;
+	if (copy_to_user(useraddr, &regs, sizeof(regs)))
+		goto out;
+	useraddr += offsetof(struct ethtool_regs, data);
+	if (copy_to_user(useraddr, regbuf, reglen))
+		goto out;
+	ret = 0;
+
+out:
+	kfree(regbuf);
+	return ret;
+}
+
+static int ethtool_get_wol(struct net_device *dev, char *useraddr)
+{
+	struct ethtool_wolinfo wol = { ETHTOOL_GWOL };
+
+	if (!ethtool_ops->get_wol)
+		return -EOPNOTSUPP;
+
+	ethtool_ops->get_wol(dev, &wol);
+
+	if (copy_to_user(useraddr, &wol, sizeof(wol)))
+		return -EFAULT;
+	return 0;
+}
+
+static int ethtool_set_wol(struct net_device *dev, char *useraddr)
+{
+	struct ethtool_wolinfo wol;
+
+	if (!ethtool_ops->set_wol)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&wol, useraddr, sizeof(wol)))
+		return -EFAULT;
+
+	return ethtool_ops->set_wol(dev, &wol);
+}
+
+static int ethtool_get_msglevel(struct net_device *dev, char *useraddr)
+{
+	struct ethtool_value edata = { ETHTOOL_GMSGLVL };
+
+	if (!ethtool_ops->get_msglevel)
+		return -EOPNOTSUPP;
+
+	edata.data = ethtool_ops->get_msglevel(dev);
+
+	if (copy_to_user(useraddr, &edata, sizeof(edata)))
+		return -EFAULT;
+	return 0;
+}
+
+static int ethtool_set_msglevel(struct net_device *dev, char *useraddr)
+{
+	struct ethtool_value edata;
+
+	if (!ethtool_ops->set_msglevel)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&edata, useraddr, sizeof(edata)))
+		return -EFAULT;
+
+	ethtool_ops->set_msglevel(dev, edata.data);
+	return 0;
+}
+
+static int ethtool_nway_reset(struct net_device *dev)
+{
+	if (!ethtool_ops->nway_reset)
+		return -EOPNOTSUPP;
+
+	return ethtool_ops->nway_reset(dev);
+}
+
+static int ethtool_get_link(struct net_device *dev, void *useraddr)
+{
+	struct ethtool_value edata = { ETHTOOL_GLINK };
+
+	if (!ethtool_ops->get_link)
+		return -EOPNOTSUPP;
+
+	edata.data = ethtool_ops->get_link(dev);
+
+	if (copy_to_user(useraddr, &edata, sizeof(edata)))
+		return -EFAULT;
+	return 0;
+}
+
+static int ethtool_get_eeprom(struct net_device *dev, void *useraddr)
+{
+	struct ethtool_eeprom eeprom;
+	struct ethtool_ops *ops = ethtool_ops;
+	u8 *data;
+	int ret;
+
+	if (!ops->get_eeprom || !ops->get_eeprom_len)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&eeprom, useraddr, sizeof(eeprom)))
+		return -EFAULT;
+
+	/* Check for wrap and zero */
+	if (eeprom.offset + eeprom.len <= eeprom.offset)
+		return -EINVAL;
+
+	/* Check for exceeding total eeprom len */
+	if (eeprom.offset + eeprom.len > ops->get_eeprom_len(dev))
+		return -EINVAL;
+
+	data = kmalloc(eeprom.len, GFP_USER);
+	if (!data)
+		return -ENOMEM;
+
+	ret = -EFAULT;
+	if (copy_from_user(data, useraddr + sizeof(eeprom), eeprom.len))
+		goto out;
+
+	ret = ops->get_eeprom(dev, &eeprom, data);
+	if (ret)
+		goto out;
+
+	ret = -EFAULT;
+	if (copy_to_user(useraddr, &eeprom, sizeof(eeprom)))
+		goto out;
+	if (copy_to_user(useraddr + sizeof(eeprom), data, eeprom.len))
+		goto out;
+	ret = 0;
+
+out:
+	kfree(data);
+	return ret;
+}
+
+static int ethtool_set_eeprom(struct net_device *dev, void *useraddr)
+{
+	struct ethtool_eeprom eeprom;
+	struct ethtool_ops *ops = ethtool_ops;
+	u8 *data;
+	int ret;
+
+	if (!ops->set_eeprom || !ops->get_eeprom_len)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&eeprom, useraddr, sizeof(eeprom)))
+		return -EFAULT;
+
+	/* Check for wrap and zero */
+	if (eeprom.offset + eeprom.len <= eeprom.offset)
+		return -EINVAL;
+
+	/* Check for exceeding total eeprom len */
+	if (eeprom.offset + eeprom.len > ops->get_eeprom_len(dev))
+		return -EINVAL;
+
+	data = kmalloc(eeprom.len, GFP_USER);
+	if (!data)
+		return -ENOMEM;
+
+	ret = -EFAULT;
+	if (copy_from_user(data, useraddr + sizeof(eeprom), eeprom.len))
+		goto out;
+
+	ret = ops->set_eeprom(dev, &eeprom, data);
+	if (ret)
+		goto out;
+
+	if (copy_to_user(useraddr + sizeof(eeprom), data, eeprom.len))
+		ret = -EFAULT;
+
+out:
+	kfree(data);
+	return ret;
+}
+
+static int ethtool_get_coalesce(struct net_device *dev, void *useraddr)
+{
+	struct ethtool_coalesce coalesce = { ETHTOOL_GCOALESCE };
+
+	if (!ethtool_ops->get_coalesce)
+		return -EOPNOTSUPP;
+
+	ethtool_ops->get_coalesce(dev, &coalesce);
+
+	if (copy_to_user(useraddr, &coalesce, sizeof(coalesce)))
+		return -EFAULT;
+	return 0;
+}
+
+static int ethtool_set_coalesce(struct net_device *dev, void *useraddr)
+{
+	struct ethtool_coalesce coalesce;
+
+	if (!ethtool_ops->get_coalesce)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&coalesce, useraddr, sizeof(coalesce)))
+		return -EFAULT;
+
+	return ethtool_ops->set_coalesce(dev, &coalesce);
+}
+
+static int ethtool_get_ringparam(struct net_device *dev, void *useraddr)
+{
+	struct ethtool_ringparam ringparam = { ETHTOOL_GRINGPARAM };
+
+	if (!ethtool_ops->get_ringparam)
+		return -EOPNOTSUPP;
+
+	ethtool_ops->get_ringparam(dev, &ringparam);
+
+	if (copy_to_user(useraddr, &ringparam, sizeof(ringparam)))
+		return -EFAULT;
+	return 0;
+}
+
+static int ethtool_set_ringparam(struct net_device *dev, void *useraddr)
+{
+	struct ethtool_ringparam ringparam;
+
+	if (!ethtool_ops->get_ringparam)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&ringparam, useraddr, sizeof(ringparam)))
+		return -EFAULT;
+
+	return ethtool_ops->set_ringparam(dev, &ringparam);
+}
+
+static int ethtool_get_pauseparam(struct net_device *dev, void *useraddr)
+{
+	struct ethtool_pauseparam pauseparam = { ETHTOOL_GPAUSEPARAM };
+
+	if (!ethtool_ops->get_pauseparam)
+		return -EOPNOTSUPP;
+
+	ethtool_ops->get_pauseparam(dev, &pauseparam);
+
+	if (copy_to_user(useraddr, &pauseparam, sizeof(pauseparam)))
+		return -EFAULT;
+	return 0;
+}
+
+static int ethtool_set_pauseparam(struct net_device *dev, void *useraddr)
+{
+	struct ethtool_pauseparam pauseparam;
+
+	if (!ethtool_ops->get_pauseparam)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&pauseparam, useraddr, sizeof(pauseparam)))
+		return -EFAULT;
+
+	return ethtool_ops->set_pauseparam(dev, &pauseparam);
+}
+
+static int ethtool_get_rx_csum(struct net_device *dev, char *useraddr)
+{
+	struct ethtool_value edata = { ETHTOOL_GRXCSUM };
+
+	if (!ethtool_ops->get_rx_csum)
+		return -EOPNOTSUPP;
+
+	edata.data = ethtool_ops->get_rx_csum(dev);
+
+	if (copy_to_user(useraddr, &edata, sizeof(edata)))
+		return -EFAULT;
+	return 0;
+}
+
+static int ethtool_set_rx_csum(struct net_device *dev, char *useraddr)
+{
+	struct ethtool_value edata;
+
+	if (!ethtool_ops->set_rx_csum)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&edata, useraddr, sizeof(edata)))
+		return -EFAULT;
+
+	ethtool_ops->set_rx_csum(dev, edata.data);
+	return 0;
+}
+
+static int ethtool_get_tx_csum(struct net_device *dev, char *useraddr)
+{
+	struct ethtool_value edata = { ETHTOOL_GTXCSUM };
+
+	if (!ethtool_ops->get_tx_csum)
+		return -EOPNOTSUPP;
+
+	edata.data = ethtool_ops->get_tx_csum(dev);
+
+	if (copy_to_user(useraddr, &edata, sizeof(edata)))
+		return -EFAULT;
+	return 0;
+}
+
+static int ethtool_set_tx_csum(struct net_device *dev, char *useraddr)
+{
+	struct ethtool_value edata;
+
+	if (!ethtool_ops->set_tx_csum)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&edata, useraddr, sizeof(edata)))
+		return -EFAULT;
+
+	return ethtool_ops->set_tx_csum(dev, edata.data);
+}
+
+static int ethtool_get_sg(struct net_device *dev, char *useraddr)
+{
+	struct ethtool_value edata = { ETHTOOL_GSG };
+
+	if (!ethtool_ops->get_sg)
+		return -EOPNOTSUPP;
+
+	edata.data = ethtool_ops->get_sg(dev);
+
+	if (copy_to_user(useraddr, &edata, sizeof(edata)))
+		return -EFAULT;
+	return 0;
+}
+
+static int ethtool_set_sg(struct net_device *dev, char *useraddr)
+{
+	struct ethtool_value edata;
+
+	if (!ethtool_ops->set_sg)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&edata, useraddr, sizeof(edata)))
+		return -EFAULT;
+
+	return ethtool_ops->set_sg(dev, edata.data);
+}
+
+static int ethtool_get_tso(struct net_device *dev, char *useraddr)
+{
+	struct ethtool_value edata = { ETHTOOL_GTSO };
+
+	if (!ethtool_ops->get_tso)
+		return -EOPNOTSUPP;
+
+	edata.data = ethtool_ops->get_tso(dev);
+
+	if (copy_to_user(useraddr, &edata, sizeof(edata)))
+		return -EFAULT;
+	return 0;
+}
+
+static int ethtool_set_tso(struct net_device *dev, char *useraddr)
+{
+	struct ethtool_value edata;
+
+	if (!ethtool_ops->set_tso)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&edata, useraddr, sizeof(edata)))
+		return -EFAULT;
+
+	return ethtool_ops->set_tso(dev, edata.data);
+}
+
+static int ethtool_self_test(struct net_device *dev, char *useraddr)
+{
+	struct ethtool_test test;
+	struct ethtool_ops *ops = ethtool_ops;
+	u64 *data;
+	int ret;
+
+	if (!ops->self_test || !ops->self_test_count)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&test, useraddr, sizeof(test)))
+		return -EFAULT;
+
+	test.len = ops->self_test_count(dev);
+	data = kmalloc(test.len * sizeof(u64), GFP_USER);
+	if (!data)
+		return -ENOMEM;
+
+	ops->self_test(dev, &test, data);
+
+	ret = -EFAULT;
+	if (copy_to_user(useraddr, &test, sizeof(test)))
+		goto out;
+	useraddr += sizeof(test);
+	if (copy_to_user(useraddr, data, test.len * sizeof(u64)))
+		goto out;
+	ret = 0;
+
+out:
+	kfree(data);
+	return ret;
+}
+
+static int ethtool_get_strings(struct net_device *dev, void *useraddr)
+{
+	struct ethtool_gstrings gstrings;
+	struct ethtool_ops *ops = ethtool_ops;
+	u8 *data;
+	int ret;
+
+	if (!ops->get_strings)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&gstrings, useraddr, sizeof(gstrings)))
+		return -EFAULT;
+
+	switch (gstrings.string_set) {
+	case ETH_SS_TEST:
+		if (!ops->self_test_count)
+			return -EOPNOTSUPP;
+		gstrings.len = ops->self_test_count(dev);
+		break;
+	case ETH_SS_STATS:
+		if (!ops->get_stats_count)
+			return -EOPNOTSUPP;
+		gstrings.len = ops->get_stats_count(dev);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	data = kmalloc(gstrings.len * ETH_GSTRING_LEN, GFP_USER);
+	if (!data)
+		return -ENOMEM;
+
+	ops->get_strings(dev, gstrings.string_set, data);
+
+	ret = -EFAULT;
+	if (copy_to_user(useraddr, &gstrings, sizeof(gstrings)))
+		goto out;
+	useraddr += sizeof(gstrings);
+	if (copy_to_user(useraddr, data, gstrings.len * ETH_GSTRING_LEN))
+		goto out;
+	ret = 0;
+
+out:
+	kfree(data);
+	return ret;
+}
+
+static int ethtool_phys_id(struct net_device *dev, void *useraddr)
+{
+	struct ethtool_value id;
+
+	if (!ethtool_ops->phys_id)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&id, useraddr, sizeof(id)))
+		return -EFAULT;
+
+	return ethtool_ops->phys_id(dev, id.data);
+}
+
+static int ethtool_get_stats(struct net_device *dev, void *useraddr)
+{
+	struct ethtool_stats stats;
+	struct ethtool_ops *ops = ethtool_ops;
+	u64 *data;
+	int ret;
+
+	if (!ops->get_ethtool_stats || !ops->get_stats_count)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&stats, useraddr, sizeof(stats)))
+		return -EFAULT;
+
+	stats.n_stats = ops->get_stats_count(dev);
+	data = kmalloc(stats.n_stats * sizeof(u64), GFP_USER);
+	if (!data)
+		return -ENOMEM;
+
+	ops->get_ethtool_stats(dev, &stats, data);
+
+	ret = -EFAULT;
+	if (copy_to_user(useraddr, &stats, sizeof(stats)))
+		goto out;
+	useraddr += sizeof(stats);
+	if (copy_to_user(useraddr, data, stats.n_stats * sizeof(u64)))
+		goto out;
+	ret = 0;
+
+out:
+	kfree(data);
+	return ret;
+}
+
+/* The main entry point in this file.  Called from net/core/dev.c */
+
+#define ETHTOOL_OPS_COMPAT
+int ethtool_ioctl(struct ifreq *ifr)
+{
+	struct net_device *dev = __dev_get_by_name(ifr->ifr_name);
+	void *useraddr = (void *) ifr->ifr_data;
+	u32 ethcmd;
+
+	/*
+	 * XXX: This can be pushed down into the ethtool_* handlers that
+	 * need it.  Keep existing behavior for the moment.
+	 */
+	if (!capable(CAP_NET_ADMIN))
+		return -EPERM;
+
+	if (!dev || !netif_device_present(dev))
+		return -ENODEV;
+
+	if (copy_from_user(&ethcmd, useraddr, sizeof (ethcmd)))
+		return -EFAULT;
+
+	switch (ethcmd) {
+	case ETHTOOL_GSET:
+		return ethtool_get_settings(dev, useraddr);
+	case ETHTOOL_SSET:
+		return ethtool_set_settings(dev, useraddr);
+	case ETHTOOL_GDRVINFO:
+		return ethtool_get_drvinfo(dev, useraddr);
+	case ETHTOOL_GREGS:
+		return ethtool_get_regs(dev, useraddr);
+	case ETHTOOL_GWOL:
+		return ethtool_get_wol(dev, useraddr);
+	case ETHTOOL_SWOL:
+		return ethtool_set_wol(dev, useraddr);
+	case ETHTOOL_GMSGLVL:
+		return ethtool_get_msglevel(dev, useraddr);
+	case ETHTOOL_SMSGLVL:
+		return ethtool_set_msglevel(dev, useraddr);
+	case ETHTOOL_NWAY_RST:
+		return ethtool_nway_reset(dev);
+	case ETHTOOL_GLINK:
+		return ethtool_get_link(dev, useraddr);
+	case ETHTOOL_GEEPROM:
+		return ethtool_get_eeprom(dev, useraddr);
+	case ETHTOOL_SEEPROM:
+		return ethtool_set_eeprom(dev, useraddr);
+	case ETHTOOL_GCOALESCE:
+		return ethtool_get_coalesce(dev, useraddr);
+	case ETHTOOL_SCOALESCE:
+		return ethtool_set_coalesce(dev, useraddr);
+	case ETHTOOL_GRINGPARAM:
+		return ethtool_get_ringparam(dev, useraddr);
+	case ETHTOOL_SRINGPARAM:
+		return ethtool_set_ringparam(dev, useraddr);
+	case ETHTOOL_GPAUSEPARAM:
+		return ethtool_get_pauseparam(dev, useraddr);
+	case ETHTOOL_SPAUSEPARAM:
+		return ethtool_set_pauseparam(dev, useraddr);
+	case ETHTOOL_GRXCSUM:
+		return ethtool_get_rx_csum(dev, useraddr);
+	case ETHTOOL_SRXCSUM:
+		return ethtool_set_rx_csum(dev, useraddr);
+	case ETHTOOL_GTXCSUM:
+		return ethtool_get_tx_csum(dev, useraddr);
+	case ETHTOOL_STXCSUM:
+		return ethtool_set_tx_csum(dev, useraddr);
+	case ETHTOOL_GSG:
+		return ethtool_get_sg(dev, useraddr);
+	case ETHTOOL_SSG:
+		return ethtool_set_sg(dev, useraddr);
+	case ETHTOOL_GTSO:
+		return ethtool_get_tso(dev, useraddr);
+	case ETHTOOL_STSO:
+		return ethtool_set_tso(dev, useraddr);
+	case ETHTOOL_TEST:
+		return ethtool_self_test(dev, useraddr);
+	case ETHTOOL_GSTRINGS:
+		return ethtool_get_strings(dev, useraddr);
+	case ETHTOOL_PHYS_ID:
+		return ethtool_phys_id(dev, useraddr);
+	case ETHTOOL_GSTATS:
+		return ethtool_get_stats(dev, useraddr);
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	return -EOPNOTSUPP;
+}
+
+#define mii_if_info _kc_mii_if_info
+struct _kc_mii_if_info {
+	int phy_id;
+	int advertising;
+	int phy_id_mask;
+	int reg_num_mask;
+
+	unsigned int full_duplex : 1;	/* is full duplex? */
+	unsigned int force_media : 1;	/* is autoneg. disabled? */
+
+	struct net_device *dev;
+	int (*mdio_read) (struct net_device *dev, int phy_id, int location);
+	void (*mdio_write) (struct net_device *dev, int phy_id, int location, int val);
+};
+
+struct ethtool_cmd;
+struct mii_ioctl_data;
+
+#undef mii_link_ok
+#define mii_link_ok _kc_mii_link_ok
+#undef mii_nway_restart
+#define mii_nway_restart _kc_mii_nway_restart
+#undef mii_ethtool_gset
+#define mii_ethtool_gset _kc_mii_ethtool_gset
+#undef mii_ethtool_sset
+#define mii_ethtool_sset _kc_mii_ethtool_sset
+#undef mii_check_link
+#define mii_check_link _kc_mii_check_link
+extern int _kc_mii_link_ok (struct mii_if_info *mii);
+extern int _kc_mii_nway_restart (struct mii_if_info *mii);
+extern int _kc_mii_ethtool_gset(struct mii_if_info *mii,
+                                struct ethtool_cmd *ecmd);
+extern int _kc_mii_ethtool_sset(struct mii_if_info *mii,
+                                struct ethtool_cmd *ecmd);
+extern void _kc_mii_check_link (struct mii_if_info *mii);
+#if ( LINUX_VERSION_CODE > KERNEL_VERSION(2,4,6) )
+#undef generic_mii_ioctl
+#define generic_mii_ioctl _kc_generic_mii_ioctl
+extern int _kc_generic_mii_ioctl(struct mii_if_info *mii_if,
+                                 struct mii_ioctl_data *mii_data, int cmd,
+                                 unsigned int *duplex_changed);
+#endif /* > 2.4.6 */
+
+
+struct _kc_pci_dev_ext {
+	struct pci_dev *dev;
+	void *pci_drvdata;
+	struct pci_driver *driver;
+};
+
+struct _kc_net_dev_ext {
+	struct net_device *dev;
+	unsigned int carrier;
+};
+
+
+/**************************************/
+/* mii support */
+
+int _kc_mii_ethtool_gset(struct mii_if_info *mii, struct ethtool_cmd *ecmd)
+{
+	struct net_device *dev = mii->dev;
+	u32 advert, bmcr, lpa, nego;
+
+	ecmd->supported =
+	    (SUPPORTED_10baseT_Half | SUPPORTED_10baseT_Full |
+	     SUPPORTED_100baseT_Half | SUPPORTED_100baseT_Full |
+	     SUPPORTED_Autoneg | SUPPORTED_TP | SUPPORTED_MII);
+
+	/* only supports twisted-pair */
+	ecmd->port = PORT_MII;
+
+	/* only supports internal transceiver */
+	ecmd->transceiver = XCVR_INTERNAL;
+
+	/* this isn't fully supported at higher layers */
+	ecmd->phy_address = mii->phy_id;
+
+	ecmd->advertising = ADVERTISED_TP | ADVERTISED_MII;
+	advert = mii->mdio_read(dev, mii->phy_id, MII_ADVERTISE);
+	if (advert & ADVERTISE_10HALF)
+		ecmd->advertising |= ADVERTISED_10baseT_Half;
+	if (advert & ADVERTISE_10FULL)
+		ecmd->advertising |= ADVERTISED_10baseT_Full;
+	if (advert & ADVERTISE_100HALF)
+		ecmd->advertising |= ADVERTISED_100baseT_Half;
+	if (advert & ADVERTISE_100FULL)
+		ecmd->advertising |= ADVERTISED_100baseT_Full;
+
+	bmcr = mii->mdio_read(dev, mii->phy_id, MII_BMCR);
+	lpa = mii->mdio_read(dev, mii->phy_id, MII_LPA);
+	if (bmcr & BMCR_ANENABLE) {
+		ecmd->advertising |= ADVERTISED_Autoneg;
+		ecmd->autoneg = AUTONEG_ENABLE;
+
+		nego = mii_nway_result(advert & lpa);
+		if (nego == LPA_100FULL || nego == LPA_100HALF)
+			ecmd->speed = SPEED_100;
+		else
+			ecmd->speed = SPEED_10;
+		if (nego == LPA_100FULL || nego == LPA_10FULL) {
+			ecmd->duplex = DUPLEX_FULL;
+			mii->full_duplex = 1;
+		} else {
+			ecmd->duplex = DUPLEX_HALF;
+			mii->full_duplex = 0;
+		}
+	} else {
+		ecmd->autoneg = AUTONEG_DISABLE;
+
+		ecmd->speed = (bmcr & BMCR_SPEED100) ? SPEED_100 : SPEED_10;
+		ecmd->duplex = (bmcr & BMCR_FULLDPLX) ? DUPLEX_FULL : DUPLEX_HALF;
+	}
+
+	/* ignore maxtxpkt, maxrxpkt for now */
+
+	return 0;
+}
+
+int _kc_mii_ethtool_sset(struct mii_if_info *mii, struct ethtool_cmd *ecmd)
+{
+	struct net_device *dev = mii->dev;
+
+	if (ecmd->speed != SPEED_10 && ecmd->speed != SPEED_100)
+		return -EINVAL;
+	if (ecmd->duplex != DUPLEX_HALF && ecmd->duplex != DUPLEX_FULL)
+		return -EINVAL;
+	if (ecmd->port != PORT_MII)
+		return -EINVAL;
+	if (ecmd->transceiver != XCVR_INTERNAL)
+		return -EINVAL;
+	if (ecmd->phy_address != mii->phy_id)
+		return -EINVAL;
+	if (ecmd->autoneg != AUTONEG_DISABLE && ecmd->autoneg != AUTONEG_ENABLE)
+		return -EINVAL;
+
+	/* ignore supported, maxtxpkt, maxrxpkt */
+
+	if (ecmd->autoneg == AUTONEG_ENABLE) {
+		u32 bmcr, advert, tmp;
+
+		if ((ecmd->advertising & (ADVERTISED_10baseT_Half |
+					  ADVERTISED_10baseT_Full |
+					  ADVERTISED_100baseT_Half |
+					  ADVERTISED_100baseT_Full)) == 0)
+			return -EINVAL;
+
+		/* advertise only what has been requested */
+		advert = mii->mdio_read(dev, mii->phy_id, MII_ADVERTISE);
+		tmp = advert & ~(ADVERTISE_ALL | ADVERTISE_100BASE4);
+		if (ADVERTISED_10baseT_Half)
+			tmp |= ADVERTISE_10HALF;
+		if (ADVERTISED_10baseT_Full)
+			tmp |= ADVERTISE_10FULL;
+		if (ADVERTISED_100baseT_Half)
+			tmp |= ADVERTISE_100HALF;
+		if (ADVERTISED_100baseT_Full)
+			tmp |= ADVERTISE_100FULL;
+		if (advert != tmp) {
+			mii->mdio_write(dev, mii->phy_id, MII_ADVERTISE, tmp);
+			mii->advertising = tmp;
+		}
+
+		/* turn on autonegotiation, and force a renegotiate */
+		bmcr = mii->mdio_read(dev, mii->phy_id, MII_BMCR);
+		bmcr |= (BMCR_ANENABLE | BMCR_ANRESTART);
+		mii->mdio_write(dev, mii->phy_id, MII_BMCR, bmcr);
+
+		mii->force_media = 0;
+	} else {
+		u32 bmcr, tmp;
+
+		/* turn off auto negotiation, set speed and duplexity */
+		bmcr = mii->mdio_read(dev, mii->phy_id, MII_BMCR);
+		tmp = bmcr & ~(BMCR_ANENABLE | BMCR_SPEED100 | BMCR_FULLDPLX);
+		if (ecmd->speed == SPEED_100)
+			tmp |= BMCR_SPEED100;
+		if (ecmd->duplex == DUPLEX_FULL) {
+			tmp |= BMCR_FULLDPLX;
+			mii->full_duplex = 1;
+		} else
+			mii->full_duplex = 0;
+		if (bmcr != tmp)
+			mii->mdio_write(dev, mii->phy_id, MII_BMCR, tmp);
+
+		mii->force_media = 1;
+	}
+	return 0;
+}
+
+int _kc_mii_link_ok (struct mii_if_info *mii)
+{
+	/* first, a dummy read, needed to latch some MII phys */
+	mii->mdio_read(mii->dev, mii->phy_id, MII_BMSR);
+	if (mii->mdio_read(mii->dev, mii->phy_id, MII_BMSR) & BMSR_LSTATUS)
+		return 1;
+	return 0;
+}
+
+int _kc_mii_nway_restart (struct mii_if_info *mii)
+{
+	int bmcr;
+	int r = -EINVAL;
+
+	/* if autoneg is off, it's an error */
+	bmcr = mii->mdio_read(mii->dev, mii->phy_id, MII_BMCR);
+
+	if (bmcr & BMCR_ANENABLE) {
+		bmcr |= BMCR_ANRESTART;
+		mii->mdio_write(mii->dev, mii->phy_id, MII_BMCR, bmcr);
+		r = 0;
+	}
+
+	return r;
+}
+
+void _kc_mii_check_link (struct mii_if_info *mii)
+{
+	int cur_link = mii_link_ok(mii);
+	int prev_link = netif_carrier_ok(mii->dev);
+
+	if (cur_link && !prev_link)
+		netif_carrier_on(mii->dev);
+	else if (prev_link && !cur_link)
+		netif_carrier_off(mii->dev);
+}
+
+#if ( LINUX_VERSION_CODE > KERNEL_VERSION(2,4,6) )
+int _kc_generic_mii_ioctl(struct mii_if_info *mii_if,
+                          struct mii_ioctl_data *mii_data, int cmd,
+                          unsigned int *duplex_chg_out)
+{
+	int rc = 0;
+	unsigned int duplex_changed = 0;
+
+	if (duplex_chg_out)
+		*duplex_chg_out = 0;
+
+	mii_data->phy_id &= mii_if->phy_id_mask;
+	mii_data->reg_num &= mii_if->reg_num_mask;
+
+	switch(cmd) {
+	case SIOCDEVPRIVATE:	/* binary compat, remove in 2.5 */
+	case SIOCGMIIPHY:
+		mii_data->phy_id = mii_if->phy_id;
+		/* fall through */
+
+	case SIOCDEVPRIVATE + 1:/* binary compat, remove in 2.5 */
+	case SIOCGMIIREG:
+		mii_data->val_out =
+			mii_if->mdio_read(mii_if->dev, mii_data->phy_id,
+					  mii_data->reg_num);
+		break;
+
+	case SIOCDEVPRIVATE + 2:/* binary compat, remove in 2.5 */
+	case SIOCSMIIREG: {
+		u16 val = mii_data->val_in;
+
+		if (!capable(CAP_NET_ADMIN))
+			return -EPERM;
+
+		if (mii_data->phy_id == mii_if->phy_id) {
+			switch(mii_data->reg_num) {
+			case MII_BMCR: {
+				unsigned int new_duplex = 0;
+				if (val & (BMCR_RESET|BMCR_ANENABLE))
+					mii_if->force_media = 0;
+				else
+					mii_if->force_media = 1;
+				if (mii_if->force_media &&
+				    (val & BMCR_FULLDPLX))
+					new_duplex = 1;
+				if (mii_if->full_duplex != new_duplex) {
+					duplex_changed = 1;
+					mii_if->full_duplex = new_duplex;
+				}
+				break;
+			}
+			case MII_ADVERTISE:
+				mii_if->advertising = val;
+				break;
+			default:
+				/* do nothing */
+				break;
+			}
+		}
+
+		mii_if->mdio_write(mii_if->dev, mii_data->phy_id,
+				   mii_data->reg_num, val);
+		break;
+	}
+
+	default:
+		rc = -EOPNOTSUPP;
+		break;
+	}
+
+	if ((rc == 0) && (duplex_chg_out) && (duplex_changed))
+		*duplex_chg_out = 1;
+
+	return rc;
+}
+#endif /* > 2.4.6 */
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/COPYING b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/COPYING
new file mode 100644
index 00000000..5f297e5b
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/COPYING
@@ -0,0 +1,339 @@
+
+"This software program is licensed subject to the GNU General Public License 
+(GPL). Version 2, June 1991, available at 
+<http://www.fsf.org/copyleft/gpl.html>"
+
+GNU General Public License 
+
+Version 2, June 1991
+
+Copyright (C) 1989, 1991 Free Software Foundation, Inc.  
+59 Temple Place - Suite 330, Boston, MA  02111-1307, USA
+
+Everyone is permitted to copy and distribute verbatim copies of this license
+document, but changing it is not allowed.
+
+Preamble
+
+The licenses for most software are designed to take away your freedom to 
+share and change it. By contrast, the GNU General Public License is intended
+to guarantee your freedom to share and change free software--to make sure 
+the software is free for all its users. This General Public License applies 
+to most of the Free Software Foundation's software and to any other program 
+whose authors commit to using it. (Some other Free Software Foundation 
+software is covered by the GNU Library General Public License instead.) You 
+can apply it to your programs, too.
+
+When we speak of free software, we are referring to freedom, not price. Our
+General Public Licenses are designed to make sure that you have the freedom 
+to distribute copies of free software (and charge for this service if you 
+wish), that you receive source code or can get it if you want it, that you 
+can change the software or use pieces of it in new free programs; and that 
+you know you can do these things.
+
+To protect your rights, we need to make restrictions that forbid anyone to 
+deny you these rights or to ask you to surrender the rights. These 
+restrictions translate to certain responsibilities for you if you distribute
+copies of the software, or if you modify it.
+
+For example, if you distribute copies of such a program, whether gratis or 
+for a fee, you must give the recipients all the rights that you have. You 
+must make sure that they, too, receive or can get the source code. And you 
+must show them these terms so they know their rights.
+ 
+We protect your rights with two steps: (1) copyright the software, and (2) 
+offer you this license which gives you legal permission to copy, distribute 
+and/or modify the software. 
+
+Also, for each author's protection and ours, we want to make certain that 
+everyone understands that there is no warranty for this free software. If 
+the software is modified by someone else and passed on, we want its 
+recipients to know that what they have is not the original, so that any 
+problems introduced by others will not reflect on the original authors' 
+reputations. 
+
+Finally, any free program is threatened constantly by software patents. We 
+wish to avoid the danger that redistributors of a free program will 
+individually obtain patent licenses, in effect making the program 
+proprietary. To prevent this, we have made it clear that any patent must be 
+licensed for everyone's free use or not licensed at all. 
+
+The precise terms and conditions for copying, distribution and modification 
+follow. 
+
+TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+0. This License applies to any program or other work which contains a notice
+   placed by the copyright holder saying it may be distributed under the 
+   terms of this General Public License. The "Program", below, refers to any
+   such program or work, and a "work based on the Program" means either the 
+   Program or any derivative work under copyright law: that is to say, a 
+   work containing the Program or a portion of it, either verbatim or with 
+   modifications and/or translated into another language. (Hereinafter, 
+   translation is included without limitation in the term "modification".) 
+   Each licensee is addressed as "you". 
+
+   Activities other than copying, distribution and modification are not 
+   covered by this License; they are outside its scope. The act of running 
+   the Program is not restricted, and the output from the Program is covered 
+   only if its contents constitute a work based on the Program (independent 
+   of having been made by running the Program). Whether that is true depends
+   on what the Program does. 
+
+1. You may copy and distribute verbatim copies of the Program's source code 
+   as you receive it, in any medium, provided that you conspicuously and 
+   appropriately publish on each copy an appropriate copyright notice and 
+   disclaimer of warranty; keep intact all the notices that refer to this 
+   License and to the absence of any warranty; and give any other recipients 
+   of the Program a copy of this License along with the Program. 
+
+   You may charge a fee for the physical act of transferring a copy, and you 
+   may at your option offer warranty protection in exchange for a fee. 
+
+2. You may modify your copy or copies of the Program or any portion of it, 
+   thus forming a work based on the Program, and copy and distribute such 
+   modifications or work under the terms of Section 1 above, provided that 
+   you also meet all of these conditions: 
+
+   * a) You must cause the modified files to carry prominent notices stating 
+        that you changed the files and the date of any change. 
+
+   * b) You must cause any work that you distribute or publish, that in 
+        whole or in part contains or is derived from the Program or any part 
+        thereof, to be licensed as a whole at no charge to all third parties
+        under the terms of this License. 
+
+   * c) If the modified program normally reads commands interactively when 
+        run, you must cause it, when started running for such interactive 
+        use in the most ordinary way, to print or display an announcement 
+        including an appropriate copyright notice and a notice that there is
+        no warranty (or else, saying that you provide a warranty) and that 
+        users may redistribute the program under these conditions, and 
+        telling the user how to view a copy of this License. (Exception: if 
+        the Program itself is interactive but does not normally print such 
+        an announcement, your work based on the Program is not required to 
+        print an announcement.) 
+
+   These requirements apply to the modified work as a whole. If identifiable 
+   sections of that work are not derived from the Program, and can be 
+   reasonably considered independent and separate works in themselves, then 
+   this License, and its terms, do not apply to those sections when you 
+   distribute them as separate works. But when you distribute the same 
+   sections as part of a whole which is a work based on the Program, the 
+   distribution of the whole must be on the terms of this License, whose 
+   permissions for other licensees extend to the entire whole, and thus to 
+   each and every part regardless of who wrote it. 
+
+   Thus, it is not the intent of this section to claim rights or contest 
+   your rights to work written entirely by you; rather, the intent is to 
+   exercise the right to control the distribution of derivative or 
+   collective works based on the Program. 
+
+   In addition, mere aggregation of another work not based on the Program 
+   with the Program (or with a work based on the Program) on a volume of a 
+   storage or distribution medium does not bring the other work under the 
+   scope of this License. 
+
+3. You may copy and distribute the Program (or a work based on it, under 
+   Section 2) in object code or executable form under the terms of Sections 
+   1 and 2 above provided that you also do one of the following: 
+
+   * a) Accompany it with the complete corresponding machine-readable source 
+        code, which must be distributed under the terms of Sections 1 and 2 
+        above on a medium customarily used for software interchange; or, 
+
+   * b) Accompany it with a written offer, valid for at least three years, 
+        to give any third party, for a charge no more than your cost of 
+        physically performing source distribution, a complete machine-
+        readable copy of the corresponding source code, to be distributed 
+        under the terms of Sections 1 and 2 above on a medium customarily 
+        used for software interchange; or, 
+
+   * c) Accompany it with the information you received as to the offer to 
+        distribute corresponding source code. (This alternative is allowed 
+        only for noncommercial distribution and only if you received the 
+        program in object code or executable form with such an offer, in 
+        accord with Subsection b above.) 
+
+   The source code for a work means the preferred form of the work for 
+   making modifications to it. For an executable work, complete source code 
+   means all the source code for all modules it contains, plus any 
+   associated interface definition files, plus the scripts used to control 
+   compilation and installation of the executable. However, as a special 
+   exception, the source code distributed need not include anything that is 
+   normally distributed (in either source or binary form) with the major 
+   components (compiler, kernel, and so on) of the operating system on which
+   the executable runs, unless that component itself accompanies the 
+   executable. 
+
+   If distribution of executable or object code is made by offering access 
+   to copy from a designated place, then offering equivalent access to copy 
+   the source code from the same place counts as distribution of the source 
+   code, even though third parties are not compelled to copy the source 
+   along with the object code. 
+
+4. You may not copy, modify, sublicense, or distribute the Program except as
+   expressly provided under this License. Any attempt otherwise to copy, 
+   modify, sublicense or distribute the Program is void, and will 
+   automatically terminate your rights under this License. However, parties 
+   who have received copies, or rights, from you under this License will not
+   have their licenses terminated so long as such parties remain in full 
+   compliance. 
+
+5. You are not required to accept this License, since you have not signed 
+   it. However, nothing else grants you permission to modify or distribute 
+   the Program or its derivative works. These actions are prohibited by law 
+   if you do not accept this License. Therefore, by modifying or 
+   distributing the Program (or any work based on the Program), you 
+   indicate your acceptance of this License to do so, and all its terms and
+   conditions for copying, distributing or modifying the Program or works 
+   based on it. 
+
+6. Each time you redistribute the Program (or any work based on the 
+   Program), the recipient automatically receives a license from the 
+   original licensor to copy, distribute or modify the Program subject to 
+   these terms and conditions. You may not impose any further restrictions 
+   on the recipients' exercise of the rights granted herein. You are not 
+   responsible for enforcing compliance by third parties to this License. 
+
+7. If, as a consequence of a court judgment or allegation of patent 
+   infringement or for any other reason (not limited to patent issues), 
+   conditions are imposed on you (whether by court order, agreement or 
+   otherwise) that contradict the conditions of this License, they do not 
+   excuse you from the conditions of this License. If you cannot distribute 
+   so as to satisfy simultaneously your obligations under this License and 
+   any other pertinent obligations, then as a consequence you may not 
+   distribute the Program at all. For example, if a patent license would 
+   not permit royalty-free redistribution of the Program by all those who 
+   receive copies directly or indirectly through you, then the only way you 
+   could satisfy both it and this License would be to refrain entirely from 
+   distribution of the Program. 
+
+   If any portion of this section is held invalid or unenforceable under any
+   particular circumstance, the balance of the section is intended to apply
+   and the section as a whole is intended to apply in other circumstances. 
+
+   It is not the purpose of this section to induce you to infringe any 
+   patents or other property right claims or to contest validity of any 
+   such claims; this section has the sole purpose of protecting the 
+   integrity of the free software distribution system, which is implemented 
+   by public license practices. Many people have made generous contributions
+   to the wide range of software distributed through that system in 
+   reliance on consistent application of that system; it is up to the 
+   author/donor to decide if he or she is willing to distribute software 
+   through any other system and a licensee cannot impose that choice. 
+
+   This section is intended to make thoroughly clear what is believed to be 
+   a consequence of the rest of this License. 
+
+8. If the distribution and/or use of the Program is restricted in certain 
+   countries either by patents or by copyrighted interfaces, the original 
+   copyright holder who places the Program under this License may add an 
+   explicit geographical distribution limitation excluding those countries, 
+   so that distribution is permitted only in or among countries not thus 
+   excluded. In such case, this License incorporates the limitation as if 
+   written in the body of this License. 
+
+9. The Free Software Foundation may publish revised and/or new versions of 
+   the General Public License from time to time. Such new versions will be 
+   similar in spirit to the present version, but may differ in detail to 
+   address new problems or concerns. 
+
+   Each version is given a distinguishing version number. If the Program 
+   specifies a version number of this License which applies to it and "any 
+   later version", you have the option of following the terms and 
+   conditions either of that version or of any later version published by 
+   the Free Software Foundation. If the Program does not specify a version 
+   number of this License, you may choose any version ever published by the 
+   Free Software Foundation. 
+
+10. If you wish to incorporate parts of the Program into other free programs
+    whose distribution conditions are different, write to the author to ask 
+    for permission. For software which is copyrighted by the Free Software 
+    Foundation, write to the Free Software Foundation; we sometimes make 
+    exceptions for this. Our decision will be guided by the two goals of 
+    preserving the free status of all derivatives of our free software and 
+    of promoting the sharing and reuse of software generally. 
+
+   NO WARRANTY
+
+11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY 
+    FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN 
+    OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES 
+    PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER 
+    EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 
+    WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE 
+    ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH 
+    YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL 
+    NECESSARY SERVICING, REPAIR OR CORRECTION. 
+
+12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 
+    WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR 
+    REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR 
+    DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL 
+    DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM 
+    (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED 
+    INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF 
+    THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR 
+    OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. 
+
+END OF TERMS AND CONDITIONS
+
+How to Apply These Terms to Your New Programs
+
+If you develop a new program, and you want it to be of the greatest 
+possible use to the public, the best way to achieve this is to make it free 
+software which everyone can redistribute and change under these terms. 
+
+To do so, attach the following notices to the program. It is safest to 
+attach them to the start of each source file to most effectively convey the
+exclusion of warranty; and each file should have at least the "copyright" 
+line and a pointer to where the full notice is found. 
+
+one line to give the program's name and an idea of what it does.
+Copyright (C) yyyy  name of author
+
+This program is free software; you can redistribute it and/or modify it 
+under the terms of the GNU General Public License as published by the Free 
+Software Foundation; either version 2 of the License, or (at your option) 
+any later version.
+
+This program is distributed in the hope that it will be useful, but WITHOUT 
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for 
+more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 
+Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+
+Also add information on how to contact you by electronic and paper mail. 
+
+If the program is interactive, make it output a short notice like this when 
+it starts in an interactive mode: 
+
+Gnomovision version 69, Copyright (C) year name of author Gnomovision comes 
+with ABSOLUTELY NO WARRANTY; for details type 'show w'.  This is free 
+software, and you are welcome to redistribute it under certain conditions; 
+type 'show c' for details.
+
+The hypothetical commands 'show w' and 'show c' should show the appropriate 
+parts of the General Public License. Of course, the commands you use may be 
+called something other than 'show w' and 'show c'; they could even be 
+mouse-clicks or menu items--whatever suits your program. 
+
+You should also get your employer (if you work as a programmer) or your 
+school, if any, to sign a "copyright disclaimer" for the program, if 
+necessary. Here is a sample; alter the names: 
+
+Yoyodyne, Inc., hereby disclaims all copyright interest in the program 
+'Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+signature of Ty Coon, 1 April 1989
+Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into 
+proprietary programs. If your program is a subroutine library, you may 
+consider it more useful to permit linking proprietary applications with the 
+library. If this is what you want to do, use the GNU Library General Public 
+License instead of this License.
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe.h
new file mode 100644
index 00000000..222c2c71
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe.h
@@ -0,0 +1,925 @@
+/*******************************************************************************
+
+  Intel 10 Gigabit PCI Express Linux driver
+  Copyright(c) 1999 - 2012 Intel Corporation.
+
+  This program is free software; you can redistribute it and/or modify it
+  under the terms and conditions of the GNU General Public License,
+  version 2, as published by the Free Software Foundation.
+
+  This program is distributed in the hope it will be useful, but WITHOUT
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+  more details.
+
+  You should have received a copy of the GNU General Public License along with
+  this program; if not, write to the Free Software Foundation, Inc.,
+  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+  The full GNU General Public License is included in this distribution in
+  the file called "COPYING".
+
+  Contact Information:
+  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+#ifndef _IXGBE_H_
+#define _IXGBE_H_
+
+#ifndef IXGBE_NO_LRO
+#include <net/tcp.h>
+#endif
+
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+#ifdef HAVE_IRQ_AFFINITY_HINT
+#include <linux/cpumask.h>
+#endif /* HAVE_IRQ_AFFINITY_HINT */
+#include <linux/vmalloc.h>
+
+#ifdef SIOCETHTOOL
+#include <linux/ethtool.h>
+#endif
+#ifdef NETIF_F_HW_VLAN_TX
+#include <linux/if_vlan.h>
+#endif
+#if defined(CONFIG_DCA) || defined(CONFIG_DCA_MODULE)
+#define IXGBE_DCA
+#include <linux/dca.h>
+#endif
+#include "ixgbe_dcb.h"
+
+#include "kcompat.h"
+
+#ifdef HAVE_SCTP
+#include <linux/sctp.h>
+#endif
+
+#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
+#define IXGBE_FCOE
+#include "ixgbe_fcoe.h"
+#endif /* CONFIG_FCOE or CONFIG_FCOE_MODULE */
+
+#if defined(CONFIG_PTP_1588_CLOCK) || defined(CONFIG_PTP_1588_CLOCK_MODULE)
+#define HAVE_IXGBE_PTP
+#endif
+
+#include "ixgbe_api.h"
+
+#define PFX "ixgbe: "
+#define DPRINTK(nlevel, klevel, fmt, args...) \
+	((void)((NETIF_MSG_##nlevel & adapter->msg_enable) && \
+	printk(KERN_##klevel PFX "%s: %s: " fmt, adapter->netdev->name, \
+		__func__ , ## args)))
+
+/* TX/RX descriptor defines */
+#define IXGBE_DEFAULT_TXD		512
+#define IXGBE_DEFAULT_TX_WORK		256
+#define IXGBE_MAX_TXD			4096
+#define IXGBE_MIN_TXD			64
+
+#define IXGBE_DEFAULT_RXD		512
+#define IXGBE_DEFAULT_RX_WORK		256
+#define IXGBE_MAX_RXD			4096
+#define IXGBE_MIN_RXD			64
+
+
+/* flow control */
+#define IXGBE_MIN_FCRTL			0x40
+#define IXGBE_MAX_FCRTL			0x7FF80
+#define IXGBE_MIN_FCRTH			0x600
+#define IXGBE_MAX_FCRTH			0x7FFF0
+#define IXGBE_DEFAULT_FCPAUSE		0xFFFF
+#define IXGBE_MIN_FCPAUSE		0
+#define IXGBE_MAX_FCPAUSE		0xFFFF
+
+/* Supported Rx Buffer Sizes */
+#define IXGBE_RXBUFFER_512	512    /* Used for packet split */
+#ifdef CONFIG_IXGBE_DISABLE_PACKET_SPLIT
+#define IXGBE_RXBUFFER_1536	1536
+#define IXGBE_RXBUFFER_2K	2048
+#define IXGBE_RXBUFFER_3K	3072
+#define IXGBE_RXBUFFER_4K	4096
+#define IXGBE_RXBUFFER_7K	7168
+#define IXGBE_RXBUFFER_8K	8192
+#define IXGBE_RXBUFFER_15K	15360
+#endif /* CONFIG_IXGBE_DISABLE_PACKET_SPLIT */
+#define IXGBE_MAX_RXBUFFER	16384  /* largest size for single descriptor */
+
+/*
+ * NOTE: netdev_alloc_skb reserves up to 64 bytes, NET_IP_ALIGN mans we
+ * reserve 2 more, and skb_shared_info adds an additional 384 bytes more,
+ * this adds up to 512 bytes of extra data meaning the smallest allocation
+ * we could have is 1K.
+ * i.e. RXBUFFER_512 --> size-1024 slab
+ */
+#define IXGBE_RX_HDR_SIZE	IXGBE_RXBUFFER_512
+
+#define MAXIMUM_ETHERNET_VLAN_SIZE	(VLAN_ETH_FRAME_LEN + ETH_FCS_LEN)
+
+/* How many Rx Buffers do we bundle into one write to the hardware ? */
+#define IXGBE_RX_BUFFER_WRITE	16	/* Must be power of 2 */
+
+#define IXGBE_TX_FLAGS_CSUM		(u32)(1)
+#define IXGBE_TX_FLAGS_HW_VLAN		(u32)(1 << 1)
+#define IXGBE_TX_FLAGS_SW_VLAN		(u32)(1 << 2)
+#define IXGBE_TX_FLAGS_TSO		(u32)(1 << 3)
+#define IXGBE_TX_FLAGS_IPV4		(u32)(1 << 4)
+#define IXGBE_TX_FLAGS_FCOE		(u32)(1 << 5)
+#define IXGBE_TX_FLAGS_FSO		(u32)(1 << 6)
+#define IXGBE_TX_FLAGS_TXSW		(u32)(1 << 7)
+#define IXGBE_TX_FLAGS_TSTAMP		(u32)(1 << 8)
+#define IXGBE_TX_FLAGS_VLAN_MASK	0xffff0000
+#define IXGBE_TX_FLAGS_VLAN_PRIO_MASK	0xe0000000
+#define IXGBE_TX_FLAGS_VLAN_PRIO_SHIFT	29
+#define IXGBE_TX_FLAGS_VLAN_SHIFT	16
+
+#define IXGBE_MAX_RX_DESC_POLL		10
+
+#define IXGBE_MAX_VF_MC_ENTRIES		30
+#define IXGBE_MAX_VF_FUNCTIONS		64
+#define IXGBE_MAX_VFTA_ENTRIES		128
+#define MAX_EMULATION_MAC_ADDRS		16
+#define IXGBE_MAX_PF_MACVLANS		15
+#define IXGBE_82599_VF_DEVICE_ID	0x10ED
+#define IXGBE_X540_VF_DEVICE_ID		0x1515
+
+#ifdef CONFIG_PCI_IOV
+#define VMDQ_P(p)	((p) + adapter->num_vfs)
+#else
+#define VMDQ_P(p)	(p)
+#endif
+
+#define UPDATE_VF_COUNTER_32bit(reg, last_counter, counter)	\
+	{							\
+		u32 current_counter = IXGBE_READ_REG(hw, reg);	\
+		if (current_counter < last_counter)		\
+			counter += 0x100000000LL;		\
+		last_counter = current_counter;			\
+		counter &= 0xFFFFFFFF00000000LL;		\
+		counter |= current_counter;			\
+	}
+
+#define UPDATE_VF_COUNTER_36bit(reg_lsb, reg_msb, last_counter, counter) \
+	{								 \
+		u64 current_counter_lsb = IXGBE_READ_REG(hw, reg_lsb);	 \
+		u64 current_counter_msb = IXGBE_READ_REG(hw, reg_msb);	 \
+		u64 current_counter = (current_counter_msb << 32) |	 \
+			current_counter_lsb;				 \
+		if (current_counter < last_counter)			 \
+			counter += 0x1000000000LL;			 \
+		last_counter = current_counter;				 \
+		counter &= 0xFFFFFFF000000000LL;			 \
+		counter |= current_counter;				 \
+	}
+
+struct vf_stats {
+	u64 gprc;
+	u64 gorc;
+	u64 gptc;
+	u64 gotc;
+	u64 mprc;
+};
+
+struct vf_data_storage {
+	unsigned char vf_mac_addresses[ETH_ALEN];
+	u16 vf_mc_hashes[IXGBE_MAX_VF_MC_ENTRIES];
+	u16 num_vf_mc_hashes;
+	u16 default_vf_vlan_id;
+	u16 vlans_enabled;
+	bool clear_to_send;
+	struct vf_stats vfstats;
+	struct vf_stats last_vfstats;
+	struct vf_stats saved_rst_vfstats;
+	bool pf_set_mac;
+	u16 pf_vlan; /* When set, guest VLAN config not allowed. */
+	u16 pf_qos;
+	u16 tx_rate;
+	u16 vlan_count;
+	u8 spoofchk_enabled;
+	struct pci_dev *vfdev;
+};
+
+struct vf_macvlans {
+	struct list_head l;
+	int vf;
+	bool free;
+	bool is_macvlan;
+	u8 vf_macvlan[ETH_ALEN];
+};
+
+#ifndef IXGBE_NO_LRO
+#define IXGBE_LRO_MAX		32	/*Maximum number of LRO descriptors*/
+#define IXGBE_LRO_GLOBAL	10
+
+struct ixgbe_lro_stats {
+	u32 flushed;
+	u32 coal;
+};
+
+/*
+ * ixgbe_lro_header - header format to be aggregated by LRO
+ * @iph: IP header without options
+ * @tcp: TCP header
+ * @ts:  Optional TCP timestamp data in TCP options
+ *
+ * This structure relies on the check above that verifies that the header
+ * is IPv4 and does not contain any options.
+ */
+struct ixgbe_lrohdr {
+	struct iphdr iph;
+	struct tcphdr th;
+	__be32 ts[0];
+};
+
+struct ixgbe_lro_list {
+	struct sk_buff_head active;
+	struct ixgbe_lro_stats stats;
+};
+
+#endif /* IXGBE_NO_LRO */
+#define IXGBE_MAX_TXD_PWR	14
+#define IXGBE_MAX_DATA_PER_TXD	(1 << IXGBE_MAX_TXD_PWR)
+
+/* Tx Descriptors needed, worst case */
+#define TXD_USE_COUNT(S)	DIV_ROUND_UP((S), IXGBE_MAX_DATA_PER_TXD)
+#ifdef MAX_SKB_FRAGS
+#define DESC_NEEDED	((MAX_SKB_FRAGS * TXD_USE_COUNT(PAGE_SIZE)) + 4)
+#else
+#define DESC_NEEDED	4
+#endif
+
+/* wrapper around a pointer to a socket buffer,
+ * so a DMA handle can be stored along with the buffer */
+struct ixgbe_tx_buffer {
+	union ixgbe_adv_tx_desc *next_to_watch;
+	unsigned long time_stamp;
+	struct sk_buff *skb;
+	unsigned int bytecount;
+	unsigned short gso_segs;
+	__be16 protocol;
+	DEFINE_DMA_UNMAP_ADDR(dma);
+	DEFINE_DMA_UNMAP_LEN(len);
+	u32 tx_flags;
+};
+
+struct ixgbe_rx_buffer {
+	struct sk_buff *skb;
+	dma_addr_t dma;
+#ifndef CONFIG_IXGBE_DISABLE_PACKET_SPLIT
+	struct page *page;
+	unsigned int page_offset;
+#endif
+};
+
+struct ixgbe_queue_stats {
+	u64 packets;
+	u64 bytes;
+};
+
+struct ixgbe_tx_queue_stats {
+	u64 restart_queue;
+	u64 tx_busy;
+	u64 tx_done_old;
+};
+
+struct ixgbe_rx_queue_stats {
+	u64 rsc_count;
+	u64 rsc_flush;
+	u64 non_eop_descs;
+	u64 alloc_rx_page_failed;
+	u64 alloc_rx_buff_failed;
+	u64 csum_err;
+};
+
+enum ixgbe_ring_state_t {
+	__IXGBE_TX_FDIR_INIT_DONE,
+	__IXGBE_TX_DETECT_HANG,
+	__IXGBE_HANG_CHECK_ARMED,
+	__IXGBE_RX_RSC_ENABLED,
+#ifndef HAVE_NDO_SET_FEATURES
+	__IXGBE_RX_CSUM_ENABLED,
+#endif
+	__IXGBE_RX_CSUM_UDP_ZERO_ERR,
+#ifdef IXGBE_FCOE
+	__IXGBE_RX_FCOE_BUFSZ,
+#endif
+};
+
+#define check_for_tx_hang(ring) \
+	test_bit(__IXGBE_TX_DETECT_HANG, &(ring)->state)
+#define set_check_for_tx_hang(ring) \
+	set_bit(__IXGBE_TX_DETECT_HANG, &(ring)->state)
+#define clear_check_for_tx_hang(ring) \
+	clear_bit(__IXGBE_TX_DETECT_HANG, &(ring)->state)
+#ifndef IXGBE_NO_HW_RSC
+#define ring_is_rsc_enabled(ring) \
+	test_bit(__IXGBE_RX_RSC_ENABLED, &(ring)->state)
+#else
+#define ring_is_rsc_enabled(ring)	false
+#endif
+#define set_ring_rsc_enabled(ring) \
+	set_bit(__IXGBE_RX_RSC_ENABLED, &(ring)->state)
+#define clear_ring_rsc_enabled(ring) \
+	clear_bit(__IXGBE_RX_RSC_ENABLED, &(ring)->state)
+#define netdev_ring(ring) (ring->netdev)
+#define ring_queue_index(ring) (ring->queue_index)
+
+
+struct ixgbe_ring {
+	struct ixgbe_ring *next;	/* pointer to next ring in q_vector */
+	struct ixgbe_q_vector *q_vector; /* backpointer to host q_vector */
+	struct net_device *netdev;	/* netdev ring belongs to */
+	struct device *dev;		/* device for DMA mapping */
+	void *desc;			/* descriptor ring memory */
+	union {
+		struct ixgbe_tx_buffer *tx_buffer_info;
+		struct ixgbe_rx_buffer *rx_buffer_info;
+	};
+	unsigned long state;
+	u8 __iomem *tail;
+	dma_addr_t dma;			/* phys. address of descriptor ring */
+	unsigned int size;		/* length in bytes */
+
+	u16 count;			/* amount of descriptors */
+
+	u8 queue_index; /* needed for multiqueue queue management */
+	u8 reg_idx;			/* holds the special value that gets
+					 * the hardware register offset
+					 * associated with this ring, which is
+					 * different for DCB and RSS modes
+					 */
+	u16 next_to_use;
+	u16 next_to_clean;
+
+	union {
+#ifdef CONFIG_IXGBE_DISABLE_PACKET_SPLIT
+		u16 rx_buf_len;
+#else
+		u16 next_to_alloc;
+#endif
+		struct {
+			u8 atr_sample_rate;
+			u8 atr_count;
+		};
+	};
+
+	u8 dcb_tc;
+	struct ixgbe_queue_stats stats;
+	union {
+		struct ixgbe_tx_queue_stats tx_stats;
+		struct ixgbe_rx_queue_stats rx_stats;
+	};
+} ____cacheline_internodealigned_in_smp;
+
+enum ixgbe_ring_f_enum {
+	RING_F_NONE = 0,
+	RING_F_VMDQ,  /* SR-IOV uses the same ring feature */
+	RING_F_RSS,
+	RING_F_FDIR,
+#ifdef IXGBE_FCOE
+	RING_F_FCOE,
+#endif /* IXGBE_FCOE */
+	RING_F_ARRAY_SIZE  /* must be last in enum set */
+};
+
+#define IXGBE_MAX_DCB_INDICES	8
+#define IXGBE_MAX_RSS_INDICES	16
+#define IXGBE_MAX_VMDQ_INDICES	64
+#define IXGBE_MAX_FDIR_INDICES	64
+#ifdef IXGBE_FCOE
+#define IXGBE_MAX_FCOE_INDICES	8
+#define MAX_RX_QUEUES	(IXGBE_MAX_FDIR_INDICES + IXGBE_MAX_FCOE_INDICES)
+#define MAX_TX_QUEUES	(IXGBE_MAX_FDIR_INDICES + IXGBE_MAX_FCOE_INDICES)
+#else
+#define MAX_RX_QUEUES	IXGBE_MAX_FDIR_INDICES
+#define MAX_TX_QUEUES	IXGBE_MAX_FDIR_INDICES
+#endif /* IXGBE_FCOE */
+struct ixgbe_ring_feature {
+	int indices;
+	int mask;
+};
+
+#ifndef CONFIG_IXGBE_DISABLE_PACKET_SPLIT
+/*
+ * FCoE requires that all Rx buffers be over 2200 bytes in length.  Since
+ * this is twice the size of a half page we need to double the page order
+ * for FCoE enabled Rx queues.
+ */
+#if defined(IXGBE_FCOE) && (PAGE_SIZE < 8192)
+static inline unsigned int ixgbe_rx_pg_order(struct ixgbe_ring *ring)
+{
+	return test_bit(__IXGBE_RX_FCOE_BUFSZ, &ring->state) ? 1 : 0;
+}
+#else
+#define ixgbe_rx_pg_order(_ring) 0
+#endif
+#define ixgbe_rx_pg_size(_ring) (PAGE_SIZE << ixgbe_rx_pg_order(_ring))
+#define ixgbe_rx_bufsz(_ring) ((PAGE_SIZE / 2) << ixgbe_rx_pg_order(_ring))
+
+#endif
+struct ixgbe_ring_container {
+	struct ixgbe_ring *ring;	/* pointer to linked list of rings */
+	unsigned int total_bytes;	/* total bytes processed this int */
+	unsigned int total_packets;	/* total packets processed this int */
+	u16 work_limit;			/* total work allowed per interrupt */
+	u8 count;			/* total number of rings in vector */
+	u8 itr;				/* current ITR setting for ring */
+};
+
+/* iterator for handling rings in ring container */
+#define ixgbe_for_each_ring(pos, head) \
+	for (pos = (head).ring; pos != NULL; pos = pos->next)
+
+#define MAX_RX_PACKET_BUFFERS	((adapter->flags & IXGBE_FLAG_DCB_ENABLED) \
+				 ? 8 : 1)
+#define MAX_TX_PACKET_BUFFERS	MAX_RX_PACKET_BUFFERS
+
+/* MAX_MSIX_Q_VECTORS of these are allocated,
+ * but we only use one per queue-specific vector.
+ */
+struct ixgbe_q_vector {
+	struct ixgbe_adapter *adapter;
+	int cpu;	/* CPU for DCA */
+	u16 v_idx;	/* index of q_vector within array, also used for
+			 * finding the bit in EICR and friends that
+			 * represents the vector for this ring */
+	u16 itr;	/* Interrupt throttle rate written to EITR */
+	struct ixgbe_ring_container rx, tx;
+
+#ifdef CONFIG_IXGBE_NAPI
+	struct napi_struct napi;
+#endif
+#ifndef HAVE_NETDEV_NAPI_LIST
+	struct net_device poll_dev;
+#endif
+#ifdef HAVE_IRQ_AFFINITY_HINT
+	cpumask_t affinity_mask;
+#endif
+#ifndef IXGBE_NO_LRO
+	struct ixgbe_lro_list lrolist;   /* LRO list for queue vector*/
+#endif
+	int numa_node;
+	char name[IFNAMSIZ + 9];
+
+	/* for dynamic allocation of rings associated with this q_vector */
+	struct ixgbe_ring ring[0] ____cacheline_internodealigned_in_smp;
+};
+
+/*
+ * microsecond values for various ITR rates shifted by 2 to fit itr register
+ * with the first 3 bits reserved 0
+ */
+#define IXGBE_MIN_RSC_ITR	24
+#define IXGBE_100K_ITR		40
+#define IXGBE_20K_ITR		200
+#define IXGBE_16K_ITR		248
+#define IXGBE_10K_ITR		400
+#define IXGBE_8K_ITR		500
+
+/* ixgbe_test_staterr - tests bits in Rx descriptor status and error fields */
+static inline __le32 ixgbe_test_staterr(union ixgbe_adv_rx_desc *rx_desc,
+					const u32 stat_err_bits)
+{
+	return rx_desc->wb.upper.status_error & cpu_to_le32(stat_err_bits);
+}
+
+/* ixgbe_desc_unused - calculate if we have unused descriptors */
+static inline u16 ixgbe_desc_unused(struct ixgbe_ring *ring)
+{
+	u16 ntc = ring->next_to_clean;
+	u16 ntu = ring->next_to_use;
+
+	return ((ntc > ntu) ? 0 : ring->count) + ntc - ntu - 1;
+}
+
+#define IXGBE_RX_DESC(R, i)	\
+	(&(((union ixgbe_adv_rx_desc *)((R)->desc))[i]))
+#define IXGBE_TX_DESC(R, i)	\
+	(&(((union ixgbe_adv_tx_desc *)((R)->desc))[i]))
+#define IXGBE_TX_CTXTDESC(R, i)	\
+	(&(((struct ixgbe_adv_tx_context_desc *)((R)->desc))[i]))
+
+#define IXGBE_MAX_JUMBO_FRAME_SIZE	16128
+#ifdef IXGBE_FCOE
+/* use 3K as the baby jumbo frame size for FCoE */
+#define IXGBE_FCOE_JUMBO_FRAME_SIZE	3072
+#endif /* IXGBE_FCOE */
+
+#define TCP_TIMER_VECTOR	0
+#define OTHER_VECTOR	1
+#define NON_Q_VECTORS	(OTHER_VECTOR + TCP_TIMER_VECTOR)
+
+#define IXGBE_MAX_MSIX_Q_VECTORS_82599	64
+#define IXGBE_MAX_MSIX_Q_VECTORS_82598	16
+
+struct ixgbe_mac_addr {
+	u8 addr[ETH_ALEN];
+	u16 queue;
+	u16 state; /* bitmask */
+};
+#define IXGBE_MAC_STATE_DEFAULT		0x1
+#define IXGBE_MAC_STATE_MODIFIED	0x2
+#define IXGBE_MAC_STATE_IN_USE		0x4
+
+#ifdef IXGBE_PROCFS
+struct ixgbe_therm_proc_data {
+	struct ixgbe_hw *hw;
+	struct ixgbe_thermal_diode_data *sensor_data;
+};
+
+#endif /* IXGBE_PROCFS */
+
+/*
+ * Only for array allocations in our adapter struct.  On 82598, there will be
+ * unused entries in the array, but that's not a big deal.  Also, in 82599,
+ * we can actually assign 64 queue vectors based on our extended-extended
+ * interrupt registers.  This is different than 82598, which is limited to 16.
+ */
+#define MAX_MSIX_Q_VECTORS	IXGBE_MAX_MSIX_Q_VECTORS_82599
+#define MAX_MSIX_COUNT		IXGBE_MAX_MSIX_VECTORS_82599
+
+#define MIN_MSIX_Q_VECTORS	1
+#define MIN_MSIX_COUNT		(MIN_MSIX_Q_VECTORS + NON_Q_VECTORS)
+
+/* default to trying for four seconds */
+#define IXGBE_TRY_LINK_TIMEOUT	(4 * HZ)
+
+/* board specific private data structure */
+struct ixgbe_adapter {
+#ifdef NETIF_F_HW_VLAN_TX
+#ifdef HAVE_VLAN_RX_REGISTER
+	struct vlan_group *vlgrp; /* must be first, see ixgbe_receive_skb */
+#else
+	unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)];
+#endif
+#endif /* NETIF_F_HW_VLAN_TX */
+	/* OS defined structs */
+	struct net_device *netdev;
+	struct pci_dev *pdev;
+
+	unsigned long state;
+
+	/* Some features need tri-state capability,
+	 * thus the additional *_CAPABLE flags.
+	 */
+	u32 flags;
+#define IXGBE_FLAG_MSI_CAPABLE			(u32)(1 << 0)
+#define IXGBE_FLAG_MSI_ENABLED			(u32)(1 << 1)
+#define IXGBE_FLAG_MSIX_CAPABLE			(u32)(1 << 2)
+#define IXGBE_FLAG_MSIX_ENABLED			(u32)(1 << 3)
+#ifndef IXGBE_NO_LLI
+#define IXGBE_FLAG_LLI_PUSH			(u32)(1 << 4)
+#endif
+#define IXGBE_FLAG_IN_NETPOLL                   (u32)(1 << 8)
+#if defined(CONFIG_DCA) || defined(CONFIG_DCA_MODULE)
+#define IXGBE_FLAG_DCA_ENABLED			(u32)(1 << 9)
+#define IXGBE_FLAG_DCA_CAPABLE			(u32)(1 << 10)
+#define IXGBE_FLAG_DCA_ENABLED_DATA		(u32)(1 << 11)
+#else
+#define IXGBE_FLAG_DCA_ENABLED			(u32)0
+#define IXGBE_FLAG_DCA_CAPABLE			(u32)0
+#define IXGBE_FLAG_DCA_ENABLED_DATA             (u32)0
+#endif
+#define IXGBE_FLAG_MQ_CAPABLE			(u32)(1 << 12)
+#define IXGBE_FLAG_DCB_ENABLED			(u32)(1 << 13)
+#define IXGBE_FLAG_DCB_CAPABLE			(u32)(1 << 14)
+#define IXGBE_FLAG_RSS_ENABLED			(u32)(1 << 15)
+#define IXGBE_FLAG_RSS_CAPABLE			(u32)(1 << 16)
+#define IXGBE_FLAG_VMDQ_ENABLED			(u32)(1 << 18)
+#define IXGBE_FLAG_FAN_FAIL_CAPABLE		(u32)(1 << 19)
+#define IXGBE_FLAG_NEED_LINK_UPDATE		(u32)(1 << 20)
+#define IXGBE_FLAG_NEED_LINK_CONFIG		(u32)(1 << 21)
+#define IXGBE_FLAG_FDIR_HASH_CAPABLE		(u32)(1 << 22)
+#define IXGBE_FLAG_FDIR_PERFECT_CAPABLE		(u32)(1 << 23)
+#ifdef IXGBE_FCOE
+#define IXGBE_FLAG_FCOE_CAPABLE			(u32)(1 << 24)
+#define IXGBE_FLAG_FCOE_ENABLED			(u32)(1 << 25)
+#endif /* IXGBE_FCOE */
+#define IXGBE_FLAG_SRIOV_CAPABLE		(u32)(1 << 26)
+#define IXGBE_FLAG_SRIOV_ENABLED		(u32)(1 << 27)
+#define IXGBE_FLAG_SRIOV_REPLICATION_ENABLE	(u32)(1 << 28)
+#define IXGBE_FLAG_SRIOV_L2SWITCH_ENABLE	(u32)(1 << 29)
+#define IXGBE_FLAG_SRIOV_L2LOOPBACK_ENABLE	(u32)(1 << 30)
+#define IXGBE_FLAG_RX_BB_CAPABLE		(u32)(1 << 31)
+
+	u32 flags2;
+#ifndef IXGBE_NO_HW_RSC
+#define IXGBE_FLAG2_RSC_CAPABLE			(u32)(1)
+#define IXGBE_FLAG2_RSC_ENABLED			(u32)(1 << 1)
+#else
+#define IXGBE_FLAG2_RSC_CAPABLE			0
+#define IXGBE_FLAG2_RSC_ENABLED			0
+#endif
+#define IXGBE_FLAG2_VMDQ_DEFAULT_OVERRIDE	(u32)(1 << 2)
+#define IXGBE_FLAG2_TEMP_SENSOR_CAPABLE		(u32)(1 << 4)
+#define IXGBE_FLAG2_TEMP_SENSOR_EVENT		(u32)(1 << 5)
+#define IXGBE_FLAG2_SEARCH_FOR_SFP		(u32)(1 << 6)
+#define IXGBE_FLAG2_SFP_NEEDS_RESET		(u32)(1 << 7)
+#define IXGBE_FLAG2_RESET_REQUESTED		(u32)(1 << 8)
+#define IXGBE_FLAG2_FDIR_REQUIRES_REINIT	(u32)(1 << 9)
+#define IXGBE_FLAG2_RSS_FIELD_IPV4_UDP		(u32)(1 << 10)
+#define IXGBE_FLAG2_RSS_FIELD_IPV6_UDP		(u32)(1 << 11)
+#define IXGBE_FLAG2_OVERFLOW_CHECK_ENABLED      (u32)(1 << 12)
+
+	/* Tx fast path data */
+	int num_tx_queues;
+	u16 tx_itr_setting;
+	u16 tx_work_limit;
+
+	/* Rx fast path data */
+	int num_rx_queues;
+	u16 rx_itr_setting;
+	u16 rx_work_limit;
+
+	/* TX */
+	struct ixgbe_ring *tx_ring[MAX_TX_QUEUES] ____cacheline_aligned_in_smp;
+
+	u64 restart_queue;
+	u64 lsc_int;
+	u32 tx_timeout_count;
+
+	/* RX */
+	struct ixgbe_ring *rx_ring[MAX_RX_QUEUES];
+	int num_rx_pools;		/* == num_rx_queues in 82598 */
+	int num_rx_queues_per_pool;	/* 1 if 82598, can be many if 82599 */
+	u64 hw_csum_rx_error;
+	u64 hw_rx_no_dma_resources;
+	u64 rsc_total_count;
+	u64 rsc_total_flush;
+	u64 non_eop_descs;
+#ifndef CONFIG_IXGBE_NAPI
+	u64 rx_dropped_backlog;		/* count drops from rx intr handler */
+#endif
+	u32 alloc_rx_page_failed;
+	u32 alloc_rx_buff_failed;
+
+	struct ixgbe_q_vector *q_vector[MAX_MSIX_Q_VECTORS];
+
+#ifdef HAVE_DCBNL_IEEE
+	struct ieee_pfc *ixgbe_ieee_pfc;
+	struct ieee_ets *ixgbe_ieee_ets;
+#endif
+	struct ixgbe_dcb_config dcb_cfg;
+	struct ixgbe_dcb_config temp_dcb_cfg;
+	u8 dcb_set_bitmap;
+	u8 dcbx_cap;
+#ifndef HAVE_MQPRIO
+	u8 tc;
+#endif
+	enum ixgbe_fc_mode last_lfc_mode;
+
+	int num_msix_vectors;
+	int max_msix_q_vectors;         /* true count of q_vectors for device */
+	struct ixgbe_ring_feature ring_feature[RING_F_ARRAY_SIZE];
+	struct msix_entry *msix_entries;
+
+#ifndef HAVE_NETDEV_STATS_IN_NETDEV
+	struct net_device_stats net_stats;
+#endif
+#ifndef IXGBE_NO_LRO
+	struct ixgbe_lro_stats lro_stats;
+#endif
+
+#ifdef ETHTOOL_TEST
+	u32 test_icr;
+	struct ixgbe_ring test_tx_ring;
+	struct ixgbe_ring test_rx_ring;
+#endif
+
+	/* structs defined in ixgbe_hw.h */
+	struct ixgbe_hw hw;
+	u16 msg_enable;
+	struct ixgbe_hw_stats stats;
+#ifndef IXGBE_NO_LLI
+	u32 lli_port;
+	u32 lli_size;
+	u32 lli_etype;
+	u32 lli_vlan_pri;
+#endif /* IXGBE_NO_LLI */
+
+	u32 *config_space;
+	u64 tx_busy;
+	unsigned int tx_ring_count;
+	unsigned int rx_ring_count;
+
+	u32 link_speed;
+	bool link_up;
+	unsigned long link_check_timeout;
+
+	struct timer_list service_timer;
+	struct work_struct service_task;
+
+	struct hlist_head fdir_filter_list;
+	unsigned long fdir_overflow; /* number of times ATR was backed off */
+	union ixgbe_atr_input fdir_mask;
+	int fdir_filter_count;
+	u32 fdir_pballoc;
+	u32 atr_sample_rate;
+	spinlock_t fdir_perfect_lock;
+
+#ifdef IXGBE_FCOE
+	struct ixgbe_fcoe fcoe;
+#endif /* IXGBE_FCOE */
+	u32 wol;
+
+	u16 bd_number;
+
+	char eeprom_id[32];
+	u16 eeprom_cap;
+	bool netdev_registered;
+	u32 interrupt_event;
+#ifdef HAVE_ETHTOOL_SET_PHYS_ID
+	u32 led_reg;
+#endif
+
+	DECLARE_BITMAP(active_vfs, IXGBE_MAX_VF_FUNCTIONS);
+	unsigned int num_vfs;
+	struct vf_data_storage *vfinfo;
+	int vf_rate_link_speed;
+	struct vf_macvlans vf_mvs;
+	struct vf_macvlans *mv_list;
+#ifdef CONFIG_PCI_IOV
+	u32 timer_event_accumulator;
+	u32 vferr_refcount;
+#endif
+	struct ixgbe_mac_addr *mac_table;
+#ifdef IXGBE_SYSFS
+	struct kobject *info_kobj;
+	struct kobject *therm_kobj[IXGBE_MAX_SENSORS];
+#else /* IXGBE_SYSFS */
+#ifdef IXGBE_PROCFS
+	struct proc_dir_entry *eth_dir;
+	struct proc_dir_entry *info_dir;
+	struct proc_dir_entry *therm_dir[IXGBE_MAX_SENSORS];
+	struct ixgbe_therm_proc_data therm_data[IXGBE_MAX_SENSORS];
+#endif /* IXGBE_PROCFS */
+#endif /* IXGBE_SYSFS */
+};
+
+struct ixgbe_fdir_filter {
+	struct  hlist_node fdir_node;
+	union ixgbe_atr_input filter;
+	u16 sw_idx;
+	u16 action;
+};
+
+enum ixgbe_state_t {
+	__IXGBE_TESTING,
+	__IXGBE_RESETTING,
+	__IXGBE_DOWN,
+	__IXGBE_SERVICE_SCHED,
+	__IXGBE_IN_SFP_INIT,
+};
+
+struct ixgbe_cb {
+#ifdef CONFIG_IXGBE_DISABLE_PACKET_SPLIT
+	union {				/* Union defining head/tail partner */
+		struct sk_buff *head;
+		struct sk_buff *tail;
+	};
+#endif
+	dma_addr_t dma;
+#ifndef IXGBE_NO_LRO
+	__be32	tsecr;			/* timestamp echo response */
+	u32	tsval;			/* timestamp value in host order */
+	u32	next_seq;		/* next expected sequence number */
+	u16	free;			/* 65521 minus total size */
+	u16	mss;			/* size of data portion of packet */
+#endif /* IXGBE_NO_LRO */
+#ifdef HAVE_VLAN_RX_REGISTER
+	u16	vid;			/* VLAN tag */
+#endif
+	u16	append_cnt;		/* number of skb's appended */
+#ifndef CONFIG_IXGBE_DISABLE_PACKET_SPLIT
+	bool	page_released;
+#endif
+};
+#define IXGBE_CB(skb) ((struct ixgbe_cb *)(skb)->cb)
+
+#ifdef IXGBE_SYSFS
+void ixgbe_sysfs_exit(struct ixgbe_adapter *adapter);
+int ixgbe_sysfs_init(struct ixgbe_adapter *adapter);
+#endif /* IXGBE_SYSFS */
+#ifdef IXGBE_PROCFS
+void ixgbe_procfs_exit(struct ixgbe_adapter *adapter);
+int ixgbe_procfs_init(struct ixgbe_adapter *adapter);
+int ixgbe_procfs_topdir_init(void);
+void ixgbe_procfs_topdir_exit(void);
+#endif /* IXGBE_PROCFS */
+
+extern struct dcbnl_rtnl_ops dcbnl_ops;
+extern int ixgbe_copy_dcb_cfg(struct ixgbe_adapter *adapter, int tc_max);
+
+extern u8 ixgbe_dcb_txq_to_tc(struct ixgbe_adapter *adapter, u8 index);
+
+/* needed by ixgbe_main.c */
+extern int ixgbe_validate_mac_addr(u8 *mc_addr);
+extern void ixgbe_check_options(struct ixgbe_adapter *adapter);
+extern void ixgbe_assign_netdev_ops(struct net_device *netdev);
+
+/* needed by ixgbe_ethtool.c */
+extern char ixgbe_driver_name[];
+extern const char ixgbe_driver_version[];
+
+extern void ixgbe_up(struct ixgbe_adapter *adapter);
+extern void ixgbe_down(struct ixgbe_adapter *adapter);
+extern void ixgbe_reinit_locked(struct ixgbe_adapter *adapter);
+extern void ixgbe_reset(struct ixgbe_adapter *adapter);
+extern void ixgbe_set_ethtool_ops(struct net_device *netdev);
+extern int ixgbe_setup_rx_resources(struct ixgbe_ring *);
+extern int ixgbe_setup_tx_resources(struct ixgbe_ring *);
+extern void ixgbe_free_rx_resources(struct ixgbe_ring *);
+extern void ixgbe_free_tx_resources(struct ixgbe_ring *);
+extern void ixgbe_configure_rx_ring(struct ixgbe_adapter *,
+				    struct ixgbe_ring *);
+extern void ixgbe_configure_tx_ring(struct ixgbe_adapter *,
+				    struct ixgbe_ring *);
+extern void ixgbe_update_stats(struct ixgbe_adapter *adapter);
+extern int ixgbe_init_interrupt_scheme(struct ixgbe_adapter *adapter);
+extern void ixgbe_clear_interrupt_scheme(struct ixgbe_adapter *adapter);
+extern bool ixgbe_is_ixgbe(struct pci_dev *pcidev);
+extern netdev_tx_t ixgbe_xmit_frame_ring(struct sk_buff *,
+					 struct ixgbe_adapter *,
+					 struct ixgbe_ring *);
+extern void ixgbe_unmap_and_free_tx_resource(struct ixgbe_ring *,
+					     struct ixgbe_tx_buffer *);
+extern void ixgbe_alloc_rx_buffers(struct ixgbe_ring *, u16);
+extern void ixgbe_configure_rscctl(struct ixgbe_adapter *adapter,
+				   struct ixgbe_ring *);
+extern void ixgbe_clear_rscctl(struct ixgbe_adapter *adapter,
+			       struct ixgbe_ring *);
+extern void ixgbe_set_rx_mode(struct net_device *netdev);
+extern int ixgbe_write_mc_addr_list(struct net_device *netdev);
+extern int ixgbe_setup_tc(struct net_device *dev, u8 tc);
+#ifdef IXGBE_FCOE
+extern void ixgbe_tx_ctxtdesc(struct ixgbe_ring *, u32, u32, u32, u32);
+#endif /* IXGBE_FCOE */
+extern void ixgbe_do_reset(struct net_device *netdev);
+extern void ixgbe_write_eitr(struct ixgbe_q_vector *q_vector);
+extern void ixgbe_disable_rx_queue(struct ixgbe_adapter *adapter,
+				   struct ixgbe_ring *);
+extern void ixgbe_vlan_stripping_enable(struct ixgbe_adapter *adapter);
+extern void ixgbe_vlan_stripping_disable(struct ixgbe_adapter *adapter);
+#ifdef ETHTOOL_OPS_COMPAT
+extern int ethtool_ioctl(struct ifreq *ifr);
+#endif
+
+#ifdef IXGBE_FCOE
+extern void ixgbe_configure_fcoe(struct ixgbe_adapter *adapter);
+extern int ixgbe_fso(struct ixgbe_ring *tx_ring,
+		     struct ixgbe_tx_buffer *first,
+		     u8 *hdr_len);
+extern void ixgbe_cleanup_fcoe(struct ixgbe_adapter *adapter);
+extern int ixgbe_fcoe_ddp(struct ixgbe_adapter *adapter,
+			  union ixgbe_adv_rx_desc *rx_desc,
+			  struct sk_buff *skb);
+extern int ixgbe_fcoe_ddp_get(struct net_device *netdev, u16 xid,
+			      struct scatterlist *sgl, unsigned int sgc);
+#ifdef HAVE_NETDEV_OPS_FCOE_DDP_TARGET
+extern int ixgbe_fcoe_ddp_target(struct net_device *netdev, u16 xid,
+				 struct scatterlist *sgl, unsigned int sgc);
+#endif /* HAVE_NETDEV_OPS_FCOE_DDP_TARGET */
+extern int ixgbe_fcoe_ddp_put(struct net_device *netdev, u16 xid);
+#ifdef HAVE_NETDEV_OPS_FCOE_ENABLE
+extern int ixgbe_fcoe_enable(struct net_device *netdev);
+extern int ixgbe_fcoe_disable(struct net_device *netdev);
+#endif /* HAVE_NETDEV_OPS_FCOE_ENABLE */
+#ifdef CONFIG_DCB
+#ifdef HAVE_DCBNL_OPS_GETAPP
+extern u8 ixgbe_fcoe_getapp(struct net_device *netdev);
+#endif /* HAVE_DCBNL_OPS_GETAPP */
+extern u8 ixgbe_fcoe_setapp(struct ixgbe_adapter *adapter, u8 up);
+#endif /* CONFIG_DCB */
+#ifdef HAVE_NETDEV_OPS_FCOE_GETWWN
+extern int ixgbe_fcoe_get_wwn(struct net_device *netdev, u64 *wwn, int type);
+#endif
+#endif /* IXGBE_FCOE */
+
+#ifdef CONFIG_DCB
+#ifdef HAVE_DCBNL_IEEE
+s32 ixgbe_dcb_hw_ets(struct ixgbe_hw *hw, struct ieee_ets *ets, int max_frame);
+#endif /* HAVE_DCBNL_IEEE */
+#endif /* CONFIG_DCB */
+
+extern void ixgbe_clean_rx_ring(struct ixgbe_ring *rx_ring);
+extern int ixgbe_get_settings(struct net_device *netdev,
+			      struct ethtool_cmd *ecmd);
+extern int ixgbe_write_uc_addr_list(struct ixgbe_adapter *adapter,
+			    struct net_device *netdev, unsigned int vfn);
+extern void ixgbe_full_sync_mac_table(struct ixgbe_adapter *adapter);
+extern int ixgbe_add_mac_filter(struct ixgbe_adapter *adapter,
+				u8 *addr, u16 queue);
+extern int ixgbe_del_mac_filter(struct ixgbe_adapter *adapter,
+				u8 *addr, u16 queue);
+extern int ixgbe_available_rars(struct ixgbe_adapter *adapter);
+#ifndef HAVE_VLAN_RX_REGISTER
+extern void ixgbe_vlan_mode(struct net_device *, u32);
+#endif
+#ifndef ixgbe_get_netdev_tc_txq
+#define ixgbe_get_netdev_tc_txq(dev, tc) (&dev->tc_to_txq[tc])
+#endif
+extern void ixgbe_set_rx_drop_en(struct ixgbe_adapter *adapter);
+#endif /* _IXGBE_H_ */
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82598.c b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82598.c
new file mode 100644
index 00000000..24015844
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82598.c
@@ -0,0 +1,1296 @@
+/*******************************************************************************
+
+  Intel 10 Gigabit PCI Express Linux driver
+  Copyright(c) 1999 - 2012 Intel Corporation.
+
+  This program is free software; you can redistribute it and/or modify it
+  under the terms and conditions of the GNU General Public License,
+  version 2, as published by the Free Software Foundation.
+
+  This program is distributed in the hope it will be useful, but WITHOUT
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+  more details.
+
+  You should have received a copy of the GNU General Public License along with
+  this program; if not, write to the Free Software Foundation, Inc.,
+  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+  The full GNU General Public License is included in this distribution in
+  the file called "COPYING".
+
+  Contact Information:
+  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+#include "ixgbe_type.h"
+#include "ixgbe_82598.h"
+#include "ixgbe_api.h"
+#include "ixgbe_common.h"
+#include "ixgbe_phy.h"
+
+static s32 ixgbe_get_link_capabilities_82598(struct ixgbe_hw *hw,
+					     ixgbe_link_speed *speed,
+					     bool *autoneg);
+static enum ixgbe_media_type ixgbe_get_media_type_82598(struct ixgbe_hw *hw);
+static s32 ixgbe_start_mac_link_82598(struct ixgbe_hw *hw,
+				      bool autoneg_wait_to_complete);
+static s32 ixgbe_check_mac_link_82598(struct ixgbe_hw *hw,
+				      ixgbe_link_speed *speed, bool *link_up,
+				      bool link_up_wait_to_complete);
+static s32 ixgbe_setup_mac_link_82598(struct ixgbe_hw *hw,
+				      ixgbe_link_speed speed,
+				      bool autoneg,
+				      bool autoneg_wait_to_complete);
+static s32 ixgbe_setup_copper_link_82598(struct ixgbe_hw *hw,
+					 ixgbe_link_speed speed,
+					 bool autoneg,
+					 bool autoneg_wait_to_complete);
+static s32 ixgbe_reset_hw_82598(struct ixgbe_hw *hw);
+static s32 ixgbe_clear_vmdq_82598(struct ixgbe_hw *hw, u32 rar, u32 vmdq);
+static s32 ixgbe_clear_vfta_82598(struct ixgbe_hw *hw);
+static void ixgbe_set_rxpba_82598(struct ixgbe_hw *hw, int num_pb,
+				  u32 headroom, int strategy);
+
+/**
+ *  ixgbe_set_pcie_completion_timeout - set pci-e completion timeout
+ *  @hw: pointer to the HW structure
+ *
+ *  The defaults for 82598 should be in the range of 50us to 50ms,
+ *  however the hardware default for these parts is 500us to 1ms which is less
+ *  than the 10ms recommended by the pci-e spec.  To address this we need to
+ *  increase the value to either 10ms to 250ms for capability version 1 config,
+ *  or 16ms to 55ms for version 2.
+ **/
+void ixgbe_set_pcie_completion_timeout(struct ixgbe_hw *hw)
+{
+	u32 gcr = IXGBE_READ_REG(hw, IXGBE_GCR);
+	u16 pcie_devctl2;
+
+	/* only take action if timeout value is defaulted to 0 */
+	if (gcr & IXGBE_GCR_CMPL_TMOUT_MASK)
+		goto out;
+
+	/*
+	 * if capababilities version is type 1 we can write the
+	 * timeout of 10ms to 250ms through the GCR register
+	 */
+	if (!(gcr & IXGBE_GCR_CAP_VER2)) {
+		gcr |= IXGBE_GCR_CMPL_TMOUT_10ms;
+		goto out;
+	}
+
+	/*
+	 * for version 2 capabilities we need to write the config space
+	 * directly in order to set the completion timeout value for
+	 * 16ms to 55ms
+	 */
+	pcie_devctl2 = IXGBE_READ_PCIE_WORD(hw, IXGBE_PCI_DEVICE_CONTROL2);
+	pcie_devctl2 |= IXGBE_PCI_DEVICE_CONTROL2_16ms;
+	IXGBE_WRITE_PCIE_WORD(hw, IXGBE_PCI_DEVICE_CONTROL2, pcie_devctl2);
+out:
+	/* disable completion timeout resend */
+	gcr &= ~IXGBE_GCR_CMPL_TMOUT_RESEND;
+	IXGBE_WRITE_REG(hw, IXGBE_GCR, gcr);
+}
+
+/**
+ *  ixgbe_init_ops_82598 - Inits func ptrs and MAC type
+ *  @hw: pointer to hardware structure
+ *
+ *  Initialize the function pointers and assign the MAC type for 82598.
+ *  Does not touch the hardware.
+ **/
+s32 ixgbe_init_ops_82598(struct ixgbe_hw *hw)
+{
+	struct ixgbe_mac_info *mac = &hw->mac;
+	struct ixgbe_phy_info *phy = &hw->phy;
+	s32 ret_val;
+
+	ret_val = ixgbe_init_phy_ops_generic(hw);
+	ret_val = ixgbe_init_ops_generic(hw);
+
+	/* PHY */
+	phy->ops.init = &ixgbe_init_phy_ops_82598;
+
+	/* MAC */
+	mac->ops.start_hw = &ixgbe_start_hw_82598;
+	mac->ops.reset_hw = &ixgbe_reset_hw_82598;
+	mac->ops.get_media_type = &ixgbe_get_media_type_82598;
+	mac->ops.get_supported_physical_layer =
+				&ixgbe_get_supported_physical_layer_82598;
+	mac->ops.read_analog_reg8 = &ixgbe_read_analog_reg8_82598;
+	mac->ops.write_analog_reg8 = &ixgbe_write_analog_reg8_82598;
+	mac->ops.set_lan_id = &ixgbe_set_lan_id_multi_port_pcie_82598;
+
+	/* RAR, Multicast, VLAN */
+	mac->ops.set_vmdq = &ixgbe_set_vmdq_82598;
+	mac->ops.clear_vmdq = &ixgbe_clear_vmdq_82598;
+	mac->ops.set_vfta = &ixgbe_set_vfta_82598;
+	mac->ops.set_vlvf = NULL;
+	mac->ops.clear_vfta = &ixgbe_clear_vfta_82598;
+
+	/* Flow Control */
+	mac->ops.fc_enable = &ixgbe_fc_enable_82598;
+
+	mac->mcft_size		= 128;
+	mac->vft_size		= 128;
+	mac->num_rar_entries	= 16;
+	mac->rx_pb_size		= 512;
+	mac->max_tx_queues	= 32;
+	mac->max_rx_queues	= 64;
+	mac->max_msix_vectors	= ixgbe_get_pcie_msix_count_generic(hw);
+
+	/* SFP+ Module */
+	phy->ops.read_i2c_eeprom = &ixgbe_read_i2c_eeprom_82598;
+
+	/* Link */
+	mac->ops.check_link = &ixgbe_check_mac_link_82598;
+	mac->ops.setup_link = &ixgbe_setup_mac_link_82598;
+	mac->ops.flap_tx_laser = NULL;
+	mac->ops.get_link_capabilities = &ixgbe_get_link_capabilities_82598;
+	mac->ops.setup_rxpba = &ixgbe_set_rxpba_82598;
+
+	/* Manageability interface */
+	mac->ops.set_fw_drv_ver = NULL;
+
+	return ret_val;
+}
+
+/**
+ *  ixgbe_init_phy_ops_82598 - PHY/SFP specific init
+ *  @hw: pointer to hardware structure
+ *
+ *  Initialize any function pointers that were not able to be
+ *  set during init_shared_code because the PHY/SFP type was
+ *  not known.  Perform the SFP init if necessary.
+ *
+ **/
+s32 ixgbe_init_phy_ops_82598(struct ixgbe_hw *hw)
+{
+	struct ixgbe_mac_info *mac = &hw->mac;
+	struct ixgbe_phy_info *phy = &hw->phy;
+	s32 ret_val = 0;
+	u16 list_offset, data_offset;
+
+	/* Identify the PHY */
+	phy->ops.identify(hw);
+
+	/* Overwrite the link function pointers if copper PHY */
+	if (mac->ops.get_media_type(hw) == ixgbe_media_type_copper) {
+		mac->ops.setup_link = &ixgbe_setup_copper_link_82598;
+		mac->ops.get_link_capabilities =
+				&ixgbe_get_copper_link_capabilities_generic;
+	}
+
+	switch (hw->phy.type) {
+	case ixgbe_phy_tn:
+		phy->ops.setup_link = &ixgbe_setup_phy_link_tnx;
+		phy->ops.check_link = &ixgbe_check_phy_link_tnx;
+		phy->ops.get_firmware_version =
+					&ixgbe_get_phy_firmware_version_tnx;
+		break;
+	case ixgbe_phy_nl:
+		phy->ops.reset = &ixgbe_reset_phy_nl;
+
+		/* Call SFP+ identify routine to get the SFP+ module type */
+		ret_val = phy->ops.identify_sfp(hw);
+		if (ret_val != 0)
+			goto out;
+		else if (hw->phy.sfp_type == ixgbe_sfp_type_unknown) {
+			ret_val = IXGBE_ERR_SFP_NOT_SUPPORTED;
+			goto out;
+		}
+
+		/* Check to see if SFP+ module is supported */
+		ret_val = ixgbe_get_sfp_init_sequence_offsets(hw,
+							      &list_offset,
+							      &data_offset);
+		if (ret_val != 0) {
+			ret_val = IXGBE_ERR_SFP_NOT_SUPPORTED;
+			goto out;
+		}
+		break;
+	default:
+		break;
+	}
+
+out:
+	return ret_val;
+}
+
+/**
+ *  ixgbe_start_hw_82598 - Prepare hardware for Tx/Rx
+ *  @hw: pointer to hardware structure
+ *
+ *  Starts the hardware using the generic start_hw function.
+ *  Disables relaxed ordering Then set pcie completion timeout
+ *
+ **/
+s32 ixgbe_start_hw_82598(struct ixgbe_hw *hw)
+{
+	u32 regval;
+	u32 i;
+	s32 ret_val = 0;
+
+	ret_val = ixgbe_start_hw_generic(hw);
+
+	/* Disable relaxed ordering */
+	for (i = 0; ((i < hw->mac.max_tx_queues) &&
+	     (i < IXGBE_DCA_MAX_QUEUES_82598)); i++) {
+		regval = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL(i));
+		regval &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
+		IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(i), regval);
+	}
+
+	for (i = 0; ((i < hw->mac.max_rx_queues) &&
+	     (i < IXGBE_DCA_MAX_QUEUES_82598)); i++) {
+		regval = IXGBE_READ_REG(hw, IXGBE_DCA_RXCTRL(i));
+		regval &= ~(IXGBE_DCA_RXCTRL_DATA_WRO_EN |
+			    IXGBE_DCA_RXCTRL_HEAD_WRO_EN);
+		IXGBE_WRITE_REG(hw, IXGBE_DCA_RXCTRL(i), regval);
+	}
+
+	/* set the completion timeout for interface */
+	if (ret_val == 0)
+		ixgbe_set_pcie_completion_timeout(hw);
+
+	return ret_val;
+}
+
+/**
+ *  ixgbe_get_link_capabilities_82598 - Determines link capabilities
+ *  @hw: pointer to hardware structure
+ *  @speed: pointer to link speed
+ *  @autoneg: boolean auto-negotiation value
+ *
+ *  Determines the link capabilities by reading the AUTOC register.
+ **/
+static s32 ixgbe_get_link_capabilities_82598(struct ixgbe_hw *hw,
+					     ixgbe_link_speed *speed,
+					     bool *autoneg)
+{
+	s32 status = 0;
+	u32 autoc = 0;
+
+	/*
+	 * Determine link capabilities based on the stored value of AUTOC,
+	 * which represents EEPROM defaults.  If AUTOC value has not been
+	 * stored, use the current register value.
+	 */
+	if (hw->mac.orig_link_settings_stored)
+		autoc = hw->mac.orig_autoc;
+	else
+		autoc = IXGBE_READ_REG(hw, IXGBE_AUTOC);
+
+	switch (autoc & IXGBE_AUTOC_LMS_MASK) {
+	case IXGBE_AUTOC_LMS_1G_LINK_NO_AN:
+		*speed = IXGBE_LINK_SPEED_1GB_FULL;
+		*autoneg = false;
+		break;
+
+	case IXGBE_AUTOC_LMS_10G_LINK_NO_AN:
+		*speed = IXGBE_LINK_SPEED_10GB_FULL;
+		*autoneg = false;
+		break;
+
+	case IXGBE_AUTOC_LMS_1G_AN:
+		*speed = IXGBE_LINK_SPEED_1GB_FULL;
+		*autoneg = true;
+		break;
+
+	case IXGBE_AUTOC_LMS_KX4_AN:
+	case IXGBE_AUTOC_LMS_KX4_AN_1G_AN:
+		*speed = IXGBE_LINK_SPEED_UNKNOWN;
+		if (autoc & IXGBE_AUTOC_KX4_SUPP)
+			*speed |= IXGBE_LINK_SPEED_10GB_FULL;
+		if (autoc & IXGBE_AUTOC_KX_SUPP)
+			*speed |= IXGBE_LINK_SPEED_1GB_FULL;
+		*autoneg = true;
+		break;
+
+	default:
+		status = IXGBE_ERR_LINK_SETUP;
+		break;
+	}
+
+	return status;
+}
+
+/**
+ *  ixgbe_get_media_type_82598 - Determines media type
+ *  @hw: pointer to hardware structure
+ *
+ *  Returns the media type (fiber, copper, backplane)
+ **/
+static enum ixgbe_media_type ixgbe_get_media_type_82598(struct ixgbe_hw *hw)
+{
+	enum ixgbe_media_type media_type;
+
+	/* Detect if there is a copper PHY attached. */
+	switch (hw->phy.type) {
+	case ixgbe_phy_cu_unknown:
+	case ixgbe_phy_tn:
+		media_type = ixgbe_media_type_copper;
+		goto out;
+	default:
+		break;
+	}
+
+	/* Media type for I82598 is based on device ID */
+	switch (hw->device_id) {
+	case IXGBE_DEV_ID_82598:
+	case IXGBE_DEV_ID_82598_BX:
+		/* Default device ID is mezzanine card KX/KX4 */
+		media_type = ixgbe_media_type_backplane;
+		break;
+	case IXGBE_DEV_ID_82598AF_DUAL_PORT:
+	case IXGBE_DEV_ID_82598AF_SINGLE_PORT:
+	case IXGBE_DEV_ID_82598_DA_DUAL_PORT:
+	case IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM:
+	case IXGBE_DEV_ID_82598EB_XF_LR:
+	case IXGBE_DEV_ID_82598EB_SFP_LOM:
+		media_type = ixgbe_media_type_fiber;
+		break;
+	case IXGBE_DEV_ID_82598EB_CX4:
+	case IXGBE_DEV_ID_82598_CX4_DUAL_PORT:
+		media_type = ixgbe_media_type_cx4;
+		break;
+	case IXGBE_DEV_ID_82598AT:
+	case IXGBE_DEV_ID_82598AT2:
+		media_type = ixgbe_media_type_copper;
+		break;
+	default:
+		media_type = ixgbe_media_type_unknown;
+		break;
+	}
+out:
+	return media_type;
+}
+
+/**
+ *  ixgbe_fc_enable_82598 - Enable flow control
+ *  @hw: pointer to hardware structure
+ *
+ *  Enable flow control according to the current settings.
+ **/
+s32 ixgbe_fc_enable_82598(struct ixgbe_hw *hw)
+{
+	s32 ret_val = 0;
+	u32 fctrl_reg;
+	u32 rmcs_reg;
+	u32 reg;
+	u32 fcrtl, fcrth;
+	u32 link_speed = 0;
+	int i;
+	bool link_up;
+
+	/* Validate the water mark configuration */
+	if (!hw->fc.pause_time) {
+		ret_val = IXGBE_ERR_INVALID_LINK_SETTINGS;
+		goto out;
+	}
+
+	/* Low water mark of zero causes XOFF floods */
+	for (i = 0; i < IXGBE_DCB_MAX_TRAFFIC_CLASS; i++) {
+		if ((hw->fc.current_mode & ixgbe_fc_tx_pause) &&
+		    hw->fc.high_water[i]) {
+			if (!hw->fc.low_water[i] ||
+			    hw->fc.low_water[i] >= hw->fc.high_water[i]) {
+				hw_dbg(hw, "Invalid water mark configuration\n");
+				ret_val = IXGBE_ERR_INVALID_LINK_SETTINGS;
+				goto out;
+			}
+		}
+	}
+
+	/*
+	 * On 82598 having Rx FC on causes resets while doing 1G
+	 * so if it's on turn it off once we know link_speed. For
+	 * more details see 82598 Specification update.
+	 */
+	hw->mac.ops.check_link(hw, &link_speed, &link_up, false);
+	if (link_up && link_speed == IXGBE_LINK_SPEED_1GB_FULL) {
+		switch (hw->fc.requested_mode) {
+		case ixgbe_fc_full:
+			hw->fc.requested_mode = ixgbe_fc_tx_pause;
+			break;
+		case ixgbe_fc_rx_pause:
+			hw->fc.requested_mode = ixgbe_fc_none;
+			break;
+		default:
+			/* no change */
+			break;
+		}
+	}
+
+	/* Negotiate the fc mode to use */
+	ixgbe_fc_autoneg(hw);
+
+	/* Disable any previous flow control settings */
+	fctrl_reg = IXGBE_READ_REG(hw, IXGBE_FCTRL);
+	fctrl_reg &= ~(IXGBE_FCTRL_RFCE | IXGBE_FCTRL_RPFCE);
+
+	rmcs_reg = IXGBE_READ_REG(hw, IXGBE_RMCS);
+	rmcs_reg &= ~(IXGBE_RMCS_TFCE_PRIORITY | IXGBE_RMCS_TFCE_802_3X);
+
+	/*
+	 * The possible values of fc.current_mode are:
+	 * 0: Flow control is completely disabled
+	 * 1: Rx flow control is enabled (we can receive pause frames,
+	 *    but not send pause frames).
+	 * 2: Tx flow control is enabled (we can send pause frames but
+	 *     we do not support receiving pause frames).
+	 * 3: Both Rx and Tx flow control (symmetric) are enabled.
+	 * other: Invalid.
+	 */
+	switch (hw->fc.current_mode) {
+	case ixgbe_fc_none:
+		/*
+		 * Flow control is disabled by software override or autoneg.
+		 * The code below will actually disable it in the HW.
+		 */
+		break;
+	case ixgbe_fc_rx_pause:
+		/*
+		 * Rx Flow control is enabled and Tx Flow control is
+		 * disabled by software override. Since there really
+		 * isn't a way to advertise that we are capable of RX
+		 * Pause ONLY, we will advertise that we support both
+		 * symmetric and asymmetric Rx PAUSE.  Later, we will
+		 * disable the adapter's ability to send PAUSE frames.
+		 */
+		fctrl_reg |= IXGBE_FCTRL_RFCE;
+		break;
+	case ixgbe_fc_tx_pause:
+		/*
+		 * Tx Flow control is enabled, and Rx Flow control is
+		 * disabled by software override.
+		 */
+		rmcs_reg |= IXGBE_RMCS_TFCE_802_3X;
+		break;
+	case ixgbe_fc_full:
+		/* Flow control (both Rx and Tx) is enabled by SW override. */
+		fctrl_reg |= IXGBE_FCTRL_RFCE;
+		rmcs_reg |= IXGBE_RMCS_TFCE_802_3X;
+		break;
+	default:
+		hw_dbg(hw, "Flow control param set incorrectly\n");
+		ret_val = IXGBE_ERR_CONFIG;
+		goto out;
+		break;
+	}
+
+	/* Set 802.3x based flow control settings. */
+	fctrl_reg |= IXGBE_FCTRL_DPF;
+	IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl_reg);
+	IXGBE_WRITE_REG(hw, IXGBE_RMCS, rmcs_reg);
+
+	/* Set up and enable Rx high/low water mark thresholds, enable XON. */
+	for (i = 0; i < IXGBE_DCB_MAX_TRAFFIC_CLASS; i++) {
+		if ((hw->fc.current_mode & ixgbe_fc_tx_pause) &&
+		    hw->fc.high_water[i]) {
+			fcrtl = (hw->fc.low_water[i] << 10) | IXGBE_FCRTL_XONE;
+			fcrth = (hw->fc.high_water[i] << 10) | IXGBE_FCRTH_FCEN;
+			IXGBE_WRITE_REG(hw, IXGBE_FCRTL(i), fcrtl);
+			IXGBE_WRITE_REG(hw, IXGBE_FCRTH(i), fcrth);
+		} else {
+			IXGBE_WRITE_REG(hw, IXGBE_FCRTL(i), 0);
+			IXGBE_WRITE_REG(hw, IXGBE_FCRTH(i), 0);
+		}
+
+	}
+
+	/* Configure pause time (2 TCs per register) */
+	reg = hw->fc.pause_time * 0x00010001;
+	for (i = 0; i < (IXGBE_DCB_MAX_TRAFFIC_CLASS / 2); i++)
+		IXGBE_WRITE_REG(hw, IXGBE_FCTTV(i), reg);
+
+	/* Configure flow control refresh threshold value */
+	IXGBE_WRITE_REG(hw, IXGBE_FCRTV, hw->fc.pause_time / 2);
+
+out:
+	return ret_val;
+}
+
+/**
+ *  ixgbe_start_mac_link_82598 - Configures MAC link settings
+ *  @hw: pointer to hardware structure
+ *
+ *  Configures link settings based on values in the ixgbe_hw struct.
+ *  Restarts the link.  Performs autonegotiation if needed.
+ **/
+static s32 ixgbe_start_mac_link_82598(struct ixgbe_hw *hw,
+				      bool autoneg_wait_to_complete)
+{
+	u32 autoc_reg;
+	u32 links_reg;
+	u32 i;
+	s32 status = 0;
+
+	/* Restart link */
+	autoc_reg = IXGBE_READ_REG(hw, IXGBE_AUTOC);
+	autoc_reg |= IXGBE_AUTOC_AN_RESTART;
+	IXGBE_WRITE_REG(hw, IXGBE_AUTOC, autoc_reg);
+
+	/* Only poll for autoneg to complete if specified to do so */
+	if (autoneg_wait_to_complete) {
+		if ((autoc_reg & IXGBE_AUTOC_LMS_MASK) ==
+		     IXGBE_AUTOC_LMS_KX4_AN ||
+		    (autoc_reg & IXGBE_AUTOC_LMS_MASK) ==
+		     IXGBE_AUTOC_LMS_KX4_AN_1G_AN) {
+			links_reg = 0; /* Just in case Autoneg time = 0 */
+			for (i = 0; i < IXGBE_AUTO_NEG_TIME; i++) {
+				links_reg = IXGBE_READ_REG(hw, IXGBE_LINKS);
+				if (links_reg & IXGBE_LINKS_KX_AN_COMP)
+					break;
+				msleep(100);
+			}
+			if (!(links_reg & IXGBE_LINKS_KX_AN_COMP)) {
+				status = IXGBE_ERR_AUTONEG_NOT_COMPLETE;
+				hw_dbg(hw, "Autonegotiation did not complete.\n");
+			}
+		}
+	}
+
+	/* Add delay to filter out noises during initial link setup */
+	msleep(50);
+
+	return status;
+}
+
+/**
+ *  ixgbe_validate_link_ready - Function looks for phy link
+ *  @hw: pointer to hardware structure
+ *
+ *  Function indicates success when phy link is available. If phy is not ready
+ *  within 5 seconds of MAC indicating link, the function returns error.
+ **/
+static s32 ixgbe_validate_link_ready(struct ixgbe_hw *hw)
+{
+	u32 timeout;
+	u16 an_reg;
+
+	if (hw->device_id != IXGBE_DEV_ID_82598AT2)
+		return 0;
+
+	for (timeout = 0;
+	     timeout < IXGBE_VALIDATE_LINK_READY_TIMEOUT; timeout++) {
+		hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_STATUS,
+				     IXGBE_MDIO_AUTO_NEG_DEV_TYPE, &an_reg);
+
+		if ((an_reg & IXGBE_MII_AUTONEG_COMPLETE) &&
+		    (an_reg & IXGBE_MII_AUTONEG_LINK_UP))
+			break;
+
+		msleep(100);
+	}
+
+	if (timeout == IXGBE_VALIDATE_LINK_READY_TIMEOUT) {
+		hw_dbg(hw, "Link was indicated but link is down\n");
+		return IXGBE_ERR_LINK_SETUP;
+	}
+
+	return 0;
+}
+
+/**
+ *  ixgbe_check_mac_link_82598 - Get link/speed status
+ *  @hw: pointer to hardware structure
+ *  @speed: pointer to link speed
+ *  @link_up: true is link is up, false otherwise
+ *  @link_up_wait_to_complete: bool used to wait for link up or not
+ *
+ *  Reads the links register to determine if link is up and the current speed
+ **/
+static s32 ixgbe_check_mac_link_82598(struct ixgbe_hw *hw,
+				      ixgbe_link_speed *speed, bool *link_up,
+				      bool link_up_wait_to_complete)
+{
+	u32 links_reg;
+	u32 i;
+	u16 link_reg, adapt_comp_reg;
+
+	/*
+	 * SERDES PHY requires us to read link status from undocumented
+	 * register 0xC79F.  Bit 0 set indicates link is up/ready; clear
+	 * indicates link down.  OxC00C is read to check that the XAUI lanes
+	 * are active.  Bit 0 clear indicates active; set indicates inactive.
+	 */
+	if (hw->phy.type == ixgbe_phy_nl) {
+		hw->phy.ops.read_reg(hw, 0xC79F, IXGBE_TWINAX_DEV, &link_reg);
+		hw->phy.ops.read_reg(hw, 0xC79F, IXGBE_TWINAX_DEV, &link_reg);
+		hw->phy.ops.read_reg(hw, 0xC00C, IXGBE_TWINAX_DEV,
+				     &adapt_comp_reg);
+		if (link_up_wait_to_complete) {
+			for (i = 0; i < IXGBE_LINK_UP_TIME; i++) {
+				if ((link_reg & 1) &&
+				    ((adapt_comp_reg & 1) == 0)) {
+					*link_up = true;
+					break;
+				} else {
+					*link_up = false;
+				}
+				msleep(100);
+				hw->phy.ops.read_reg(hw, 0xC79F,
+						     IXGBE_TWINAX_DEV,
+						     &link_reg);
+				hw->phy.ops.read_reg(hw, 0xC00C,
+						     IXGBE_TWINAX_DEV,
+						     &adapt_comp_reg);
+			}
+		} else {
+			if ((link_reg & 1) && ((adapt_comp_reg & 1) == 0))
+				*link_up = true;
+			else
+				*link_up = false;
+		}
+
+		if (*link_up == false)
+			goto out;
+	}
+
+	links_reg = IXGBE_READ_REG(hw, IXGBE_LINKS);
+	if (link_up_wait_to_complete) {
+		for (i = 0; i < IXGBE_LINK_UP_TIME; i++) {
+			if (links_reg & IXGBE_LINKS_UP) {
+				*link_up = true;
+				break;
+			} else {
+				*link_up = false;
+			}
+			msleep(100);
+			links_reg = IXGBE_READ_REG(hw, IXGBE_LINKS);
+		}
+	} else {
+		if (links_reg & IXGBE_LINKS_UP)
+			*link_up = true;
+		else
+			*link_up = false;
+	}
+
+	if (links_reg & IXGBE_LINKS_SPEED)
+		*speed = IXGBE_LINK_SPEED_10GB_FULL;
+	else
+		*speed = IXGBE_LINK_SPEED_1GB_FULL;
+
+	if ((hw->device_id == IXGBE_DEV_ID_82598AT2) && (*link_up == true) &&
+	    (ixgbe_validate_link_ready(hw) != 0))
+		*link_up = false;
+
+out:
+	return 0;
+}
+
+/**
+ *  ixgbe_setup_mac_link_82598 - Set MAC link speed
+ *  @hw: pointer to hardware structure
+ *  @speed: new link speed
+ *  @autoneg: true if autonegotiation enabled
+ *  @autoneg_wait_to_complete: true when waiting for completion is needed
+ *
+ *  Set the link speed in the AUTOC register and restarts link.
+ **/
+static s32 ixgbe_setup_mac_link_82598(struct ixgbe_hw *hw,
+				      ixgbe_link_speed speed, bool autoneg,
+				      bool autoneg_wait_to_complete)
+{
+	s32 status = 0;
+	ixgbe_link_speed link_capabilities = IXGBE_LINK_SPEED_UNKNOWN;
+	u32 curr_autoc = IXGBE_READ_REG(hw, IXGBE_AUTOC);
+	u32 autoc = curr_autoc;
+	u32 link_mode = autoc & IXGBE_AUTOC_LMS_MASK;
+
+	/* Check to see if speed passed in is supported. */
+	ixgbe_get_link_capabilities(hw, &link_capabilities, &autoneg);
+	speed &= link_capabilities;
+
+	if (speed == IXGBE_LINK_SPEED_UNKNOWN)
+		status = IXGBE_ERR_LINK_SETUP;
+
+	/* Set KX4/KX support according to speed requested */
+	else if (link_mode == IXGBE_AUTOC_LMS_KX4_AN ||
+		 link_mode == IXGBE_AUTOC_LMS_KX4_AN_1G_AN) {
+		autoc &= ~IXGBE_AUTOC_KX4_KX_SUPP_MASK;
+		if (speed & IXGBE_LINK_SPEED_10GB_FULL)
+			autoc |= IXGBE_AUTOC_KX4_SUPP;
+		if (speed & IXGBE_LINK_SPEED_1GB_FULL)
+			autoc |= IXGBE_AUTOC_KX_SUPP;
+		if (autoc != curr_autoc)
+			IXGBE_WRITE_REG(hw, IXGBE_AUTOC, autoc);
+	}
+
+	if (status == 0) {
+		/*
+		 * Setup and restart the link based on the new values in
+		 * ixgbe_hw This will write the AUTOC register based on the new
+		 * stored values
+		 */
+		status = ixgbe_start_mac_link_82598(hw,
+						    autoneg_wait_to_complete);
+	}
+
+	return status;
+}
+
+
+/**
+ *  ixgbe_setup_copper_link_82598 - Set the PHY autoneg advertised field
+ *  @hw: pointer to hardware structure
+ *  @speed: new link speed
+ *  @autoneg: true if autonegotiation enabled
+ *  @autoneg_wait_to_complete: true if waiting is needed to complete
+ *
+ *  Sets the link speed in the AUTOC register in the MAC and restarts link.
+ **/
+static s32 ixgbe_setup_copper_link_82598(struct ixgbe_hw *hw,
+					 ixgbe_link_speed speed,
+					 bool autoneg,
+					 bool autoneg_wait_to_complete)
+{
+	s32 status;
+
+	/* Setup the PHY according to input speed */
+	status = hw->phy.ops.setup_link_speed(hw, speed, autoneg,
+					      autoneg_wait_to_complete);
+	/* Set up MAC */
+	ixgbe_start_mac_link_82598(hw, autoneg_wait_to_complete);
+
+	return status;
+}
+
+/**
+ *  ixgbe_reset_hw_82598 - Performs hardware reset
+ *  @hw: pointer to hardware structure
+ *
+ *  Resets the hardware by resetting the transmit and receive units, masks and
+ *  clears all interrupts, performing a PHY reset, and performing a link (MAC)
+ *  reset.
+ **/
+static s32 ixgbe_reset_hw_82598(struct ixgbe_hw *hw)
+{
+	s32 status = 0;
+	s32 phy_status = 0;
+	u32 ctrl;
+	u32 gheccr;
+	u32 i;
+	u32 autoc;
+	u8  analog_val;
+
+	/* Call adapter stop to disable tx/rx and clear interrupts */
+	status = hw->mac.ops.stop_adapter(hw);
+	if (status != 0)
+		goto reset_hw_out;
+
+	/*
+	 * Power up the Atlas Tx lanes if they are currently powered down.
+	 * Atlas Tx lanes are powered down for MAC loopback tests, but
+	 * they are not automatically restored on reset.
+	 */
+	hw->mac.ops.read_analog_reg8(hw, IXGBE_ATLAS_PDN_LPBK, &analog_val);
+	if (analog_val & IXGBE_ATLAS_PDN_TX_REG_EN) {
+		/* Enable Tx Atlas so packets can be transmitted again */
+		hw->mac.ops.read_analog_reg8(hw, IXGBE_ATLAS_PDN_LPBK,
+					     &analog_val);
+		analog_val &= ~IXGBE_ATLAS_PDN_TX_REG_EN;
+		hw->mac.ops.write_analog_reg8(hw, IXGBE_ATLAS_PDN_LPBK,
+					      analog_val);
+
+		hw->mac.ops.read_analog_reg8(hw, IXGBE_ATLAS_PDN_10G,
+					     &analog_val);
+		analog_val &= ~IXGBE_ATLAS_PDN_TX_10G_QL_ALL;
+		hw->mac.ops.write_analog_reg8(hw, IXGBE_ATLAS_PDN_10G,
+					      analog_val);
+
+		hw->mac.ops.read_analog_reg8(hw, IXGBE_ATLAS_PDN_1G,
+					     &analog_val);
+		analog_val &= ~IXGBE_ATLAS_PDN_TX_1G_QL_ALL;
+		hw->mac.ops.write_analog_reg8(hw, IXGBE_ATLAS_PDN_1G,
+					      analog_val);
+
+		hw->mac.ops.read_analog_reg8(hw, IXGBE_ATLAS_PDN_AN,
+					     &analog_val);
+		analog_val &= ~IXGBE_ATLAS_PDN_TX_AN_QL_ALL;
+		hw->mac.ops.write_analog_reg8(hw, IXGBE_ATLAS_PDN_AN,
+					      analog_val);
+	}
+
+	/* Reset PHY */
+	if (hw->phy.reset_disable == false) {
+		/* PHY ops must be identified and initialized prior to reset */
+
+		/* Init PHY and function pointers, perform SFP setup */
+		phy_status = hw->phy.ops.init(hw);
+		if (phy_status == IXGBE_ERR_SFP_NOT_SUPPORTED)
+			goto reset_hw_out;
+		if (phy_status == IXGBE_ERR_SFP_NOT_PRESENT)
+			goto mac_reset_top;
+
+		hw->phy.ops.reset(hw);
+	}
+
+mac_reset_top:
+	/*
+	 * Issue global reset to the MAC.  This needs to be a SW reset.
+	 * If link reset is used, it might reset the MAC when mng is using it
+	 */
+	ctrl = IXGBE_READ_REG(hw, IXGBE_CTRL) | IXGBE_CTRL_RST;
+	IXGBE_WRITE_REG(hw, IXGBE_CTRL, ctrl);
+	IXGBE_WRITE_FLUSH(hw);
+
+	/* Poll for reset bit to self-clear indicating reset is complete */
+	for (i = 0; i < 10; i++) {
+		udelay(1);
+		ctrl = IXGBE_READ_REG(hw, IXGBE_CTRL);
+		if (!(ctrl & IXGBE_CTRL_RST))
+			break;
+	}
+	if (ctrl & IXGBE_CTRL_RST) {
+		status = IXGBE_ERR_RESET_FAILED;
+		hw_dbg(hw, "Reset polling failed to complete.\n");
+	}
+
+	msleep(50);
+
+	/*
+	 * Double resets are required for recovery from certain error
+	 * conditions.  Between resets, it is necessary to stall to allow time
+	 * for any pending HW events to complete.
+	 */
+	if (hw->mac.flags & IXGBE_FLAGS_DOUBLE_RESET_REQUIRED) {
+		hw->mac.flags &= ~IXGBE_FLAGS_DOUBLE_RESET_REQUIRED;
+		goto mac_reset_top;
+	}
+
+	gheccr = IXGBE_READ_REG(hw, IXGBE_GHECCR);
+	gheccr &= ~((1 << 21) | (1 << 18) | (1 << 9) | (1 << 6));
+	IXGBE_WRITE_REG(hw, IXGBE_GHECCR, gheccr);
+
+	/*
+	 * Store the original AUTOC value if it has not been
+	 * stored off yet.  Otherwise restore the stored original
+	 * AUTOC value since the reset operation sets back to deaults.
+	 */
+	autoc = IXGBE_READ_REG(hw, IXGBE_AUTOC);
+	if (hw->mac.orig_link_settings_stored == false) {
+		hw->mac.orig_autoc = autoc;
+		hw->mac.orig_link_settings_stored = true;
+	} else if (autoc != hw->mac.orig_autoc) {
+		IXGBE_WRITE_REG(hw, IXGBE_AUTOC, hw->mac.orig_autoc);
+	}
+
+	/* Store the permanent mac address */
+	hw->mac.ops.get_mac_addr(hw, hw->mac.perm_addr);
+
+	/*
+	 * Store MAC address from RAR0, clear receive address registers, and
+	 * clear the multicast table
+	 */
+	hw->mac.ops.init_rx_addrs(hw);
+
+reset_hw_out:
+	if (phy_status != 0)
+		status = phy_status;
+
+	return status;
+}
+
+/**
+ *  ixgbe_set_vmdq_82598 - Associate a VMDq set index with a rx address
+ *  @hw: pointer to hardware struct
+ *  @rar: receive address register index to associate with a VMDq index
+ *  @vmdq: VMDq set index
+ **/
+s32 ixgbe_set_vmdq_82598(struct ixgbe_hw *hw, u32 rar, u32 vmdq)
+{
+	u32 rar_high;
+	u32 rar_entries = hw->mac.num_rar_entries;
+
+	/* Make sure we are using a valid rar index range */
+	if (rar >= rar_entries) {
+		hw_dbg(hw, "RAR index %d is out of range.\n", rar);
+		return IXGBE_ERR_INVALID_ARGUMENT;
+	}
+
+	rar_high = IXGBE_READ_REG(hw, IXGBE_RAH(rar));
+	rar_high &= ~IXGBE_RAH_VIND_MASK;
+	rar_high |= ((vmdq << IXGBE_RAH_VIND_SHIFT) & IXGBE_RAH_VIND_MASK);
+	IXGBE_WRITE_REG(hw, IXGBE_RAH(rar), rar_high);
+	return 0;
+}
+
+/**
+ *  ixgbe_clear_vmdq_82598 - Disassociate a VMDq set index from an rx address
+ *  @hw: pointer to hardware struct
+ *  @rar: receive address register index to associate with a VMDq index
+ *  @vmdq: VMDq clear index (not used in 82598, but elsewhere)
+ **/
+static s32 ixgbe_clear_vmdq_82598(struct ixgbe_hw *hw, u32 rar, u32 vmdq)
+{
+	u32 rar_high;
+	u32 rar_entries = hw->mac.num_rar_entries;
+
+
+	/* Make sure we are using a valid rar index range */
+	if (rar >= rar_entries) {
+		hw_dbg(hw, "RAR index %d is out of range.\n", rar);
+		return IXGBE_ERR_INVALID_ARGUMENT;
+	}
+
+	rar_high = IXGBE_READ_REG(hw, IXGBE_RAH(rar));
+	if (rar_high & IXGBE_RAH_VIND_MASK) {
+		rar_high &= ~IXGBE_RAH_VIND_MASK;
+		IXGBE_WRITE_REG(hw, IXGBE_RAH(rar), rar_high);
+	}
+
+	return 0;
+}
+
+/**
+ *  ixgbe_set_vfta_82598 - Set VLAN filter table
+ *  @hw: pointer to hardware structure
+ *  @vlan: VLAN id to write to VLAN filter
+ *  @vind: VMDq output index that maps queue to VLAN id in VFTA
+ *  @vlan_on: boolean flag to turn on/off VLAN in VFTA
+ *
+ *  Turn on/off specified VLAN in the VLAN filter table.
+ **/
+s32 ixgbe_set_vfta_82598(struct ixgbe_hw *hw, u32 vlan, u32 vind,
+			 bool vlan_on)
+{
+	u32 regindex;
+	u32 bitindex;
+	u32 bits;
+	u32 vftabyte;
+
+	if (vlan > 4095)
+		return IXGBE_ERR_PARAM;
+
+	/* Determine 32-bit word position in array */
+	regindex = (vlan >> 5) & 0x7F;   /* upper seven bits */
+
+	/* Determine the location of the (VMD) queue index */
+	vftabyte =  ((vlan >> 3) & 0x03); /* bits (4:3) indicating byte array */
+	bitindex = (vlan & 0x7) << 2;    /* lower 3 bits indicate nibble */
+
+	/* Set the nibble for VMD queue index */
+	bits = IXGBE_READ_REG(hw, IXGBE_VFTAVIND(vftabyte, regindex));
+	bits &= (~(0x0F << bitindex));
+	bits |= (vind << bitindex);
+	IXGBE_WRITE_REG(hw, IXGBE_VFTAVIND(vftabyte, regindex), bits);
+
+	/* Determine the location of the bit for this VLAN id */
+	bitindex = vlan & 0x1F;   /* lower five bits */
+
+	bits = IXGBE_READ_REG(hw, IXGBE_VFTA(regindex));
+	if (vlan_on)
+		/* Turn on this VLAN id */
+		bits |= (1 << bitindex);
+	else
+		/* Turn off this VLAN id */
+		bits &= ~(1 << bitindex);
+	IXGBE_WRITE_REG(hw, IXGBE_VFTA(regindex), bits);
+
+	return 0;
+}
+
+/**
+ *  ixgbe_clear_vfta_82598 - Clear VLAN filter table
+ *  @hw: pointer to hardware structure
+ *
+ *  Clears the VLAN filer table, and the VMDq index associated with the filter
+ **/
+static s32 ixgbe_clear_vfta_82598(struct ixgbe_hw *hw)
+{
+	u32 offset;
+	u32 vlanbyte;
+
+	for (offset = 0; offset < hw->mac.vft_size; offset++)
+		IXGBE_WRITE_REG(hw, IXGBE_VFTA(offset), 0);
+
+	for (vlanbyte = 0; vlanbyte < 4; vlanbyte++)
+		for (offset = 0; offset < hw->mac.vft_size; offset++)
+			IXGBE_WRITE_REG(hw, IXGBE_VFTAVIND(vlanbyte, offset),
+					0);
+
+	return 0;
+}
+
+/**
+ *  ixgbe_read_analog_reg8_82598 - Reads 8 bit Atlas analog register
+ *  @hw: pointer to hardware structure
+ *  @reg: analog register to read
+ *  @val: read value
+ *
+ *  Performs read operation to Atlas analog register specified.
+ **/
+s32 ixgbe_read_analog_reg8_82598(struct ixgbe_hw *hw, u32 reg, u8 *val)
+{
+	u32  atlas_ctl;
+
+	IXGBE_WRITE_REG(hw, IXGBE_ATLASCTL,
+			IXGBE_ATLASCTL_WRITE_CMD | (reg << 8));
+	IXGBE_WRITE_FLUSH(hw);
+	udelay(10);
+	atlas_ctl = IXGBE_READ_REG(hw, IXGBE_ATLASCTL);
+	*val = (u8)atlas_ctl;
+
+	return 0;
+}
+
+/**
+ *  ixgbe_write_analog_reg8_82598 - Writes 8 bit Atlas analog register
+ *  @hw: pointer to hardware structure
+ *  @reg: atlas register to write
+ *  @val: value to write
+ *
+ *  Performs write operation to Atlas analog register specified.
+ **/
+s32 ixgbe_write_analog_reg8_82598(struct ixgbe_hw *hw, u32 reg, u8 val)
+{
+	u32  atlas_ctl;
+
+	atlas_ctl = (reg << 8) | val;
+	IXGBE_WRITE_REG(hw, IXGBE_ATLASCTL, atlas_ctl);
+	IXGBE_WRITE_FLUSH(hw);
+	udelay(10);
+
+	return 0;
+}
+
+/**
+ *  ixgbe_read_i2c_eeprom_82598 - Reads 8 bit word over I2C interface.
+ *  @hw: pointer to hardware structure
+ *  @byte_offset: EEPROM byte offset to read
+ *  @eeprom_data: value read
+ *
+ *  Performs 8 byte read operation to SFP module's EEPROM over I2C interface.
+ **/
+s32 ixgbe_read_i2c_eeprom_82598(struct ixgbe_hw *hw, u8 byte_offset,
+				u8 *eeprom_data)
+{
+	s32 status = 0;
+	u16 sfp_addr = 0;
+	u16 sfp_data = 0;
+	u16 sfp_stat = 0;
+	u32 i;
+
+	if (hw->phy.type == ixgbe_phy_nl) {
+		/*
+		 * NetLogic phy SDA/SCL registers are at addresses 0xC30A to
+		 * 0xC30D. These registers are used to talk to the SFP+
+		 * module's EEPROM through the SDA/SCL (I2C) interface.
+		 */
+		sfp_addr = (IXGBE_I2C_EEPROM_DEV_ADDR << 8) + byte_offset;
+		sfp_addr = (sfp_addr | IXGBE_I2C_EEPROM_READ_MASK);
+		hw->phy.ops.write_reg(hw,
+				      IXGBE_MDIO_PMA_PMD_SDA_SCL_ADDR,
+				      IXGBE_MDIO_PMA_PMD_DEV_TYPE,
+				      sfp_addr);
+
+		/* Poll status */
+		for (i = 0; i < 100; i++) {
+			hw->phy.ops.read_reg(hw,
+					     IXGBE_MDIO_PMA_PMD_SDA_SCL_STAT,
+					     IXGBE_MDIO_PMA_PMD_DEV_TYPE,
+					     &sfp_stat);
+			sfp_stat = sfp_stat & IXGBE_I2C_EEPROM_STATUS_MASK;
+			if (sfp_stat != IXGBE_I2C_EEPROM_STATUS_IN_PROGRESS)
+				break;
+			msleep(10);
+		}
+
+		if (sfp_stat != IXGBE_I2C_EEPROM_STATUS_PASS) {
+			hw_dbg(hw, "EEPROM read did not pass.\n");
+			status = IXGBE_ERR_SFP_NOT_PRESENT;
+			goto out;
+		}
+
+		/* Read data */
+		hw->phy.ops.read_reg(hw, IXGBE_MDIO_PMA_PMD_SDA_SCL_DATA,
+				     IXGBE_MDIO_PMA_PMD_DEV_TYPE, &sfp_data);
+
+		*eeprom_data = (u8)(sfp_data >> 8);
+	} else {
+		status = IXGBE_ERR_PHY;
+		goto out;
+	}
+
+out:
+	return status;
+}
+
+/**
+ *  ixgbe_get_supported_physical_layer_82598 - Returns physical layer type
+ *  @hw: pointer to hardware structure
+ *
+ *  Determines physical layer capabilities of the current configuration.
+ **/
+u32 ixgbe_get_supported_physical_layer_82598(struct ixgbe_hw *hw)
+{
+	u32 physical_layer = IXGBE_PHYSICAL_LAYER_UNKNOWN;
+	u32 autoc = IXGBE_READ_REG(hw, IXGBE_AUTOC);
+	u32 pma_pmd_10g = autoc & IXGBE_AUTOC_10G_PMA_PMD_MASK;
+	u32 pma_pmd_1g = autoc & IXGBE_AUTOC_1G_PMA_PMD_MASK;
+	u16 ext_ability = 0;
+
+	hw->phy.ops.identify(hw);
+
+	/* Copper PHY must be checked before AUTOC LMS to determine correct
+	 * physical layer because 10GBase-T PHYs use LMS = KX4/KX */
+	switch (hw->phy.type) {
+	case ixgbe_phy_tn:
+	case ixgbe_phy_cu_unknown:
+		hw->phy.ops.read_reg(hw, IXGBE_MDIO_PHY_EXT_ABILITY,
+		IXGBE_MDIO_PMA_PMD_DEV_TYPE, &ext_ability);
+		if (ext_ability & IXGBE_MDIO_PHY_10GBASET_ABILITY)
+			physical_layer |= IXGBE_PHYSICAL_LAYER_10GBASE_T;
+		if (ext_ability & IXGBE_MDIO_PHY_1000BASET_ABILITY)
+			physical_layer |= IXGBE_PHYSICAL_LAYER_1000BASE_T;
+		if (ext_ability & IXGBE_MDIO_PHY_100BASETX_ABILITY)
+			physical_layer |= IXGBE_PHYSICAL_LAYER_100BASE_TX;
+		goto out;
+	default:
+		break;
+	}
+
+	switch (autoc & IXGBE_AUTOC_LMS_MASK) {
+	case IXGBE_AUTOC_LMS_1G_AN:
+	case IXGBE_AUTOC_LMS_1G_LINK_NO_AN:
+		if (pma_pmd_1g == IXGBE_AUTOC_1G_KX)
+			physical_layer = IXGBE_PHYSICAL_LAYER_1000BASE_KX;
+		else
+			physical_layer = IXGBE_PHYSICAL_LAYER_1000BASE_BX;
+		break;
+	case IXGBE_AUTOC_LMS_10G_LINK_NO_AN:
+		if (pma_pmd_10g == IXGBE_AUTOC_10G_CX4)
+			physical_layer = IXGBE_PHYSICAL_LAYER_10GBASE_CX4;
+		else if (pma_pmd_10g == IXGBE_AUTOC_10G_KX4)
+			physical_layer = IXGBE_PHYSICAL_LAYER_10GBASE_KX4;
+		else /* XAUI */
+			physical_layer = IXGBE_PHYSICAL_LAYER_UNKNOWN;
+		break;
+	case IXGBE_AUTOC_LMS_KX4_AN:
+	case IXGBE_AUTOC_LMS_KX4_AN_1G_AN:
+		if (autoc & IXGBE_AUTOC_KX_SUPP)
+			physical_layer |= IXGBE_PHYSICAL_LAYER_1000BASE_KX;
+		if (autoc & IXGBE_AUTOC_KX4_SUPP)
+			physical_layer |= IXGBE_PHYSICAL_LAYER_10GBASE_KX4;
+		break;
+	default:
+		break;
+	}
+
+	if (hw->phy.type == ixgbe_phy_nl) {
+		hw->phy.ops.identify_sfp(hw);
+
+		switch (hw->phy.sfp_type) {
+		case ixgbe_sfp_type_da_cu:
+			physical_layer = IXGBE_PHYSICAL_LAYER_SFP_PLUS_CU;
+			break;
+		case ixgbe_sfp_type_sr:
+			physical_layer = IXGBE_PHYSICAL_LAYER_10GBASE_SR;
+			break;
+		case ixgbe_sfp_type_lr:
+			physical_layer = IXGBE_PHYSICAL_LAYER_10GBASE_LR;
+			break;
+		default:
+			physical_layer = IXGBE_PHYSICAL_LAYER_UNKNOWN;
+			break;
+		}
+	}
+
+	switch (hw->device_id) {
+	case IXGBE_DEV_ID_82598_DA_DUAL_PORT:
+		physical_layer = IXGBE_PHYSICAL_LAYER_SFP_PLUS_CU;
+		break;
+	case IXGBE_DEV_ID_82598AF_DUAL_PORT:
+	case IXGBE_DEV_ID_82598AF_SINGLE_PORT:
+	case IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM:
+		physical_layer = IXGBE_PHYSICAL_LAYER_10GBASE_SR;
+		break;
+	case IXGBE_DEV_ID_82598EB_XF_LR:
+		physical_layer = IXGBE_PHYSICAL_LAYER_10GBASE_LR;
+		break;
+	default:
+		break;
+	}
+
+out:
+	return physical_layer;
+}
+
+/**
+ *  ixgbe_set_lan_id_multi_port_pcie_82598 - Set LAN id for PCIe multiple
+ *  port devices.
+ *  @hw: pointer to the HW structure
+ *
+ *  Calls common function and corrects issue with some single port devices
+ *  that enable LAN1 but not LAN0.
+ **/
+void ixgbe_set_lan_id_multi_port_pcie_82598(struct ixgbe_hw *hw)
+{
+	struct ixgbe_bus_info *bus = &hw->bus;
+	u16 pci_gen = 0;
+	u16 pci_ctrl2 = 0;
+
+	ixgbe_set_lan_id_multi_port_pcie(hw);
+
+	/* check if LAN0 is disabled */
+	hw->eeprom.ops.read(hw, IXGBE_PCIE_GENERAL_PTR, &pci_gen);
+	if ((pci_gen != 0) && (pci_gen != 0xFFFF)) {
+
+		hw->eeprom.ops.read(hw, pci_gen + IXGBE_PCIE_CTRL2, &pci_ctrl2);
+
+		/* if LAN0 is completely disabled force function to 0 */
+		if ((pci_ctrl2 & IXGBE_PCIE_CTRL2_LAN_DISABLE) &&
+		    !(pci_ctrl2 & IXGBE_PCIE_CTRL2_DISABLE_SELECT) &&
+		    !(pci_ctrl2 & IXGBE_PCIE_CTRL2_DUMMY_ENABLE)) {
+
+			bus->func = 0;
+		}
+	}
+}
+
+/**
+ * ixgbe_set_rxpba_82598 - Initialize RX packet buffer
+ * @hw: pointer to hardware structure
+ * @num_pb: number of packet buffers to allocate
+ * @headroom: reserve n KB of headroom
+ * @strategy: packet buffer allocation strategy
+ **/
+static void ixgbe_set_rxpba_82598(struct ixgbe_hw *hw, int num_pb,
+				  u32 headroom, int strategy)
+{
+	u32 rxpktsize = IXGBE_RXPBSIZE_64KB;
+	u8 i = 0;
+
+	if (!num_pb)
+		return;
+
+	/* Setup Rx packet buffer sizes */
+	switch (strategy) {
+	case PBA_STRATEGY_WEIGHTED:
+		/* Setup the first four at 80KB */
+		rxpktsize = IXGBE_RXPBSIZE_80KB;
+		for (; i < 4; i++)
+			IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpktsize);
+		/* Setup the last four at 48KB...don't re-init i */
+		rxpktsize = IXGBE_RXPBSIZE_48KB;
+		/* Fall Through */
+	case PBA_STRATEGY_EQUAL:
+	default:
+		/* Divide the remaining Rx packet buffer evenly among the TCs */
+		for (; i < IXGBE_MAX_PACKET_BUFFERS; i++)
+			IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpktsize);
+		break;
+	}
+
+	/* Setup Tx packet buffer sizes */
+	for (i = 0; i < IXGBE_MAX_PACKET_BUFFERS; i++)
+		IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), IXGBE_TXPBSIZE_40KB);
+
+	return;
+}
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82598.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82598.h
new file mode 100644
index 00000000..c6abb020
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82598.h
@@ -0,0 +1,44 @@
+/*******************************************************************************
+
+  Intel 10 Gigabit PCI Express Linux driver
+  Copyright(c) 1999 - 2012 Intel Corporation.
+
+  This program is free software; you can redistribute it and/or modify it
+  under the terms and conditions of the GNU General Public License,
+  version 2, as published by the Free Software Foundation.
+
+  This program is distributed in the hope it will be useful, but WITHOUT
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+  more details.
+
+  You should have received a copy of the GNU General Public License along with
+  this program; if not, write to the Free Software Foundation, Inc.,
+  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+  The full GNU General Public License is included in this distribution in
+  the file called "COPYING".
+
+  Contact Information:
+  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+#ifndef _IXGBE_82598_H_
+#define _IXGBE_82598_H_
+
+u32 ixgbe_get_pcie_msix_count_82598(struct ixgbe_hw *hw);
+s32 ixgbe_fc_enable_82598(struct ixgbe_hw *hw);
+s32 ixgbe_start_hw_82598(struct ixgbe_hw *hw);
+s32 ixgbe_set_vmdq_82598(struct ixgbe_hw *hw, u32 rar, u32 vmdq);
+s32 ixgbe_set_vfta_82598(struct ixgbe_hw *hw, u32 vlan, u32 vind, bool vlan_on);
+s32 ixgbe_read_analog_reg8_82598(struct ixgbe_hw *hw, u32 reg, u8 *val);
+s32 ixgbe_write_analog_reg8_82598(struct ixgbe_hw *hw, u32 reg, u8 val);
+s32 ixgbe_read_i2c_eeprom_82598(struct ixgbe_hw *hw, u8 byte_offset,
+				u8 *eeprom_data);
+u32 ixgbe_get_supported_physical_layer_82598(struct ixgbe_hw *hw);
+s32 ixgbe_init_phy_ops_82598(struct ixgbe_hw *hw);
+void ixgbe_set_lan_id_multi_port_pcie_82598(struct ixgbe_hw *hw);
+void ixgbe_set_pcie_completion_timeout(struct ixgbe_hw *hw);
+#endif /* _IXGBE_82598_H_ */
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82599.c b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82599.c
new file mode 100644
index 00000000..017dfe16
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82599.c
@@ -0,0 +1,2313 @@
+/*******************************************************************************
+
+  Intel 10 Gigabit PCI Express Linux driver
+  Copyright(c) 1999 - 2012 Intel Corporation.
+
+  This program is free software; you can redistribute it and/or modify it
+  under the terms and conditions of the GNU General Public License,
+  version 2, as published by the Free Software Foundation.
+
+  This program is distributed in the hope it will be useful, but WITHOUT
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+  more details.
+
+  You should have received a copy of the GNU General Public License along with
+  this program; if not, write to the Free Software Foundation, Inc.,
+  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+  The full GNU General Public License is included in this distribution in
+  the file called "COPYING".
+
+  Contact Information:
+  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+#include "ixgbe_type.h"
+#include "ixgbe_82599.h"
+#include "ixgbe_api.h"
+#include "ixgbe_common.h"
+#include "ixgbe_phy.h"
+
+static s32 ixgbe_setup_copper_link_82599(struct ixgbe_hw *hw,
+					 ixgbe_link_speed speed,
+					 bool autoneg,
+					 bool autoneg_wait_to_complete);
+static s32 ixgbe_verify_fw_version_82599(struct ixgbe_hw *hw);
+static s32 ixgbe_read_eeprom_82599(struct ixgbe_hw *hw,
+				   u16 offset, u16 *data);
+static s32 ixgbe_read_eeprom_buffer_82599(struct ixgbe_hw *hw, u16 offset,
+					  u16 words, u16 *data);
+static s32 ixgbe_read_i2c_byte_82599(struct ixgbe_hw *hw, u8 byte_offset,
+					u8 dev_addr, u8 *data);
+static s32 ixgbe_write_i2c_byte_82599(struct ixgbe_hw *hw, u8 byte_offset,
+					u8 dev_addr, u8 data);
+
+void ixgbe_init_mac_link_ops_82599(struct ixgbe_hw *hw)
+{
+	struct ixgbe_mac_info *mac = &hw->mac;
+
+	/* enable the laser control functions for SFP+ fiber */
+	if (mac->ops.get_media_type(hw) == ixgbe_media_type_fiber) {
+		mac->ops.disable_tx_laser =
+				       &ixgbe_disable_tx_laser_multispeed_fiber;
+		mac->ops.enable_tx_laser =
+					&ixgbe_enable_tx_laser_multispeed_fiber;
+		mac->ops.flap_tx_laser = &ixgbe_flap_tx_laser_multispeed_fiber;
+
+	} else {
+		mac->ops.disable_tx_laser = NULL;
+		mac->ops.enable_tx_laser = NULL;
+		mac->ops.flap_tx_laser = NULL;
+	}
+
+	if (hw->phy.multispeed_fiber) {
+		/* Set up dual speed SFP+ support */
+		mac->ops.setup_link = &ixgbe_setup_mac_link_multispeed_fiber;
+	} else {
+		if ((ixgbe_get_media_type(hw) == ixgbe_media_type_backplane) &&
+		     (hw->phy.smart_speed == ixgbe_smart_speed_auto ||
+		      hw->phy.smart_speed == ixgbe_smart_speed_on) &&
+		      !ixgbe_verify_lesm_fw_enabled_82599(hw)) {
+			mac->ops.setup_link = &ixgbe_setup_mac_link_smartspeed;
+		} else {
+			mac->ops.setup_link = &ixgbe_setup_mac_link_82599;
+		}
+	}
+}
+
+/**
+ *  ixgbe_init_phy_ops_82599 - PHY/SFP specific init
+ *  @hw: pointer to hardware structure
+ *
+ *  Initialize any function pointers that were not able to be
+ *  set during init_shared_code because the PHY/SFP type was
+ *  not known.  Perform the SFP init if necessary.
+ *
+ **/
+s32 ixgbe_init_phy_ops_82599(struct ixgbe_hw *hw)
+{
+	struct ixgbe_mac_info *mac = &hw->mac;
+	struct ixgbe_phy_info *phy = &hw->phy;
+	s32 ret_val = 0;
+	u32 esdp;
+
+	if (hw->device_id == IXGBE_DEV_ID_82599_QSFP_SF_QP) {
+		/* Store flag indicating I2C bus access control unit. */
+		hw->phy.qsfp_shared_i2c_bus = TRUE;
+
+		/* Initialize access to QSFP+ I2C bus */
+		esdp = IXGBE_READ_REG(hw, IXGBE_ESDP);
+		esdp |= IXGBE_ESDP_SDP0_DIR;
+		esdp &= ~IXGBE_ESDP_SDP1_DIR;
+		esdp &= ~IXGBE_ESDP_SDP0;
+		esdp &= ~IXGBE_ESDP_SDP0_NATIVE;
+		esdp &= ~IXGBE_ESDP_SDP1_NATIVE;
+		IXGBE_WRITE_REG(hw, IXGBE_ESDP, esdp);
+		IXGBE_WRITE_FLUSH(hw);
+
+		phy->ops.read_i2c_byte = &ixgbe_read_i2c_byte_82599;
+		phy->ops.write_i2c_byte = &ixgbe_write_i2c_byte_82599;
+	}
+	/* Identify the PHY or SFP module */
+	ret_val = phy->ops.identify(hw);
+	if (ret_val == IXGBE_ERR_SFP_NOT_SUPPORTED)
+		goto init_phy_ops_out;
+
+	/* Setup function pointers based on detected SFP module and speeds */
+	ixgbe_init_mac_link_ops_82599(hw);
+	if (hw->phy.sfp_type != ixgbe_sfp_type_unknown)
+		hw->phy.ops.reset = NULL;
+
+	/* If copper media, overwrite with copper function pointers */
+	if (mac->ops.get_media_type(hw) == ixgbe_media_type_copper) {
+		mac->ops.setup_link = &ixgbe_setup_copper_link_82599;
+		mac->ops.get_link_capabilities =
+				  &ixgbe_get_copper_link_capabilities_generic;
+	}
+
+	/* Set necessary function pointers based on phy type */
+	switch (hw->phy.type) {
+	case ixgbe_phy_tn:
+		phy->ops.setup_link = &ixgbe_setup_phy_link_tnx;
+		phy->ops.check_link = &ixgbe_check_phy_link_tnx;
+		phy->ops.get_firmware_version =
+			     &ixgbe_get_phy_firmware_version_tnx;
+		break;
+	default:
+		break;
+	}
+init_phy_ops_out:
+	return ret_val;
+}
+
+s32 ixgbe_setup_sfp_modules_82599(struct ixgbe_hw *hw)
+{
+	s32 ret_val = 0;
+	u32 reg_anlp1 = 0;
+	u32 i = 0;
+	u16 list_offset, data_offset, data_value;
+
+	if (hw->phy.sfp_type != ixgbe_sfp_type_unknown) {
+		ixgbe_init_mac_link_ops_82599(hw);
+
+		hw->phy.ops.reset = NULL;
+
+		ret_val = ixgbe_get_sfp_init_sequence_offsets(hw, &list_offset,
+							      &data_offset);
+		if (ret_val != 0)
+			goto setup_sfp_out;
+
+		/* PHY config will finish before releasing the semaphore */
+		ret_val = hw->mac.ops.acquire_swfw_sync(hw,
+							IXGBE_GSSR_MAC_CSR_SM);
+		if (ret_val != 0) {
+			ret_val = IXGBE_ERR_SWFW_SYNC;
+			goto setup_sfp_out;
+		}
+
+		hw->eeprom.ops.read(hw, ++data_offset, &data_value);
+		while (data_value != 0xffff) {
+			IXGBE_WRITE_REG(hw, IXGBE_CORECTL, data_value);
+			IXGBE_WRITE_FLUSH(hw);
+			hw->eeprom.ops.read(hw, ++data_offset, &data_value);
+		}
+
+		/* Release the semaphore */
+		hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM);
+		/* Delay obtaining semaphore again to allow FW access */
+		msleep(hw->eeprom.semaphore_delay);
+
+		/* Now restart DSP by setting Restart_AN and clearing LMS */
+		IXGBE_WRITE_REG(hw, IXGBE_AUTOC, ((IXGBE_READ_REG(hw,
+				IXGBE_AUTOC) & ~IXGBE_AUTOC_LMS_MASK) |
+				IXGBE_AUTOC_AN_RESTART));
+
+		/* Wait for AN to leave state 0 */
+		for (i = 0; i < 10; i++) {
+			msleep(4);
+			reg_anlp1 = IXGBE_READ_REG(hw, IXGBE_ANLP1);
+			if (reg_anlp1 & IXGBE_ANLP1_AN_STATE_MASK)
+				break;
+		}
+		if (!(reg_anlp1 & IXGBE_ANLP1_AN_STATE_MASK)) {
+			hw_dbg(hw, "sfp module setup not complete\n");
+			ret_val = IXGBE_ERR_SFP_SETUP_NOT_COMPLETE;
+			goto setup_sfp_out;
+		}
+
+		/* Restart DSP by setting Restart_AN and return to SFI mode */
+		IXGBE_WRITE_REG(hw, IXGBE_AUTOC, (IXGBE_READ_REG(hw,
+				IXGBE_AUTOC) | IXGBE_AUTOC_LMS_10G_SERIAL |
+				IXGBE_AUTOC_AN_RESTART));
+	}
+
+setup_sfp_out:
+	return ret_val;
+}
+
+/**
+ *  ixgbe_init_ops_82599 - Inits func ptrs and MAC type
+ *  @hw: pointer to hardware structure
+ *
+ *  Initialize the function pointers and assign the MAC type for 82599.
+ *  Does not touch the hardware.
+ **/
+
+s32 ixgbe_init_ops_82599(struct ixgbe_hw *hw)
+{
+	struct ixgbe_mac_info *mac = &hw->mac;
+	struct ixgbe_phy_info *phy = &hw->phy;
+	struct ixgbe_eeprom_info *eeprom = &hw->eeprom;
+	s32 ret_val;
+
+	ixgbe_init_phy_ops_generic(hw);
+	ret_val = ixgbe_init_ops_generic(hw);
+
+	/* PHY */
+	phy->ops.identify = &ixgbe_identify_phy_82599;
+	phy->ops.init = &ixgbe_init_phy_ops_82599;
+
+	/* MAC */
+	mac->ops.reset_hw = &ixgbe_reset_hw_82599;
+	mac->ops.get_media_type = &ixgbe_get_media_type_82599;
+	mac->ops.get_supported_physical_layer =
+				    &ixgbe_get_supported_physical_layer_82599;
+	mac->ops.disable_sec_rx_path = &ixgbe_disable_sec_rx_path_generic;
+	mac->ops.enable_sec_rx_path = &ixgbe_enable_sec_rx_path_generic;
+	mac->ops.enable_rx_dma = &ixgbe_enable_rx_dma_82599;
+	mac->ops.read_analog_reg8 = &ixgbe_read_analog_reg8_82599;
+	mac->ops.write_analog_reg8 = &ixgbe_write_analog_reg8_82599;
+	mac->ops.start_hw = &ixgbe_start_hw_82599;
+	mac->ops.get_san_mac_addr = &ixgbe_get_san_mac_addr_generic;
+	mac->ops.set_san_mac_addr = &ixgbe_set_san_mac_addr_generic;
+	mac->ops.get_device_caps = &ixgbe_get_device_caps_generic;
+	mac->ops.get_wwn_prefix = &ixgbe_get_wwn_prefix_generic;
+	mac->ops.get_fcoe_boot_status = &ixgbe_get_fcoe_boot_status_generic;
+
+	/* RAR, Multicast, VLAN */
+	mac->ops.set_vmdq = &ixgbe_set_vmdq_generic;
+	mac->ops.set_vmdq_san_mac = &ixgbe_set_vmdq_san_mac_generic;
+	mac->ops.clear_vmdq = &ixgbe_clear_vmdq_generic;
+	mac->ops.insert_mac_addr = &ixgbe_insert_mac_addr_generic;
+	mac->rar_highwater = 1;
+	mac->ops.set_vfta = &ixgbe_set_vfta_generic;
+	mac->ops.set_vlvf = &ixgbe_set_vlvf_generic;
+	mac->ops.clear_vfta = &ixgbe_clear_vfta_generic;
+	mac->ops.init_uta_tables = &ixgbe_init_uta_tables_generic;
+	mac->ops.setup_sfp = &ixgbe_setup_sfp_modules_82599;
+	mac->ops.set_mac_anti_spoofing = &ixgbe_set_mac_anti_spoofing;
+	mac->ops.set_vlan_anti_spoofing = &ixgbe_set_vlan_anti_spoofing;
+
+	/* Link */
+	mac->ops.get_link_capabilities = &ixgbe_get_link_capabilities_82599;
+	mac->ops.check_link = &ixgbe_check_mac_link_generic;
+	mac->ops.setup_rxpba = &ixgbe_set_rxpba_generic;
+	ixgbe_init_mac_link_ops_82599(hw);
+
+	mac->mcft_size		= 128;
+	mac->vft_size		= 128;
+	mac->num_rar_entries	= 128;
+	mac->rx_pb_size		= 512;
+	mac->max_tx_queues	= 128;
+	mac->max_rx_queues	= 128;
+	mac->max_msix_vectors	= ixgbe_get_pcie_msix_count_generic(hw);
+
+	mac->arc_subsystem_valid = (IXGBE_READ_REG(hw, IXGBE_FWSM) &
+				   IXGBE_FWSM_MODE_MASK) ? true : false;
+
+	//hw->mbx.ops.init_params = ixgbe_init_mbx_params_pf;
+
+	/* EEPROM */
+	eeprom->ops.read = &ixgbe_read_eeprom_82599;
+	eeprom->ops.read_buffer = &ixgbe_read_eeprom_buffer_82599;
+
+	/* Manageability interface */
+	mac->ops.set_fw_drv_ver = &ixgbe_set_fw_drv_ver_generic;
+
+	mac->ops.get_thermal_sensor_data =
+					 &ixgbe_get_thermal_sensor_data_generic;
+	mac->ops.init_thermal_sensor_thresh =
+				      &ixgbe_init_thermal_sensor_thresh_generic;
+
+	return ret_val;
+}
+
+/**
+ *  ixgbe_get_link_capabilities_82599 - Determines link capabilities
+ *  @hw: pointer to hardware structure
+ *  @speed: pointer to link speed
+ *  @negotiation: true when autoneg or autotry is enabled
+ *
+ *  Determines the link capabilities by reading the AUTOC register.
+ **/
+s32 ixgbe_get_link_capabilities_82599(struct ixgbe_hw *hw,
+				      ixgbe_link_speed *speed,
+				      bool *negotiation)
+{
+	s32 status = 0;
+	u32 autoc = 0;
+
+	/* Check if 1G SFP module. */
+	if (hw->phy.sfp_type == ixgbe_sfp_type_1g_cu_core0 ||
+	    hw->phy.sfp_type == ixgbe_sfp_type_1g_cu_core1 ||
+	    hw->phy.sfp_type == ixgbe_sfp_type_1g_sx_core0 ||
+	    hw->phy.sfp_type == ixgbe_sfp_type_1g_sx_core1) {
+		*speed = IXGBE_LINK_SPEED_1GB_FULL;
+		*negotiation = true;
+		goto out;
+	}
+
+	/*
+	 * Determine link capabilities based on the stored value of AUTOC,
+	 * which represents EEPROM defaults.  If AUTOC value has not
+	 * been stored, use the current register values.
+	 */
+	if (hw->mac.orig_link_settings_stored)
+		autoc = hw->mac.orig_autoc;
+	else
+		autoc = IXGBE_READ_REG(hw, IXGBE_AUTOC);
+
+	switch (autoc & IXGBE_AUTOC_LMS_MASK) {
+	case IXGBE_AUTOC_LMS_1G_LINK_NO_AN:
+		*speed = IXGBE_LINK_SPEED_1GB_FULL;
+		*negotiation = false;
+		break;
+
+	case IXGBE_AUTOC_LMS_10G_LINK_NO_AN:
+		*speed = IXGBE_LINK_SPEED_10GB_FULL;
+		*negotiation = false;
+		break;
+
+	case IXGBE_AUTOC_LMS_1G_AN:
+		*speed = IXGBE_LINK_SPEED_1GB_FULL;
+		*negotiation = true;
+		break;
+
+	case IXGBE_AUTOC_LMS_10G_SERIAL:
+		*speed = IXGBE_LINK_SPEED_10GB_FULL;
+		*negotiation = false;
+		break;
+
+	case IXGBE_AUTOC_LMS_KX4_KX_KR:
+	case IXGBE_AUTOC_LMS_KX4_KX_KR_1G_AN:
+		*speed = IXGBE_LINK_SPEED_UNKNOWN;
+		if (autoc & IXGBE_AUTOC_KR_SUPP)
+			*speed |= IXGBE_LINK_SPEED_10GB_FULL;
+		if (autoc & IXGBE_AUTOC_KX4_SUPP)
+			*speed |= IXGBE_LINK_SPEED_10GB_FULL;
+		if (autoc & IXGBE_AUTOC_KX_SUPP)
+			*speed |= IXGBE_LINK_SPEED_1GB_FULL;
+		*negotiation = true;
+		break;
+
+	case IXGBE_AUTOC_LMS_KX4_KX_KR_SGMII:
+		*speed = IXGBE_LINK_SPEED_100_FULL;
+		if (autoc & IXGBE_AUTOC_KR_SUPP)
+			*speed |= IXGBE_LINK_SPEED_10GB_FULL;
+		if (autoc & IXGBE_AUTOC_KX4_SUPP)
+			*speed |= IXGBE_LINK_SPEED_10GB_FULL;
+		if (autoc & IXGBE_AUTOC_KX_SUPP)
+			*speed |= IXGBE_LINK_SPEED_1GB_FULL;
+		*negotiation = true;
+		break;
+
+	case IXGBE_AUTOC_LMS_SGMII_1G_100M:
+		*speed = IXGBE_LINK_SPEED_1GB_FULL | IXGBE_LINK_SPEED_100_FULL;
+		*negotiation = false;
+		break;
+
+	default:
+		status = IXGBE_ERR_LINK_SETUP;
+		goto out;
+		break;
+	}
+
+	if (hw->phy.multispeed_fiber) {
+		*speed |= IXGBE_LINK_SPEED_10GB_FULL |
+			  IXGBE_LINK_SPEED_1GB_FULL;
+		*negotiation = true;
+	}
+
+out:
+	return status;
+}
+
+/**
+ *  ixgbe_get_media_type_82599 - Get media type
+ *  @hw: pointer to hardware structure
+ *
+ *  Returns the media type (fiber, copper, backplane)
+ **/
+enum ixgbe_media_type ixgbe_get_media_type_82599(struct ixgbe_hw *hw)
+{
+	enum ixgbe_media_type media_type;
+
+	/* Detect if there is a copper PHY attached. */
+	switch (hw->phy.type) {
+	case ixgbe_phy_cu_unknown:
+	case ixgbe_phy_tn:
+		media_type = ixgbe_media_type_copper;
+		goto out;
+	default:
+		break;
+	}
+
+	switch (hw->device_id) {
+	case IXGBE_DEV_ID_82599_KX4:
+	case IXGBE_DEV_ID_82599_KX4_MEZZ:
+	case IXGBE_DEV_ID_82599_COMBO_BACKPLANE:
+	case IXGBE_DEV_ID_82599_KR:
+	case IXGBE_DEV_ID_82599_BACKPLANE_FCOE:
+	case IXGBE_DEV_ID_82599_XAUI_LOM:
+		/* Default device ID is mezzanine card KX/KX4 */
+		media_type = ixgbe_media_type_backplane;
+		break;
+	case IXGBE_DEV_ID_82599_SFP:
+	case IXGBE_DEV_ID_82599_SFP_FCOE:
+	case IXGBE_DEV_ID_82599_SFP_EM:
+	case IXGBE_DEV_ID_82599_SFP_SF2:
+	case IXGBE_DEV_ID_82599EN_SFP:
+		media_type = ixgbe_media_type_fiber;
+		break;
+	case IXGBE_DEV_ID_82599_CX4:
+		media_type = ixgbe_media_type_cx4;
+		break;
+	case IXGBE_DEV_ID_82599_T3_LOM:
+		media_type = ixgbe_media_type_copper;
+		break;
+	case IXGBE_DEV_ID_82599_LS:
+		media_type = ixgbe_media_type_fiber_lco;
+		break;
+	case IXGBE_DEV_ID_82599_QSFP_SF_QP:
+		media_type = ixgbe_media_type_fiber_qsfp;
+		break;
+	default:
+		media_type = ixgbe_media_type_unknown;
+		break;
+	}
+out:
+	return media_type;
+}
+
+/**
+ *  ixgbe_start_mac_link_82599 - Setup MAC link settings
+ *  @hw: pointer to hardware structure
+ *  @autoneg_wait_to_complete: true when waiting for completion is needed
+ *
+ *  Configures link settings based on values in the ixgbe_hw struct.
+ *  Restarts the link.  Performs autonegotiation if needed.
+ **/
+s32 ixgbe_start_mac_link_82599(struct ixgbe_hw *hw,
+			       bool autoneg_wait_to_complete)
+{
+	u32 autoc_reg;
+	u32 links_reg = 0;
+	u32 i;
+	s32 status = 0;
+
+	/* Restart link */
+	autoc_reg = IXGBE_READ_REG(hw, IXGBE_AUTOC);
+	autoc_reg |= IXGBE_AUTOC_AN_RESTART;
+	IXGBE_WRITE_REG(hw, IXGBE_AUTOC, autoc_reg);
+
+	/* Only poll for autoneg to complete if specified to do so */
+	if (autoneg_wait_to_complete) {
+		if ((autoc_reg & IXGBE_AUTOC_LMS_MASK) ==
+		     IXGBE_AUTOC_LMS_KX4_KX_KR ||
+		    (autoc_reg & IXGBE_AUTOC_LMS_MASK) ==
+		     IXGBE_AUTOC_LMS_KX4_KX_KR_1G_AN ||
+		    (autoc_reg & IXGBE_AUTOC_LMS_MASK) ==
+		     IXGBE_AUTOC_LMS_KX4_KX_KR_SGMII) {
+			for (i = 0; i < IXGBE_AUTO_NEG_TIME; i++) {
+				links_reg = IXGBE_READ_REG(hw, IXGBE_LINKS);
+				if (links_reg & IXGBE_LINKS_KX_AN_COMP)
+					break;
+				msleep(100);
+			}
+			if (!(links_reg & IXGBE_LINKS_KX_AN_COMP)) {
+				status = IXGBE_ERR_AUTONEG_NOT_COMPLETE;
+				hw_dbg(hw, "Autoneg did not complete.\n");
+			}
+		}
+	}
+
+	/* Add delay to filter out noises during initial link setup */
+	msleep(50);
+
+	return status;
+}
+
+/**
+ *  ixgbe_disable_tx_laser_multispeed_fiber - Disable Tx laser
+ *  @hw: pointer to hardware structure
+ *
+ *  The base drivers may require better control over SFP+ module
+ *  PHY states.  This includes selectively shutting down the Tx
+ *  laser on the PHY, effectively halting physical link.
+ **/
+void ixgbe_disable_tx_laser_multispeed_fiber(struct ixgbe_hw *hw)
+{
+	u32 esdp_reg = IXGBE_READ_REG(hw, IXGBE_ESDP);
+
+	/* Disable tx laser; allow 100us to go dark per spec */
+	esdp_reg |= IXGBE_ESDP_SDP3;
+	IXGBE_WRITE_REG(hw, IXGBE_ESDP, esdp_reg);
+	IXGBE_WRITE_FLUSH(hw);
+	udelay(100);
+}
+
+/**
+ *  ixgbe_enable_tx_laser_multispeed_fiber - Enable Tx laser
+ *  @hw: pointer to hardware structure
+ *
+ *  The base drivers may require better control over SFP+ module
+ *  PHY states.  This includes selectively turning on the Tx
+ *  laser on the PHY, effectively starting physical link.
+ **/
+void ixgbe_enable_tx_laser_multispeed_fiber(struct ixgbe_hw *hw)
+{
+	u32 esdp_reg = IXGBE_READ_REG(hw, IXGBE_ESDP);
+
+	/* Enable tx laser; allow 100ms to light up */
+	esdp_reg &= ~IXGBE_ESDP_SDP3;
+	IXGBE_WRITE_REG(hw, IXGBE_ESDP, esdp_reg);
+	IXGBE_WRITE_FLUSH(hw);
+	msleep(100);
+}
+
+/**
+ *  ixgbe_flap_tx_laser_multispeed_fiber - Flap Tx laser
+ *  @hw: pointer to hardware structure
+ *
+ *  When the driver changes the link speeds that it can support,
+ *  it sets autotry_restart to true to indicate that we need to
+ *  initiate a new autotry session with the link partner.  To do
+ *  so, we set the speed then disable and re-enable the tx laser, to
+ *  alert the link partner that it also needs to restart autotry on its
+ *  end.  This is consistent with true clause 37 autoneg, which also
+ *  involves a loss of signal.
+ **/
+void ixgbe_flap_tx_laser_multispeed_fiber(struct ixgbe_hw *hw)
+{
+	if (hw->mac.autotry_restart) {
+		ixgbe_disable_tx_laser_multispeed_fiber(hw);
+		ixgbe_enable_tx_laser_multispeed_fiber(hw);
+		hw->mac.autotry_restart = false;
+	}
+}
+
+/**
+ *  ixgbe_setup_mac_link_multispeed_fiber - Set MAC link speed
+ *  @hw: pointer to hardware structure
+ *  @speed: new link speed
+ *  @autoneg: true if autonegotiation enabled
+ *  @autoneg_wait_to_complete: true when waiting for completion is needed
+ *
+ *  Set the link speed in the AUTOC register and restarts link.
+ **/
+s32 ixgbe_setup_mac_link_multispeed_fiber(struct ixgbe_hw *hw,
+				     ixgbe_link_speed speed, bool autoneg,
+				     bool autoneg_wait_to_complete)
+{
+	s32 status = 0;
+	ixgbe_link_speed link_speed = IXGBE_LINK_SPEED_UNKNOWN;
+	ixgbe_link_speed highest_link_speed = IXGBE_LINK_SPEED_UNKNOWN;
+	u32 speedcnt = 0;
+	u32 esdp_reg = IXGBE_READ_REG(hw, IXGBE_ESDP);
+	u32 i = 0;
+	bool link_up = false;
+	bool negotiation;
+
+	/* Mask off requested but non-supported speeds */
+	status = ixgbe_get_link_capabilities(hw, &link_speed, &negotiation);
+	if (status != 0)
+		return status;
+
+	speed &= link_speed;
+
+	/*
+	 * Try each speed one by one, highest priority first.  We do this in
+	 * software because 10gb fiber doesn't support speed autonegotiation.
+	 */
+	if (speed & IXGBE_LINK_SPEED_10GB_FULL) {
+		speedcnt++;
+		highest_link_speed = IXGBE_LINK_SPEED_10GB_FULL;
+
+		/* If we already have link at this speed, just jump out */
+		status = ixgbe_check_link(hw, &link_speed, &link_up, false);
+		if (status != 0)
+			return status;
+
+		if ((link_speed == IXGBE_LINK_SPEED_10GB_FULL) && link_up)
+			goto out;
+
+		/* Set the module link speed */
+		esdp_reg |= (IXGBE_ESDP_SDP5_DIR | IXGBE_ESDP_SDP5);
+		IXGBE_WRITE_REG(hw, IXGBE_ESDP, esdp_reg);
+		IXGBE_WRITE_FLUSH(hw);
+
+		/* Allow module to change analog characteristics (1G->10G) */
+		msleep(40);
+
+		status = ixgbe_setup_mac_link_82599(hw,
+						    IXGBE_LINK_SPEED_10GB_FULL,
+						    autoneg,
+						    autoneg_wait_to_complete);
+		if (status != 0)
+			return status;
+
+		/* Flap the tx laser if it has not already been done */
+		ixgbe_flap_tx_laser(hw);
+
+		/*
+		 * Wait for the controller to acquire link.  Per IEEE 802.3ap,
+		 * Section 73.10.2, we may have to wait up to 500ms if KR is
+		 * attempted.  82599 uses the same timing for 10g SFI.
+		 */
+		for (i = 0; i < 5; i++) {
+			/* Wait for the link partner to also set speed */
+			msleep(100);
+
+			/* If we have link, just jump out */
+			status = ixgbe_check_link(hw, &link_speed,
+						  &link_up, false);
+			if (status != 0)
+				return status;
+
+			if (link_up)
+				goto out;
+		}
+	}
+
+	if (speed & IXGBE_LINK_SPEED_1GB_FULL) {
+		speedcnt++;
+		if (highest_link_speed == IXGBE_LINK_SPEED_UNKNOWN)
+			highest_link_speed = IXGBE_LINK_SPEED_1GB_FULL;
+
+		/* If we already have link at this speed, just jump out */
+		status = ixgbe_check_link(hw, &link_speed, &link_up, false);
+		if (status != 0)
+			return status;
+
+		if ((link_speed == IXGBE_LINK_SPEED_1GB_FULL) && link_up)
+			goto out;
+
+		/* Set the module link speed */
+		esdp_reg &= ~IXGBE_ESDP_SDP5;
+		esdp_reg |= IXGBE_ESDP_SDP5_DIR;
+		IXGBE_WRITE_REG(hw, IXGBE_ESDP, esdp_reg);
+		IXGBE_WRITE_FLUSH(hw);
+
+		/* Allow module to change analog characteristics (10G->1G) */
+		msleep(40);
+
+		status = ixgbe_setup_mac_link_82599(hw,
+						    IXGBE_LINK_SPEED_1GB_FULL,
+						    autoneg,
+						    autoneg_wait_to_complete);
+		if (status != 0)
+			return status;
+
+		/* Flap the tx laser if it has not already been done */
+		ixgbe_flap_tx_laser(hw);
+
+		/* Wait for the link partner to also set speed */
+		msleep(100);
+
+		/* If we have link, just jump out */
+		status = ixgbe_check_link(hw, &link_speed, &link_up, false);
+		if (status != 0)
+			return status;
+
+		if (link_up)
+			goto out;
+	}
+
+	/*
+	 * We didn't get link.  Configure back to the highest speed we tried,
+	 * (if there was more than one).  We call ourselves back with just the
+	 * single highest speed that the user requested.
+	 */
+	if (speedcnt > 1)
+		status = ixgbe_setup_mac_link_multispeed_fiber(hw,
+			highest_link_speed, autoneg, autoneg_wait_to_complete);
+
+out:
+	/* Set autoneg_advertised value based on input link speed */
+	hw->phy.autoneg_advertised = 0;
+
+	if (speed & IXGBE_LINK_SPEED_10GB_FULL)
+		hw->phy.autoneg_advertised |= IXGBE_LINK_SPEED_10GB_FULL;
+
+	if (speed & IXGBE_LINK_SPEED_1GB_FULL)
+		hw->phy.autoneg_advertised |= IXGBE_LINK_SPEED_1GB_FULL;
+
+	return status;
+}
+
+/**
+ *  ixgbe_setup_mac_link_smartspeed - Set MAC link speed using SmartSpeed
+ *  @hw: pointer to hardware structure
+ *  @speed: new link speed
+ *  @autoneg: true if autonegotiation enabled
+ *  @autoneg_wait_to_complete: true when waiting for completion is needed
+ *
+ *  Implements the Intel SmartSpeed algorithm.
+ **/
+s32 ixgbe_setup_mac_link_smartspeed(struct ixgbe_hw *hw,
+				    ixgbe_link_speed speed, bool autoneg,
+				    bool autoneg_wait_to_complete)
+{
+	s32 status = 0;
+	ixgbe_link_speed link_speed = IXGBE_LINK_SPEED_UNKNOWN;
+	s32 i, j;
+	bool link_up = false;
+	u32 autoc_reg = IXGBE_READ_REG(hw, IXGBE_AUTOC);
+
+	 /* Set autoneg_advertised value based on input link speed */
+	hw->phy.autoneg_advertised = 0;
+
+	if (speed & IXGBE_LINK_SPEED_10GB_FULL)
+		hw->phy.autoneg_advertised |= IXGBE_LINK_SPEED_10GB_FULL;
+
+	if (speed & IXGBE_LINK_SPEED_1GB_FULL)
+		hw->phy.autoneg_advertised |= IXGBE_LINK_SPEED_1GB_FULL;
+
+	if (speed & IXGBE_LINK_SPEED_100_FULL)
+		hw->phy.autoneg_advertised |= IXGBE_LINK_SPEED_100_FULL;
+
+	/*
+	 * Implement Intel SmartSpeed algorithm.  SmartSpeed will reduce the
+	 * autoneg advertisement if link is unable to be established at the
+	 * highest negotiated rate.  This can sometimes happen due to integrity
+	 * issues with the physical media connection.
+	 */
+
+	/* First, try to get link with full advertisement */
+	hw->phy.smart_speed_active = false;
+	for (j = 0; j < IXGBE_SMARTSPEED_MAX_RETRIES; j++) {
+		status = ixgbe_setup_mac_link_82599(hw, speed, autoneg,
+						    autoneg_wait_to_complete);
+		if (status != 0)
+			goto out;
+
+		/*
+		 * Wait for the controller to acquire link.  Per IEEE 802.3ap,
+		 * Section 73.10.2, we may have to wait up to 500ms if KR is
+		 * attempted, or 200ms if KX/KX4/BX/BX4 is attempted, per
+		 * Table 9 in the AN MAS.
+		 */
+		for (i = 0; i < 5; i++) {
+			msleep(100);
+
+			/* If we have link, just jump out */
+			status = ixgbe_check_link(hw, &link_speed, &link_up,
+						  false);
+			if (status != 0)
+				goto out;
+
+			if (link_up)
+				goto out;
+		}
+	}
+
+	/*
+	 * We didn't get link.  If we advertised KR plus one of KX4/KX
+	 * (or BX4/BX), then disable KR and try again.
+	 */
+	if (((autoc_reg & IXGBE_AUTOC_KR_SUPP) == 0) ||
+	    ((autoc_reg & IXGBE_AUTOC_KX4_KX_SUPP_MASK) == 0))
+		goto out;
+
+	/* Turn SmartSpeed on to disable KR support */
+	hw->phy.smart_speed_active = true;
+	status = ixgbe_setup_mac_link_82599(hw, speed, autoneg,
+					    autoneg_wait_to_complete);
+	if (status != 0)
+		goto out;
+
+	/*
+	 * Wait for the controller to acquire link.  600ms will allow for
+	 * the AN link_fail_inhibit_timer as well for multiple cycles of
+	 * parallel detect, both 10g and 1g. This allows for the maximum
+	 * connect attempts as defined in the AN MAS table 73-7.
+	 */
+	for (i = 0; i < 6; i++) {
+		msleep(100);
+
+		/* If we have link, just jump out */
+		status = ixgbe_check_link(hw, &link_speed, &link_up, false);
+		if (status != 0)
+			goto out;
+
+		if (link_up)
+			goto out;
+	}
+
+	/* We didn't get link.  Turn SmartSpeed back off. */
+	hw->phy.smart_speed_active = false;
+	status = ixgbe_setup_mac_link_82599(hw, speed, autoneg,
+					    autoneg_wait_to_complete);
+
+out:
+	if (link_up && (link_speed == IXGBE_LINK_SPEED_1GB_FULL))
+		hw_dbg(hw, "Smartspeed has downgraded the link speed "
+		"from the maximum advertised\n");
+	return status;
+}
+
+/**
+ *  ixgbe_setup_mac_link_82599 - Set MAC link speed
+ *  @hw: pointer to hardware structure
+ *  @speed: new link speed
+ *  @autoneg: true if autonegotiation enabled
+ *  @autoneg_wait_to_complete: true when waiting for completion is needed
+ *
+ *  Set the link speed in the AUTOC register and restarts link.
+ **/
+s32 ixgbe_setup_mac_link_82599(struct ixgbe_hw *hw,
+			       ixgbe_link_speed speed, bool autoneg,
+			       bool autoneg_wait_to_complete)
+{
+	s32 status = 0;
+	u32 autoc = IXGBE_READ_REG(hw, IXGBE_AUTOC);
+	u32 autoc2 = IXGBE_READ_REG(hw, IXGBE_AUTOC2);
+	u32 start_autoc = autoc;
+	u32 orig_autoc = 0;
+	u32 link_mode = autoc & IXGBE_AUTOC_LMS_MASK;
+	u32 pma_pmd_1g = autoc & IXGBE_AUTOC_1G_PMA_PMD_MASK;
+	u32 pma_pmd_10g_serial = autoc2 & IXGBE_AUTOC2_10G_SERIAL_PMA_PMD_MASK;
+	u32 links_reg = 0;
+	u32 i;
+	ixgbe_link_speed link_capabilities = IXGBE_LINK_SPEED_UNKNOWN;
+
+	/* Check to see if speed passed in is supported. */
+	status = ixgbe_get_link_capabilities(hw, &link_capabilities, &autoneg);
+	if (status != 0)
+		goto out;
+
+	speed &= link_capabilities;
+
+	if (speed == IXGBE_LINK_SPEED_UNKNOWN) {
+		status = IXGBE_ERR_LINK_SETUP;
+		goto out;
+	}
+
+	/* Use stored value (EEPROM defaults) of AUTOC to find KR/KX4 support*/
+	if (hw->mac.orig_link_settings_stored)
+		orig_autoc = hw->mac.orig_autoc;
+	else
+		orig_autoc = autoc;
+
+	if (link_mode == IXGBE_AUTOC_LMS_KX4_KX_KR ||
+	    link_mode == IXGBE_AUTOC_LMS_KX4_KX_KR_1G_AN ||
+	    link_mode == IXGBE_AUTOC_LMS_KX4_KX_KR_SGMII) {
+		/* Set KX4/KX/KR support according to speed requested */
+		autoc &= ~(IXGBE_AUTOC_KX4_KX_SUPP_MASK | IXGBE_AUTOC_KR_SUPP);
+		if (speed & IXGBE_LINK_SPEED_10GB_FULL)
+			if (orig_autoc & IXGBE_AUTOC_KX4_SUPP)
+				autoc |= IXGBE_AUTOC_KX4_SUPP;
+			if ((orig_autoc & IXGBE_AUTOC_KR_SUPP) &&
+			    (hw->phy.smart_speed_active == false))
+				autoc |= IXGBE_AUTOC_KR_SUPP;
+		if (speed & IXGBE_LINK_SPEED_1GB_FULL)
+			autoc |= IXGBE_AUTOC_KX_SUPP;
+	} else if ((pma_pmd_1g == IXGBE_AUTOC_1G_SFI) &&
+		   (link_mode == IXGBE_AUTOC_LMS_1G_LINK_NO_AN ||
+		    link_mode == IXGBE_AUTOC_LMS_1G_AN)) {
+		/* Switch from 1G SFI to 10G SFI if requested */
+		if ((speed == IXGBE_LINK_SPEED_10GB_FULL) &&
+		    (pma_pmd_10g_serial == IXGBE_AUTOC2_10G_SFI)) {
+			autoc &= ~IXGBE_AUTOC_LMS_MASK;
+			autoc |= IXGBE_AUTOC_LMS_10G_SERIAL;
+		}
+	} else if ((pma_pmd_10g_serial == IXGBE_AUTOC2_10G_SFI) &&
+		   (link_mode == IXGBE_AUTOC_LMS_10G_SERIAL)) {
+		/* Switch from 10G SFI to 1G SFI if requested */
+		if ((speed == IXGBE_LINK_SPEED_1GB_FULL) &&
+		    (pma_pmd_1g == IXGBE_AUTOC_1G_SFI)) {
+			autoc &= ~IXGBE_AUTOC_LMS_MASK;
+			if (autoneg)
+				autoc |= IXGBE_AUTOC_LMS_1G_AN;
+			else
+				autoc |= IXGBE_AUTOC_LMS_1G_LINK_NO_AN;
+		}
+	}
+
+	if (autoc != start_autoc) {
+		/* Restart link */
+		autoc |= IXGBE_AUTOC_AN_RESTART;
+		IXGBE_WRITE_REG(hw, IXGBE_AUTOC, autoc);
+
+		/* Only poll for autoneg to complete if specified to do so */
+		if (autoneg_wait_to_complete) {
+			if (link_mode == IXGBE_AUTOC_LMS_KX4_KX_KR ||
+			    link_mode == IXGBE_AUTOC_LMS_KX4_KX_KR_1G_AN ||
+			    link_mode == IXGBE_AUTOC_LMS_KX4_KX_KR_SGMII) {
+				for (i = 0; i < IXGBE_AUTO_NEG_TIME; i++) {
+					links_reg =
+					       IXGBE_READ_REG(hw, IXGBE_LINKS);
+					if (links_reg & IXGBE_LINKS_KX_AN_COMP)
+						break;
+					msleep(100);
+				}
+				if (!(links_reg & IXGBE_LINKS_KX_AN_COMP)) {
+					status =
+						IXGBE_ERR_AUTONEG_NOT_COMPLETE;
+					hw_dbg(hw, "Autoneg did not complete.\n");
+				}
+			}
+		}
+
+		/* Add delay to filter out noises during initial link setup */
+		msleep(50);
+	}
+
+out:
+	return status;
+}
+
+/**
+ *  ixgbe_setup_copper_link_82599 - Set the PHY autoneg advertised field
+ *  @hw: pointer to hardware structure
+ *  @speed: new link speed
+ *  @autoneg: true if autonegotiation enabled
+ *  @autoneg_wait_to_complete: true if waiting is needed to complete
+ *
+ *  Restarts link on PHY and MAC based on settings passed in.
+ **/
+static s32 ixgbe_setup_copper_link_82599(struct ixgbe_hw *hw,
+					 ixgbe_link_speed speed,
+					 bool autoneg,
+					 bool autoneg_wait_to_complete)
+{
+	s32 status;
+
+	/* Setup the PHY according to input speed */
+	status = hw->phy.ops.setup_link_speed(hw, speed, autoneg,
+					      autoneg_wait_to_complete);
+	/* Set up MAC */
+	ixgbe_start_mac_link_82599(hw, autoneg_wait_to_complete);
+
+	return status;
+}
+
+/**
+ *  ixgbe_reset_hw_82599 - Perform hardware reset
+ *  @hw: pointer to hardware structure
+ *
+ *  Resets the hardware by resetting the transmit and receive units, masks
+ *  and clears all interrupts, perform a PHY reset, and perform a link (MAC)
+ *  reset.
+ **/
+s32 ixgbe_reset_hw_82599(struct ixgbe_hw *hw)
+{
+//	ixgbe_link_speed link_speed;
+	s32 status = 0;
+//	u32 ctrl, i, autoc, autoc2;
+//	bool link_up = false;
+
+#if 0
+	/* Call adapter stop to disable tx/rx and clear interrupts */
+	status = hw->mac.ops.stop_adapter(hw);
+	if (status != 0)
+		goto reset_hw_out;
+
+	/* flush pending Tx transactions */
+	ixgbe_clear_tx_pending(hw);
+
+	/* PHY ops must be identified and initialized prior to reset */
+
+	/* Identify PHY and related function pointers */
+	status = hw->phy.ops.init(hw);
+
+	if (status == IXGBE_ERR_SFP_NOT_SUPPORTED)
+		goto reset_hw_out;
+
+	/* Setup SFP module if there is one present. */
+	if (hw->phy.sfp_setup_needed) {
+		status = hw->mac.ops.setup_sfp(hw);
+		hw->phy.sfp_setup_needed = false;
+	}
+
+	if (status == IXGBE_ERR_SFP_NOT_SUPPORTED)
+		goto reset_hw_out;
+
+	/* Reset PHY */
+	if (hw->phy.reset_disable == false && hw->phy.ops.reset != NULL)
+		hw->phy.ops.reset(hw);
+
+mac_reset_top:
+	/*
+	 * Issue global reset to the MAC.  Needs to be SW reset if link is up.
+	 * If link reset is used when link is up, it might reset the PHY when
+	 * mng is using it.  If link is down or the flag to force full link
+	 * reset is set, then perform link reset.
+	 */
+	ctrl = IXGBE_CTRL_LNK_RST;
+	if (!hw->force_full_reset) {
+		hw->mac.ops.check_link(hw, &link_speed, &link_up, false);
+		if (link_up)
+			ctrl = IXGBE_CTRL_RST;
+	}
+
+	ctrl |= IXGBE_READ_REG(hw, IXGBE_CTRL);
+	IXGBE_WRITE_REG(hw, IXGBE_CTRL, ctrl);
+	IXGBE_WRITE_FLUSH(hw);
+
+	/* Poll for reset bit to self-clear indicating reset is complete */
+	for (i = 0; i < 10; i++) {
+		udelay(1);
+		ctrl = IXGBE_READ_REG(hw, IXGBE_CTRL);
+		if (!(ctrl & IXGBE_CTRL_RST_MASK))
+			break;
+	}
+
+	if (ctrl & IXGBE_CTRL_RST_MASK) {
+		status = IXGBE_ERR_RESET_FAILED;
+		hw_dbg(hw, "Reset polling failed to complete.\n");
+	}
+
+	msleep(50);
+
+	/*
+	 * Double resets are required for recovery from certain error
+	 * conditions.  Between resets, it is necessary to stall to allow time
+	 * for any pending HW events to complete.
+	 */
+	if (hw->mac.flags & IXGBE_FLAGS_DOUBLE_RESET_REQUIRED) {
+		hw->mac.flags &= ~IXGBE_FLAGS_DOUBLE_RESET_REQUIRED;
+		goto mac_reset_top;
+	}
+
+	/*
+	 * Store the original AUTOC/AUTOC2 values if they have not been
+	 * stored off yet.  Otherwise restore the stored original
+	 * values since the reset operation sets back to defaults.
+	 */
+	autoc = IXGBE_READ_REG(hw, IXGBE_AUTOC);
+	autoc2 = IXGBE_READ_REG(hw, IXGBE_AUTOC2);
+	if (hw->mac.orig_link_settings_stored == false) {
+		hw->mac.orig_autoc = autoc;
+		hw->mac.orig_autoc2 = autoc2;
+		hw->mac.orig_link_settings_stored = true;
+	} else {
+		if (autoc != hw->mac.orig_autoc)
+			IXGBE_WRITE_REG(hw, IXGBE_AUTOC, (hw->mac.orig_autoc |
+					IXGBE_AUTOC_AN_RESTART));
+
+		if ((autoc2 & IXGBE_AUTOC2_UPPER_MASK) !=
+		    (hw->mac.orig_autoc2 & IXGBE_AUTOC2_UPPER_MASK)) {
+			autoc2 &= ~IXGBE_AUTOC2_UPPER_MASK;
+			autoc2 |= (hw->mac.orig_autoc2 &
+				   IXGBE_AUTOC2_UPPER_MASK);
+			IXGBE_WRITE_REG(hw, IXGBE_AUTOC2, autoc2);
+		}
+	}
+#endif
+
+	/* Store the permanent mac address */
+	hw->mac.ops.get_mac_addr(hw, hw->mac.perm_addr);
+
+	/*
+	 * Store MAC address from RAR0, clear receive address registers, and
+	 * clear the multicast table.  Also reset num_rar_entries to 128,
+	 * since we modify this value when programming the SAN MAC address.
+	 */
+	hw->mac.num_rar_entries = 128;
+	hw->mac.ops.init_rx_addrs(hw);
+
+	/* Store the permanent SAN mac address */
+	hw->mac.ops.get_san_mac_addr(hw, hw->mac.san_addr);
+
+	/* Add the SAN MAC address to the RAR only if it's a valid address */
+	if (ixgbe_validate_mac_addr(hw->mac.san_addr) == 0) {
+		hw->mac.ops.set_rar(hw, hw->mac.num_rar_entries - 1,
+				    hw->mac.san_addr, 0, IXGBE_RAH_AV);
+
+		/* Save the SAN MAC RAR index */
+		hw->mac.san_mac_rar_index = hw->mac.num_rar_entries - 1;
+
+		/* Reserve the last RAR for the SAN MAC address */
+		hw->mac.num_rar_entries--;
+	}
+
+	/* Store the alternative WWNN/WWPN prefix */
+	hw->mac.ops.get_wwn_prefix(hw, &hw->mac.wwnn_prefix,
+				   &hw->mac.wwpn_prefix);
+
+//reset_hw_out:
+	return status;
+}
+
+/**
+ *  ixgbe_reinit_fdir_tables_82599 - Reinitialize Flow Director tables.
+ *  @hw: pointer to hardware structure
+ **/
+s32 ixgbe_reinit_fdir_tables_82599(struct ixgbe_hw *hw)
+{
+	int i;
+	u32 fdirctrl = IXGBE_READ_REG(hw, IXGBE_FDIRCTRL);
+	fdirctrl &= ~IXGBE_FDIRCTRL_INIT_DONE;
+
+	/*
+	 * Before starting reinitialization process,
+	 * FDIRCMD.CMD must be zero.
+	 */
+	for (i = 0; i < IXGBE_FDIRCMD_CMD_POLL; i++) {
+		if (!(IXGBE_READ_REG(hw, IXGBE_FDIRCMD) &
+		      IXGBE_FDIRCMD_CMD_MASK))
+			break;
+		udelay(10);
+	}
+	if (i >= IXGBE_FDIRCMD_CMD_POLL) {
+		hw_dbg(hw, "Flow Director previous command isn't complete, "
+			 "aborting table re-initialization.\n");
+		return IXGBE_ERR_FDIR_REINIT_FAILED;
+	}
+
+	IXGBE_WRITE_REG(hw, IXGBE_FDIRFREE, 0);
+	IXGBE_WRITE_FLUSH(hw);
+	/*
+	 * 82599 adapters flow director init flow cannot be restarted,
+	 * Workaround 82599 silicon errata by performing the following steps
+	 * before re-writing the FDIRCTRL control register with the same value.
+	 * - write 1 to bit 8 of FDIRCMD register &
+	 * - write 0 to bit 8 of FDIRCMD register
+	 */
+	IXGBE_WRITE_REG(hw, IXGBE_FDIRCMD,
+			(IXGBE_READ_REG(hw, IXGBE_FDIRCMD) |
+			 IXGBE_FDIRCMD_CLEARHT));
+	IXGBE_WRITE_FLUSH(hw);
+	IXGBE_WRITE_REG(hw, IXGBE_FDIRCMD,
+			(IXGBE_READ_REG(hw, IXGBE_FDIRCMD) &
+			 ~IXGBE_FDIRCMD_CLEARHT));
+	IXGBE_WRITE_FLUSH(hw);
+	/*
+	 * Clear FDIR Hash register to clear any leftover hashes
+	 * waiting to be programmed.
+	 */
+	IXGBE_WRITE_REG(hw, IXGBE_FDIRHASH, 0x00);
+	IXGBE_WRITE_FLUSH(hw);
+
+	IXGBE_WRITE_REG(hw, IXGBE_FDIRCTRL, fdirctrl);
+	IXGBE_WRITE_FLUSH(hw);
+
+	/* Poll init-done after we write FDIRCTRL register */
+	for (i = 0; i < IXGBE_FDIR_INIT_DONE_POLL; i++) {
+		if (IXGBE_READ_REG(hw, IXGBE_FDIRCTRL) &
+				   IXGBE_FDIRCTRL_INIT_DONE)
+			break;
+		udelay(10);
+	}
+	if (i >= IXGBE_FDIR_INIT_DONE_POLL) {
+		hw_dbg(hw, "Flow Director Signature poll time exceeded!\n");
+		return IXGBE_ERR_FDIR_REINIT_FAILED;
+	}
+
+	/* Clear FDIR statistics registers (read to clear) */
+	IXGBE_READ_REG(hw, IXGBE_FDIRUSTAT);
+	IXGBE_READ_REG(hw, IXGBE_FDIRFSTAT);
+	IXGBE_READ_REG(hw, IXGBE_FDIRMATCH);
+	IXGBE_READ_REG(hw, IXGBE_FDIRMISS);
+	IXGBE_READ_REG(hw, IXGBE_FDIRLEN);
+
+	return 0;
+}
+
+/**
+ *  ixgbe_fdir_enable_82599 - Initialize Flow Director control registers
+ *  @hw: pointer to hardware structure
+ *  @fdirctrl: value to write to flow director control register
+ **/
+static void ixgbe_fdir_enable_82599(struct ixgbe_hw *hw, u32 fdirctrl)
+{
+	int i;
+
+	/* Prime the keys for hashing */
+	IXGBE_WRITE_REG(hw, IXGBE_FDIRHKEY, IXGBE_ATR_BUCKET_HASH_KEY);
+	IXGBE_WRITE_REG(hw, IXGBE_FDIRSKEY, IXGBE_ATR_SIGNATURE_HASH_KEY);
+
+	/*
+	 * Poll init-done after we write the register.  Estimated times:
+	 *      10G: PBALLOC = 11b, timing is 60us
+	 *       1G: PBALLOC = 11b, timing is 600us
+	 *     100M: PBALLOC = 11b, timing is 6ms
+	 *
+	 *     Multiple these timings by 4 if under full Rx load
+	 *
+	 * So we'll poll for IXGBE_FDIR_INIT_DONE_POLL times, sleeping for
+	 * 1 msec per poll time.  If we're at line rate and drop to 100M, then
+	 * this might not finish in our poll time, but we can live with that
+	 * for now.
+	 */
+	IXGBE_WRITE_REG(hw, IXGBE_FDIRCTRL, fdirctrl);
+	IXGBE_WRITE_FLUSH(hw);
+	for (i = 0; i < IXGBE_FDIR_INIT_DONE_POLL; i++) {
+		if (IXGBE_READ_REG(hw, IXGBE_FDIRCTRL) &
+				   IXGBE_FDIRCTRL_INIT_DONE)
+			break;
+		msleep(1);
+	}
+
+	if (i >= IXGBE_FDIR_INIT_DONE_POLL)
+		hw_dbg(hw, "Flow Director poll time exceeded!\n");
+}
+
+/**
+ *  ixgbe_init_fdir_signature_82599 - Initialize Flow Director signature filters
+ *  @hw: pointer to hardware structure
+ *  @fdirctrl: value to write to flow director control register, initially
+ *	     contains just the value of the Rx packet buffer allocation
+ **/
+s32 ixgbe_init_fdir_signature_82599(struct ixgbe_hw *hw, u32 fdirctrl)
+{
+	/*
+	 * Continue setup of fdirctrl register bits:
+	 *  Move the flexible bytes to use the ethertype - shift 6 words
+	 *  Set the maximum length per hash bucket to 0xA filters
+	 *  Send interrupt when 64 filters are left
+	 */
+	fdirctrl |= (0x6 << IXGBE_FDIRCTRL_FLEX_SHIFT) |
+		    (0xA << IXGBE_FDIRCTRL_MAX_LENGTH_SHIFT) |
+		    (4 << IXGBE_FDIRCTRL_FULL_THRESH_SHIFT);
+
+	/* write hashes and fdirctrl register, poll for completion */
+	ixgbe_fdir_enable_82599(hw, fdirctrl);
+
+	return 0;
+}
+
+/**
+ *  ixgbe_init_fdir_perfect_82599 - Initialize Flow Director perfect filters
+ *  @hw: pointer to hardware structure
+ *  @fdirctrl: value to write to flow director control register, initially
+ *	     contains just the value of the Rx packet buffer allocation
+ **/
+s32 ixgbe_init_fdir_perfect_82599(struct ixgbe_hw *hw, u32 fdirctrl)
+{
+	/*
+	 * Continue setup of fdirctrl register bits:
+	 *  Turn perfect match filtering on
+	 *  Report hash in RSS field of Rx wb descriptor
+	 *  Initialize the drop queue
+	 *  Move the flexible bytes to use the ethertype - shift 6 words
+	 *  Set the maximum length per hash bucket to 0xA filters
+	 *  Send interrupt when 64 (0x4 * 16) filters are left
+	 */
+	fdirctrl |= IXGBE_FDIRCTRL_PERFECT_MATCH |
+		    IXGBE_FDIRCTRL_REPORT_STATUS |
+		    (IXGBE_FDIR_DROP_QUEUE << IXGBE_FDIRCTRL_DROP_Q_SHIFT) |
+		    (0x6 << IXGBE_FDIRCTRL_FLEX_SHIFT) |
+		    (0xA << IXGBE_FDIRCTRL_MAX_LENGTH_SHIFT) |
+		    (4 << IXGBE_FDIRCTRL_FULL_THRESH_SHIFT);
+
+	/* write hashes and fdirctrl register, poll for completion */
+	ixgbe_fdir_enable_82599(hw, fdirctrl);
+
+	return 0;
+}
+
+/*
+ * These defines allow us to quickly generate all of the necessary instructions
+ * in the function below by simply calling out IXGBE_COMPUTE_SIG_HASH_ITERATION
+ * for values 0 through 15
+ */
+#define IXGBE_ATR_COMMON_HASH_KEY \
+		(IXGBE_ATR_BUCKET_HASH_KEY & IXGBE_ATR_SIGNATURE_HASH_KEY)
+#define IXGBE_COMPUTE_SIG_HASH_ITERATION(_n) \
+do { \
+	u32 n = (_n); \
+	if (IXGBE_ATR_COMMON_HASH_KEY & (0x01 << n)) \
+		common_hash ^= lo_hash_dword >> n; \
+	else if (IXGBE_ATR_BUCKET_HASH_KEY & (0x01 << n)) \
+		bucket_hash ^= lo_hash_dword >> n; \
+	else if (IXGBE_ATR_SIGNATURE_HASH_KEY & (0x01 << n)) \
+		sig_hash ^= lo_hash_dword << (16 - n); \
+	if (IXGBE_ATR_COMMON_HASH_KEY & (0x01 << (n + 16))) \
+		common_hash ^= hi_hash_dword >> n; \
+	else if (IXGBE_ATR_BUCKET_HASH_KEY & (0x01 << (n + 16))) \
+		bucket_hash ^= hi_hash_dword >> n; \
+	else if (IXGBE_ATR_SIGNATURE_HASH_KEY & (0x01 << (n + 16))) \
+		sig_hash ^= hi_hash_dword << (16 - n); \
+} while (0);
+
+/**
+ *  ixgbe_atr_compute_sig_hash_82599 - Compute the signature hash
+ *  @stream: input bitstream to compute the hash on
+ *
+ *  This function is almost identical to the function above but contains
+ *  several optomizations such as unwinding all of the loops, letting the
+ *  compiler work out all of the conditional ifs since the keys are static
+ *  defines, and computing two keys at once since the hashed dword stream
+ *  will be the same for both keys.
+ **/
+u32 ixgbe_atr_compute_sig_hash_82599(union ixgbe_atr_hash_dword input,
+				     union ixgbe_atr_hash_dword common)
+{
+	u32 hi_hash_dword, lo_hash_dword, flow_vm_vlan;
+	u32 sig_hash = 0, bucket_hash = 0, common_hash = 0;
+
+	/* record the flow_vm_vlan bits as they are a key part to the hash */
+	flow_vm_vlan = IXGBE_NTOHL(input.dword);
+
+	/* generate common hash dword */
+	hi_hash_dword = IXGBE_NTOHL(common.dword);
+
+	/* low dword is word swapped version of common */
+	lo_hash_dword = (hi_hash_dword >> 16) | (hi_hash_dword << 16);
+
+	/* apply flow ID/VM pool/VLAN ID bits to hash words */
+	hi_hash_dword ^= flow_vm_vlan ^ (flow_vm_vlan >> 16);
+
+	/* Process bits 0 and 16 */
+	IXGBE_COMPUTE_SIG_HASH_ITERATION(0);
+
+	/*
+	 * apply flow ID/VM pool/VLAN ID bits to lo hash dword, we had to
+	 * delay this because bit 0 of the stream should not be processed
+	 * so we do not add the vlan until after bit 0 was processed
+	 */
+	lo_hash_dword ^= flow_vm_vlan ^ (flow_vm_vlan << 16);
+
+	/* Process remaining 30 bit of the key */
+	IXGBE_COMPUTE_SIG_HASH_ITERATION(1);
+	IXGBE_COMPUTE_SIG_HASH_ITERATION(2);
+	IXGBE_COMPUTE_SIG_HASH_ITERATION(3);
+	IXGBE_COMPUTE_SIG_HASH_ITERATION(4);
+	IXGBE_COMPUTE_SIG_HASH_ITERATION(5);
+	IXGBE_COMPUTE_SIG_HASH_ITERATION(6);
+	IXGBE_COMPUTE_SIG_HASH_ITERATION(7);
+	IXGBE_COMPUTE_SIG_HASH_ITERATION(8);
+	IXGBE_COMPUTE_SIG_HASH_ITERATION(9);
+	IXGBE_COMPUTE_SIG_HASH_ITERATION(10);
+	IXGBE_COMPUTE_SIG_HASH_ITERATION(11);
+	IXGBE_COMPUTE_SIG_HASH_ITERATION(12);
+	IXGBE_COMPUTE_SIG_HASH_ITERATION(13);
+	IXGBE_COMPUTE_SIG_HASH_ITERATION(14);
+	IXGBE_COMPUTE_SIG_HASH_ITERATION(15);
+
+	/* combine common_hash result with signature and bucket hashes */
+	bucket_hash ^= common_hash;
+	bucket_hash &= IXGBE_ATR_HASH_MASK;
+
+	sig_hash ^= common_hash << 16;
+	sig_hash &= IXGBE_ATR_HASH_MASK << 16;
+
+	/* return completed signature hash */
+	return sig_hash ^ bucket_hash;
+}
+
+/**
+ *  ixgbe_atr_add_signature_filter_82599 - Adds a signature hash filter
+ *  @hw: pointer to hardware structure
+ *  @input: unique input dword
+ *  @common: compressed common input dword
+ *  @queue: queue index to direct traffic to
+ **/
+s32 ixgbe_fdir_add_signature_filter_82599(struct ixgbe_hw *hw,
+					  union ixgbe_atr_hash_dword input,
+					  union ixgbe_atr_hash_dword common,
+					  u8 queue)
+{
+	u64  fdirhashcmd;
+	u32  fdircmd;
+
+	/*
+	 * Get the flow_type in order to program FDIRCMD properly
+	 * lowest 2 bits are FDIRCMD.L4TYPE, third lowest bit is FDIRCMD.IPV6
+	 */
+	switch (input.formatted.flow_type) {
+	case IXGBE_ATR_FLOW_TYPE_TCPV4:
+	case IXGBE_ATR_FLOW_TYPE_UDPV4:
+	case IXGBE_ATR_FLOW_TYPE_SCTPV4:
+	case IXGBE_ATR_FLOW_TYPE_TCPV6:
+	case IXGBE_ATR_FLOW_TYPE_UDPV6:
+	case IXGBE_ATR_FLOW_TYPE_SCTPV6:
+		break;
+	default:
+		hw_dbg(hw, " Error on flow type input\n");
+		return IXGBE_ERR_CONFIG;
+	}
+
+	/* configure FDIRCMD register */
+	fdircmd = IXGBE_FDIRCMD_CMD_ADD_FLOW | IXGBE_FDIRCMD_FILTER_UPDATE |
+		  IXGBE_FDIRCMD_LAST | IXGBE_FDIRCMD_QUEUE_EN;
+	fdircmd |= input.formatted.flow_type << IXGBE_FDIRCMD_FLOW_TYPE_SHIFT;
+	fdircmd |= (u32)queue << IXGBE_FDIRCMD_RX_QUEUE_SHIFT;
+
+	/*
+	 * The lower 32-bits of fdirhashcmd is for FDIRHASH, the upper 32-bits
+	 * is for FDIRCMD.  Then do a 64-bit register write from FDIRHASH.
+	 */
+	fdirhashcmd = (u64)fdircmd << 32;
+	fdirhashcmd |= ixgbe_atr_compute_sig_hash_82599(input, common);
+	IXGBE_WRITE_REG64(hw, IXGBE_FDIRHASH, fdirhashcmd);
+
+	hw_dbg(hw, "Tx Queue=%x hash=%x\n", queue, (u32)fdirhashcmd);
+
+	return 0;
+}
+
+#define IXGBE_COMPUTE_BKT_HASH_ITERATION(_n) \
+do { \
+	u32 n = (_n); \
+	if (IXGBE_ATR_BUCKET_HASH_KEY & (0x01 << n)) \
+		bucket_hash ^= lo_hash_dword >> n; \
+	if (IXGBE_ATR_BUCKET_HASH_KEY & (0x01 << (n + 16))) \
+		bucket_hash ^= hi_hash_dword >> n; \
+} while (0);
+
+/**
+ *  ixgbe_atr_compute_perfect_hash_82599 - Compute the perfect filter hash
+ *  @atr_input: input bitstream to compute the hash on
+ *  @input_mask: mask for the input bitstream
+ *
+ *  This function serves two main purposes.  First it applys the input_mask
+ *  to the atr_input resulting in a cleaned up atr_input data stream.
+ *  Secondly it computes the hash and stores it in the bkt_hash field at
+ *  the end of the input byte stream.  This way it will be available for
+ *  future use without needing to recompute the hash.
+ **/
+void ixgbe_atr_compute_perfect_hash_82599(union ixgbe_atr_input *input,
+					  union ixgbe_atr_input *input_mask)
+{
+
+	u32 hi_hash_dword, lo_hash_dword, flow_vm_vlan;
+	u32 bucket_hash = 0;
+
+	/* Apply masks to input data */
+	input->dword_stream[0]  &= input_mask->dword_stream[0];
+	input->dword_stream[1]  &= input_mask->dword_stream[1];
+	input->dword_stream[2]  &= input_mask->dword_stream[2];
+	input->dword_stream[3]  &= input_mask->dword_stream[3];
+	input->dword_stream[4]  &= input_mask->dword_stream[4];
+	input->dword_stream[5]  &= input_mask->dword_stream[5];
+	input->dword_stream[6]  &= input_mask->dword_stream[6];
+	input->dword_stream[7]  &= input_mask->dword_stream[7];
+	input->dword_stream[8]  &= input_mask->dword_stream[8];
+	input->dword_stream[9]  &= input_mask->dword_stream[9];
+	input->dword_stream[10] &= input_mask->dword_stream[10];
+
+	/* record the flow_vm_vlan bits as they are a key part to the hash */
+	flow_vm_vlan = IXGBE_NTOHL(input->dword_stream[0]);
+
+	/* generate common hash dword */
+	hi_hash_dword = IXGBE_NTOHL(input->dword_stream[1] ^
+				    input->dword_stream[2] ^
+				    input->dword_stream[3] ^
+				    input->dword_stream[4] ^
+				    input->dword_stream[5] ^
+				    input->dword_stream[6] ^
+				    input->dword_stream[7] ^
+				    input->dword_stream[8] ^
+				    input->dword_stream[9] ^
+				    input->dword_stream[10]);
+
+	/* low dword is word swapped version of common */
+	lo_hash_dword = (hi_hash_dword >> 16) | (hi_hash_dword << 16);
+
+	/* apply flow ID/VM pool/VLAN ID bits to hash words */
+	hi_hash_dword ^= flow_vm_vlan ^ (flow_vm_vlan >> 16);
+
+	/* Process bits 0 and 16 */
+	IXGBE_COMPUTE_BKT_HASH_ITERATION(0);
+
+	/*
+	 * apply flow ID/VM pool/VLAN ID bits to lo hash dword, we had to
+	 * delay this because bit 0 of the stream should not be processed
+	 * so we do not add the vlan until after bit 0 was processed
+	 */
+	lo_hash_dword ^= flow_vm_vlan ^ (flow_vm_vlan << 16);
+
+	/* Process remaining 30 bit of the key */
+	IXGBE_COMPUTE_BKT_HASH_ITERATION(1);
+	IXGBE_COMPUTE_BKT_HASH_ITERATION(2);
+	IXGBE_COMPUTE_BKT_HASH_ITERATION(3);
+	IXGBE_COMPUTE_BKT_HASH_ITERATION(4);
+	IXGBE_COMPUTE_BKT_HASH_ITERATION(5);
+	IXGBE_COMPUTE_BKT_HASH_ITERATION(6);
+	IXGBE_COMPUTE_BKT_HASH_ITERATION(7);
+	IXGBE_COMPUTE_BKT_HASH_ITERATION(8);
+	IXGBE_COMPUTE_BKT_HASH_ITERATION(9);
+	IXGBE_COMPUTE_BKT_HASH_ITERATION(10);
+	IXGBE_COMPUTE_BKT_HASH_ITERATION(11);
+	IXGBE_COMPUTE_BKT_HASH_ITERATION(12);
+	IXGBE_COMPUTE_BKT_HASH_ITERATION(13);
+	IXGBE_COMPUTE_BKT_HASH_ITERATION(14);
+	IXGBE_COMPUTE_BKT_HASH_ITERATION(15);
+
+	/*
+	 * Limit hash to 13 bits since max bucket count is 8K.
+	 * Store result at the end of the input stream.
+	 */
+	input->formatted.bkt_hash = bucket_hash & 0x1FFF;
+}
+
+/**
+ *  ixgbe_get_fdirtcpm_82599 - generate a tcp port from atr_input_masks
+ *  @input_mask: mask to be bit swapped
+ *
+ *  The source and destination port masks for flow director are bit swapped
+ *  in that bit 15 effects bit 0, 14 effects 1, 13, 2 etc.  In order to
+ *  generate a correctly swapped value we need to bit swap the mask and that
+ *  is what is accomplished by this function.
+ **/
+static u32 ixgbe_get_fdirtcpm_82599(union ixgbe_atr_input *input_mask)
+{
+	u32 mask = IXGBE_NTOHS(input_mask->formatted.dst_port);
+	mask <<= IXGBE_FDIRTCPM_DPORTM_SHIFT;
+	mask |= IXGBE_NTOHS(input_mask->formatted.src_port);
+	mask = ((mask & 0x55555555) << 1) | ((mask & 0xAAAAAAAA) >> 1);
+	mask = ((mask & 0x33333333) << 2) | ((mask & 0xCCCCCCCC) >> 2);
+	mask = ((mask & 0x0F0F0F0F) << 4) | ((mask & 0xF0F0F0F0) >> 4);
+	return ((mask & 0x00FF00FF) << 8) | ((mask & 0xFF00FF00) >> 8);
+}
+
+/*
+ * These two macros are meant to address the fact that we have registers
+ * that are either all or in part big-endian.  As a result on big-endian
+ * systems we will end up byte swapping the value to little-endian before
+ * it is byte swapped again and written to the hardware in the original
+ * big-endian format.
+ */
+#define IXGBE_STORE_AS_BE32(_value) \
+	(((u32)(_value) >> 24) | (((u32)(_value) & 0x00FF0000) >> 8) | \
+	 (((u32)(_value) & 0x0000FF00) << 8) | ((u32)(_value) << 24))
+
+#define IXGBE_WRITE_REG_BE32(a, reg, value) \
+	IXGBE_WRITE_REG((a), (reg), IXGBE_STORE_AS_BE32(IXGBE_NTOHL(value)))
+
+#define IXGBE_STORE_AS_BE16(_value) \
+	IXGBE_NTOHS(((u16)(_value) >> 8) | ((u16)(_value) << 8))
+
+s32 ixgbe_fdir_set_input_mask_82599(struct ixgbe_hw *hw,
+				    union ixgbe_atr_input *input_mask)
+{
+	/* mask IPv6 since it is currently not supported */
+	u32 fdirm = IXGBE_FDIRM_DIPv6;
+	u32 fdirtcpm;
+
+	/*
+	 * Program the relevant mask registers.  If src/dst_port or src/dst_addr
+	 * are zero, then assume a full mask for that field.  Also assume that
+	 * a VLAN of 0 is unspecified, so mask that out as well.  L4type
+	 * cannot be masked out in this implementation.
+	 *
+	 * This also assumes IPv4 only.  IPv6 masking isn't supported at this
+	 * point in time.
+	 */
+
+	/* verify bucket hash is cleared on hash generation */
+	if (input_mask->formatted.bkt_hash)
+		hw_dbg(hw, " bucket hash should always be 0 in mask\n");
+
+	/* Program FDIRM and verify partial masks */
+	switch (input_mask->formatted.vm_pool & 0x7F) {
+	case 0x0:
+		fdirm |= IXGBE_FDIRM_POOL;
+	case 0x7F:
+		break;
+	default:
+		hw_dbg(hw, " Error on vm pool mask\n");
+		return IXGBE_ERR_CONFIG;
+	}
+
+	switch (input_mask->formatted.flow_type & IXGBE_ATR_L4TYPE_MASK) {
+	case 0x0:
+		fdirm |= IXGBE_FDIRM_L4P;
+		if (input_mask->formatted.dst_port ||
+		    input_mask->formatted.src_port) {
+			hw_dbg(hw, " Error on src/dst port mask\n");
+			return IXGBE_ERR_CONFIG;
+		}
+	case IXGBE_ATR_L4TYPE_MASK:
+		break;
+	default:
+		hw_dbg(hw, " Error on flow type mask\n");
+		return IXGBE_ERR_CONFIG;
+	}
+
+	switch (IXGBE_NTOHS(input_mask->formatted.vlan_id) & 0xEFFF) {
+	case 0x0000:
+		/* mask VLAN ID, fall through to mask VLAN priority */
+		fdirm |= IXGBE_FDIRM_VLANID;
+	case 0x0FFF:
+		/* mask VLAN priority */
+		fdirm |= IXGBE_FDIRM_VLANP;
+		break;
+	case 0xE000:
+		/* mask VLAN ID only, fall through */
+		fdirm |= IXGBE_FDIRM_VLANID;
+	case 0xEFFF:
+		/* no VLAN fields masked */
+		break;
+	default:
+		hw_dbg(hw, " Error on VLAN mask\n");
+		return IXGBE_ERR_CONFIG;
+	}
+
+	switch (input_mask->formatted.flex_bytes & 0xFFFF) {
+	case 0x0000:
+		/* Mask Flex Bytes, fall through */
+		fdirm |= IXGBE_FDIRM_FLEX;
+	case 0xFFFF:
+		break;
+	default:
+		hw_dbg(hw, " Error on flexible byte mask\n");
+		return IXGBE_ERR_CONFIG;
+	}
+
+	/* Now mask VM pool and destination IPv6 - bits 5 and 2 */
+	IXGBE_WRITE_REG(hw, IXGBE_FDIRM, fdirm);
+
+	/* store the TCP/UDP port masks, bit reversed from port layout */
+	fdirtcpm = ixgbe_get_fdirtcpm_82599(input_mask);
+
+	/* write both the same so that UDP and TCP use the same mask */
+	IXGBE_WRITE_REG(hw, IXGBE_FDIRTCPM, ~fdirtcpm);
+	IXGBE_WRITE_REG(hw, IXGBE_FDIRUDPM, ~fdirtcpm);
+
+	/* store source and destination IP masks (big-enian) */
+	IXGBE_WRITE_REG_BE32(hw, IXGBE_FDIRSIP4M,
+			     ~input_mask->formatted.src_ip[0]);
+	IXGBE_WRITE_REG_BE32(hw, IXGBE_FDIRDIP4M,
+			     ~input_mask->formatted.dst_ip[0]);
+
+	return 0;
+}
+
+s32 ixgbe_fdir_write_perfect_filter_82599(struct ixgbe_hw *hw,
+					  union ixgbe_atr_input *input,
+					  u16 soft_id, u8 queue)
+{
+	u32 fdirport, fdirvlan, fdirhash, fdircmd;
+
+	/* currently IPv6 is not supported, must be programmed with 0 */
+	IXGBE_WRITE_REG_BE32(hw, IXGBE_FDIRSIPv6(0),
+			     input->formatted.src_ip[0]);
+	IXGBE_WRITE_REG_BE32(hw, IXGBE_FDIRSIPv6(1),
+			     input->formatted.src_ip[1]);
+	IXGBE_WRITE_REG_BE32(hw, IXGBE_FDIRSIPv6(2),
+			     input->formatted.src_ip[2]);
+
+	/* record the source address (big-endian) */
+	IXGBE_WRITE_REG_BE32(hw, IXGBE_FDIRIPSA, input->formatted.src_ip[0]);
+
+	/* record the first 32 bits of the destination address (big-endian) */
+	IXGBE_WRITE_REG_BE32(hw, IXGBE_FDIRIPDA, input->formatted.dst_ip[0]);
+
+	/* record source and destination port (little-endian)*/
+	fdirport = IXGBE_NTOHS(input->formatted.dst_port);
+	fdirport <<= IXGBE_FDIRPORT_DESTINATION_SHIFT;
+	fdirport |= IXGBE_NTOHS(input->formatted.src_port);
+	IXGBE_WRITE_REG(hw, IXGBE_FDIRPORT, fdirport);
+
+	/* record vlan (little-endian) and flex_bytes(big-endian) */
+	fdirvlan = IXGBE_STORE_AS_BE16(input->formatted.flex_bytes);
+	fdirvlan <<= IXGBE_FDIRVLAN_FLEX_SHIFT;
+	fdirvlan |= IXGBE_NTOHS(input->formatted.vlan_id);
+	IXGBE_WRITE_REG(hw, IXGBE_FDIRVLAN, fdirvlan);
+
+	/* configure FDIRHASH register */
+	fdirhash = input->formatted.bkt_hash;
+	fdirhash |= soft_id << IXGBE_FDIRHASH_SIG_SW_INDEX_SHIFT;
+	IXGBE_WRITE_REG(hw, IXGBE_FDIRHASH, fdirhash);
+
+	/*
+	 * flush all previous writes to make certain registers are
+	 * programmed prior to issuing the command
+	 */
+	IXGBE_WRITE_FLUSH(hw);
+
+	/* configure FDIRCMD register */
+	fdircmd = IXGBE_FDIRCMD_CMD_ADD_FLOW | IXGBE_FDIRCMD_FILTER_UPDATE |
+		  IXGBE_FDIRCMD_LAST | IXGBE_FDIRCMD_QUEUE_EN;
+	if (queue == IXGBE_FDIR_DROP_QUEUE)
+		fdircmd |= IXGBE_FDIRCMD_DROP;
+	fdircmd |= input->formatted.flow_type << IXGBE_FDIRCMD_FLOW_TYPE_SHIFT;
+	fdircmd |= (u32)queue << IXGBE_FDIRCMD_RX_QUEUE_SHIFT;
+	fdircmd |= (u32)input->formatted.vm_pool << IXGBE_FDIRCMD_VT_POOL_SHIFT;
+
+	IXGBE_WRITE_REG(hw, IXGBE_FDIRCMD, fdircmd);
+
+	return 0;
+}
+
+s32 ixgbe_fdir_erase_perfect_filter_82599(struct ixgbe_hw *hw,
+					  union ixgbe_atr_input *input,
+					  u16 soft_id)
+{
+	u32 fdirhash;
+	u32 fdircmd = 0;
+	u32 retry_count;
+	s32 err = 0;
+
+	/* configure FDIRHASH register */
+	fdirhash = input->formatted.bkt_hash;
+	fdirhash |= soft_id << IXGBE_FDIRHASH_SIG_SW_INDEX_SHIFT;
+	IXGBE_WRITE_REG(hw, IXGBE_FDIRHASH, fdirhash);
+
+	/* flush hash to HW */
+	IXGBE_WRITE_FLUSH(hw);
+
+	/* Query if filter is present */
+	IXGBE_WRITE_REG(hw, IXGBE_FDIRCMD, IXGBE_FDIRCMD_CMD_QUERY_REM_FILT);
+
+	for (retry_count = 10; retry_count; retry_count--) {
+		/* allow 10us for query to process */
+		udelay(10);
+		/* verify query completed successfully */
+		fdircmd = IXGBE_READ_REG(hw, IXGBE_FDIRCMD);
+		if (!(fdircmd & IXGBE_FDIRCMD_CMD_MASK))
+			break;
+	}
+
+	if (!retry_count)
+		err = IXGBE_ERR_FDIR_REINIT_FAILED;
+
+	/* if filter exists in hardware then remove it */
+	if (fdircmd & IXGBE_FDIRCMD_FILTER_VALID) {
+		IXGBE_WRITE_REG(hw, IXGBE_FDIRHASH, fdirhash);
+		IXGBE_WRITE_FLUSH(hw);
+		IXGBE_WRITE_REG(hw, IXGBE_FDIRCMD,
+				IXGBE_FDIRCMD_CMD_REMOVE_FLOW);
+	}
+
+	return err;
+}
+
+/**
+ *  ixgbe_fdir_add_perfect_filter_82599 - Adds a perfect filter
+ *  @hw: pointer to hardware structure
+ *  @input: input bitstream
+ *  @input_mask: mask for the input bitstream
+ *  @soft_id: software index for the filters
+ *  @queue: queue index to direct traffic to
+ *
+ *  Note that the caller to this function must lock before calling, since the
+ *  hardware writes must be protected from one another.
+ **/
+s32 ixgbe_fdir_add_perfect_filter_82599(struct ixgbe_hw *hw,
+					union ixgbe_atr_input *input,
+					union ixgbe_atr_input *input_mask,
+					u16 soft_id, u8 queue)
+{
+	s32 err = IXGBE_ERR_CONFIG;
+
+	/*
+	 * Check flow_type formatting, and bail out before we touch the hardware
+	 * if there's a configuration issue
+	 */
+	switch (input->formatted.flow_type) {
+	case IXGBE_ATR_FLOW_TYPE_IPV4:
+		input_mask->formatted.flow_type = IXGBE_ATR_L4TYPE_IPV6_MASK;
+		if (input->formatted.dst_port || input->formatted.src_port) {
+			hw_dbg(hw, " Error on src/dst port\n");
+			return IXGBE_ERR_CONFIG;
+		}
+		break;
+	case IXGBE_ATR_FLOW_TYPE_SCTPV4:
+		if (input->formatted.dst_port || input->formatted.src_port) {
+			hw_dbg(hw, " Error on src/dst port\n");
+			return IXGBE_ERR_CONFIG;
+		}
+	case IXGBE_ATR_FLOW_TYPE_TCPV4:
+	case IXGBE_ATR_FLOW_TYPE_UDPV4:
+		input_mask->formatted.flow_type = IXGBE_ATR_L4TYPE_IPV6_MASK |
+						  IXGBE_ATR_L4TYPE_MASK;
+		break;
+	default:
+		hw_dbg(hw, " Error on flow type input\n");
+		return err;
+	}
+
+	/* program input mask into the HW */
+	err = ixgbe_fdir_set_input_mask_82599(hw, input_mask);
+	if (err)
+		return err;
+
+	/* apply mask and compute/store hash */
+	ixgbe_atr_compute_perfect_hash_82599(input, input_mask);
+
+	/* program filters to filter memory */
+	return ixgbe_fdir_write_perfect_filter_82599(hw, input,
+						     soft_id, queue);
+}
+
+/**
+ *  ixgbe_read_analog_reg8_82599 - Reads 8 bit Omer analog register
+ *  @hw: pointer to hardware structure
+ *  @reg: analog register to read
+ *  @val: read value
+ *
+ *  Performs read operation to Omer analog register specified.
+ **/
+s32 ixgbe_read_analog_reg8_82599(struct ixgbe_hw *hw, u32 reg, u8 *val)
+{
+	u32  core_ctl;
+
+	IXGBE_WRITE_REG(hw, IXGBE_CORECTL, IXGBE_CORECTL_WRITE_CMD |
+			(reg << 8));
+	IXGBE_WRITE_FLUSH(hw);
+	udelay(10);
+	core_ctl = IXGBE_READ_REG(hw, IXGBE_CORECTL);
+	*val = (u8)core_ctl;
+
+	return 0;
+}
+
+/**
+ *  ixgbe_write_analog_reg8_82599 - Writes 8 bit Omer analog register
+ *  @hw: pointer to hardware structure
+ *  @reg: atlas register to write
+ *  @val: value to write
+ *
+ *  Performs write operation to Omer analog register specified.
+ **/
+s32 ixgbe_write_analog_reg8_82599(struct ixgbe_hw *hw, u32 reg, u8 val)
+{
+	u32  core_ctl;
+
+	core_ctl = (reg << 8) | val;
+	IXGBE_WRITE_REG(hw, IXGBE_CORECTL, core_ctl);
+	IXGBE_WRITE_FLUSH(hw);
+	udelay(10);
+
+	return 0;
+}
+
+/**
+ *  ixgbe_start_hw_82599 - Prepare hardware for Tx/Rx
+ *  @hw: pointer to hardware structure
+ *
+ *  Starts the hardware using the generic start_hw function
+ *  and the generation start_hw function.
+ *  Then performs revision-specific operations, if any.
+ **/
+s32 ixgbe_start_hw_82599(struct ixgbe_hw *hw)
+{
+	s32 ret_val = 0;
+
+	ret_val = ixgbe_start_hw_generic(hw);
+	if (ret_val != 0)
+		goto out;
+
+	ret_val = ixgbe_start_hw_gen2(hw);
+	if (ret_val != 0)
+		goto out;
+
+	/* We need to run link autotry after the driver loads */
+	hw->mac.autotry_restart = true;
+
+	if (ret_val == 0)
+		ret_val = ixgbe_verify_fw_version_82599(hw);
+out:
+	return ret_val;
+}
+
+/**
+ *  ixgbe_identify_phy_82599 - Get physical layer module
+ *  @hw: pointer to hardware structure
+ *
+ *  Determines the physical layer module found on the current adapter.
+ *  If PHY already detected, maintains current PHY type in hw struct,
+ *  otherwise executes the PHY detection routine.
+ **/
+s32 ixgbe_identify_phy_82599(struct ixgbe_hw *hw)
+{
+	s32 status = IXGBE_ERR_PHY_ADDR_INVALID;
+
+	/* Detect PHY if not unknown - returns success if already detected. */
+	status = ixgbe_identify_phy_generic(hw);
+	if (status != 0) {
+		/* 82599 10GBASE-T requires an external PHY */
+		if (hw->mac.ops.get_media_type(hw) == ixgbe_media_type_copper)
+			goto out;
+		else
+			status = ixgbe_identify_module_generic(hw);
+	}
+
+	/* Set PHY type none if no PHY detected */
+	if (hw->phy.type == ixgbe_phy_unknown) {
+		hw->phy.type = ixgbe_phy_none;
+		status = 0;
+	}
+
+	/* Return error if SFP module has been detected but is not supported */
+	if (hw->phy.type == ixgbe_phy_sfp_unsupported)
+		status = IXGBE_ERR_SFP_NOT_SUPPORTED;
+
+out:
+	return status;
+}
+
+/**
+ *  ixgbe_get_supported_physical_layer_82599 - Returns physical layer type
+ *  @hw: pointer to hardware structure
+ *
+ *  Determines physical layer capabilities of the current configuration.
+ **/
+u32 ixgbe_get_supported_physical_layer_82599(struct ixgbe_hw *hw)
+{
+	u32 physical_layer = IXGBE_PHYSICAL_LAYER_UNKNOWN;
+	u32 autoc = IXGBE_READ_REG(hw, IXGBE_AUTOC);
+	u32 autoc2 = IXGBE_READ_REG(hw, IXGBE_AUTOC2);
+	u32 pma_pmd_10g_serial = autoc2 & IXGBE_AUTOC2_10G_SERIAL_PMA_PMD_MASK;
+	u32 pma_pmd_10g_parallel = autoc & IXGBE_AUTOC_10G_PMA_PMD_MASK;
+	u32 pma_pmd_1g = autoc & IXGBE_AUTOC_1G_PMA_PMD_MASK;
+	u16 ext_ability = 0;
+	u8 comp_codes_10g = 0;
+	u8 comp_codes_1g = 0;
+
+	hw->phy.ops.identify(hw);
+
+	switch (hw->phy.type) {
+	case ixgbe_phy_tn:
+	case ixgbe_phy_cu_unknown:
+		hw->phy.ops.read_reg(hw, IXGBE_MDIO_PHY_EXT_ABILITY,
+		IXGBE_MDIO_PMA_PMD_DEV_TYPE, &ext_ability);
+		if (ext_ability & IXGBE_MDIO_PHY_10GBASET_ABILITY)
+			physical_layer |= IXGBE_PHYSICAL_LAYER_10GBASE_T;
+		if (ext_ability & IXGBE_MDIO_PHY_1000BASET_ABILITY)
+			physical_layer |= IXGBE_PHYSICAL_LAYER_1000BASE_T;
+		if (ext_ability & IXGBE_MDIO_PHY_100BASETX_ABILITY)
+			physical_layer |= IXGBE_PHYSICAL_LAYER_100BASE_TX;
+		goto out;
+	default:
+		break;
+	}
+
+	switch (autoc & IXGBE_AUTOC_LMS_MASK) {
+	case IXGBE_AUTOC_LMS_1G_AN:
+	case IXGBE_AUTOC_LMS_1G_LINK_NO_AN:
+		if (pma_pmd_1g == IXGBE_AUTOC_1G_KX_BX) {
+			physical_layer = IXGBE_PHYSICAL_LAYER_1000BASE_KX |
+			    IXGBE_PHYSICAL_LAYER_1000BASE_BX;
+			goto out;
+		} else
+			/* SFI mode so read SFP module */
+			goto sfp_check;
+		break;
+	case IXGBE_AUTOC_LMS_10G_LINK_NO_AN:
+		if (pma_pmd_10g_parallel == IXGBE_AUTOC_10G_CX4)
+			physical_layer = IXGBE_PHYSICAL_LAYER_10GBASE_CX4;
+		else if (pma_pmd_10g_parallel == IXGBE_AUTOC_10G_KX4)
+			physical_layer = IXGBE_PHYSICAL_LAYER_10GBASE_KX4;
+		else if (pma_pmd_10g_parallel == IXGBE_AUTOC_10G_XAUI)
+			physical_layer = IXGBE_PHYSICAL_LAYER_10GBASE_XAUI;
+		goto out;
+		break;
+	case IXGBE_AUTOC_LMS_10G_SERIAL:
+		if (pma_pmd_10g_serial == IXGBE_AUTOC2_10G_KR) {
+			physical_layer = IXGBE_PHYSICAL_LAYER_10GBASE_KR;
+			goto out;
+		} else if (pma_pmd_10g_serial == IXGBE_AUTOC2_10G_SFI)
+			goto sfp_check;
+		break;
+	case IXGBE_AUTOC_LMS_KX4_KX_KR:
+	case IXGBE_AUTOC_LMS_KX4_KX_KR_1G_AN:
+		if (autoc & IXGBE_AUTOC_KX_SUPP)
+			physical_layer |= IXGBE_PHYSICAL_LAYER_1000BASE_KX;
+		if (autoc & IXGBE_AUTOC_KX4_SUPP)
+			physical_layer |= IXGBE_PHYSICAL_LAYER_10GBASE_KX4;
+		if (autoc & IXGBE_AUTOC_KR_SUPP)
+			physical_layer |= IXGBE_PHYSICAL_LAYER_10GBASE_KR;
+		goto out;
+		break;
+	default:
+		goto out;
+		break;
+	}
+
+sfp_check:
+	/* SFP check must be done last since DA modules are sometimes used to
+	 * test KR mode -  we need to id KR mode correctly before SFP module.
+	 * Call identify_sfp because the pluggable module may have changed */
+	hw->phy.ops.identify_sfp(hw);
+	if (hw->phy.sfp_type == ixgbe_sfp_type_not_present)
+		goto out;
+
+	switch (hw->phy.type) {
+	case ixgbe_phy_sfp_passive_tyco:
+	case ixgbe_phy_sfp_passive_unknown:
+		physical_layer = IXGBE_PHYSICAL_LAYER_SFP_PLUS_CU;
+		break;
+	case ixgbe_phy_sfp_ftl_active:
+	case ixgbe_phy_sfp_active_unknown:
+		physical_layer = IXGBE_PHYSICAL_LAYER_SFP_ACTIVE_DA;
+		break;
+	case ixgbe_phy_sfp_avago:
+	case ixgbe_phy_sfp_ftl:
+	case ixgbe_phy_sfp_intel:
+	case ixgbe_phy_sfp_unknown:
+		hw->phy.ops.read_i2c_eeprom(hw,
+		      IXGBE_SFF_1GBE_COMP_CODES, &comp_codes_1g);
+		hw->phy.ops.read_i2c_eeprom(hw,
+		      IXGBE_SFF_10GBE_COMP_CODES, &comp_codes_10g);
+		if (comp_codes_10g & IXGBE_SFF_10GBASESR_CAPABLE)
+			physical_layer = IXGBE_PHYSICAL_LAYER_10GBASE_SR;
+		else if (comp_codes_10g & IXGBE_SFF_10GBASELR_CAPABLE)
+			physical_layer = IXGBE_PHYSICAL_LAYER_10GBASE_LR;
+		else if (comp_codes_1g & IXGBE_SFF_1GBASET_CAPABLE)
+			physical_layer = IXGBE_PHYSICAL_LAYER_1000BASE_T;
+		else if (comp_codes_1g & IXGBE_SFF_1GBASESX_CAPABLE)
+			physical_layer = IXGBE_PHYSICAL_LAYER_1000BASE_SX;
+		break;
+	default:
+		break;
+	}
+
+out:
+	return physical_layer;
+}
+
+/**
+ *  ixgbe_enable_rx_dma_82599 - Enable the Rx DMA unit on 82599
+ *  @hw: pointer to hardware structure
+ *  @regval: register value to write to RXCTRL
+ *
+ *  Enables the Rx DMA unit for 82599
+ **/
+s32 ixgbe_enable_rx_dma_82599(struct ixgbe_hw *hw, u32 regval)
+{
+
+	/*
+	 * Workaround for 82599 silicon errata when enabling the Rx datapath.
+	 * If traffic is incoming before we enable the Rx unit, it could hang
+	 * the Rx DMA unit.  Therefore, make sure the security engine is
+	 * completely disabled prior to enabling the Rx unit.
+	 */
+
+	hw->mac.ops.disable_sec_rx_path(hw);
+
+	IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, regval);
+
+	hw->mac.ops.enable_sec_rx_path(hw);
+
+	return 0;
+}
+
+/**
+ *  ixgbe_verify_fw_version_82599 - verify fw version for 82599
+ *  @hw: pointer to hardware structure
+ *
+ *  Verifies that installed the firmware version is 0.6 or higher
+ *  for SFI devices. All 82599 SFI devices should have version 0.6 or higher.
+ *
+ *  Returns IXGBE_ERR_EEPROM_VERSION if the FW is not present or
+ *  if the FW version is not supported.
+ **/
+static s32 ixgbe_verify_fw_version_82599(struct ixgbe_hw *hw)
+{
+	s32 status = IXGBE_ERR_EEPROM_VERSION;
+	u16 fw_offset, fw_ptp_cfg_offset;
+	u16 fw_version = 0;
+
+	/* firmware check is only necessary for SFI devices */
+	if (hw->phy.media_type != ixgbe_media_type_fiber) {
+		status = 0;
+		goto fw_version_out;
+	}
+
+	/* get the offset to the Firmware Module block */
+	hw->eeprom.ops.read(hw, IXGBE_FW_PTR, &fw_offset);
+
+	if ((fw_offset == 0) || (fw_offset == 0xFFFF))
+		goto fw_version_out;
+
+	/* get the offset to the Pass Through Patch Configuration block */
+	hw->eeprom.ops.read(hw, (fw_offset +
+				 IXGBE_FW_PASSTHROUGH_PATCH_CONFIG_PTR),
+				 &fw_ptp_cfg_offset);
+
+	if ((fw_ptp_cfg_offset == 0) || (fw_ptp_cfg_offset == 0xFFFF))
+		goto fw_version_out;
+
+	/* get the firmware version */
+	hw->eeprom.ops.read(hw, (fw_ptp_cfg_offset +
+			    IXGBE_FW_PATCH_VERSION_4), &fw_version);
+
+	if (fw_version > 0x5)
+		status = 0;
+
+fw_version_out:
+	return status;
+}
+
+/**
+ *  ixgbe_verify_lesm_fw_enabled_82599 - Checks LESM FW module state.
+ *  @hw: pointer to hardware structure
+ *
+ *  Returns true if the LESM FW module is present and enabled. Otherwise
+ *  returns false. Smart Speed must be disabled if LESM FW module is enabled.
+ **/
+bool ixgbe_verify_lesm_fw_enabled_82599(struct ixgbe_hw *hw)
+{
+	bool lesm_enabled = false;
+	u16 fw_offset, fw_lesm_param_offset, fw_lesm_state;
+	s32 status;
+
+	/* get the offset to the Firmware Module block */
+	status = hw->eeprom.ops.read(hw, IXGBE_FW_PTR, &fw_offset);
+
+	if ((status != 0) ||
+	    (fw_offset == 0) || (fw_offset == 0xFFFF))
+		goto out;
+
+	/* get the offset to the LESM Parameters block */
+	status = hw->eeprom.ops.read(hw, (fw_offset +
+				     IXGBE_FW_LESM_PARAMETERS_PTR),
+				     &fw_lesm_param_offset);
+
+	if ((status != 0) ||
+	    (fw_lesm_param_offset == 0) || (fw_lesm_param_offset == 0xFFFF))
+		goto out;
+
+	/* get the lesm state word */
+	status = hw->eeprom.ops.read(hw, (fw_lesm_param_offset +
+				     IXGBE_FW_LESM_STATE_1),
+				     &fw_lesm_state);
+
+	if ((status == 0) &&
+	    (fw_lesm_state & IXGBE_FW_LESM_STATE_ENABLED))
+		lesm_enabled = true;
+
+out:
+	return lesm_enabled;
+}
+
+/**
+ *  ixgbe_read_eeprom_buffer_82599 - Read EEPROM word(s) using
+ *  fastest available method
+ *
+ *  @hw: pointer to hardware structure
+ *  @offset: offset of  word in EEPROM to read
+ *  @words: number of words
+ *  @data: word(s) read from the EEPROM
+ *
+ *  Retrieves 16 bit word(s) read from EEPROM
+ **/
+static s32 ixgbe_read_eeprom_buffer_82599(struct ixgbe_hw *hw, u16 offset,
+					  u16 words, u16 *data)
+{
+	struct ixgbe_eeprom_info *eeprom = &hw->eeprom;
+	s32 ret_val = IXGBE_ERR_CONFIG;
+
+	/*
+	 * If EEPROM is detected and can be addressed using 14 bits,
+	 * use EERD otherwise use bit bang
+	 */
+	if ((eeprom->type == ixgbe_eeprom_spi) &&
+	    (offset + (words - 1) <= IXGBE_EERD_MAX_ADDR))
+		ret_val = ixgbe_read_eerd_buffer_generic(hw, offset, words,
+							 data);
+	else
+		ret_val = ixgbe_read_eeprom_buffer_bit_bang_generic(hw, offset,
+								    words,
+								    data);
+
+	return ret_val;
+}
+
+/**
+ *  ixgbe_read_eeprom_82599 - Read EEPROM word using
+ *  fastest available method
+ *
+ *  @hw: pointer to hardware structure
+ *  @offset: offset of  word in the EEPROM to read
+ *  @data: word read from the EEPROM
+ *
+ *  Reads a 16 bit word from the EEPROM
+ **/
+static s32 ixgbe_read_eeprom_82599(struct ixgbe_hw *hw,
+				   u16 offset, u16 *data)
+{
+	struct ixgbe_eeprom_info *eeprom = &hw->eeprom;
+	s32 ret_val = IXGBE_ERR_CONFIG;
+
+	/*
+	 * If EEPROM is detected and can be addressed using 14 bits,
+	 * use EERD otherwise use bit bang
+	 */
+	if ((eeprom->type == ixgbe_eeprom_spi) &&
+	    (offset <= IXGBE_EERD_MAX_ADDR))
+		ret_val = ixgbe_read_eerd_generic(hw, offset, data);
+	else
+		ret_val = ixgbe_read_eeprom_bit_bang_generic(hw, offset, data);
+
+	return ret_val;
+}
+
+/**
+ *  ixgbe_read_i2c_byte_82599 - Reads 8 bit word over I2C
+ *  @hw: pointer to hardware structure
+ *  @byte_offset: byte offset to read
+ *  @data: value read
+ *
+ *  Performs byte read operation to SFP module's EEPROM over I2C interface at
+ *  a specified device address.
+ **/
+static s32 ixgbe_read_i2c_byte_82599(struct ixgbe_hw *hw, u8 byte_offset,
+				u8 dev_addr, u8 *data)
+{
+	u32 esdp;
+	s32 status;
+	s32 timeout = 200;
+
+	if (hw->phy.qsfp_shared_i2c_bus == TRUE) {
+		/* Acquire I2C bus ownership. */
+		esdp = IXGBE_READ_REG(hw, IXGBE_ESDP);
+		esdp |= IXGBE_ESDP_SDP0;
+		IXGBE_WRITE_REG(hw, IXGBE_ESDP, esdp);
+		IXGBE_WRITE_FLUSH(hw);
+
+		while (timeout) {
+			esdp = IXGBE_READ_REG(hw, IXGBE_ESDP);
+			if (esdp & IXGBE_ESDP_SDP1)
+				break;
+
+			msleep(5);
+			timeout--;
+		}
+
+		if (!timeout) {
+			hw_dbg(hw, "Driver can't access resource,"
+				 " acquiring I2C bus timeout.\n");
+			status = IXGBE_ERR_I2C;
+			goto release_i2c_access;
+		}
+	}
+
+	status = ixgbe_read_i2c_byte_generic(hw, byte_offset, dev_addr, data);
+
+release_i2c_access:
+
+	if (hw->phy.qsfp_shared_i2c_bus == TRUE) {
+		/* Release I2C bus ownership. */
+		esdp = IXGBE_READ_REG(hw, IXGBE_ESDP);
+		esdp &= ~IXGBE_ESDP_SDP0;
+		IXGBE_WRITE_REG(hw, IXGBE_ESDP, esdp);
+		IXGBE_WRITE_FLUSH(hw);
+	}
+
+	return status;
+}
+
+/**
+ *  ixgbe_write_i2c_byte_82599 - Writes 8 bit word over I2C
+ *  @hw: pointer to hardware structure
+ *  @byte_offset: byte offset to write
+ *  @data: value to write
+ *
+ *  Performs byte write operation to SFP module's EEPROM over I2C interface at
+ *  a specified device address.
+ **/
+static s32 ixgbe_write_i2c_byte_82599(struct ixgbe_hw *hw, u8 byte_offset,
+				 u8 dev_addr, u8 data)
+{
+	u32 esdp;
+	s32 status;
+	s32 timeout = 200;
+
+	if (hw->phy.qsfp_shared_i2c_bus == TRUE) {
+		/* Acquire I2C bus ownership. */
+		esdp = IXGBE_READ_REG(hw, IXGBE_ESDP);
+		esdp |= IXGBE_ESDP_SDP0;
+		IXGBE_WRITE_REG(hw, IXGBE_ESDP, esdp);
+		IXGBE_WRITE_FLUSH(hw);
+
+		while (timeout) {
+			esdp = IXGBE_READ_REG(hw, IXGBE_ESDP);
+			if (esdp & IXGBE_ESDP_SDP1)
+				break;
+
+			msleep(5);
+			timeout--;
+		}
+
+		if (!timeout) {
+			hw_dbg(hw, "Driver can't access resource,"
+				 " acquiring I2C bus timeout.\n");
+			status = IXGBE_ERR_I2C;
+			goto release_i2c_access;
+		}
+	}
+
+	status = ixgbe_write_i2c_byte_generic(hw, byte_offset, dev_addr, data);
+
+release_i2c_access:
+
+	if (hw->phy.qsfp_shared_i2c_bus == TRUE) {
+		/* Release I2C bus ownership. */
+		esdp = IXGBE_READ_REG(hw, IXGBE_ESDP);
+		esdp &= ~IXGBE_ESDP_SDP0;
+		IXGBE_WRITE_REG(hw, IXGBE_ESDP, esdp);
+		IXGBE_WRITE_FLUSH(hw);
+	}
+
+	return status;
+}
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82599.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82599.h
new file mode 100644
index 00000000..02be92ab
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82599.h
@@ -0,0 +1,58 @@
+/*******************************************************************************
+
+  Intel 10 Gigabit PCI Express Linux driver
+  Copyright(c) 1999 - 2012 Intel Corporation.
+
+  This program is free software; you can redistribute it and/or modify it
+  under the terms and conditions of the GNU General Public License,
+  version 2, as published by the Free Software Foundation.
+
+  This program is distributed in the hope it will be useful, but WITHOUT
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+  more details.
+
+  You should have received a copy of the GNU General Public License along with
+  this program; if not, write to the Free Software Foundation, Inc.,
+  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+  The full GNU General Public License is included in this distribution in
+  the file called "COPYING".
+
+  Contact Information:
+  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+#ifndef _IXGBE_82599_H_
+#define _IXGBE_82599_H_
+
+s32 ixgbe_get_link_capabilities_82599(struct ixgbe_hw *hw,
+				      ixgbe_link_speed *speed, bool *autoneg);
+enum ixgbe_media_type ixgbe_get_media_type_82599(struct ixgbe_hw *hw);
+void ixgbe_disable_tx_laser_multispeed_fiber(struct ixgbe_hw *hw);
+void ixgbe_enable_tx_laser_multispeed_fiber(struct ixgbe_hw *hw);
+void ixgbe_flap_tx_laser_multispeed_fiber(struct ixgbe_hw *hw);
+s32 ixgbe_setup_mac_link_multispeed_fiber(struct ixgbe_hw *hw,
+					  ixgbe_link_speed speed, bool autoneg,
+					  bool autoneg_wait_to_complete);
+s32 ixgbe_setup_mac_link_smartspeed(struct ixgbe_hw *hw,
+				    ixgbe_link_speed speed, bool autoneg,
+				    bool autoneg_wait_to_complete);
+s32 ixgbe_start_mac_link_82599(struct ixgbe_hw *hw,
+			       bool autoneg_wait_to_complete);
+s32 ixgbe_setup_mac_link_82599(struct ixgbe_hw *hw, ixgbe_link_speed speed,
+			       bool autoneg, bool autoneg_wait_to_complete);
+s32 ixgbe_setup_sfp_modules_82599(struct ixgbe_hw *hw);
+void ixgbe_init_mac_link_ops_82599(struct ixgbe_hw *hw);
+s32 ixgbe_reset_hw_82599(struct ixgbe_hw *hw);
+s32 ixgbe_read_analog_reg8_82599(struct ixgbe_hw *hw, u32 reg, u8 *val);
+s32 ixgbe_write_analog_reg8_82599(struct ixgbe_hw *hw, u32 reg, u8 val);
+s32 ixgbe_start_hw_82599(struct ixgbe_hw *hw);
+s32 ixgbe_identify_phy_82599(struct ixgbe_hw *hw);
+s32 ixgbe_init_phy_ops_82599(struct ixgbe_hw *hw);
+u32 ixgbe_get_supported_physical_layer_82599(struct ixgbe_hw *hw);
+s32 ixgbe_enable_rx_dma_82599(struct ixgbe_hw *hw, u32 regval);
+bool ixgbe_verify_lesm_fw_enabled_82599(struct ixgbe_hw *hw);
+#endif /* _IXGBE_82599_H_ */
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_api.c b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_api.c
new file mode 100644
index 00000000..ef7ce629
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_api.c
@@ -0,0 +1,1157 @@
+/*******************************************************************************
+
+  Intel 10 Gigabit PCI Express Linux driver
+  Copyright(c) 1999 - 2012 Intel Corporation.
+
+  This program is free software; you can redistribute it and/or modify it
+  under the terms and conditions of the GNU General Public License,
+  version 2, as published by the Free Software Foundation.
+
+  This program is distributed in the hope it will be useful, but WITHOUT
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+  more details.
+
+  You should have received a copy of the GNU General Public License along with
+  this program; if not, write to the Free Software Foundation, Inc.,
+  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+  The full GNU General Public License is included in this distribution in
+  the file called "COPYING".
+
+  Contact Information:
+  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+#include "ixgbe_api.h"
+#include "ixgbe_common.h"
+
+/**
+ *  ixgbe_init_shared_code - Initialize the shared code
+ *  @hw: pointer to hardware structure
+ *
+ *  This will assign function pointers and assign the MAC type and PHY code.
+ *  Does not touch the hardware. This function must be called prior to any
+ *  other function in the shared code. The ixgbe_hw structure should be
+ *  memset to 0 prior to calling this function.  The following fields in
+ *  hw structure should be filled in prior to calling this function:
+ *  hw_addr, back, device_id, vendor_id, subsystem_device_id,
+ *  subsystem_vendor_id, and revision_id
+ **/
+s32 ixgbe_init_shared_code(struct ixgbe_hw *hw)
+{
+	s32 status;
+
+	/*
+	 * Set the mac type
+	 */
+	ixgbe_set_mac_type(hw);
+
+	switch (hw->mac.type) {
+	case ixgbe_mac_82598EB:
+		status = ixgbe_init_ops_82598(hw);
+		break;
+	case ixgbe_mac_82599EB:
+		status = ixgbe_init_ops_82599(hw);
+		break;
+	case ixgbe_mac_X540:
+		status = ixgbe_init_ops_X540(hw);
+		break;
+	default:
+		status = IXGBE_ERR_DEVICE_NOT_SUPPORTED;
+		break;
+	}
+
+	return status;
+}
+
+/**
+ *  ixgbe_set_mac_type - Sets MAC type
+ *  @hw: pointer to the HW structure
+ *
+ *  This function sets the mac type of the adapter based on the
+ *  vendor ID and device ID stored in the hw structure.
+ **/
+s32 ixgbe_set_mac_type(struct ixgbe_hw *hw)
+{
+	s32 ret_val = 0;
+
+	if (hw->vendor_id == IXGBE_INTEL_VENDOR_ID) {
+		switch (hw->device_id) {
+		case IXGBE_DEV_ID_82598:
+		case IXGBE_DEV_ID_82598_BX:
+		case IXGBE_DEV_ID_82598AF_SINGLE_PORT:
+		case IXGBE_DEV_ID_82598AF_DUAL_PORT:
+		case IXGBE_DEV_ID_82598AT:
+		case IXGBE_DEV_ID_82598AT2:
+		case IXGBE_DEV_ID_82598EB_CX4:
+		case IXGBE_DEV_ID_82598_CX4_DUAL_PORT:
+		case IXGBE_DEV_ID_82598_DA_DUAL_PORT:
+		case IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM:
+		case IXGBE_DEV_ID_82598EB_XF_LR:
+		case IXGBE_DEV_ID_82598EB_SFP_LOM:
+			hw->mac.type = ixgbe_mac_82598EB;
+			break;
+		case IXGBE_DEV_ID_82599_KX4:
+		case IXGBE_DEV_ID_82599_KX4_MEZZ:
+		case IXGBE_DEV_ID_82599_XAUI_LOM:
+		case IXGBE_DEV_ID_82599_COMBO_BACKPLANE:
+		case IXGBE_DEV_ID_82599_KR:
+		case IXGBE_DEV_ID_82599_SFP:
+		case IXGBE_DEV_ID_82599_BACKPLANE_FCOE:
+		case IXGBE_DEV_ID_82599_SFP_FCOE:
+		case IXGBE_DEV_ID_82599_SFP_EM:
+		case IXGBE_DEV_ID_82599_SFP_SF2:
+		case IXGBE_DEV_ID_82599_QSFP_SF_QP:
+		case IXGBE_DEV_ID_82599EN_SFP:
+		case IXGBE_DEV_ID_82599_CX4:
+		case IXGBE_DEV_ID_82599_LS:
+		case IXGBE_DEV_ID_82599_T3_LOM:
+			hw->mac.type = ixgbe_mac_82599EB;
+			break;
+		case IXGBE_DEV_ID_X540T:
+			hw->mac.type = ixgbe_mac_X540;
+			break;
+		default:
+			ret_val = IXGBE_ERR_DEVICE_NOT_SUPPORTED;
+			break;
+		}
+	} else {
+		ret_val = IXGBE_ERR_DEVICE_NOT_SUPPORTED;
+	}
+
+	hw_dbg(hw, "ixgbe_set_mac_type found mac: %d, returns: %d\n",
+		  hw->mac.type, ret_val);
+	return ret_val;
+}
+
+/**
+ *  ixgbe_init_hw - Initialize the hardware
+ *  @hw: pointer to hardware structure
+ *
+ *  Initialize the hardware by resetting and then starting the hardware
+ **/
+s32 ixgbe_init_hw(struct ixgbe_hw *hw)
+{
+	return ixgbe_call_func(hw, hw->mac.ops.init_hw, (hw),
+			       IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
+ *  ixgbe_reset_hw - Performs a hardware reset
+ *  @hw: pointer to hardware structure
+ *
+ *  Resets the hardware by resetting the transmit and receive units, masks and
+ *  clears all interrupts, performs a PHY reset, and performs a MAC reset
+ **/
+s32 ixgbe_reset_hw(struct ixgbe_hw *hw)
+{
+	return ixgbe_call_func(hw, hw->mac.ops.reset_hw, (hw),
+			       IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
+ *  ixgbe_start_hw - Prepares hardware for Rx/Tx
+ *  @hw: pointer to hardware structure
+ *
+ *  Starts the hardware by filling the bus info structure and media type,
+ *  clears all on chip counters, initializes receive address registers,
+ *  multicast table, VLAN filter table, calls routine to setup link and
+ *  flow control settings, and leaves transmit and receive units disabled
+ *  and uninitialized.
+ **/
+s32 ixgbe_start_hw(struct ixgbe_hw *hw)
+{
+	return ixgbe_call_func(hw, hw->mac.ops.start_hw, (hw),
+			       IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
+ *  ixgbe_clear_hw_cntrs - Clear hardware counters
+ *  @hw: pointer to hardware structure
+ *
+ *  Clears all hardware statistics counters by reading them from the hardware
+ *  Statistics counters are clear on read.
+ **/
+s32 ixgbe_clear_hw_cntrs(struct ixgbe_hw *hw)
+{
+	return ixgbe_call_func(hw, hw->mac.ops.clear_hw_cntrs, (hw),
+			       IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
+ *  ixgbe_get_media_type - Get media type
+ *  @hw: pointer to hardware structure
+ *
+ *  Returns the media type (fiber, copper, backplane)
+ **/
+enum ixgbe_media_type ixgbe_get_media_type(struct ixgbe_hw *hw)
+{
+	return ixgbe_call_func(hw, hw->mac.ops.get_media_type, (hw),
+			       ixgbe_media_type_unknown);
+}
+
+/**
+ *  ixgbe_get_mac_addr - Get MAC address
+ *  @hw: pointer to hardware structure
+ *  @mac_addr: Adapter MAC address
+ *
+ *  Reads the adapter's MAC address from the first Receive Address Register
+ *  (RAR0) A reset of the adapter must have been performed prior to calling
+ *  this function in order for the MAC address to have been loaded from the
+ *  EEPROM into RAR0
+ **/
+s32 ixgbe_get_mac_addr(struct ixgbe_hw *hw, u8 *mac_addr)
+{
+	return ixgbe_call_func(hw, hw->mac.ops.get_mac_addr,
+			       (hw, mac_addr), IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
+ *  ixgbe_get_san_mac_addr - Get SAN MAC address
+ *  @hw: pointer to hardware structure
+ *  @san_mac_addr: SAN MAC address
+ *
+ *  Reads the SAN MAC address from the EEPROM, if it's available.  This is
+ *  per-port, so set_lan_id() must be called before reading the addresses.
+ **/
+s32 ixgbe_get_san_mac_addr(struct ixgbe_hw *hw, u8 *san_mac_addr)
+{
+	return ixgbe_call_func(hw, hw->mac.ops.get_san_mac_addr,
+			       (hw, san_mac_addr), IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
+ *  ixgbe_set_san_mac_addr - Write a SAN MAC address
+ *  @hw: pointer to hardware structure
+ *  @san_mac_addr: SAN MAC address
+ *
+ *  Writes A SAN MAC address to the EEPROM.
+ **/
+s32 ixgbe_set_san_mac_addr(struct ixgbe_hw *hw, u8 *san_mac_addr)
+{
+	return ixgbe_call_func(hw, hw->mac.ops.set_san_mac_addr,
+			       (hw, san_mac_addr), IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
+ *  ixgbe_get_device_caps - Get additional device capabilities
+ *  @hw: pointer to hardware structure
+ *  @device_caps: the EEPROM word for device capabilities
+ *
+ *  Reads the extra device capabilities from the EEPROM
+ **/
+s32 ixgbe_get_device_caps(struct ixgbe_hw *hw, u16 *device_caps)
+{
+	return ixgbe_call_func(hw, hw->mac.ops.get_device_caps,
+			       (hw, device_caps), IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
+ *  ixgbe_get_wwn_prefix - Get alternative WWNN/WWPN prefix from the EEPROM
+ *  @hw: pointer to hardware structure
+ *  @wwnn_prefix: the alternative WWNN prefix
+ *  @wwpn_prefix: the alternative WWPN prefix
+ *
+ *  This function will read the EEPROM from the alternative SAN MAC address
+ *  block to check the support for the alternative WWNN/WWPN prefix support.
+ **/
+s32 ixgbe_get_wwn_prefix(struct ixgbe_hw *hw, u16 *wwnn_prefix,
+			 u16 *wwpn_prefix)
+{
+	return ixgbe_call_func(hw, hw->mac.ops.get_wwn_prefix,
+			       (hw, wwnn_prefix, wwpn_prefix),
+			       IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
+ *  ixgbe_get_fcoe_boot_status -  Get FCOE boot status from EEPROM
+ *  @hw: pointer to hardware structure
+ *  @bs: the fcoe boot status
+ *
+ *  This function will read the FCOE boot status from the iSCSI FCOE block
+ **/
+s32 ixgbe_get_fcoe_boot_status(struct ixgbe_hw *hw, u16 *bs)
+{
+	return ixgbe_call_func(hw, hw->mac.ops.get_fcoe_boot_status,
+			       (hw, bs),
+			       IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
+ *  ixgbe_get_bus_info - Set PCI bus info
+ *  @hw: pointer to hardware structure
+ *
+ *  Sets the PCI bus info (speed, width, type) within the ixgbe_hw structure
+ **/
+s32 ixgbe_get_bus_info(struct ixgbe_hw *hw)
+{
+	return ixgbe_call_func(hw, hw->mac.ops.get_bus_info, (hw),
+			       IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
+ *  ixgbe_get_num_of_tx_queues - Get Tx queues
+ *  @hw: pointer to hardware structure
+ *
+ *  Returns the number of transmit queues for the given adapter.
+ **/
+u32 ixgbe_get_num_of_tx_queues(struct ixgbe_hw *hw)
+{
+	return hw->mac.max_tx_queues;
+}
+
+/**
+ *  ixgbe_get_num_of_rx_queues - Get Rx queues
+ *  @hw: pointer to hardware structure
+ *
+ *  Returns the number of receive queues for the given adapter.
+ **/
+u32 ixgbe_get_num_of_rx_queues(struct ixgbe_hw *hw)
+{
+	return hw->mac.max_rx_queues;
+}
+
+/**
+ *  ixgbe_stop_adapter - Disable Rx/Tx units
+ *  @hw: pointer to hardware structure
+ *
+ *  Sets the adapter_stopped flag within ixgbe_hw struct. Clears interrupts,
+ *  disables transmit and receive units. The adapter_stopped flag is used by
+ *  the shared code and drivers to determine if the adapter is in a stopped
+ *  state and should not touch the hardware.
+ **/
+s32 ixgbe_stop_adapter(struct ixgbe_hw *hw)
+{
+	return ixgbe_call_func(hw, hw->mac.ops.stop_adapter, (hw),
+			       IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
+ *  ixgbe_read_pba_string - Reads part number string from EEPROM
+ *  @hw: pointer to hardware structure
+ *  @pba_num: stores the part number string from the EEPROM
+ *  @pba_num_size: part number string buffer length
+ *
+ *  Reads the part number string from the EEPROM.
+ **/
+s32 ixgbe_read_pba_string(struct ixgbe_hw *hw, u8 *pba_num, u32 pba_num_size)
+{
+	return ixgbe_read_pba_string_generic(hw, pba_num, pba_num_size);
+}
+
+/**
+ *  ixgbe_identify_phy - Get PHY type
+ *  @hw: pointer to hardware structure
+ *
+ *  Determines the physical layer module found on the current adapter.
+ **/
+s32 ixgbe_identify_phy(struct ixgbe_hw *hw)
+{
+	s32 status = 0;
+
+	if (hw->phy.type == ixgbe_phy_unknown) {
+		status = ixgbe_call_func(hw, hw->phy.ops.identify, (hw),
+					 IXGBE_NOT_IMPLEMENTED);
+	}
+
+	return status;
+}
+
+/**
+ *  ixgbe_reset_phy - Perform a PHY reset
+ *  @hw: pointer to hardware structure
+ **/
+s32 ixgbe_reset_phy(struct ixgbe_hw *hw)
+{
+	s32 status = 0;
+
+	if (hw->phy.type == ixgbe_phy_unknown) {
+		if (ixgbe_identify_phy(hw) != 0)
+			status = IXGBE_ERR_PHY;
+	}
+
+	if (status == 0) {
+		status = ixgbe_call_func(hw, hw->phy.ops.reset, (hw),
+					 IXGBE_NOT_IMPLEMENTED);
+	}
+	return status;
+}
+
+/**
+ *  ixgbe_get_phy_firmware_version -
+ *  @hw: pointer to hardware structure
+ *  @firmware_version: pointer to firmware version
+ **/
+s32 ixgbe_get_phy_firmware_version(struct ixgbe_hw *hw, u16 *firmware_version)
+{
+	s32 status = 0;
+
+	status = ixgbe_call_func(hw, hw->phy.ops.get_firmware_version,
+				 (hw, firmware_version),
+				 IXGBE_NOT_IMPLEMENTED);
+	return status;
+}
+
+/**
+ *  ixgbe_read_phy_reg - Read PHY register
+ *  @hw: pointer to hardware structure
+ *  @reg_addr: 32 bit address of PHY register to read
+ *  @phy_data: Pointer to read data from PHY register
+ *
+ *  Reads a value from a specified PHY register
+ **/
+s32 ixgbe_read_phy_reg(struct ixgbe_hw *hw, u32 reg_addr, u32 device_type,
+		       u16 *phy_data)
+{
+	if (hw->phy.id == 0)
+		ixgbe_identify_phy(hw);
+
+	return ixgbe_call_func(hw, hw->phy.ops.read_reg, (hw, reg_addr,
+			       device_type, phy_data), IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
+ *  ixgbe_write_phy_reg - Write PHY register
+ *  @hw: pointer to hardware structure
+ *  @reg_addr: 32 bit PHY register to write
+ *  @phy_data: Data to write to the PHY register
+ *
+ *  Writes a value to specified PHY register
+ **/
+s32 ixgbe_write_phy_reg(struct ixgbe_hw *hw, u32 reg_addr, u32 device_type,
+			u16 phy_data)
+{
+	if (hw->phy.id == 0)
+		ixgbe_identify_phy(hw);
+
+	return ixgbe_call_func(hw, hw->phy.ops.write_reg, (hw, reg_addr,
+			       device_type, phy_data), IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
+ *  ixgbe_setup_phy_link - Restart PHY autoneg
+ *  @hw: pointer to hardware structure
+ *
+ *  Restart autonegotiation and PHY and waits for completion.
+ **/
+s32 ixgbe_setup_phy_link(struct ixgbe_hw *hw)
+{
+	return ixgbe_call_func(hw, hw->phy.ops.setup_link, (hw),
+			       IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
+ *  ixgbe_check_phy_link - Determine link and speed status
+ *  @hw: pointer to hardware structure
+ *
+ *  Reads a PHY register to determine if link is up and the current speed for
+ *  the PHY.
+ **/
+s32 ixgbe_check_phy_link(struct ixgbe_hw *hw, ixgbe_link_speed *speed,
+			 bool *link_up)
+{
+	return ixgbe_call_func(hw, hw->phy.ops.check_link, (hw, speed,
+			       link_up), IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
+ *  ixgbe_setup_phy_link_speed - Set auto advertise
+ *  @hw: pointer to hardware structure
+ *  @speed: new link speed
+ *  @autoneg: true if autonegotiation enabled
+ *
+ *  Sets the auto advertised capabilities
+ **/
+s32 ixgbe_setup_phy_link_speed(struct ixgbe_hw *hw, ixgbe_link_speed speed,
+			       bool autoneg,
+			       bool autoneg_wait_to_complete)
+{
+	return ixgbe_call_func(hw, hw->phy.ops.setup_link_speed, (hw, speed,
+			       autoneg, autoneg_wait_to_complete),
+			       IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
+ *  ixgbe_check_link - Get link and speed status
+ *  @hw: pointer to hardware structure
+ *
+ *  Reads the links register to determine if link is up and the current speed
+ **/
+s32 ixgbe_check_link(struct ixgbe_hw *hw, ixgbe_link_speed *speed,
+		     bool *link_up, bool link_up_wait_to_complete)
+{
+	return ixgbe_call_func(hw, hw->mac.ops.check_link, (hw, speed,
+			       link_up, link_up_wait_to_complete),
+			       IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
+ *  ixgbe_disable_tx_laser - Disable Tx laser
+ *  @hw: pointer to hardware structure
+ *
+ *  If the driver needs to disable the laser on SFI optics.
+ **/
+void ixgbe_disable_tx_laser(struct ixgbe_hw *hw)
+{
+	if (hw->mac.ops.disable_tx_laser)
+		hw->mac.ops.disable_tx_laser(hw);
+}
+
+/**
+ *  ixgbe_enable_tx_laser - Enable Tx laser
+ *  @hw: pointer to hardware structure
+ *
+ *  If the driver needs to enable the laser on SFI optics.
+ **/
+void ixgbe_enable_tx_laser(struct ixgbe_hw *hw)
+{
+	if (hw->mac.ops.enable_tx_laser)
+		hw->mac.ops.enable_tx_laser(hw);
+}
+
+/**
+ *  ixgbe_flap_tx_laser - flap Tx laser to start autotry process
+ *  @hw: pointer to hardware structure
+ *
+ *  When the driver changes the link speeds that it can support then
+ *  flap the tx laser to alert the link partner to start autotry
+ *  process on its end.
+ **/
+void ixgbe_flap_tx_laser(struct ixgbe_hw *hw)
+{
+	if (hw->mac.ops.flap_tx_laser)
+		hw->mac.ops.flap_tx_laser(hw);
+}
+
+/**
+ *  ixgbe_setup_link - Set link speed
+ *  @hw: pointer to hardware structure
+ *  @speed: new link speed
+ *  @autoneg: true if autonegotiation enabled
+ *
+ *  Configures link settings.  Restarts the link.
+ *  Performs autonegotiation if needed.
+ **/
+s32 ixgbe_setup_link(struct ixgbe_hw *hw, ixgbe_link_speed speed,
+		     bool autoneg,
+		     bool autoneg_wait_to_complete)
+{
+	return ixgbe_call_func(hw, hw->mac.ops.setup_link, (hw, speed,
+			       autoneg, autoneg_wait_to_complete),
+			       IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
+ *  ixgbe_get_link_capabilities - Returns link capabilities
+ *  @hw: pointer to hardware structure
+ *
+ *  Determines the link capabilities of the current configuration.
+ **/
+s32 ixgbe_get_link_capabilities(struct ixgbe_hw *hw, ixgbe_link_speed *speed,
+				bool *autoneg)
+{
+	return ixgbe_call_func(hw, hw->mac.ops.get_link_capabilities, (hw,
+			       speed, autoneg), IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
+ *  ixgbe_led_on - Turn on LEDs
+ *  @hw: pointer to hardware structure
+ *  @index: led number to turn on
+ *
+ *  Turns on the software controllable LEDs.
+ **/
+s32 ixgbe_led_on(struct ixgbe_hw *hw, u32 index)
+{
+	return ixgbe_call_func(hw, hw->mac.ops.led_on, (hw, index),
+			       IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
+ *  ixgbe_led_off - Turn off LEDs
+ *  @hw: pointer to hardware structure
+ *  @index: led number to turn off
+ *
+ *  Turns off the software controllable LEDs.
+ **/
+s32 ixgbe_led_off(struct ixgbe_hw *hw, u32 index)
+{
+	return ixgbe_call_func(hw, hw->mac.ops.led_off, (hw, index),
+			       IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
+ *  ixgbe_blink_led_start - Blink LEDs
+ *  @hw: pointer to hardware structure
+ *  @index: led number to blink
+ *
+ *  Blink LED based on index.
+ **/
+s32 ixgbe_blink_led_start(struct ixgbe_hw *hw, u32 index)
+{
+	return ixgbe_call_func(hw, hw->mac.ops.blink_led_start, (hw, index),
+			       IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
+ *  ixgbe_blink_led_stop - Stop blinking LEDs
+ *  @hw: pointer to hardware structure
+ *
+ *  Stop blinking LED based on index.
+ **/
+s32 ixgbe_blink_led_stop(struct ixgbe_hw *hw, u32 index)
+{
+	return ixgbe_call_func(hw, hw->mac.ops.blink_led_stop, (hw, index),
+			       IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
+ *  ixgbe_init_eeprom_params - Initialize EEPROM parameters
+ *  @hw: pointer to hardware structure
+ *
+ *  Initializes the EEPROM parameters ixgbe_eeprom_info within the
+ *  ixgbe_hw struct in order to set up EEPROM access.
+ **/
+s32 ixgbe_init_eeprom_params(struct ixgbe_hw *hw)
+{
+	return ixgbe_call_func(hw, hw->eeprom.ops.init_params, (hw),
+			       IXGBE_NOT_IMPLEMENTED);
+}
+
+
+/**
+ *  ixgbe_write_eeprom - Write word to EEPROM
+ *  @hw: pointer to hardware structure
+ *  @offset: offset within the EEPROM to be written to
+ *  @data: 16 bit word to be written to the EEPROM
+ *
+ *  Writes 16 bit value to EEPROM. If ixgbe_eeprom_update_checksum is not
+ *  called after this function, the EEPROM will most likely contain an
+ *  invalid checksum.
+ **/
+s32 ixgbe_write_eeprom(struct ixgbe_hw *hw, u16 offset, u16 data)
+{
+	return ixgbe_call_func(hw, hw->eeprom.ops.write, (hw, offset, data),
+			       IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
+ *  ixgbe_write_eeprom_buffer - Write word(s) to EEPROM
+ *  @hw: pointer to hardware structure
+ *  @offset: offset within the EEPROM to be written to
+ *  @data: 16 bit word(s) to be written to the EEPROM
+ *  @words: number of words
+ *
+ *  Writes 16 bit word(s) to EEPROM. If ixgbe_eeprom_update_checksum is not
+ *  called after this function, the EEPROM will most likely contain an
+ *  invalid checksum.
+ **/
+s32 ixgbe_write_eeprom_buffer(struct ixgbe_hw *hw, u16 offset, u16 words,
+			      u16 *data)
+{
+	return ixgbe_call_func(hw, hw->eeprom.ops.write_buffer,
+			       (hw, offset, words, data),
+			       IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
+ *  ixgbe_read_eeprom - Read word from EEPROM
+ *  @hw: pointer to hardware structure
+ *  @offset: offset within the EEPROM to be read
+ *  @data: read 16 bit value from EEPROM
+ *
+ *  Reads 16 bit value from EEPROM
+ **/
+s32 ixgbe_read_eeprom(struct ixgbe_hw *hw, u16 offset, u16 *data)
+{
+	return ixgbe_call_func(hw, hw->eeprom.ops.read, (hw, offset, data),
+			       IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
+ *  ixgbe_read_eeprom_buffer - Read word(s) from EEPROM
+ *  @hw: pointer to hardware structure
+ *  @offset: offset within the EEPROM to be read
+ *  @data: read 16 bit word(s) from EEPROM
+ *  @words: number of words
+ *
+ *  Reads 16 bit word(s) from EEPROM
+ **/
+s32 ixgbe_read_eeprom_buffer(struct ixgbe_hw *hw, u16 offset,
+			     u16 words, u16 *data)
+{
+	return ixgbe_call_func(hw, hw->eeprom.ops.read_buffer,
+			       (hw, offset, words, data),
+			       IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
+ *  ixgbe_validate_eeprom_checksum - Validate EEPROM checksum
+ *  @hw: pointer to hardware structure
+ *  @checksum_val: calculated checksum
+ *
+ *  Performs checksum calculation and validates the EEPROM checksum
+ **/
+s32 ixgbe_validate_eeprom_checksum(struct ixgbe_hw *hw, u16 *checksum_val)
+{
+	return ixgbe_call_func(hw, hw->eeprom.ops.validate_checksum,
+			       (hw, checksum_val), IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
+ *  ixgbe_eeprom_update_checksum - Updates the EEPROM checksum
+ *  @hw: pointer to hardware structure
+ **/
+s32 ixgbe_update_eeprom_checksum(struct ixgbe_hw *hw)
+{
+	return ixgbe_call_func(hw, hw->eeprom.ops.update_checksum, (hw),
+			       IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
+ *  ixgbe_insert_mac_addr - Find a RAR for this mac address
+ *  @hw: pointer to hardware structure
+ *  @addr: Address to put into receive address register
+ *  @vmdq: VMDq pool to assign
+ *
+ *  Puts an ethernet address into a receive address register, or
+ *  finds the rar that it is aleady in; adds to the pool list
+ **/
+s32 ixgbe_insert_mac_addr(struct ixgbe_hw *hw, u8 *addr, u32 vmdq)
+{
+	return ixgbe_call_func(hw, hw->mac.ops.insert_mac_addr,
+			       (hw, addr, vmdq),
+			       IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
+ *  ixgbe_set_rar - Set Rx address register
+ *  @hw: pointer to hardware structure
+ *  @index: Receive address register to write
+ *  @addr: Address to put into receive address register
+ *  @vmdq: VMDq "set"
+ *  @enable_addr: set flag that address is active
+ *
+ *  Puts an ethernet address into a receive address register.
+ **/
+s32 ixgbe_set_rar(struct ixgbe_hw *hw, u32 index, u8 *addr, u32 vmdq,
+		  u32 enable_addr)
+{
+	return ixgbe_call_func(hw, hw->mac.ops.set_rar, (hw, index, addr, vmdq,
+			       enable_addr), IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
+ *  ixgbe_clear_rar - Clear Rx address register
+ *  @hw: pointer to hardware structure
+ *  @index: Receive address register to write
+ *
+ *  Puts an ethernet address into a receive address register.
+ **/
+s32 ixgbe_clear_rar(struct ixgbe_hw *hw, u32 index)
+{
+	return ixgbe_call_func(hw, hw->mac.ops.clear_rar, (hw, index),
+			       IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
+ *  ixgbe_set_vmdq - Associate a VMDq index with a receive address
+ *  @hw: pointer to hardware structure
+ *  @rar: receive address register index to associate with VMDq index
+ *  @vmdq: VMDq set or pool index
+ **/
+s32 ixgbe_set_vmdq(struct ixgbe_hw *hw, u32 rar, u32 vmdq)
+{
+	return ixgbe_call_func(hw, hw->mac.ops.set_vmdq, (hw, rar, vmdq),
+			       IXGBE_NOT_IMPLEMENTED);
+
+}
+
+/**
+ *  ixgbe_set_vmdq_san_mac - Associate VMDq index 127 with a receive address
+ *  @hw: pointer to hardware structure
+ *  @vmdq: VMDq default pool index
+ **/
+s32 ixgbe_set_vmdq_san_mac(struct ixgbe_hw *hw, u32 vmdq)
+{
+	return ixgbe_call_func(hw, hw->mac.ops.set_vmdq_san_mac,
+			       (hw, vmdq), IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
+ *  ixgbe_clear_vmdq - Disassociate a VMDq index from a receive address
+ *  @hw: pointer to hardware structure
+ *  @rar: receive address register index to disassociate with VMDq index
+ *  @vmdq: VMDq set or pool index
+ **/
+s32 ixgbe_clear_vmdq(struct ixgbe_hw *hw, u32 rar, u32 vmdq)
+{
+	return ixgbe_call_func(hw, hw->mac.ops.clear_vmdq, (hw, rar, vmdq),
+			       IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
+ *  ixgbe_init_rx_addrs - Initializes receive address filters.
+ *  @hw: pointer to hardware structure
+ *
+ *  Places the MAC address in receive address register 0 and clears the rest
+ *  of the receive address registers. Clears the multicast table. Assumes
+ *  the receiver is in reset when the routine is called.
+ **/
+s32 ixgbe_init_rx_addrs(struct ixgbe_hw *hw)
+{
+	return ixgbe_call_func(hw, hw->mac.ops.init_rx_addrs, (hw),
+			       IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
+ *  ixgbe_get_num_rx_addrs - Returns the number of RAR entries.
+ *  @hw: pointer to hardware structure
+ **/
+u32 ixgbe_get_num_rx_addrs(struct ixgbe_hw *hw)
+{
+	return hw->mac.num_rar_entries;
+}
+
+/**
+ *  ixgbe_update_uc_addr_list - Updates the MAC's list of secondary addresses
+ *  @hw: pointer to hardware structure
+ *  @addr_list: the list of new multicast addresses
+ *  @addr_count: number of addresses
+ *  @func: iterator function to walk the multicast address list
+ *
+ *  The given list replaces any existing list. Clears the secondary addrs from
+ *  receive address registers. Uses unused receive address registers for the
+ *  first secondary addresses, and falls back to promiscuous mode as needed.
+ **/
+s32 ixgbe_update_uc_addr_list(struct ixgbe_hw *hw, u8 *addr_list,
+			      u32 addr_count, ixgbe_mc_addr_itr func)
+{
+	return ixgbe_call_func(hw, hw->mac.ops.update_uc_addr_list, (hw,
+			       addr_list, addr_count, func),
+			       IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
+ *  ixgbe_update_mc_addr_list - Updates the MAC's list of multicast addresses
+ *  @hw: pointer to hardware structure
+ *  @mc_addr_list: the list of new multicast addresses
+ *  @mc_addr_count: number of addresses
+ *  @func: iterator function to walk the multicast address list
+ *
+ *  The given list replaces any existing list. Clears the MC addrs from receive
+ *  address registers and the multicast table. Uses unused receive address
+ *  registers for the first multicast addresses, and hashes the rest into the
+ *  multicast table.
+ **/
+s32 ixgbe_update_mc_addr_list(struct ixgbe_hw *hw, u8 *mc_addr_list,
+			      u32 mc_addr_count, ixgbe_mc_addr_itr func,
+			      bool clear)
+{
+	return ixgbe_call_func(hw, hw->mac.ops.update_mc_addr_list, (hw,
+			       mc_addr_list, mc_addr_count, func, clear),
+			       IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
+ *  ixgbe_enable_mc - Enable multicast address in RAR
+ *  @hw: pointer to hardware structure
+ *
+ *  Enables multicast address in RAR and the use of the multicast hash table.
+ **/
+s32 ixgbe_enable_mc(struct ixgbe_hw *hw)
+{
+	return ixgbe_call_func(hw, hw->mac.ops.enable_mc, (hw),
+			       IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
+ *  ixgbe_disable_mc - Disable multicast address in RAR
+ *  @hw: pointer to hardware structure
+ *
+ *  Disables multicast address in RAR and the use of the multicast hash table.
+ **/
+s32 ixgbe_disable_mc(struct ixgbe_hw *hw)
+{
+	return ixgbe_call_func(hw, hw->mac.ops.disable_mc, (hw),
+			       IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
+ *  ixgbe_clear_vfta - Clear VLAN filter table
+ *  @hw: pointer to hardware structure
+ *
+ *  Clears the VLAN filer table, and the VMDq index associated with the filter
+ **/
+s32 ixgbe_clear_vfta(struct ixgbe_hw *hw)
+{
+	return ixgbe_call_func(hw, hw->mac.ops.clear_vfta, (hw),
+			       IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
+ *  ixgbe_set_vfta - Set VLAN filter table
+ *  @hw: pointer to hardware structure
+ *  @vlan: VLAN id to write to VLAN filter
+ *  @vind: VMDq output index that maps queue to VLAN id in VFTA
+ *  @vlan_on: boolean flag to turn on/off VLAN in VFTA
+ *
+ *  Turn on/off specified VLAN in the VLAN filter table.
+ **/
+s32 ixgbe_set_vfta(struct ixgbe_hw *hw, u32 vlan, u32 vind, bool vlan_on)
+{
+	return ixgbe_call_func(hw, hw->mac.ops.set_vfta, (hw, vlan, vind,
+			       vlan_on), IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
+ *  ixgbe_set_vlvf - Set VLAN Pool Filter
+ *  @hw: pointer to hardware structure
+ *  @vlan: VLAN id to write to VLAN filter
+ *  @vind: VMDq output index that maps queue to VLAN id in VFVFB
+ *  @vlan_on: boolean flag to turn on/off VLAN in VFVF
+ *  @vfta_changed: pointer to boolean flag which indicates whether VFTA
+ *                 should be changed
+ *
+ *  Turn on/off specified bit in VLVF table.
+ **/
+s32 ixgbe_set_vlvf(struct ixgbe_hw *hw, u32 vlan, u32 vind, bool vlan_on,
+		    bool *vfta_changed)
+{
+	return ixgbe_call_func(hw, hw->mac.ops.set_vlvf, (hw, vlan, vind,
+			       vlan_on, vfta_changed), IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
+ *  ixgbe_fc_enable - Enable flow control
+ *  @hw: pointer to hardware structure
+ *
+ *  Configures the flow control settings based on SW configuration.
+ **/
+s32 ixgbe_fc_enable(struct ixgbe_hw *hw)
+{
+	return ixgbe_call_func(hw, hw->mac.ops.fc_enable, (hw),
+			       IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
+ * ixgbe_set_fw_drv_ver - Try to send the driver version number FW
+ * @hw: pointer to hardware structure
+ * @maj: driver major number to be sent to firmware
+ * @min: driver minor number to be sent to firmware
+ * @build: driver build number to be sent to firmware
+ * @ver: driver version number to be sent to firmware
+ **/
+s32 ixgbe_set_fw_drv_ver(struct ixgbe_hw *hw, u8 maj, u8 min, u8 build,
+			 u8 ver)
+{
+	return ixgbe_call_func(hw, hw->mac.ops.set_fw_drv_ver, (hw, maj, min,
+			       build, ver), IXGBE_NOT_IMPLEMENTED);
+}
+
+
+/**
+ *  ixgbe_get_thermal_sensor_data - Gathers thermal sensor data
+ *  @hw: pointer to hardware structure
+ *
+ *  Updates the temperatures in mac.thermal_sensor_data
+ **/
+s32 ixgbe_get_thermal_sensor_data(struct ixgbe_hw *hw)
+{
+	return ixgbe_call_func(hw, hw->mac.ops.get_thermal_sensor_data, (hw),
+				IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
+ *  ixgbe_init_thermal_sensor_thresh - Inits thermal sensor thresholds
+ *  @hw: pointer to hardware structure
+ *
+ *  Inits the thermal sensor thresholds according to the NVM map
+ **/
+s32 ixgbe_init_thermal_sensor_thresh(struct ixgbe_hw *hw)
+{
+	return ixgbe_call_func(hw, hw->mac.ops.init_thermal_sensor_thresh, (hw),
+				IXGBE_NOT_IMPLEMENTED);
+}
+/**
+ *  ixgbe_read_analog_reg8 - Reads 8 bit analog register
+ *  @hw: pointer to hardware structure
+ *  @reg: analog register to read
+ *  @val: read value
+ *
+ *  Performs write operation to analog register specified.
+ **/
+s32 ixgbe_read_analog_reg8(struct ixgbe_hw *hw, u32 reg, u8 *val)
+{
+	return ixgbe_call_func(hw, hw->mac.ops.read_analog_reg8, (hw, reg,
+			       val), IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
+ *  ixgbe_write_analog_reg8 - Writes 8 bit analog register
+ *  @hw: pointer to hardware structure
+ *  @reg: analog register to write
+ *  @val: value to write
+ *
+ *  Performs write operation to Atlas analog register specified.
+ **/
+s32 ixgbe_write_analog_reg8(struct ixgbe_hw *hw, u32 reg, u8 val)
+{
+	return ixgbe_call_func(hw, hw->mac.ops.write_analog_reg8, (hw, reg,
+			       val), IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
+ *  ixgbe_init_uta_tables - Initializes Unicast Table Arrays.
+ *  @hw: pointer to hardware structure
+ *
+ *  Initializes the Unicast Table Arrays to zero on device load.  This
+ *  is part of the Rx init addr execution path.
+ **/
+s32 ixgbe_init_uta_tables(struct ixgbe_hw *hw)
+{
+	return ixgbe_call_func(hw, hw->mac.ops.init_uta_tables, (hw),
+			       IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
+ *  ixgbe_read_i2c_byte - Reads 8 bit word over I2C at specified device address
+ *  @hw: pointer to hardware structure
+ *  @byte_offset: byte offset to read
+ *  @data: value read
+ *
+ *  Performs byte read operation to SFP module's EEPROM over I2C interface.
+ **/
+s32 ixgbe_read_i2c_byte(struct ixgbe_hw *hw, u8 byte_offset, u8 dev_addr,
+			u8 *data)
+{
+	return ixgbe_call_func(hw, hw->phy.ops.read_i2c_byte, (hw, byte_offset,
+			       dev_addr, data), IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
+ *  ixgbe_write_i2c_byte - Writes 8 bit word over I2C
+ *  @hw: pointer to hardware structure
+ *  @byte_offset: byte offset to write
+ *  @data: value to write
+ *
+ *  Performs byte write operation to SFP module's EEPROM over I2C interface
+ *  at a specified device address.
+ **/
+s32 ixgbe_write_i2c_byte(struct ixgbe_hw *hw, u8 byte_offset, u8 dev_addr,
+			 u8 data)
+{
+	return ixgbe_call_func(hw, hw->phy.ops.write_i2c_byte, (hw, byte_offset,
+			       dev_addr, data), IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
+ *  ixgbe_write_i2c_eeprom - Writes 8 bit EEPROM word over I2C interface
+ *  @hw: pointer to hardware structure
+ *  @byte_offset: EEPROM byte offset to write
+ *  @eeprom_data: value to write
+ *
+ *  Performs byte write operation to SFP module's EEPROM over I2C interface.
+ **/
+s32 ixgbe_write_i2c_eeprom(struct ixgbe_hw *hw,
+			   u8 byte_offset, u8 eeprom_data)
+{
+	return ixgbe_call_func(hw, hw->phy.ops.write_i2c_eeprom,
+			       (hw, byte_offset, eeprom_data),
+			       IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
+ *  ixgbe_read_i2c_eeprom - Reads 8 bit EEPROM word over I2C interface
+ *  @hw: pointer to hardware structure
+ *  @byte_offset: EEPROM byte offset to read
+ *  @eeprom_data: value read
+ *
+ *  Performs byte read operation to SFP module's EEPROM over I2C interface.
+ **/
+s32 ixgbe_read_i2c_eeprom(struct ixgbe_hw *hw, u8 byte_offset, u8 *eeprom_data)
+{
+	return ixgbe_call_func(hw, hw->phy.ops.read_i2c_eeprom,
+			      (hw, byte_offset, eeprom_data),
+			      IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
+ *  ixgbe_get_supported_physical_layer - Returns physical layer type
+ *  @hw: pointer to hardware structure
+ *
+ *  Determines physical layer capabilities of the current configuration.
+ **/
+u32 ixgbe_get_supported_physical_layer(struct ixgbe_hw *hw)
+{
+	return ixgbe_call_func(hw, hw->mac.ops.get_supported_physical_layer,
+			       (hw), IXGBE_PHYSICAL_LAYER_UNKNOWN);
+}
+
+/**
+ *  ixgbe_enable_rx_dma - Enables Rx DMA unit, dependent on device specifics
+ *  @hw: pointer to hardware structure
+ *  @regval: bitfield to write to the Rx DMA register
+ *
+ *  Enables the Rx DMA unit of the device.
+ **/
+s32 ixgbe_enable_rx_dma(struct ixgbe_hw *hw, u32 regval)
+{
+	return ixgbe_call_func(hw, hw->mac.ops.enable_rx_dma,
+			       (hw, regval), IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
+ *  ixgbe_disable_sec_rx_path - Stops the receive data path
+ *  @hw: pointer to hardware structure
+ *
+ *  Stops the receive data path.
+ **/
+s32 ixgbe_disable_sec_rx_path(struct ixgbe_hw *hw)
+{
+	return ixgbe_call_func(hw, hw->mac.ops.disable_sec_rx_path,
+				(hw), IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
+ *  ixgbe_enable_sec_rx_path - Enables the receive data path
+ *  @hw: pointer to hardware structure
+ *
+ *  Enables the receive data path.
+ **/
+s32 ixgbe_enable_sec_rx_path(struct ixgbe_hw *hw)
+{
+	return ixgbe_call_func(hw, hw->mac.ops.enable_sec_rx_path,
+				(hw), IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
+ *  ixgbe_acquire_swfw_semaphore - Acquire SWFW semaphore
+ *  @hw: pointer to hardware structure
+ *  @mask: Mask to specify which semaphore to acquire
+ *
+ *  Acquires the SWFW semaphore through SW_FW_SYNC register for the specified
+ *  function (CSR, PHY0, PHY1, EEPROM, Flash)
+ **/
+s32 ixgbe_acquire_swfw_semaphore(struct ixgbe_hw *hw, u16 mask)
+{
+	return ixgbe_call_func(hw, hw->mac.ops.acquire_swfw_sync,
+			       (hw, mask), IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
+ *  ixgbe_release_swfw_semaphore - Release SWFW semaphore
+ *  @hw: pointer to hardware structure
+ *  @mask: Mask to specify which semaphore to release
+ *
+ *  Releases the SWFW semaphore through SW_FW_SYNC register for the specified
+ *  function (CSR, PHY0, PHY1, EEPROM, Flash)
+ **/
+void ixgbe_release_swfw_semaphore(struct ixgbe_hw *hw, u16 mask)
+{
+	if (hw->mac.ops.release_swfw_sync)
+		hw->mac.ops.release_swfw_sync(hw, mask);
+}
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_api.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_api.h
new file mode 100644
index 00000000..a6ab30d2
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_api.h
@@ -0,0 +1,168 @@
+/*******************************************************************************
+
+  Intel 10 Gigabit PCI Express Linux driver
+  Copyright(c) 1999 - 2012 Intel Corporation.
+
+  This program is free software; you can redistribute it and/or modify it
+  under the terms and conditions of the GNU General Public License,
+  version 2, as published by the Free Software Foundation.
+
+  This program is distributed in the hope it will be useful, but WITHOUT
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+  more details.
+
+  You should have received a copy of the GNU General Public License along with
+  this program; if not, write to the Free Software Foundation, Inc.,
+  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+  The full GNU General Public License is included in this distribution in
+  the file called "COPYING".
+
+  Contact Information:
+  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+#ifndef _IXGBE_API_H_
+#define _IXGBE_API_H_
+
+#include "ixgbe_type.h"
+
+s32 ixgbe_init_shared_code(struct ixgbe_hw *hw);
+
+extern s32 ixgbe_init_ops_82598(struct ixgbe_hw *hw);
+extern s32 ixgbe_init_ops_82599(struct ixgbe_hw *hw);
+extern s32 ixgbe_init_ops_X540(struct ixgbe_hw *hw);
+
+s32 ixgbe_set_mac_type(struct ixgbe_hw *hw);
+s32 ixgbe_init_hw(struct ixgbe_hw *hw);
+s32 ixgbe_reset_hw(struct ixgbe_hw *hw);
+s32 ixgbe_start_hw(struct ixgbe_hw *hw);
+s32 ixgbe_clear_hw_cntrs(struct ixgbe_hw *hw);
+enum ixgbe_media_type ixgbe_get_media_type(struct ixgbe_hw *hw);
+s32 ixgbe_get_mac_addr(struct ixgbe_hw *hw, u8 *mac_addr);
+s32 ixgbe_get_bus_info(struct ixgbe_hw *hw);
+u32 ixgbe_get_num_of_tx_queues(struct ixgbe_hw *hw);
+u32 ixgbe_get_num_of_rx_queues(struct ixgbe_hw *hw);
+s32 ixgbe_stop_adapter(struct ixgbe_hw *hw);
+s32 ixgbe_read_pba_string(struct ixgbe_hw *hw, u8 *pba_num, u32 pba_num_size);
+
+s32 ixgbe_identify_phy(struct ixgbe_hw *hw);
+s32 ixgbe_reset_phy(struct ixgbe_hw *hw);
+s32 ixgbe_read_phy_reg(struct ixgbe_hw *hw, u32 reg_addr, u32 device_type,
+		       u16 *phy_data);
+s32 ixgbe_write_phy_reg(struct ixgbe_hw *hw, u32 reg_addr, u32 device_type,
+			u16 phy_data);
+
+s32 ixgbe_setup_phy_link(struct ixgbe_hw *hw);
+s32 ixgbe_check_phy_link(struct ixgbe_hw *hw,
+			 ixgbe_link_speed *speed,
+			 bool *link_up);
+s32 ixgbe_setup_phy_link_speed(struct ixgbe_hw *hw,
+			       ixgbe_link_speed speed,
+			       bool autoneg,
+			       bool autoneg_wait_to_complete);
+void ixgbe_disable_tx_laser(struct ixgbe_hw *hw);
+void ixgbe_enable_tx_laser(struct ixgbe_hw *hw);
+void ixgbe_flap_tx_laser(struct ixgbe_hw *hw);
+s32 ixgbe_setup_link(struct ixgbe_hw *hw, ixgbe_link_speed speed,
+		     bool autoneg, bool autoneg_wait_to_complete);
+s32 ixgbe_check_link(struct ixgbe_hw *hw, ixgbe_link_speed *speed,
+		     bool *link_up, bool link_up_wait_to_complete);
+s32 ixgbe_get_link_capabilities(struct ixgbe_hw *hw, ixgbe_link_speed *speed,
+				bool *autoneg);
+s32 ixgbe_led_on(struct ixgbe_hw *hw, u32 index);
+s32 ixgbe_led_off(struct ixgbe_hw *hw, u32 index);
+s32 ixgbe_blink_led_start(struct ixgbe_hw *hw, u32 index);
+s32 ixgbe_blink_led_stop(struct ixgbe_hw *hw, u32 index);
+
+s32 ixgbe_init_eeprom_params(struct ixgbe_hw *hw);
+s32 ixgbe_write_eeprom(struct ixgbe_hw *hw, u16 offset, u16 data);
+s32 ixgbe_write_eeprom_buffer(struct ixgbe_hw *hw, u16 offset,
+			      u16 words, u16 *data);
+s32 ixgbe_read_eeprom(struct ixgbe_hw *hw, u16 offset, u16 *data);
+s32 ixgbe_read_eeprom_buffer(struct ixgbe_hw *hw, u16 offset,
+			     u16 words, u16 *data);
+
+s32 ixgbe_validate_eeprom_checksum(struct ixgbe_hw *hw, u16 *checksum_val);
+s32 ixgbe_update_eeprom_checksum(struct ixgbe_hw *hw);
+
+s32 ixgbe_insert_mac_addr(struct ixgbe_hw *hw, u8 *addr, u32 vmdq);
+s32 ixgbe_set_rar(struct ixgbe_hw *hw, u32 index, u8 *addr, u32 vmdq,
+		  u32 enable_addr);
+s32 ixgbe_clear_rar(struct ixgbe_hw *hw, u32 index);
+s32 ixgbe_set_vmdq(struct ixgbe_hw *hw, u32 rar, u32 vmdq);
+s32 ixgbe_set_vmdq_san_mac(struct ixgbe_hw *hw, u32 vmdq);
+s32 ixgbe_clear_vmdq(struct ixgbe_hw *hw, u32 rar, u32 vmdq);
+s32 ixgbe_init_rx_addrs(struct ixgbe_hw *hw);
+u32 ixgbe_get_num_rx_addrs(struct ixgbe_hw *hw);
+s32 ixgbe_update_uc_addr_list(struct ixgbe_hw *hw, u8 *addr_list,
+			      u32 addr_count, ixgbe_mc_addr_itr func);
+s32 ixgbe_update_mc_addr_list(struct ixgbe_hw *hw, u8 *mc_addr_list,
+			      u32 mc_addr_count, ixgbe_mc_addr_itr func,
+			      bool clear);
+void ixgbe_add_uc_addr(struct ixgbe_hw *hw, u8 *addr_list, u32 vmdq);
+s32 ixgbe_enable_mc(struct ixgbe_hw *hw);
+s32 ixgbe_disable_mc(struct ixgbe_hw *hw);
+s32 ixgbe_clear_vfta(struct ixgbe_hw *hw);
+s32 ixgbe_set_vfta(struct ixgbe_hw *hw, u32 vlan,
+		   u32 vind, bool vlan_on);
+s32 ixgbe_set_vlvf(struct ixgbe_hw *hw, u32 vlan, u32 vind,
+		   bool vlan_on, bool *vfta_changed);
+s32 ixgbe_fc_enable(struct ixgbe_hw *hw);
+s32 ixgbe_set_fw_drv_ver(struct ixgbe_hw *hw, u8 maj, u8 min, u8 build,
+			 u8 ver);
+s32 ixgbe_get_thermal_sensor_data(struct ixgbe_hw *hw);
+s32 ixgbe_init_thermal_sensor_thresh(struct ixgbe_hw *hw);
+void ixgbe_set_mta(struct ixgbe_hw *hw, u8 *mc_addr);
+s32 ixgbe_get_phy_firmware_version(struct ixgbe_hw *hw,
+				   u16 *firmware_version);
+s32 ixgbe_read_analog_reg8(struct ixgbe_hw *hw, u32 reg, u8 *val);
+s32 ixgbe_write_analog_reg8(struct ixgbe_hw *hw, u32 reg, u8 val);
+s32 ixgbe_init_uta_tables(struct ixgbe_hw *hw);
+s32 ixgbe_read_i2c_eeprom(struct ixgbe_hw *hw, u8 byte_offset, u8 *eeprom_data);
+u32 ixgbe_get_supported_physical_layer(struct ixgbe_hw *hw);
+s32 ixgbe_enable_rx_dma(struct ixgbe_hw *hw, u32 regval);
+s32 ixgbe_disable_sec_rx_path(struct ixgbe_hw *hw);
+s32 ixgbe_enable_sec_rx_path(struct ixgbe_hw *hw);
+s32 ixgbe_reinit_fdir_tables_82599(struct ixgbe_hw *hw);
+s32 ixgbe_init_fdir_signature_82599(struct ixgbe_hw *hw, u32 fdirctrl);
+s32 ixgbe_init_fdir_perfect_82599(struct ixgbe_hw *hw, u32 fdirctrl);
+s32 ixgbe_fdir_add_signature_filter_82599(struct ixgbe_hw *hw,
+					  union ixgbe_atr_hash_dword input,
+					  union ixgbe_atr_hash_dword common,
+					  u8 queue);
+s32 ixgbe_fdir_set_input_mask_82599(struct ixgbe_hw *hw,
+				    union ixgbe_atr_input *input_mask);
+s32 ixgbe_fdir_write_perfect_filter_82599(struct ixgbe_hw *hw,
+					  union ixgbe_atr_input *input,
+					  u16 soft_id, u8 queue);
+s32 ixgbe_fdir_erase_perfect_filter_82599(struct ixgbe_hw *hw,
+					  union ixgbe_atr_input *input,
+					  u16 soft_id);
+s32 ixgbe_fdir_add_perfect_filter_82599(struct ixgbe_hw *hw,
+					union ixgbe_atr_input *input,
+					union ixgbe_atr_input *mask,
+					u16 soft_id,
+					u8 queue);
+void ixgbe_atr_compute_perfect_hash_82599(union ixgbe_atr_input *input,
+					  union ixgbe_atr_input *mask);
+u32 ixgbe_atr_compute_sig_hash_82599(union ixgbe_atr_hash_dword input,
+				     union ixgbe_atr_hash_dword common);
+s32 ixgbe_read_i2c_byte(struct ixgbe_hw *hw, u8 byte_offset, u8 dev_addr,
+			u8 *data);
+s32 ixgbe_write_i2c_byte(struct ixgbe_hw *hw, u8 byte_offset, u8 dev_addr,
+			 u8 data);
+s32 ixgbe_write_i2c_eeprom(struct ixgbe_hw *hw, u8 byte_offset, u8 eeprom_data);
+s32 ixgbe_get_san_mac_addr(struct ixgbe_hw *hw, u8 *san_mac_addr);
+s32 ixgbe_set_san_mac_addr(struct ixgbe_hw *hw, u8 *san_mac_addr);
+s32 ixgbe_get_device_caps(struct ixgbe_hw *hw, u16 *device_caps);
+s32 ixgbe_acquire_swfw_semaphore(struct ixgbe_hw *hw, u16 mask);
+void ixgbe_release_swfw_semaphore(struct ixgbe_hw *hw, u16 mask);
+s32 ixgbe_get_wwn_prefix(struct ixgbe_hw *hw, u16 *wwnn_prefix,
+			 u16 *wwpn_prefix);
+s32 ixgbe_get_fcoe_boot_status(struct ixgbe_hw *hw, u16 *bs);
+
+#endif /* _IXGBE_API_H_ */
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_common.c b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_common.c
new file mode 100644
index 00000000..93659ca0
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_common.c
@@ -0,0 +1,4082 @@
+/*******************************************************************************
+
+  Intel 10 Gigabit PCI Express Linux driver
+  Copyright(c) 1999 - 2012 Intel Corporation.
+
+  This program is free software; you can redistribute it and/or modify it
+  under the terms and conditions of the GNU General Public License,
+  version 2, as published by the Free Software Foundation.
+
+  This program is distributed in the hope it will be useful, but WITHOUT
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+  more details.
+
+  You should have received a copy of the GNU General Public License along with
+  this program; if not, write to the Free Software Foundation, Inc.,
+  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+  The full GNU General Public License is included in this distribution in
+  the file called "COPYING".
+
+  Contact Information:
+  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+#include "ixgbe_common.h"
+#include "ixgbe_phy.h"
+#include "ixgbe_api.h"
+
+static s32 ixgbe_acquire_eeprom(struct ixgbe_hw *hw);
+static s32 ixgbe_get_eeprom_semaphore(struct ixgbe_hw *hw);
+static void ixgbe_release_eeprom_semaphore(struct ixgbe_hw *hw);
+static s32 ixgbe_ready_eeprom(struct ixgbe_hw *hw);
+static void ixgbe_standby_eeprom(struct ixgbe_hw *hw);
+static void ixgbe_shift_out_eeprom_bits(struct ixgbe_hw *hw, u16 data,
+					u16 count);
+static u16 ixgbe_shift_in_eeprom_bits(struct ixgbe_hw *hw, u16 count);
+static void ixgbe_raise_eeprom_clk(struct ixgbe_hw *hw, u32 *eec);
+static void ixgbe_lower_eeprom_clk(struct ixgbe_hw *hw, u32 *eec);
+static void ixgbe_release_eeprom(struct ixgbe_hw *hw);
+
+static s32 ixgbe_mta_vector(struct ixgbe_hw *hw, u8 *mc_addr);
+static s32 ixgbe_get_san_mac_addr_offset(struct ixgbe_hw *hw,
+					 u16 *san_mac_offset);
+static s32 ixgbe_read_eeprom_buffer_bit_bang(struct ixgbe_hw *hw, u16 offset,
+					     u16 words, u16 *data);
+static s32 ixgbe_write_eeprom_buffer_bit_bang(struct ixgbe_hw *hw, u16 offset,
+					      u16 words, u16 *data);
+static s32 ixgbe_detect_eeprom_page_size_generic(struct ixgbe_hw *hw,
+						 u16 offset);
+
+/**
+ *  ixgbe_init_ops_generic - Inits function ptrs
+ *  @hw: pointer to the hardware structure
+ *
+ *  Initialize the function pointers.
+ **/
+s32 ixgbe_init_ops_generic(struct ixgbe_hw *hw)
+{
+	struct ixgbe_eeprom_info *eeprom = &hw->eeprom;
+	struct ixgbe_mac_info *mac = &hw->mac;
+	u32 eec = IXGBE_READ_REG(hw, IXGBE_EEC);
+
+	/* EEPROM */
+	eeprom->ops.init_params = &ixgbe_init_eeprom_params_generic;
+	/* If EEPROM is valid (bit 8 = 1), use EERD otherwise use bit bang */
+	if (eec & IXGBE_EEC_PRES) {
+		eeprom->ops.read = &ixgbe_read_eerd_generic;
+		eeprom->ops.read_buffer = &ixgbe_read_eerd_buffer_generic;
+	} else {
+		eeprom->ops.read = &ixgbe_read_eeprom_bit_bang_generic;
+		eeprom->ops.read_buffer =
+				 &ixgbe_read_eeprom_buffer_bit_bang_generic;
+	}
+	eeprom->ops.write = &ixgbe_write_eeprom_generic;
+	eeprom->ops.write_buffer = &ixgbe_write_eeprom_buffer_bit_bang_generic;
+	eeprom->ops.validate_checksum =
+				      &ixgbe_validate_eeprom_checksum_generic;
+	eeprom->ops.update_checksum = &ixgbe_update_eeprom_checksum_generic;
+	eeprom->ops.calc_checksum = &ixgbe_calc_eeprom_checksum_generic;
+
+	/* MAC */
+	mac->ops.init_hw = &ixgbe_init_hw_generic;
+	mac->ops.reset_hw = NULL;
+	mac->ops.start_hw = &ixgbe_start_hw_generic;
+	mac->ops.clear_hw_cntrs = &ixgbe_clear_hw_cntrs_generic;
+	mac->ops.get_media_type = NULL;
+	mac->ops.get_supported_physical_layer = NULL;
+	mac->ops.enable_rx_dma = &ixgbe_enable_rx_dma_generic;
+	mac->ops.get_mac_addr = &ixgbe_get_mac_addr_generic;
+	mac->ops.stop_adapter = &ixgbe_stop_adapter_generic;
+	mac->ops.get_bus_info = &ixgbe_get_bus_info_generic;
+	mac->ops.set_lan_id = &ixgbe_set_lan_id_multi_port_pcie;
+	mac->ops.acquire_swfw_sync = &ixgbe_acquire_swfw_sync;
+	mac->ops.release_swfw_sync = &ixgbe_release_swfw_sync;
+
+	/* LEDs */
+	mac->ops.led_on = &ixgbe_led_on_generic;
+	mac->ops.led_off = &ixgbe_led_off_generic;
+	mac->ops.blink_led_start = &ixgbe_blink_led_start_generic;
+	mac->ops.blink_led_stop = &ixgbe_blink_led_stop_generic;
+
+	/* RAR, Multicast, VLAN */
+	mac->ops.set_rar = &ixgbe_set_rar_generic;
+	mac->ops.clear_rar = &ixgbe_clear_rar_generic;
+	mac->ops.insert_mac_addr = NULL;
+	mac->ops.set_vmdq = NULL;
+	mac->ops.clear_vmdq = NULL;
+	mac->ops.init_rx_addrs = &ixgbe_init_rx_addrs_generic;
+	mac->ops.update_uc_addr_list = &ixgbe_update_uc_addr_list_generic;
+	mac->ops.update_mc_addr_list = &ixgbe_update_mc_addr_list_generic;
+	mac->ops.enable_mc = &ixgbe_enable_mc_generic;
+	mac->ops.disable_mc = &ixgbe_disable_mc_generic;
+	mac->ops.clear_vfta = NULL;
+	mac->ops.set_vfta = NULL;
+	mac->ops.set_vlvf = NULL;
+	mac->ops.init_uta_tables = NULL;
+
+	/* Flow Control */
+	mac->ops.fc_enable = &ixgbe_fc_enable_generic;
+
+	/* Link */
+	mac->ops.get_link_capabilities = NULL;
+	mac->ops.setup_link = NULL;
+	mac->ops.check_link = NULL;
+
+	return 0;
+}
+
+/**
+ *  ixgbe_device_supports_autoneg_fc - Check if phy supports autoneg flow
+ *  control
+ *  @hw: pointer to hardware structure
+ *
+ *  There are several phys that do not support autoneg flow control. This
+ *  function check the device id to see if the associated phy supports
+ *  autoneg flow control.
+ **/
+static s32 ixgbe_device_supports_autoneg_fc(struct ixgbe_hw *hw)
+{
+
+	switch (hw->device_id) {
+	case IXGBE_DEV_ID_X540T:
+		return 0;
+	case IXGBE_DEV_ID_82599_T3_LOM:
+		return 0;
+	default:
+		return IXGBE_ERR_FC_NOT_SUPPORTED;
+	}
+}
+
+/**
+ *  ixgbe_setup_fc - Set up flow control
+ *  @hw: pointer to hardware structure
+ *
+ *  Called at init time to set up flow control.
+ **/
+static s32 ixgbe_setup_fc(struct ixgbe_hw *hw)
+{
+	s32 ret_val = 0;
+	u32 reg = 0, reg_bp = 0;
+	u16 reg_cu = 0;
+
+	/*
+	 * Validate the requested mode.  Strict IEEE mode does not allow
+	 * ixgbe_fc_rx_pause because it will cause us to fail at UNH.
+	 */
+	if (hw->fc.strict_ieee && hw->fc.requested_mode == ixgbe_fc_rx_pause) {
+		hw_dbg(hw, "ixgbe_fc_rx_pause not valid in strict IEEE mode\n");
+		ret_val = IXGBE_ERR_INVALID_LINK_SETTINGS;
+		goto out;
+	}
+
+	/*
+	 * 10gig parts do not have a word in the EEPROM to determine the
+	 * default flow control setting, so we explicitly set it to full.
+	 */
+	if (hw->fc.requested_mode == ixgbe_fc_default)
+		hw->fc.requested_mode = ixgbe_fc_full;
+
+	/*
+	 * Set up the 1G and 10G flow control advertisement registers so the
+	 * HW will be able to do fc autoneg once the cable is plugged in.  If
+	 * we link at 10G, the 1G advertisement is harmless and vice versa.
+	 */
+	switch (hw->phy.media_type) {
+	case ixgbe_media_type_fiber:
+	case ixgbe_media_type_backplane:
+		reg = IXGBE_READ_REG(hw, IXGBE_PCS1GANA);
+		reg_bp = IXGBE_READ_REG(hw, IXGBE_AUTOC);
+		break;
+	case ixgbe_media_type_copper:
+		hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_ADVT,
+				     IXGBE_MDIO_AUTO_NEG_DEV_TYPE, &reg_cu);
+		break;
+	default:
+		break;
+	}
+
+	/*
+	 * The possible values of fc.requested_mode are:
+	 * 0: Flow control is completely disabled
+	 * 1: Rx flow control is enabled (we can receive pause frames,
+	 *    but not send pause frames).
+	 * 2: Tx flow control is enabled (we can send pause frames but
+	 *    we do not support receiving pause frames).
+	 * 3: Both Rx and Tx flow control (symmetric) are enabled.
+	 * other: Invalid.
+	 */
+	switch (hw->fc.requested_mode) {
+	case ixgbe_fc_none:
+		/* Flow control completely disabled by software override. */
+		reg &= ~(IXGBE_PCS1GANA_SYM_PAUSE | IXGBE_PCS1GANA_ASM_PAUSE);
+		if (hw->phy.media_type == ixgbe_media_type_backplane)
+			reg_bp &= ~(IXGBE_AUTOC_SYM_PAUSE |
+				    IXGBE_AUTOC_ASM_PAUSE);
+		else if (hw->phy.media_type == ixgbe_media_type_copper)
+			reg_cu &= ~(IXGBE_TAF_SYM_PAUSE | IXGBE_TAF_ASM_PAUSE);
+		break;
+	case ixgbe_fc_tx_pause:
+		/*
+		 * Tx Flow control is enabled, and Rx Flow control is
+		 * disabled by software override.
+		 */
+		reg |= IXGBE_PCS1GANA_ASM_PAUSE;
+		reg &= ~IXGBE_PCS1GANA_SYM_PAUSE;
+		if (hw->phy.media_type == ixgbe_media_type_backplane) {
+			reg_bp |= IXGBE_AUTOC_ASM_PAUSE;
+			reg_bp &= ~IXGBE_AUTOC_SYM_PAUSE;
+		} else if (hw->phy.media_type == ixgbe_media_type_copper) {
+			reg_cu |= IXGBE_TAF_ASM_PAUSE;
+			reg_cu &= ~IXGBE_TAF_SYM_PAUSE;
+		}
+		break;
+	case ixgbe_fc_rx_pause:
+		/*
+		 * Rx Flow control is enabled and Tx Flow control is
+		 * disabled by software override. Since there really
+		 * isn't a way to advertise that we are capable of RX
+		 * Pause ONLY, we will advertise that we support both
+		 * symmetric and asymmetric Rx PAUSE, as such we fall
+		 * through to the fc_full statement.  Later, we will
+		 * disable the adapter's ability to send PAUSE frames.
+		 */
+	case ixgbe_fc_full:
+		/* Flow control (both Rx and Tx) is enabled by SW override. */
+		reg |= IXGBE_PCS1GANA_SYM_PAUSE | IXGBE_PCS1GANA_ASM_PAUSE;
+		if (hw->phy.media_type == ixgbe_media_type_backplane)
+			reg_bp |= IXGBE_AUTOC_SYM_PAUSE |
+				  IXGBE_AUTOC_ASM_PAUSE;
+		else if (hw->phy.media_type == ixgbe_media_type_copper)
+			reg_cu |= IXGBE_TAF_SYM_PAUSE | IXGBE_TAF_ASM_PAUSE;
+		break;
+	default:
+		hw_dbg(hw, "Flow control param set incorrectly\n");
+		ret_val = IXGBE_ERR_CONFIG;
+		goto out;
+		break;
+	}
+
+	if (hw->mac.type != ixgbe_mac_X540) {
+		/*
+		 * Enable auto-negotiation between the MAC & PHY;
+		 * the MAC will advertise clause 37 flow control.
+		 */
+		IXGBE_WRITE_REG(hw, IXGBE_PCS1GANA, reg);
+		reg = IXGBE_READ_REG(hw, IXGBE_PCS1GLCTL);
+
+		/* Disable AN timeout */
+		if (hw->fc.strict_ieee)
+			reg &= ~IXGBE_PCS1GLCTL_AN_1G_TIMEOUT_EN;
+
+		IXGBE_WRITE_REG(hw, IXGBE_PCS1GLCTL, reg);
+		hw_dbg(hw, "Set up FC; PCS1GLCTL = 0x%08X\n", reg);
+	}
+
+	/*
+	 * AUTOC restart handles negotiation of 1G and 10G on backplane
+	 * and copper. There is no need to set the PCS1GCTL register.
+	 *
+	 */
+	if (hw->phy.media_type == ixgbe_media_type_backplane) {
+		reg_bp |= IXGBE_AUTOC_AN_RESTART;
+		IXGBE_WRITE_REG(hw, IXGBE_AUTOC, reg_bp);
+	} else if ((hw->phy.media_type == ixgbe_media_type_copper) &&
+		    (ixgbe_device_supports_autoneg_fc(hw) == 0)) {
+		hw->phy.ops.write_reg(hw, IXGBE_MDIO_AUTO_NEG_ADVT,
+				      IXGBE_MDIO_AUTO_NEG_DEV_TYPE, reg_cu);
+	}
+
+	hw_dbg(hw, "Set up FC; IXGBE_AUTOC = 0x%08X\n", reg);
+out:
+	return ret_val;
+}
+
+/**
+ *  ixgbe_start_hw_generic - Prepare hardware for Tx/Rx
+ *  @hw: pointer to hardware structure
+ *
+ *  Starts the hardware by filling the bus info structure and media type, clears
+ *  all on chip counters, initializes receive address registers, multicast
+ *  table, VLAN filter table, calls routine to set up link and flow control
+ *  settings, and leaves transmit and receive units disabled and uninitialized
+ **/
+s32 ixgbe_start_hw_generic(struct ixgbe_hw *hw)
+{
+	s32 ret_val;
+	u32 ctrl_ext;
+
+	/* Set the media type */
+	hw->phy.media_type = hw->mac.ops.get_media_type(hw);
+
+	/* PHY ops initialization must be done in reset_hw() */
+
+	/* Clear the VLAN filter table */
+	hw->mac.ops.clear_vfta(hw);
+
+	/* Clear statistics registers */
+	hw->mac.ops.clear_hw_cntrs(hw);
+
+	/* Set No Snoop Disable */
+	ctrl_ext = IXGBE_READ_REG(hw, IXGBE_CTRL_EXT);
+	ctrl_ext |= IXGBE_CTRL_EXT_NS_DIS;
+	IXGBE_WRITE_REG(hw, IXGBE_CTRL_EXT, ctrl_ext);
+	IXGBE_WRITE_FLUSH(hw);
+
+	/* Setup flow control */
+	ret_val = ixgbe_setup_fc(hw);
+	if (ret_val != 0)
+		goto out;
+
+	/* Clear adapter stopped flag */
+	hw->adapter_stopped = false;
+
+out:
+	return ret_val;
+}
+
+/**
+ *  ixgbe_start_hw_gen2 - Init sequence for common device family
+ *  @hw: pointer to hw structure
+ *
+ * Performs the init sequence common to the second generation
+ * of 10 GbE devices.
+ * Devices in the second generation:
+ *     82599
+ *     X540
+ **/
+s32 ixgbe_start_hw_gen2(struct ixgbe_hw *hw)
+{
+	u32 i;
+	u32 regval;
+
+	/* Clear the rate limiters */
+	for (i = 0; i < hw->mac.max_tx_queues; i++) {
+		IXGBE_WRITE_REG(hw, IXGBE_RTTDQSEL, i);
+		IXGBE_WRITE_REG(hw, IXGBE_RTTBCNRC, 0);
+	}
+	IXGBE_WRITE_FLUSH(hw);
+
+	/* Disable relaxed ordering */
+	for (i = 0; i < hw->mac.max_tx_queues; i++) {
+		regval = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL_82599(i));
+		regval &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
+		IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(i), regval);
+	}
+
+	for (i = 0; i < hw->mac.max_rx_queues; i++) {
+		regval = IXGBE_READ_REG(hw, IXGBE_DCA_RXCTRL(i));
+		regval &= ~(IXGBE_DCA_RXCTRL_DATA_WRO_EN |
+			    IXGBE_DCA_RXCTRL_HEAD_WRO_EN);
+		IXGBE_WRITE_REG(hw, IXGBE_DCA_RXCTRL(i), regval);
+	}
+
+	return 0;
+}
+
+/**
+ *  ixgbe_init_hw_generic - Generic hardware initialization
+ *  @hw: pointer to hardware structure
+ *
+ *  Initialize the hardware by resetting the hardware, filling the bus info
+ *  structure and media type, clears all on chip counters, initializes receive
+ *  address registers, multicast table, VLAN filter table, calls routine to set
+ *  up link and flow control settings, and leaves transmit and receive units
+ *  disabled and uninitialized
+ **/
+s32 ixgbe_init_hw_generic(struct ixgbe_hw *hw)
+{
+	s32 status;
+
+	/* Reset the hardware */
+	status = hw->mac.ops.reset_hw(hw);
+
+	if (status == 0) {
+		/* Start the HW */
+		status = hw->mac.ops.start_hw(hw);
+	}
+
+	return status;
+}
+
+/**
+ *  ixgbe_clear_hw_cntrs_generic - Generic clear hardware counters
+ *  @hw: pointer to hardware structure
+ *
+ *  Clears all hardware statistics counters by reading them from the hardware
+ *  Statistics counters are clear on read.
+ **/
+s32 ixgbe_clear_hw_cntrs_generic(struct ixgbe_hw *hw)
+{
+	u16 i = 0;
+
+	IXGBE_READ_REG(hw, IXGBE_CRCERRS);
+	IXGBE_READ_REG(hw, IXGBE_ILLERRC);
+	IXGBE_READ_REG(hw, IXGBE_ERRBC);
+	IXGBE_READ_REG(hw, IXGBE_MSPDC);
+	for (i = 0; i < 8; i++)
+		IXGBE_READ_REG(hw, IXGBE_MPC(i));
+
+	IXGBE_READ_REG(hw, IXGBE_MLFC);
+	IXGBE_READ_REG(hw, IXGBE_MRFC);
+	IXGBE_READ_REG(hw, IXGBE_RLEC);
+	IXGBE_READ_REG(hw, IXGBE_LXONTXC);
+	IXGBE_READ_REG(hw, IXGBE_LXOFFTXC);
+	if (hw->mac.type >= ixgbe_mac_82599EB) {
+		IXGBE_READ_REG(hw, IXGBE_LXONRXCNT);
+		IXGBE_READ_REG(hw, IXGBE_LXOFFRXCNT);
+	} else {
+		IXGBE_READ_REG(hw, IXGBE_LXONRXC);
+		IXGBE_READ_REG(hw, IXGBE_LXOFFRXC);
+	}
+
+	for (i = 0; i < 8; i++) {
+		IXGBE_READ_REG(hw, IXGBE_PXONTXC(i));
+		IXGBE_READ_REG(hw, IXGBE_PXOFFTXC(i));
+		if (hw->mac.type >= ixgbe_mac_82599EB) {
+			IXGBE_READ_REG(hw, IXGBE_PXONRXCNT(i));
+			IXGBE_READ_REG(hw, IXGBE_PXOFFRXCNT(i));
+		} else {
+			IXGBE_READ_REG(hw, IXGBE_PXONRXC(i));
+			IXGBE_READ_REG(hw, IXGBE_PXOFFRXC(i));
+		}
+	}
+	if (hw->mac.type >= ixgbe_mac_82599EB)
+		for (i = 0; i < 8; i++)
+			IXGBE_READ_REG(hw, IXGBE_PXON2OFFCNT(i));
+	IXGBE_READ_REG(hw, IXGBE_PRC64);
+	IXGBE_READ_REG(hw, IXGBE_PRC127);
+	IXGBE_READ_REG(hw, IXGBE_PRC255);
+	IXGBE_READ_REG(hw, IXGBE_PRC511);
+	IXGBE_READ_REG(hw, IXGBE_PRC1023);
+	IXGBE_READ_REG(hw, IXGBE_PRC1522);
+	IXGBE_READ_REG(hw, IXGBE_GPRC);
+	IXGBE_READ_REG(hw, IXGBE_BPRC);
+	IXGBE_READ_REG(hw, IXGBE_MPRC);
+	IXGBE_READ_REG(hw, IXGBE_GPTC);
+	IXGBE_READ_REG(hw, IXGBE_GORCL);
+	IXGBE_READ_REG(hw, IXGBE_GORCH);
+	IXGBE_READ_REG(hw, IXGBE_GOTCL);
+	IXGBE_READ_REG(hw, IXGBE_GOTCH);
+	if (hw->mac.type == ixgbe_mac_82598EB)
+		for (i = 0; i < 8; i++)
+			IXGBE_READ_REG(hw, IXGBE_RNBC(i));
+	IXGBE_READ_REG(hw, IXGBE_RUC);
+	IXGBE_READ_REG(hw, IXGBE_RFC);
+	IXGBE_READ_REG(hw, IXGBE_ROC);
+	IXGBE_READ_REG(hw, IXGBE_RJC);
+	IXGBE_READ_REG(hw, IXGBE_MNGPRC);
+	IXGBE_READ_REG(hw, IXGBE_MNGPDC);
+	IXGBE_READ_REG(hw, IXGBE_MNGPTC);
+	IXGBE_READ_REG(hw, IXGBE_TORL);
+	IXGBE_READ_REG(hw, IXGBE_TORH);
+	IXGBE_READ_REG(hw, IXGBE_TPR);
+	IXGBE_READ_REG(hw, IXGBE_TPT);
+	IXGBE_READ_REG(hw, IXGBE_PTC64);
+	IXGBE_READ_REG(hw, IXGBE_PTC127);
+	IXGBE_READ_REG(hw, IXGBE_PTC255);
+	IXGBE_READ_REG(hw, IXGBE_PTC511);
+	IXGBE_READ_REG(hw, IXGBE_PTC1023);
+	IXGBE_READ_REG(hw, IXGBE_PTC1522);
+	IXGBE_READ_REG(hw, IXGBE_MPTC);
+	IXGBE_READ_REG(hw, IXGBE_BPTC);
+	for (i = 0; i < 16; i++) {
+		IXGBE_READ_REG(hw, IXGBE_QPRC(i));
+		IXGBE_READ_REG(hw, IXGBE_QPTC(i));
+		if (hw->mac.type >= ixgbe_mac_82599EB) {
+			IXGBE_READ_REG(hw, IXGBE_QBRC_L(i));
+			IXGBE_READ_REG(hw, IXGBE_QBRC_H(i));
+			IXGBE_READ_REG(hw, IXGBE_QBTC_L(i));
+			IXGBE_READ_REG(hw, IXGBE_QBTC_H(i));
+			IXGBE_READ_REG(hw, IXGBE_QPRDC(i));
+		} else {
+			IXGBE_READ_REG(hw, IXGBE_QBRC(i));
+			IXGBE_READ_REG(hw, IXGBE_QBTC(i));
+		}
+	}
+
+	if (hw->mac.type == ixgbe_mac_X540) {
+		if (hw->phy.id == 0)
+			ixgbe_identify_phy(hw);
+		hw->phy.ops.read_reg(hw, IXGBE_PCRC8ECL,
+				     IXGBE_MDIO_PCS_DEV_TYPE, &i);
+		hw->phy.ops.read_reg(hw, IXGBE_PCRC8ECH,
+				     IXGBE_MDIO_PCS_DEV_TYPE, &i);
+		hw->phy.ops.read_reg(hw, IXGBE_LDPCECL,
+				     IXGBE_MDIO_PCS_DEV_TYPE, &i);
+		hw->phy.ops.read_reg(hw, IXGBE_LDPCECH,
+				     IXGBE_MDIO_PCS_DEV_TYPE, &i);
+	}
+
+	return 0;
+}
+
+/**
+ *  ixgbe_read_pba_string_generic - Reads part number string from EEPROM
+ *  @hw: pointer to hardware structure
+ *  @pba_num: stores the part number string from the EEPROM
+ *  @pba_num_size: part number string buffer length
+ *
+ *  Reads the part number string from the EEPROM.
+ **/
+s32 ixgbe_read_pba_string_generic(struct ixgbe_hw *hw, u8 *pba_num,
+				  u32 pba_num_size)
+{
+	s32 ret_val;
+	u16 data;
+	u16 pba_ptr;
+	u16 offset;
+	u16 length;
+
+	if (pba_num == NULL) {
+		hw_dbg(hw, "PBA string buffer was null\n");
+		return IXGBE_ERR_INVALID_ARGUMENT;
+	}
+
+	ret_val = hw->eeprom.ops.read(hw, IXGBE_PBANUM0_PTR, &data);
+	if (ret_val) {
+		hw_dbg(hw, "NVM Read Error\n");
+		return ret_val;
+	}
+
+	ret_val = hw->eeprom.ops.read(hw, IXGBE_PBANUM1_PTR, &pba_ptr);
+	if (ret_val) {
+		hw_dbg(hw, "NVM Read Error\n");
+		return ret_val;
+	}
+
+	/*
+	 * if data is not ptr guard the PBA must be in legacy format which
+	 * means pba_ptr is actually our second data word for the PBA number
+	 * and we can decode it into an ascii string
+	 */
+	if (data != IXGBE_PBANUM_PTR_GUARD) {
+		hw_dbg(hw, "NVM PBA number is not stored as string\n");
+
+		/* we will need 11 characters to store the PBA */
+		if (pba_num_size < 11) {
+			hw_dbg(hw, "PBA string buffer too small\n");
+			return IXGBE_ERR_NO_SPACE;
+		}
+
+		/* extract hex string from data and pba_ptr */
+		pba_num[0] = (data >> 12) & 0xF;
+		pba_num[1] = (data >> 8) & 0xF;
+		pba_num[2] = (data >> 4) & 0xF;
+		pba_num[3] = data & 0xF;
+		pba_num[4] = (pba_ptr >> 12) & 0xF;
+		pba_num[5] = (pba_ptr >> 8) & 0xF;
+		pba_num[6] = '-';
+		pba_num[7] = 0;
+		pba_num[8] = (pba_ptr >> 4) & 0xF;
+		pba_num[9] = pba_ptr & 0xF;
+
+		/* put a null character on the end of our string */
+		pba_num[10] = '\0';
+
+		/* switch all the data but the '-' to hex char */
+		for (offset = 0; offset < 10; offset++) {
+			if (pba_num[offset] < 0xA)
+				pba_num[offset] += '0';
+			else if (pba_num[offset] < 0x10)
+				pba_num[offset] += 'A' - 0xA;
+		}
+
+		return 0;
+	}
+
+	ret_val = hw->eeprom.ops.read(hw, pba_ptr, &length);
+	if (ret_val) {
+		hw_dbg(hw, "NVM Read Error\n");
+		return ret_val;
+	}
+
+	if (length == 0xFFFF || length == 0) {
+		hw_dbg(hw, "NVM PBA number section invalid length\n");
+		return IXGBE_ERR_PBA_SECTION;
+	}
+
+	/* check if pba_num buffer is big enough */
+	if (pba_num_size  < (((u32)length * 2) - 1)) {
+		hw_dbg(hw, "PBA string buffer too small\n");
+		return IXGBE_ERR_NO_SPACE;
+	}
+
+	/* trim pba length from start of string */
+	pba_ptr++;
+	length--;
+
+	for (offset = 0; offset < length; offset++) {
+		ret_val = hw->eeprom.ops.read(hw, pba_ptr + offset, &data);
+		if (ret_val) {
+			hw_dbg(hw, "NVM Read Error\n");
+			return ret_val;
+		}
+		pba_num[offset * 2] = (u8)(data >> 8);
+		pba_num[(offset * 2) + 1] = (u8)(data & 0xFF);
+	}
+	pba_num[offset * 2] = '\0';
+
+	return 0;
+}
+
+/**
+ *  ixgbe_get_mac_addr_generic - Generic get MAC address
+ *  @hw: pointer to hardware structure
+ *  @mac_addr: Adapter MAC address
+ *
+ *  Reads the adapter's MAC address from first Receive Address Register (RAR0)
+ *  A reset of the adapter must be performed prior to calling this function
+ *  in order for the MAC address to have been loaded from the EEPROM into RAR0
+ **/
+s32 ixgbe_get_mac_addr_generic(struct ixgbe_hw *hw, u8 *mac_addr)
+{
+	u32 rar_high;
+	u32 rar_low;
+	u16 i;
+
+	rar_high = IXGBE_READ_REG(hw, IXGBE_RAH(0));
+	rar_low = IXGBE_READ_REG(hw, IXGBE_RAL(0));
+
+	for (i = 0; i < 4; i++)
+		mac_addr[i] = (u8)(rar_low >> (i*8));
+
+	for (i = 0; i < 2; i++)
+		mac_addr[i+4] = (u8)(rar_high >> (i*8));
+
+	return 0;
+}
+
+/**
+ *  ixgbe_get_bus_info_generic - Generic set PCI bus info
+ *  @hw: pointer to hardware structure
+ *
+ *  Sets the PCI bus info (speed, width, type) within the ixgbe_hw structure
+ **/
+s32 ixgbe_get_bus_info_generic(struct ixgbe_hw *hw)
+{
+	struct ixgbe_mac_info *mac = &hw->mac;
+	u16 link_status;
+
+	hw->bus.type = ixgbe_bus_type_pci_express;
+
+	/* Get the negotiated link width and speed from PCI config space */
+	link_status = IXGBE_READ_PCIE_WORD(hw, IXGBE_PCI_LINK_STATUS);
+
+	switch (link_status & IXGBE_PCI_LINK_WIDTH) {
+	case IXGBE_PCI_LINK_WIDTH_1:
+		hw->bus.width = ixgbe_bus_width_pcie_x1;
+		break;
+	case IXGBE_PCI_LINK_WIDTH_2:
+		hw->bus.width = ixgbe_bus_width_pcie_x2;
+		break;
+	case IXGBE_PCI_LINK_WIDTH_4:
+		hw->bus.width = ixgbe_bus_width_pcie_x4;
+		break;
+	case IXGBE_PCI_LINK_WIDTH_8:
+		hw->bus.width = ixgbe_bus_width_pcie_x8;
+		break;
+	default:
+		hw->bus.width = ixgbe_bus_width_unknown;
+		break;
+	}
+
+	switch (link_status & IXGBE_PCI_LINK_SPEED) {
+	case IXGBE_PCI_LINK_SPEED_2500:
+		hw->bus.speed = ixgbe_bus_speed_2500;
+		break;
+	case IXGBE_PCI_LINK_SPEED_5000:
+		hw->bus.speed = ixgbe_bus_speed_5000;
+		break;
+	case IXGBE_PCI_LINK_SPEED_8000:
+		hw->bus.speed = ixgbe_bus_speed_8000;
+		break;
+	default:
+		hw->bus.speed = ixgbe_bus_speed_unknown;
+		break;
+	}
+
+	mac->ops.set_lan_id(hw);
+
+	return 0;
+}
+
+/**
+ *  ixgbe_set_lan_id_multi_port_pcie - Set LAN id for PCIe multiple port devices
+ *  @hw: pointer to the HW structure
+ *
+ *  Determines the LAN function id by reading memory-mapped registers
+ *  and swaps the port value if requested.
+ **/
+void ixgbe_set_lan_id_multi_port_pcie(struct ixgbe_hw *hw)
+{
+	struct ixgbe_bus_info *bus = &hw->bus;
+	u32 reg;
+
+	reg = IXGBE_READ_REG(hw, IXGBE_STATUS);
+	bus->func = (reg & IXGBE_STATUS_LAN_ID) >> IXGBE_STATUS_LAN_ID_SHIFT;
+	bus->lan_id = bus->func;
+
+	/* check for a port swap */
+	reg = IXGBE_READ_REG(hw, IXGBE_FACTPS);
+	if (reg & IXGBE_FACTPS_LFS)
+		bus->func ^= 0x1;
+}
+
+/**
+ *  ixgbe_stop_adapter_generic - Generic stop Tx/Rx units
+ *  @hw: pointer to hardware structure
+ *
+ *  Sets the adapter_stopped flag within ixgbe_hw struct. Clears interrupts,
+ *  disables transmit and receive units. The adapter_stopped flag is used by
+ *  the shared code and drivers to determine if the adapter is in a stopped
+ *  state and should not touch the hardware.
+ **/
+s32 ixgbe_stop_adapter_generic(struct ixgbe_hw *hw)
+{
+	u32 reg_val;
+	u16 i;
+
+	/*
+	 * Set the adapter_stopped flag so other driver functions stop touching
+	 * the hardware
+	 */
+	hw->adapter_stopped = true;
+
+	/* Disable the receive unit */
+	IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, 0);
+
+	/* Clear interrupt mask to stop interrupts from being generated */
+	IXGBE_WRITE_REG(hw, IXGBE_EIMC, IXGBE_IRQ_CLEAR_MASK);
+
+	/* Clear any pending interrupts, flush previous writes */
+	IXGBE_READ_REG(hw, IXGBE_EICR);
+
+	/* Disable the transmit unit.  Each queue must be disabled. */
+	for (i = 0; i < hw->mac.max_tx_queues; i++)
+		IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(i), IXGBE_TXDCTL_SWFLSH);
+
+	/* Disable the receive unit by stopping each queue */
+	for (i = 0; i < hw->mac.max_rx_queues; i++) {
+		reg_val = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
+		reg_val &= ~IXGBE_RXDCTL_ENABLE;
+		reg_val |= IXGBE_RXDCTL_SWFLSH;
+		IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), reg_val);
+	}
+
+	/* flush all queues disables */
+	IXGBE_WRITE_FLUSH(hw);
+	msleep(2);
+
+	/*
+	 * Prevent the PCI-E bus from from hanging by disabling PCI-E master
+	 * access and verify no pending requests
+	 */
+	return ixgbe_disable_pcie_master(hw);
+}
+
+/**
+ *  ixgbe_led_on_generic - Turns on the software controllable LEDs.
+ *  @hw: pointer to hardware structure
+ *  @index: led number to turn on
+ **/
+s32 ixgbe_led_on_generic(struct ixgbe_hw *hw, u32 index)
+{
+	u32 led_reg = IXGBE_READ_REG(hw, IXGBE_LEDCTL);
+
+	/* To turn on the LED, set mode to ON. */
+	led_reg &= ~IXGBE_LED_MODE_MASK(index);
+	led_reg |= IXGBE_LED_ON << IXGBE_LED_MODE_SHIFT(index);
+	IXGBE_WRITE_REG(hw, IXGBE_LEDCTL, led_reg);
+	IXGBE_WRITE_FLUSH(hw);
+
+	return 0;
+}
+
+/**
+ *  ixgbe_led_off_generic - Turns off the software controllable LEDs.
+ *  @hw: pointer to hardware structure
+ *  @index: led number to turn off
+ **/
+s32 ixgbe_led_off_generic(struct ixgbe_hw *hw, u32 index)
+{
+	u32 led_reg = IXGBE_READ_REG(hw, IXGBE_LEDCTL);
+
+	/* To turn off the LED, set mode to OFF. */
+	led_reg &= ~IXGBE_LED_MODE_MASK(index);
+	led_reg |= IXGBE_LED_OFF << IXGBE_LED_MODE_SHIFT(index);
+	IXGBE_WRITE_REG(hw, IXGBE_LEDCTL, led_reg);
+	IXGBE_WRITE_FLUSH(hw);
+
+	return 0;
+}
+
+/**
+ *  ixgbe_init_eeprom_params_generic - Initialize EEPROM params
+ *  @hw: pointer to hardware structure
+ *
+ *  Initializes the EEPROM parameters ixgbe_eeprom_info within the
+ *  ixgbe_hw struct in order to set up EEPROM access.
+ **/
+s32 ixgbe_init_eeprom_params_generic(struct ixgbe_hw *hw)
+{
+	struct ixgbe_eeprom_info *eeprom = &hw->eeprom;
+	u32 eec;
+	u16 eeprom_size;
+
+	if (eeprom->type == ixgbe_eeprom_uninitialized) {
+		eeprom->type = ixgbe_eeprom_none;
+		/* Set default semaphore delay to 10ms which is a well
+		 * tested value */
+		eeprom->semaphore_delay = 10;
+		/* Clear EEPROM page size, it will be initialized as needed */
+		eeprom->word_page_size = 0;
+
+		/*
+		 * Check for EEPROM present first.
+		 * If not present leave as none
+		 */
+		eec = IXGBE_READ_REG(hw, IXGBE_EEC);
+		if (eec & IXGBE_EEC_PRES) {
+			eeprom->type = ixgbe_eeprom_spi;
+
+			/*
+			 * SPI EEPROM is assumed here.  This code would need to
+			 * change if a future EEPROM is not SPI.
+			 */
+			eeprom_size = (u16)((eec & IXGBE_EEC_SIZE) >>
+					    IXGBE_EEC_SIZE_SHIFT);
+			eeprom->word_size = 1 << (eeprom_size +
+					     IXGBE_EEPROM_WORD_SIZE_SHIFT);
+		}
+
+		if (eec & IXGBE_EEC_ADDR_SIZE)
+			eeprom->address_bits = 16;
+		else
+			eeprom->address_bits = 8;
+		hw_dbg(hw, "Eeprom params: type = %d, size = %d, address bits: "
+			  "%d\n", eeprom->type, eeprom->word_size,
+			  eeprom->address_bits);
+	}
+
+	return 0;
+}
+
+/**
+ *  ixgbe_write_eeprom_buffer_bit_bang_generic - Write EEPROM using bit-bang
+ *  @hw: pointer to hardware structure
+ *  @offset: offset within the EEPROM to write
+ *  @words: number of word(s)
+ *  @data: 16 bit word(s) to write to EEPROM
+ *
+ *  Reads 16 bit word(s) from EEPROM through bit-bang method
+ **/
+s32 ixgbe_write_eeprom_buffer_bit_bang_generic(struct ixgbe_hw *hw, u16 offset,
+					       u16 words, u16 *data)
+{
+	s32 status = 0;
+	u16 i, count;
+
+	hw->eeprom.ops.init_params(hw);
+
+	if (words == 0) {
+		status = IXGBE_ERR_INVALID_ARGUMENT;
+		goto out;
+	}
+
+	if (offset + words > hw->eeprom.word_size) {
+		status = IXGBE_ERR_EEPROM;
+		goto out;
+	}
+
+	/*
+	 * The EEPROM page size cannot be queried from the chip. We do lazy
+	 * initialization. It is worth to do that when we write large buffer.
+	 */
+	if ((hw->eeprom.word_page_size == 0) &&
+	    (words > IXGBE_EEPROM_PAGE_SIZE_MAX))
+		ixgbe_detect_eeprom_page_size_generic(hw, offset);
+
+	/*
+	 * We cannot hold synchronization semaphores for too long
+	 * to avoid other entity starvation. However it is more efficient
+	 * to read in bursts than synchronizing access for each word.
+	 */
+	for (i = 0; i < words; i += IXGBE_EEPROM_RD_BUFFER_MAX_COUNT) {
+		count = (words - i) / IXGBE_EEPROM_RD_BUFFER_MAX_COUNT > 0 ?
+			IXGBE_EEPROM_RD_BUFFER_MAX_COUNT : (words - i);
+		status = ixgbe_write_eeprom_buffer_bit_bang(hw, offset + i,
+							    count, &data[i]);
+
+		if (status != 0)
+			break;
+	}
+
+out:
+	return status;
+}
+
+/**
+ *  ixgbe_write_eeprom_buffer_bit_bang - Writes 16 bit word(s) to EEPROM
+ *  @hw: pointer to hardware structure
+ *  @offset: offset within the EEPROM to be written to
+ *  @words: number of word(s)
+ *  @data: 16 bit word(s) to be written to the EEPROM
+ *
+ *  If ixgbe_eeprom_update_checksum is not called after this function, the
+ *  EEPROM will most likely contain an invalid checksum.
+ **/
+static s32 ixgbe_write_eeprom_buffer_bit_bang(struct ixgbe_hw *hw, u16 offset,
+					      u16 words, u16 *data)
+{
+	s32 status;
+	u16 word;
+	u16 page_size;
+	u16 i;
+	u8 write_opcode = IXGBE_EEPROM_WRITE_OPCODE_SPI;
+
+	/* Prepare the EEPROM for writing  */
+	status = ixgbe_acquire_eeprom(hw);
+
+	if (status == 0) {
+		if (ixgbe_ready_eeprom(hw) != 0) {
+			ixgbe_release_eeprom(hw);
+			status = IXGBE_ERR_EEPROM;
+		}
+	}
+
+	if (status == 0) {
+		for (i = 0; i < words; i++) {
+			ixgbe_standby_eeprom(hw);
+
+			/*  Send the WRITE ENABLE command (8 bit opcode )  */
+			ixgbe_shift_out_eeprom_bits(hw,
+						   IXGBE_EEPROM_WREN_OPCODE_SPI,
+						   IXGBE_EEPROM_OPCODE_BITS);
+
+			ixgbe_standby_eeprom(hw);
+
+			/*
+			 * Some SPI eeproms use the 8th address bit embedded
+			 * in the opcode
+			 */
+			if ((hw->eeprom.address_bits == 8) &&
+			    ((offset + i) >= 128))
+				write_opcode |= IXGBE_EEPROM_A8_OPCODE_SPI;
+
+			/* Send the Write command (8-bit opcode + addr) */
+			ixgbe_shift_out_eeprom_bits(hw, write_opcode,
+						    IXGBE_EEPROM_OPCODE_BITS);
+			ixgbe_shift_out_eeprom_bits(hw, (u16)((offset + i) * 2),
+						    hw->eeprom.address_bits);
+
+			page_size = hw->eeprom.word_page_size;
+
+			/* Send the data in burst via SPI*/
+			do {
+				word = data[i];
+				word = (word >> 8) | (word << 8);
+				ixgbe_shift_out_eeprom_bits(hw, word, 16);
+
+				if (page_size == 0)
+					break;
+
+				/* do not wrap around page */
+				if (((offset + i) & (page_size - 1)) ==
+				    (page_size - 1))
+					break;
+			} while (++i < words);
+
+			ixgbe_standby_eeprom(hw);
+			msleep(10);
+		}
+		/* Done with writing - release the EEPROM */
+		ixgbe_release_eeprom(hw);
+	}
+
+	return status;
+}
+
+/**
+ *  ixgbe_write_eeprom_generic - Writes 16 bit value to EEPROM
+ *  @hw: pointer to hardware structure
+ *  @offset: offset within the EEPROM to be written to
+ *  @data: 16 bit word to be written to the EEPROM
+ *
+ *  If ixgbe_eeprom_update_checksum is not called after this function, the
+ *  EEPROM will most likely contain an invalid checksum.
+ **/
+s32 ixgbe_write_eeprom_generic(struct ixgbe_hw *hw, u16 offset, u16 data)
+{
+	s32 status;
+
+	hw->eeprom.ops.init_params(hw);
+
+	if (offset >= hw->eeprom.word_size) {
+		status = IXGBE_ERR_EEPROM;
+		goto out;
+	}
+
+	status = ixgbe_write_eeprom_buffer_bit_bang(hw, offset, 1, &data);
+
+out:
+	return status;
+}
+
+/**
+ *  ixgbe_read_eeprom_buffer_bit_bang_generic - Read EEPROM using bit-bang
+ *  @hw: pointer to hardware structure
+ *  @offset: offset within the EEPROM to be read
+ *  @data: read 16 bit words(s) from EEPROM
+ *  @words: number of word(s)
+ *
+ *  Reads 16 bit word(s) from EEPROM through bit-bang method
+ **/
+s32 ixgbe_read_eeprom_buffer_bit_bang_generic(struct ixgbe_hw *hw, u16 offset,
+					      u16 words, u16 *data)
+{
+	s32 status = 0;
+	u16 i, count;
+
+	hw->eeprom.ops.init_params(hw);
+
+	if (words == 0) {
+		status = IXGBE_ERR_INVALID_ARGUMENT;
+		goto out;
+	}
+
+	if (offset + words > hw->eeprom.word_size) {
+		status = IXGBE_ERR_EEPROM;
+		goto out;
+	}
+
+	/*
+	 * We cannot hold synchronization semaphores for too long
+	 * to avoid other entity starvation. However it is more efficient
+	 * to read in bursts than synchronizing access for each word.
+	 */
+	for (i = 0; i < words; i += IXGBE_EEPROM_RD_BUFFER_MAX_COUNT) {
+		count = (words - i) / IXGBE_EEPROM_RD_BUFFER_MAX_COUNT > 0 ?
+			IXGBE_EEPROM_RD_BUFFER_MAX_COUNT : (words - i);
+
+		status = ixgbe_read_eeprom_buffer_bit_bang(hw, offset + i,
+							   count, &data[i]);
+
+		if (status != 0)
+			break;
+	}
+
+out:
+	return status;
+}
+
+/**
+ *  ixgbe_read_eeprom_buffer_bit_bang - Read EEPROM using bit-bang
+ *  @hw: pointer to hardware structure
+ *  @offset: offset within the EEPROM to be read
+ *  @words: number of word(s)
+ *  @data: read 16 bit word(s) from EEPROM
+ *
+ *  Reads 16 bit word(s) from EEPROM through bit-bang method
+ **/
+static s32 ixgbe_read_eeprom_buffer_bit_bang(struct ixgbe_hw *hw, u16 offset,
+					     u16 words, u16 *data)
+{
+	s32 status;
+	u16 word_in;
+	u8 read_opcode = IXGBE_EEPROM_READ_OPCODE_SPI;
+	u16 i;
+
+	/* Prepare the EEPROM for reading  */
+	status = ixgbe_acquire_eeprom(hw);
+
+	if (status == 0) {
+		if (ixgbe_ready_eeprom(hw) != 0) {
+			ixgbe_release_eeprom(hw);
+			status = IXGBE_ERR_EEPROM;
+		}
+	}
+
+	if (status == 0) {
+		for (i = 0; i < words; i++) {
+			ixgbe_standby_eeprom(hw);
+			/*
+			 * Some SPI eeproms use the 8th address bit embedded
+			 * in the opcode
+			 */
+			if ((hw->eeprom.address_bits == 8) &&
+			    ((offset + i) >= 128))
+				read_opcode |= IXGBE_EEPROM_A8_OPCODE_SPI;
+
+			/* Send the READ command (opcode + addr) */
+			ixgbe_shift_out_eeprom_bits(hw, read_opcode,
+						    IXGBE_EEPROM_OPCODE_BITS);
+			ixgbe_shift_out_eeprom_bits(hw, (u16)((offset + i) * 2),
+						    hw->eeprom.address_bits);
+
+			/* Read the data. */
+			word_in = ixgbe_shift_in_eeprom_bits(hw, 16);
+			data[i] = (word_in >> 8) | (word_in << 8);
+		}
+
+		/* End this read operation */
+		ixgbe_release_eeprom(hw);
+	}
+
+	return status;
+}
+
+/**
+ *  ixgbe_read_eeprom_bit_bang_generic - Read EEPROM word using bit-bang
+ *  @hw: pointer to hardware structure
+ *  @offset: offset within the EEPROM to be read
+ *  @data: read 16 bit value from EEPROM
+ *
+ *  Reads 16 bit value from EEPROM through bit-bang method
+ **/
+s32 ixgbe_read_eeprom_bit_bang_generic(struct ixgbe_hw *hw, u16 offset,
+				       u16 *data)
+{
+	s32 status;
+
+	hw->eeprom.ops.init_params(hw);
+
+	if (offset >= hw->eeprom.word_size) {
+		status = IXGBE_ERR_EEPROM;
+		goto out;
+	}
+
+	status = ixgbe_read_eeprom_buffer_bit_bang(hw, offset, 1, data);
+
+out:
+	return status;
+}
+
+/**
+ *  ixgbe_read_eerd_buffer_generic - Read EEPROM word(s) using EERD
+ *  @hw: pointer to hardware structure
+ *  @offset: offset of word in the EEPROM to read
+ *  @words: number of word(s)
+ *  @data: 16 bit word(s) from the EEPROM
+ *
+ *  Reads a 16 bit word(s) from the EEPROM using the EERD register.
+ **/
+s32 ixgbe_read_eerd_buffer_generic(struct ixgbe_hw *hw, u16 offset,
+				   u16 words, u16 *data)
+{
+	u32 eerd;
+	s32 status = 0;
+	u32 i;
+
+	hw->eeprom.ops.init_params(hw);
+
+	if (words == 0) {
+		status = IXGBE_ERR_INVALID_ARGUMENT;
+		goto out;
+	}
+
+	if (offset >= hw->eeprom.word_size) {
+		status = IXGBE_ERR_EEPROM;
+		goto out;
+	}
+
+	for (i = 0; i < words; i++) {
+		eerd = ((offset + i) << IXGBE_EEPROM_RW_ADDR_SHIFT) +
+		       IXGBE_EEPROM_RW_REG_START;
+
+		IXGBE_WRITE_REG(hw, IXGBE_EERD, eerd);
+		status = ixgbe_poll_eerd_eewr_done(hw, IXGBE_NVM_POLL_READ);
+
+		if (status == 0) {
+			data[i] = (IXGBE_READ_REG(hw, IXGBE_EERD) >>
+				   IXGBE_EEPROM_RW_REG_DATA);
+		} else {
+			hw_dbg(hw, "Eeprom read timed out\n");
+			goto out;
+		}
+	}
+out:
+	return status;
+}
+
+/**
+ *  ixgbe_detect_eeprom_page_size_generic - Detect EEPROM page size
+ *  @hw: pointer to hardware structure
+ *  @offset: offset within the EEPROM to be used as a scratch pad
+ *
+ *  Discover EEPROM page size by writing marching data at given offset.
+ *  This function is called only when we are writing a new large buffer
+ *  at given offset so the data would be overwritten anyway.
+ **/
+static s32 ixgbe_detect_eeprom_page_size_generic(struct ixgbe_hw *hw,
+						 u16 offset)
+{
+	u16 data[IXGBE_EEPROM_PAGE_SIZE_MAX];
+	s32 status = 0;
+	u16 i;
+
+	for (i = 0; i < IXGBE_EEPROM_PAGE_SIZE_MAX; i++)
+		data[i] = i;
+
+	hw->eeprom.word_page_size = IXGBE_EEPROM_PAGE_SIZE_MAX;
+	status = ixgbe_write_eeprom_buffer_bit_bang(hw, offset,
+					     IXGBE_EEPROM_PAGE_SIZE_MAX, data);
+	hw->eeprom.word_page_size = 0;
+	if (status != 0)
+		goto out;
+
+	status = ixgbe_read_eeprom_buffer_bit_bang(hw, offset, 1, data);
+	if (status != 0)
+		goto out;
+
+	/*
+	 * When writing in burst more than the actual page size
+	 * EEPROM address wraps around current page.
+	 */
+	hw->eeprom.word_page_size = IXGBE_EEPROM_PAGE_SIZE_MAX - data[0];
+
+	hw_dbg(hw, "Detected EEPROM page size = %d words.",
+		  hw->eeprom.word_page_size);
+out:
+	return status;
+}
+
+/**
+ *  ixgbe_read_eerd_generic - Read EEPROM word using EERD
+ *  @hw: pointer to hardware structure
+ *  @offset: offset of  word in the EEPROM to read
+ *  @data: word read from the EEPROM
+ *
+ *  Reads a 16 bit word from the EEPROM using the EERD register.
+ **/
+s32 ixgbe_read_eerd_generic(struct ixgbe_hw *hw, u16 offset, u16 *data)
+{
+	return ixgbe_read_eerd_buffer_generic(hw, offset, 1, data);
+}
+
+/**
+ *  ixgbe_write_eewr_buffer_generic - Write EEPROM word(s) using EEWR
+ *  @hw: pointer to hardware structure
+ *  @offset: offset of  word in the EEPROM to write
+ *  @words: number of word(s)
+ *  @data: word(s) write to the EEPROM
+ *
+ *  Write a 16 bit word(s) to the EEPROM using the EEWR register.
+ **/
+s32 ixgbe_write_eewr_buffer_generic(struct ixgbe_hw *hw, u16 offset,
+				    u16 words, u16 *data)
+{
+	u32 eewr;
+	s32 status = 0;
+	u16 i;
+
+	hw->eeprom.ops.init_params(hw);
+
+	if (words == 0) {
+		status = IXGBE_ERR_INVALID_ARGUMENT;
+		goto out;
+	}
+
+	if (offset >= hw->eeprom.word_size) {
+		status = IXGBE_ERR_EEPROM;
+		goto out;
+	}
+
+	for (i = 0; i < words; i++) {
+		eewr = ((offset + i) << IXGBE_EEPROM_RW_ADDR_SHIFT) |
+			(data[i] << IXGBE_EEPROM_RW_REG_DATA) |
+			IXGBE_EEPROM_RW_REG_START;
+
+		status = ixgbe_poll_eerd_eewr_done(hw, IXGBE_NVM_POLL_WRITE);
+		if (status != 0) {
+			hw_dbg(hw, "Eeprom write EEWR timed out\n");
+			goto out;
+		}
+
+		IXGBE_WRITE_REG(hw, IXGBE_EEWR, eewr);
+
+		status = ixgbe_poll_eerd_eewr_done(hw, IXGBE_NVM_POLL_WRITE);
+		if (status != 0) {
+			hw_dbg(hw, "Eeprom write EEWR timed out\n");
+			goto out;
+		}
+	}
+
+out:
+	return status;
+}
+
+/**
+ *  ixgbe_write_eewr_generic - Write EEPROM word using EEWR
+ *  @hw: pointer to hardware structure
+ *  @offset: offset of  word in the EEPROM to write
+ *  @data: word write to the EEPROM
+ *
+ *  Write a 16 bit word to the EEPROM using the EEWR register.
+ **/
+s32 ixgbe_write_eewr_generic(struct ixgbe_hw *hw, u16 offset, u16 data)
+{
+	return ixgbe_write_eewr_buffer_generic(hw, offset, 1, &data);
+}
+
+/**
+ *  ixgbe_poll_eerd_eewr_done - Poll EERD read or EEWR write status
+ *  @hw: pointer to hardware structure
+ *  @ee_reg: EEPROM flag for polling
+ *
+ *  Polls the status bit (bit 1) of the EERD or EEWR to determine when the
+ *  read or write is done respectively.
+ **/
+s32 ixgbe_poll_eerd_eewr_done(struct ixgbe_hw *hw, u32 ee_reg)
+{
+	u32 i;
+	u32 reg;
+	s32 status = IXGBE_ERR_EEPROM;
+
+	for (i = 0; i < IXGBE_EERD_EEWR_ATTEMPTS; i++) {
+		if (ee_reg == IXGBE_NVM_POLL_READ)
+			reg = IXGBE_READ_REG(hw, IXGBE_EERD);
+		else
+			reg = IXGBE_READ_REG(hw, IXGBE_EEWR);
+
+		if (reg & IXGBE_EEPROM_RW_REG_DONE) {
+			status = 0;
+			break;
+		}
+		udelay(5);
+	}
+	return status;
+}
+
+/**
+ *  ixgbe_acquire_eeprom - Acquire EEPROM using bit-bang
+ *  @hw: pointer to hardware structure
+ *
+ *  Prepares EEPROM for access using bit-bang method. This function should
+ *  be called before issuing a command to the EEPROM.
+ **/
+static s32 ixgbe_acquire_eeprom(struct ixgbe_hw *hw)
+{
+	s32 status = 0;
+	u32 eec;
+	u32 i;
+
+	if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_EEP_SM)
+	    != 0)
+		status = IXGBE_ERR_SWFW_SYNC;
+
+	if (status == 0) {
+		eec = IXGBE_READ_REG(hw, IXGBE_EEC);
+
+		/* Request EEPROM Access */
+		eec |= IXGBE_EEC_REQ;
+		IXGBE_WRITE_REG(hw, IXGBE_EEC, eec);
+
+		for (i = 0; i < IXGBE_EEPROM_GRANT_ATTEMPTS; i++) {
+			eec = IXGBE_READ_REG(hw, IXGBE_EEC);
+			if (eec & IXGBE_EEC_GNT)
+				break;
+			udelay(5);
+		}
+
+		/* Release if grant not acquired */
+		if (!(eec & IXGBE_EEC_GNT)) {
+			eec &= ~IXGBE_EEC_REQ;
+			IXGBE_WRITE_REG(hw, IXGBE_EEC, eec);
+			hw_dbg(hw, "Could not acquire EEPROM grant\n");
+
+			hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_EEP_SM);
+			status = IXGBE_ERR_EEPROM;
+		}
+
+		/* Setup EEPROM for Read/Write */
+		if (status == 0) {
+			/* Clear CS and SK */
+			eec &= ~(IXGBE_EEC_CS | IXGBE_EEC_SK);
+			IXGBE_WRITE_REG(hw, IXGBE_EEC, eec);
+			IXGBE_WRITE_FLUSH(hw);
+			udelay(1);
+		}
+	}
+	return status;
+}
+
+/**
+ *  ixgbe_get_eeprom_semaphore - Get hardware semaphore
+ *  @hw: pointer to hardware structure
+ *
+ *  Sets the hardware semaphores so EEPROM access can occur for bit-bang method
+ **/
+static s32 ixgbe_get_eeprom_semaphore(struct ixgbe_hw *hw)
+{
+	s32 status = IXGBE_ERR_EEPROM;
+	u32 timeout = 2000;
+	u32 i;
+	u32 swsm;
+
+	/* Get SMBI software semaphore between device drivers first */
+	for (i = 0; i < timeout; i++) {
+		/*
+		 * If the SMBI bit is 0 when we read it, then the bit will be
+		 * set and we have the semaphore
+		 */
+		swsm = IXGBE_READ_REG(hw, IXGBE_SWSM);
+		if (!(swsm & IXGBE_SWSM_SMBI)) {
+			status = 0;
+			break;
+		}
+		udelay(50);
+	}
+
+	if (i == timeout) {
+		hw_dbg(hw, "Driver can't access the Eeprom - SMBI Semaphore "
+			 "not granted.\n");
+		/*
+		 * this release is particularly important because our attempts
+		 * above to get the semaphore may have succeeded, and if there
+		 * was a timeout, we should unconditionally clear the semaphore
+		 * bits to free the driver to make progress
+		 */
+		ixgbe_release_eeprom_semaphore(hw);
+
+		udelay(50);
+		/*
+		 * one last try
+		 * If the SMBI bit is 0 when we read it, then the bit will be
+		 * set and we have the semaphore
+		 */
+		swsm = IXGBE_READ_REG(hw, IXGBE_SWSM);
+		if (!(swsm & IXGBE_SWSM_SMBI))
+			status = 0;
+	}
+
+	/* Now get the semaphore between SW/FW through the SWESMBI bit */
+	if (status == 0) {
+		for (i = 0; i < timeout; i++) {
+			swsm = IXGBE_READ_REG(hw, IXGBE_SWSM);
+
+			/* Set the SW EEPROM semaphore bit to request access */
+			swsm |= IXGBE_SWSM_SWESMBI;
+			IXGBE_WRITE_REG(hw, IXGBE_SWSM, swsm);
+
+			/*
+			 * If we set the bit successfully then we got the
+			 * semaphore.
+			 */
+			swsm = IXGBE_READ_REG(hw, IXGBE_SWSM);
+			if (swsm & IXGBE_SWSM_SWESMBI)
+				break;
+
+			udelay(50);
+		}
+
+		/*
+		 * Release semaphores and return error if SW EEPROM semaphore
+		 * was not granted because we don't have access to the EEPROM
+		 */
+		if (i >= timeout) {
+			hw_dbg(hw, "SWESMBI Software EEPROM semaphore "
+				 "not granted.\n");
+			ixgbe_release_eeprom_semaphore(hw);
+			status = IXGBE_ERR_EEPROM;
+		}
+	} else {
+		hw_dbg(hw, "Software semaphore SMBI between device drivers "
+			 "not granted.\n");
+	}
+
+	return status;
+}
+
+/**
+ *  ixgbe_release_eeprom_semaphore - Release hardware semaphore
+ *  @hw: pointer to hardware structure
+ *
+ *  This function clears hardware semaphore bits.
+ **/
+static void ixgbe_release_eeprom_semaphore(struct ixgbe_hw *hw)
+{
+	u32 swsm;
+
+	swsm = IXGBE_READ_REG(hw, IXGBE_SWSM);
+
+	/* Release both semaphores by writing 0 to the bits SWESMBI and SMBI */
+	swsm &= ~(IXGBE_SWSM_SWESMBI | IXGBE_SWSM_SMBI);
+	IXGBE_WRITE_REG(hw, IXGBE_SWSM, swsm);
+	IXGBE_WRITE_FLUSH(hw);
+}
+
+/**
+ *  ixgbe_ready_eeprom - Polls for EEPROM ready
+ *  @hw: pointer to hardware structure
+ **/
+static s32 ixgbe_ready_eeprom(struct ixgbe_hw *hw)
+{
+	s32 status = 0;
+	u16 i;
+	u8 spi_stat_reg;
+
+	/*
+	 * Read "Status Register" repeatedly until the LSB is cleared.  The
+	 * EEPROM will signal that the command has been completed by clearing
+	 * bit 0 of the internal status register.  If it's not cleared within
+	 * 5 milliseconds, then error out.
+	 */
+	for (i = 0; i < IXGBE_EEPROM_MAX_RETRY_SPI; i += 5) {
+		ixgbe_shift_out_eeprom_bits(hw, IXGBE_EEPROM_RDSR_OPCODE_SPI,
+					    IXGBE_EEPROM_OPCODE_BITS);
+		spi_stat_reg = (u8)ixgbe_shift_in_eeprom_bits(hw, 8);
+		if (!(spi_stat_reg & IXGBE_EEPROM_STATUS_RDY_SPI))
+			break;
+
+		udelay(5);
+		ixgbe_standby_eeprom(hw);
+	};
+
+	/*
+	 * On some parts, SPI write time could vary from 0-20mSec on 3.3V
+	 * devices (and only 0-5mSec on 5V devices)
+	 */
+	if (i >= IXGBE_EEPROM_MAX_RETRY_SPI) {
+		hw_dbg(hw, "SPI EEPROM Status error\n");
+		status = IXGBE_ERR_EEPROM;
+	}
+
+	return status;
+}
+
+/**
+ *  ixgbe_standby_eeprom - Returns EEPROM to a "standby" state
+ *  @hw: pointer to hardware structure
+ **/
+static void ixgbe_standby_eeprom(struct ixgbe_hw *hw)
+{
+	u32 eec;
+
+	eec = IXGBE_READ_REG(hw, IXGBE_EEC);
+
+	/* Toggle CS to flush commands */
+	eec |= IXGBE_EEC_CS;
+	IXGBE_WRITE_REG(hw, IXGBE_EEC, eec);
+	IXGBE_WRITE_FLUSH(hw);
+	udelay(1);
+	eec &= ~IXGBE_EEC_CS;
+	IXGBE_WRITE_REG(hw, IXGBE_EEC, eec);
+	IXGBE_WRITE_FLUSH(hw);
+	udelay(1);
+}
+
+/**
+ *  ixgbe_shift_out_eeprom_bits - Shift data bits out to the EEPROM.
+ *  @hw: pointer to hardware structure
+ *  @data: data to send to the EEPROM
+ *  @count: number of bits to shift out
+ **/
+static void ixgbe_shift_out_eeprom_bits(struct ixgbe_hw *hw, u16 data,
+					u16 count)
+{
+	u32 eec;
+	u32 mask;
+	u32 i;
+
+	eec = IXGBE_READ_REG(hw, IXGBE_EEC);
+
+	/*
+	 * Mask is used to shift "count" bits of "data" out to the EEPROM
+	 * one bit at a time.  Determine the starting bit based on count
+	 */
+	mask = 0x01 << (count - 1);
+
+	for (i = 0; i < count; i++) {
+		/*
+		 * A "1" is shifted out to the EEPROM by setting bit "DI" to a
+		 * "1", and then raising and then lowering the clock (the SK
+		 * bit controls the clock input to the EEPROM).  A "0" is
+		 * shifted out to the EEPROM by setting "DI" to "0" and then
+		 * raising and then lowering the clock.
+		 */
+		if (data & mask)
+			eec |= IXGBE_EEC_DI;
+		else
+			eec &= ~IXGBE_EEC_DI;
+
+		IXGBE_WRITE_REG(hw, IXGBE_EEC, eec);
+		IXGBE_WRITE_FLUSH(hw);
+
+		udelay(1);
+
+		ixgbe_raise_eeprom_clk(hw, &eec);
+		ixgbe_lower_eeprom_clk(hw, &eec);
+
+		/*
+		 * Shift mask to signify next bit of data to shift in to the
+		 * EEPROM
+		 */
+		mask = mask >> 1;
+	};
+
+	/* We leave the "DI" bit set to "0" when we leave this routine. */
+	eec &= ~IXGBE_EEC_DI;
+	IXGBE_WRITE_REG(hw, IXGBE_EEC, eec);
+	IXGBE_WRITE_FLUSH(hw);
+}
+
+/**
+ *  ixgbe_shift_in_eeprom_bits - Shift data bits in from the EEPROM
+ *  @hw: pointer to hardware structure
+ **/
+static u16 ixgbe_shift_in_eeprom_bits(struct ixgbe_hw *hw, u16 count)
+{
+	u32 eec;
+	u32 i;
+	u16 data = 0;
+
+	/*
+	 * In order to read a register from the EEPROM, we need to shift
+	 * 'count' bits in from the EEPROM. Bits are "shifted in" by raising
+	 * the clock input to the EEPROM (setting the SK bit), and then reading
+	 * the value of the "DO" bit.  During this "shifting in" process the
+	 * "DI" bit should always be clear.
+	 */
+	eec = IXGBE_READ_REG(hw, IXGBE_EEC);
+
+	eec &= ~(IXGBE_EEC_DO | IXGBE_EEC_DI);
+
+	for (i = 0; i < count; i++) {
+		data = data << 1;
+		ixgbe_raise_eeprom_clk(hw, &eec);
+
+		eec = IXGBE_READ_REG(hw, IXGBE_EEC);
+
+		eec &= ~(IXGBE_EEC_DI);
+		if (eec & IXGBE_EEC_DO)
+			data |= 1;
+
+		ixgbe_lower_eeprom_clk(hw, &eec);
+	}
+
+	return data;
+}
+
+/**
+ *  ixgbe_raise_eeprom_clk - Raises the EEPROM's clock input.
+ *  @hw: pointer to hardware structure
+ *  @eec: EEC register's current value
+ **/
+static void ixgbe_raise_eeprom_clk(struct ixgbe_hw *hw, u32 *eec)
+{
+	/*
+	 * Raise the clock input to the EEPROM
+	 * (setting the SK bit), then delay
+	 */
+	*eec = *eec | IXGBE_EEC_SK;
+	IXGBE_WRITE_REG(hw, IXGBE_EEC, *eec);
+	IXGBE_WRITE_FLUSH(hw);
+	udelay(1);
+}
+
+/**
+ *  ixgbe_lower_eeprom_clk - Lowers the EEPROM's clock input.
+ *  @hw: pointer to hardware structure
+ *  @eecd: EECD's current value
+ **/
+static void ixgbe_lower_eeprom_clk(struct ixgbe_hw *hw, u32 *eec)
+{
+	/*
+	 * Lower the clock input to the EEPROM (clearing the SK bit), then
+	 * delay
+	 */
+	*eec = *eec & ~IXGBE_EEC_SK;
+	IXGBE_WRITE_REG(hw, IXGBE_EEC, *eec);
+	IXGBE_WRITE_FLUSH(hw);
+	udelay(1);
+}
+
+/**
+ *  ixgbe_release_eeprom - Release EEPROM, release semaphores
+ *  @hw: pointer to hardware structure
+ **/
+static void ixgbe_release_eeprom(struct ixgbe_hw *hw)
+{
+	u32 eec;
+
+	eec = IXGBE_READ_REG(hw, IXGBE_EEC);
+
+	eec |= IXGBE_EEC_CS;  /* Pull CS high */
+	eec &= ~IXGBE_EEC_SK; /* Lower SCK */
+
+	IXGBE_WRITE_REG(hw, IXGBE_EEC, eec);
+	IXGBE_WRITE_FLUSH(hw);
+
+	udelay(1);
+
+	/* Stop requesting EEPROM access */
+	eec &= ~IXGBE_EEC_REQ;
+	IXGBE_WRITE_REG(hw, IXGBE_EEC, eec);
+
+	hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_EEP_SM);
+
+	/* Delay before attempt to obtain semaphore again to allow FW access */
+	msleep(hw->eeprom.semaphore_delay);
+}
+
+/**
+ *  ixgbe_calc_eeprom_checksum_generic - Calculates and returns the checksum
+ *  @hw: pointer to hardware structure
+ **/
+u16 ixgbe_calc_eeprom_checksum_generic(struct ixgbe_hw *hw)
+{
+	u16 i;
+	u16 j;
+	u16 checksum = 0;
+	u16 length = 0;
+	u16 pointer = 0;
+	u16 word = 0;
+
+	/* Include 0x0-0x3F in the checksum */
+	for (i = 0; i < IXGBE_EEPROM_CHECKSUM; i++) {
+		if (hw->eeprom.ops.read(hw, i, &word) != 0) {
+			hw_dbg(hw, "EEPROM read failed\n");
+			break;
+		}
+		checksum += word;
+	}
+
+	/* Include all data from pointers except for the fw pointer */
+	for (i = IXGBE_PCIE_ANALOG_PTR; i < IXGBE_FW_PTR; i++) {
+		hw->eeprom.ops.read(hw, i, &pointer);
+
+		/* Make sure the pointer seems valid */
+		if (pointer != 0xFFFF && pointer != 0) {
+			hw->eeprom.ops.read(hw, pointer, &length);
+
+			if (length != 0xFFFF && length != 0) {
+				for (j = pointer+1; j <= pointer+length; j++) {
+					hw->eeprom.ops.read(hw, j, &word);
+					checksum += word;
+				}
+			}
+		}
+	}
+
+	checksum = (u16)IXGBE_EEPROM_SUM - checksum;
+
+	return checksum;
+}
+
+/**
+ *  ixgbe_validate_eeprom_checksum_generic - Validate EEPROM checksum
+ *  @hw: pointer to hardware structure
+ *  @checksum_val: calculated checksum
+ *
+ *  Performs checksum calculation and validates the EEPROM checksum.  If the
+ *  caller does not need checksum_val, the value can be NULL.
+ **/
+s32 ixgbe_validate_eeprom_checksum_generic(struct ixgbe_hw *hw,
+					   u16 *checksum_val)
+{
+	s32 status;
+	u16 checksum;
+	u16 read_checksum = 0;
+
+	/*
+	 * Read the first word from the EEPROM. If this times out or fails, do
+	 * not continue or we could be in for a very long wait while every
+	 * EEPROM read fails
+	 */
+	status = hw->eeprom.ops.read(hw, 0, &checksum);
+
+	if (status == 0) {
+		checksum = hw->eeprom.ops.calc_checksum(hw);
+
+		hw->eeprom.ops.read(hw, IXGBE_EEPROM_CHECKSUM, &read_checksum);
+
+		/*
+		 * Verify read checksum from EEPROM is the same as
+		 * calculated checksum
+		 */
+		if (read_checksum != checksum)
+			status = IXGBE_ERR_EEPROM_CHECKSUM;
+
+		/* If the user cares, return the calculated checksum */
+		if (checksum_val)
+			*checksum_val = checksum;
+	} else {
+		hw_dbg(hw, "EEPROM read failed\n");
+	}
+
+	return status;
+}
+
+/**
+ *  ixgbe_update_eeprom_checksum_generic - Updates the EEPROM checksum
+ *  @hw: pointer to hardware structure
+ **/
+s32 ixgbe_update_eeprom_checksum_generic(struct ixgbe_hw *hw)
+{
+	s32 status;
+	u16 checksum;
+
+	/*
+	 * Read the first word from the EEPROM. If this times out or fails, do
+	 * not continue or we could be in for a very long wait while every
+	 * EEPROM read fails
+	 */
+	status = hw->eeprom.ops.read(hw, 0, &checksum);
+
+	if (status == 0) {
+		checksum = hw->eeprom.ops.calc_checksum(hw);
+		status = hw->eeprom.ops.write(hw, IXGBE_EEPROM_CHECKSUM,
+					      checksum);
+	} else {
+		hw_dbg(hw, "EEPROM read failed\n");
+	}
+
+	return status;
+}
+
+/**
+ *  ixgbe_validate_mac_addr - Validate MAC address
+ *  @mac_addr: pointer to MAC address.
+ *
+ *  Tests a MAC address to ensure it is a valid Individual Address
+ **/
+s32 ixgbe_validate_mac_addr(u8 *mac_addr)
+{
+	s32 status = 0;
+
+	/* Make sure it is not a multicast address */
+	if (IXGBE_IS_MULTICAST(mac_addr)) {
+		hw_dbg(hw, "MAC address is multicast\n");
+		status = IXGBE_ERR_INVALID_MAC_ADDR;
+	/* Not a broadcast address */
+	} else if (IXGBE_IS_BROADCAST(mac_addr)) {
+		hw_dbg(hw, "MAC address is broadcast\n");
+		status = IXGBE_ERR_INVALID_MAC_ADDR;
+	/* Reject the zero address */
+	} else if (mac_addr[0] == 0 && mac_addr[1] == 0 && mac_addr[2] == 0 &&
+		   mac_addr[3] == 0 && mac_addr[4] == 0 && mac_addr[5] == 0) {
+		hw_dbg(hw, "MAC address is all zeros\n");
+		status = IXGBE_ERR_INVALID_MAC_ADDR;
+	}
+	return status;
+}
+
+/**
+ *  ixgbe_set_rar_generic - Set Rx address register
+ *  @hw: pointer to hardware structure
+ *  @index: Receive address register to write
+ *  @addr: Address to put into receive address register
+ *  @vmdq: VMDq "set" or "pool" index
+ *  @enable_addr: set flag that address is active
+ *
+ *  Puts an ethernet address into a receive address register.
+ **/
+s32 ixgbe_set_rar_generic(struct ixgbe_hw *hw, u32 index, u8 *addr, u32 vmdq,
+			  u32 enable_addr)
+{
+	u32 rar_low, rar_high;
+	u32 rar_entries = hw->mac.num_rar_entries;
+
+	/* Make sure we are using a valid rar index range */
+	if (index >= rar_entries) {
+		hw_dbg(hw, "RAR index %d is out of range.\n", index);
+		return IXGBE_ERR_INVALID_ARGUMENT;
+	}
+
+	/* setup VMDq pool selection before this RAR gets enabled */
+	hw->mac.ops.set_vmdq(hw, index, vmdq);
+
+	/*
+	 * HW expects these in little endian so we reverse the byte
+	 * order from network order (big endian) to little endian
+	 */
+	rar_low = ((u32)addr[0] |
+		   ((u32)addr[1] << 8) |
+		   ((u32)addr[2] << 16) |
+		   ((u32)addr[3] << 24));
+	/*
+	 * Some parts put the VMDq setting in the extra RAH bits,
+	 * so save everything except the lower 16 bits that hold part
+	 * of the address and the address valid bit.
+	 */
+	rar_high = IXGBE_READ_REG(hw, IXGBE_RAH(index));
+	rar_high &= ~(0x0000FFFF | IXGBE_RAH_AV);
+	rar_high |= ((u32)addr[4] | ((u32)addr[5] << 8));
+
+	if (enable_addr != 0)
+		rar_high |= IXGBE_RAH_AV;
+
+	IXGBE_WRITE_REG(hw, IXGBE_RAL(index), rar_low);
+	IXGBE_WRITE_REG(hw, IXGBE_RAH(index), rar_high);
+
+	return 0;
+}
+
+/**
+ *  ixgbe_clear_rar_generic - Remove Rx address register
+ *  @hw: pointer to hardware structure
+ *  @index: Receive address register to write
+ *
+ *  Clears an ethernet address from a receive address register.
+ **/
+s32 ixgbe_clear_rar_generic(struct ixgbe_hw *hw, u32 index)
+{
+	u32 rar_high;
+	u32 rar_entries = hw->mac.num_rar_entries;
+
+	/* Make sure we are using a valid rar index range */
+	if (index >= rar_entries) {
+		hw_dbg(hw, "RAR index %d is out of range.\n", index);
+		return IXGBE_ERR_INVALID_ARGUMENT;
+	}
+
+	/*
+	 * Some parts put the VMDq setting in the extra RAH bits,
+	 * so save everything except the lower 16 bits that hold part
+	 * of the address and the address valid bit.
+	 */
+	rar_high = IXGBE_READ_REG(hw, IXGBE_RAH(index));
+	rar_high &= ~(0x0000FFFF | IXGBE_RAH_AV);
+
+	IXGBE_WRITE_REG(hw, IXGBE_RAL(index), 0);
+	IXGBE_WRITE_REG(hw, IXGBE_RAH(index), rar_high);
+
+	/* clear VMDq pool/queue selection for this RAR */
+	hw->mac.ops.clear_vmdq(hw, index, IXGBE_CLEAR_VMDQ_ALL);
+
+	return 0;
+}
+
+/**
+ *  ixgbe_init_rx_addrs_generic - Initializes receive address filters.
+ *  @hw: pointer to hardware structure
+ *
+ *  Places the MAC address in receive address register 0 and clears the rest
+ *  of the receive address registers. Clears the multicast table. Assumes
+ *  the receiver is in reset when the routine is called.
+ **/
+s32 ixgbe_init_rx_addrs_generic(struct ixgbe_hw *hw)
+{
+	u32 i;
+	u32 rar_entries = hw->mac.num_rar_entries;
+
+	/*
+	 * If the current mac address is valid, assume it is a software override
+	 * to the permanent address.
+	 * Otherwise, use the permanent address from the eeprom.
+	 */
+	if (ixgbe_validate_mac_addr(hw->mac.addr) ==
+	    IXGBE_ERR_INVALID_MAC_ADDR) {
+		/* Get the MAC address from the RAR0 for later reference */
+		hw->mac.ops.get_mac_addr(hw, hw->mac.addr);
+
+		hw_dbg(hw, " Keeping Current RAR0 Addr =%.2X %.2X %.2X ",
+			  hw->mac.addr[0], hw->mac.addr[1],
+			  hw->mac.addr[2]);
+		hw_dbg(hw, "%.2X %.2X %.2X\n", hw->mac.addr[3],
+			  hw->mac.addr[4], hw->mac.addr[5]);
+	} else {
+		/* Setup the receive address. */
+		hw_dbg(hw, "Overriding MAC Address in RAR[0]\n");
+		hw_dbg(hw, " New MAC Addr =%.2X %.2X %.2X ",
+			  hw->mac.addr[0], hw->mac.addr[1],
+			  hw->mac.addr[2]);
+		hw_dbg(hw, "%.2X %.2X %.2X\n", hw->mac.addr[3],
+			  hw->mac.addr[4], hw->mac.addr[5]);
+
+		hw->mac.ops.set_rar(hw, 0, hw->mac.addr, 0, IXGBE_RAH_AV);
+
+		/* clear VMDq pool/queue selection for RAR 0 */
+		hw->mac.ops.clear_vmdq(hw, 0, IXGBE_CLEAR_VMDQ_ALL);
+	}
+	hw->addr_ctrl.overflow_promisc = 0;
+
+	hw->addr_ctrl.rar_used_count = 1;
+
+	/* Zero out the other receive addresses. */
+	hw_dbg(hw, "Clearing RAR[1-%d]\n", rar_entries - 1);
+	for (i = 1; i < rar_entries; i++) {
+		IXGBE_WRITE_REG(hw, IXGBE_RAL(i), 0);
+		IXGBE_WRITE_REG(hw, IXGBE_RAH(i), 0);
+	}
+
+	/* Clear the MTA */
+	hw->addr_ctrl.mta_in_use = 0;
+	IXGBE_WRITE_REG(hw, IXGBE_MCSTCTRL, hw->mac.mc_filter_type);
+
+	hw_dbg(hw, " Clearing MTA\n");
+	for (i = 0; i < hw->mac.mcft_size; i++)
+		IXGBE_WRITE_REG(hw, IXGBE_MTA(i), 0);
+
+	ixgbe_init_uta_tables(hw);
+
+	return 0;
+}
+
+/**
+ *  ixgbe_add_uc_addr - Adds a secondary unicast address.
+ *  @hw: pointer to hardware structure
+ *  @addr: new address
+ *
+ *  Adds it to unused receive address register or goes into promiscuous mode.
+ **/
+void ixgbe_add_uc_addr(struct ixgbe_hw *hw, u8 *addr, u32 vmdq)
+{
+	u32 rar_entries = hw->mac.num_rar_entries;
+	u32 rar;
+
+	hw_dbg(hw, " UC Addr = %.2X %.2X %.2X %.2X %.2X %.2X\n",
+		  addr[0], addr[1], addr[2], addr[3], addr[4], addr[5]);
+
+	/*
+	 * Place this address in the RAR if there is room,
+	 * else put the controller into promiscuous mode
+	 */
+	if (hw->addr_ctrl.rar_used_count < rar_entries) {
+		rar = hw->addr_ctrl.rar_used_count;
+		hw->mac.ops.set_rar(hw, rar, addr, vmdq, IXGBE_RAH_AV);
+		hw_dbg(hw, "Added a secondary address to RAR[%d]\n", rar);
+		hw->addr_ctrl.rar_used_count++;
+	} else {
+		hw->addr_ctrl.overflow_promisc++;
+	}
+
+	hw_dbg(hw, "ixgbe_add_uc_addr Complete\n");
+}
+
+/**
+ *  ixgbe_update_uc_addr_list_generic - Updates MAC list of secondary addresses
+ *  @hw: pointer to hardware structure
+ *  @addr_list: the list of new addresses
+ *  @addr_count: number of addresses
+ *  @next: iterator function to walk the address list
+ *
+ *  The given list replaces any existing list.  Clears the secondary addrs from
+ *  receive address registers.  Uses unused receive address registers for the
+ *  first secondary addresses, and falls back to promiscuous mode as needed.
+ *
+ *  Drivers using secondary unicast addresses must set user_set_promisc when
+ *  manually putting the device into promiscuous mode.
+ **/
+s32 ixgbe_update_uc_addr_list_generic(struct ixgbe_hw *hw, u8 *addr_list,
+				      u32 addr_count, ixgbe_mc_addr_itr next)
+{
+	u8 *addr;
+	u32 i;
+	u32 old_promisc_setting = hw->addr_ctrl.overflow_promisc;
+	u32 uc_addr_in_use;
+	u32 fctrl;
+	u32 vmdq;
+
+	/*
+	 * Clear accounting of old secondary address list,
+	 * don't count RAR[0]
+	 */
+	uc_addr_in_use = hw->addr_ctrl.rar_used_count - 1;
+	hw->addr_ctrl.rar_used_count -= uc_addr_in_use;
+	hw->addr_ctrl.overflow_promisc = 0;
+
+	/* Zero out the other receive addresses */
+	hw_dbg(hw, "Clearing RAR[1-%d]\n", uc_addr_in_use+1);
+	for (i = 0; i < uc_addr_in_use; i++) {
+		IXGBE_WRITE_REG(hw, IXGBE_RAL(1+i), 0);
+		IXGBE_WRITE_REG(hw, IXGBE_RAH(1+i), 0);
+	}
+
+	/* Add the new addresses */
+	for (i = 0; i < addr_count; i++) {
+		hw_dbg(hw, " Adding the secondary addresses:\n");
+		addr = next(hw, &addr_list, &vmdq);
+		ixgbe_add_uc_addr(hw, addr, vmdq);
+	}
+
+	if (hw->addr_ctrl.overflow_promisc) {
+		/* enable promisc if not already in overflow or set by user */
+		if (!old_promisc_setting && !hw->addr_ctrl.user_set_promisc) {
+			hw_dbg(hw, " Entering address overflow promisc mode\n");
+			fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
+			fctrl |= IXGBE_FCTRL_UPE;
+			IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
+		}
+	} else {
+		/* only disable if set by overflow, not by user */
+		if (old_promisc_setting && !hw->addr_ctrl.user_set_promisc) {
+			hw_dbg(hw, " Leaving address overflow promisc mode\n");
+			fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
+			fctrl &= ~IXGBE_FCTRL_UPE;
+			IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
+		}
+	}
+
+	hw_dbg(hw, "ixgbe_update_uc_addr_list_generic Complete\n");
+	return 0;
+}
+
+/**
+ *  ixgbe_mta_vector - Determines bit-vector in multicast table to set
+ *  @hw: pointer to hardware structure
+ *  @mc_addr: the multicast address
+ *
+ *  Extracts the 12 bits, from a multicast address, to determine which
+ *  bit-vector to set in the multicast table. The hardware uses 12 bits, from
+ *  incoming rx multicast addresses, to determine the bit-vector to check in
+ *  the MTA. Which of the 4 combination, of 12-bits, the hardware uses is set
+ *  by the MO field of the MCSTCTRL. The MO field is set during initialization
+ *  to mc_filter_type.
+ **/
+static s32 ixgbe_mta_vector(struct ixgbe_hw *hw, u8 *mc_addr)
+{
+	u32 vector = 0;
+
+	switch (hw->mac.mc_filter_type) {
+	case 0:   /* use bits [47:36] of the address */
+		vector = ((mc_addr[4] >> 4) | (((u16)mc_addr[5]) << 4));
+		break;
+	case 1:   /* use bits [46:35] of the address */
+		vector = ((mc_addr[4] >> 3) | (((u16)mc_addr[5]) << 5));
+		break;
+	case 2:   /* use bits [45:34] of the address */
+		vector = ((mc_addr[4] >> 2) | (((u16)mc_addr[5]) << 6));
+		break;
+	case 3:   /* use bits [43:32] of the address */
+		vector = ((mc_addr[4]) | (((u16)mc_addr[5]) << 8));
+		break;
+	default:  /* Invalid mc_filter_type */
+		hw_dbg(hw, "MC filter type param set incorrectly\n");
+		break;
+	}
+
+	/* vector can only be 12-bits or boundary will be exceeded */
+	vector &= 0xFFF;
+	return vector;
+}
+
+/**
+ *  ixgbe_set_mta - Set bit-vector in multicast table
+ *  @hw: pointer to hardware structure
+ *  @hash_value: Multicast address hash value
+ *
+ *  Sets the bit-vector in the multicast table.
+ **/
+void ixgbe_set_mta(struct ixgbe_hw *hw, u8 *mc_addr)
+{
+	u32 vector;
+	u32 vector_bit;
+	u32 vector_reg;
+
+	hw->addr_ctrl.mta_in_use++;
+
+	vector = ixgbe_mta_vector(hw, mc_addr);
+	hw_dbg(hw, " bit-vector = 0x%03X\n", vector);
+
+	/*
+	 * The MTA is a register array of 128 32-bit registers. It is treated
+	 * like an array of 4096 bits.  We want to set bit
+	 * BitArray[vector_value]. So we figure out what register the bit is
+	 * in, read it, OR in the new bit, then write back the new value.  The
+	 * register is determined by the upper 7 bits of the vector value and
+	 * the bit within that register are determined by the lower 5 bits of
+	 * the value.
+	 */
+	vector_reg = (vector >> 5) & 0x7F;
+	vector_bit = vector & 0x1F;
+	hw->mac.mta_shadow[vector_reg] |= (1 << vector_bit);
+}
+
+/**
+ *  ixgbe_update_mc_addr_list_generic - Updates MAC list of multicast addresses
+ *  @hw: pointer to hardware structure
+ *  @mc_addr_list: the list of new multicast addresses
+ *  @mc_addr_count: number of addresses
+ *  @next: iterator function to walk the multicast address list
+ *  @clear: flag, when set clears the table beforehand
+ *
+ *  When the clear flag is set, the given list replaces any existing list.
+ *  Hashes the given addresses into the multicast table.
+ **/
+s32 ixgbe_update_mc_addr_list_generic(struct ixgbe_hw *hw, u8 *mc_addr_list,
+				      u32 mc_addr_count, ixgbe_mc_addr_itr next,
+				      bool clear)
+{
+	u32 i;
+	u32 vmdq;
+
+	/*
+	 * Set the new number of MC addresses that we are being requested to
+	 * use.
+	 */
+	hw->addr_ctrl.num_mc_addrs = mc_addr_count;
+	hw->addr_ctrl.mta_in_use = 0;
+
+	/* Clear mta_shadow */
+	if (clear) {
+		hw_dbg(hw, " Clearing MTA\n");
+		memset(&hw->mac.mta_shadow, 0, sizeof(hw->mac.mta_shadow));
+	}
+
+	/* Update mta_shadow */
+	for (i = 0; i < mc_addr_count; i++) {
+		hw_dbg(hw, " Adding the multicast addresses:\n");
+		ixgbe_set_mta(hw, next(hw, &mc_addr_list, &vmdq));
+	}
+
+	/* Enable mta */
+	for (i = 0; i < hw->mac.mcft_size; i++)
+		IXGBE_WRITE_REG_ARRAY(hw, IXGBE_MTA(0), i,
+				      hw->mac.mta_shadow[i]);
+
+	if (hw->addr_ctrl.mta_in_use > 0)
+		IXGBE_WRITE_REG(hw, IXGBE_MCSTCTRL,
+				IXGBE_MCSTCTRL_MFE | hw->mac.mc_filter_type);
+
+	hw_dbg(hw, "ixgbe_update_mc_addr_list_generic Complete\n");
+	return 0;
+}
+
+/**
+ *  ixgbe_enable_mc_generic - Enable multicast address in RAR
+ *  @hw: pointer to hardware structure
+ *
+ *  Enables multicast address in RAR and the use of the multicast hash table.
+ **/
+s32 ixgbe_enable_mc_generic(struct ixgbe_hw *hw)
+{
+	struct ixgbe_addr_filter_info *a = &hw->addr_ctrl;
+
+	if (a->mta_in_use > 0)
+		IXGBE_WRITE_REG(hw, IXGBE_MCSTCTRL, IXGBE_MCSTCTRL_MFE |
+				hw->mac.mc_filter_type);
+
+	return 0;
+}
+
+/**
+ *  ixgbe_disable_mc_generic - Disable multicast address in RAR
+ *  @hw: pointer to hardware structure
+ *
+ *  Disables multicast address in RAR and the use of the multicast hash table.
+ **/
+s32 ixgbe_disable_mc_generic(struct ixgbe_hw *hw)
+{
+	struct ixgbe_addr_filter_info *a = &hw->addr_ctrl;
+
+	if (a->mta_in_use > 0)
+		IXGBE_WRITE_REG(hw, IXGBE_MCSTCTRL, hw->mac.mc_filter_type);
+
+	return 0;
+}
+
+/**
+ *  ixgbe_fc_enable_generic - Enable flow control
+ *  @hw: pointer to hardware structure
+ *
+ *  Enable flow control according to the current settings.
+ **/
+s32 ixgbe_fc_enable_generic(struct ixgbe_hw *hw)
+{
+	s32 ret_val = 0;
+	u32 mflcn_reg, fccfg_reg;
+	u32 reg;
+	u32 fcrtl, fcrth;
+	int i;
+
+	/* Validate the water mark configuration */
+	if (!hw->fc.pause_time) {
+		ret_val = IXGBE_ERR_INVALID_LINK_SETTINGS;
+		goto out;
+	}
+
+	/* Low water mark of zero causes XOFF floods */
+	for (i = 0; i < IXGBE_DCB_MAX_TRAFFIC_CLASS; i++) {
+		if ((hw->fc.current_mode & ixgbe_fc_tx_pause) &&
+		    hw->fc.high_water[i]) {
+			if (!hw->fc.low_water[i] ||
+			    hw->fc.low_water[i] >= hw->fc.high_water[i]) {
+				hw_dbg(hw, "Invalid water mark configuration\n");
+				ret_val = IXGBE_ERR_INVALID_LINK_SETTINGS;
+				goto out;
+			}
+		}
+	}
+
+	/* Negotiate the fc mode to use */
+	ixgbe_fc_autoneg(hw);
+
+	/* Disable any previous flow control settings */
+	mflcn_reg = IXGBE_READ_REG(hw, IXGBE_MFLCN);
+	mflcn_reg &= ~(IXGBE_MFLCN_RPFCE_MASK | IXGBE_MFLCN_RFCE);
+
+	fccfg_reg = IXGBE_READ_REG(hw, IXGBE_FCCFG);
+	fccfg_reg &= ~(IXGBE_FCCFG_TFCE_802_3X | IXGBE_FCCFG_TFCE_PRIORITY);
+
+	/*
+	 * The possible values of fc.current_mode are:
+	 * 0: Flow control is completely disabled
+	 * 1: Rx flow control is enabled (we can receive pause frames,
+	 *    but not send pause frames).
+	 * 2: Tx flow control is enabled (we can send pause frames but
+	 *    we do not support receiving pause frames).
+	 * 3: Both Rx and Tx flow control (symmetric) are enabled.
+	 * other: Invalid.
+	 */
+	switch (hw->fc.current_mode) {
+	case ixgbe_fc_none:
+		/*
+		 * Flow control is disabled by software override or autoneg.
+		 * The code below will actually disable it in the HW.
+		 */
+		break;
+	case ixgbe_fc_rx_pause:
+		/*
+		 * Rx Flow control is enabled and Tx Flow control is
+		 * disabled by software override. Since there really
+		 * isn't a way to advertise that we are capable of RX
+		 * Pause ONLY, we will advertise that we support both
+		 * symmetric and asymmetric Rx PAUSE.  Later, we will
+		 * disable the adapter's ability to send PAUSE frames.
+		 */
+		mflcn_reg |= IXGBE_MFLCN_RFCE;
+		break;
+	case ixgbe_fc_tx_pause:
+		/*
+		 * Tx Flow control is enabled, and Rx Flow control is
+		 * disabled by software override.
+		 */
+		fccfg_reg |= IXGBE_FCCFG_TFCE_802_3X;
+		break;
+	case ixgbe_fc_full:
+		/* Flow control (both Rx and Tx) is enabled by SW override. */
+		mflcn_reg |= IXGBE_MFLCN_RFCE;
+		fccfg_reg |= IXGBE_FCCFG_TFCE_802_3X;
+		break;
+	default:
+		hw_dbg(hw, "Flow control param set incorrectly\n");
+		ret_val = IXGBE_ERR_CONFIG;
+		goto out;
+		break;
+	}
+
+	/* Set 802.3x based flow control settings. */
+	mflcn_reg |= IXGBE_MFLCN_DPF;
+	IXGBE_WRITE_REG(hw, IXGBE_MFLCN, mflcn_reg);
+	IXGBE_WRITE_REG(hw, IXGBE_FCCFG, fccfg_reg);
+
+
+	/* Set up and enable Rx high/low water mark thresholds, enable XON. */
+	for (i = 0; i < IXGBE_DCB_MAX_TRAFFIC_CLASS; i++) {
+		if ((hw->fc.current_mode & ixgbe_fc_tx_pause) &&
+		    hw->fc.high_water[i]) {
+			fcrtl = (hw->fc.low_water[i] << 10) | IXGBE_FCRTL_XONE;
+			IXGBE_WRITE_REG(hw, IXGBE_FCRTL_82599(i), fcrtl);
+			fcrth = (hw->fc.high_water[i] << 10) | IXGBE_FCRTH_FCEN;
+		} else {
+			IXGBE_WRITE_REG(hw, IXGBE_FCRTL_82599(i), 0);
+			/*
+			 * In order to prevent Tx hangs when the internal Tx
+			 * switch is enabled we must set the high water mark
+			 * to the maximum FCRTH value.  This allows the Tx
+			 * switch to function even under heavy Rx workloads.
+			 */
+			fcrth = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i)) - 32;
+		}
+
+		IXGBE_WRITE_REG(hw, IXGBE_FCRTH_82599(i), fcrth);
+	}
+
+	/* Configure pause time (2 TCs per register) */
+	reg = hw->fc.pause_time * 0x00010001;
+	for (i = 0; i < (IXGBE_DCB_MAX_TRAFFIC_CLASS / 2); i++)
+		IXGBE_WRITE_REG(hw, IXGBE_FCTTV(i), reg);
+
+	/* Configure flow control refresh threshold value */
+	IXGBE_WRITE_REG(hw, IXGBE_FCRTV, hw->fc.pause_time / 2);
+
+out:
+	return ret_val;
+}
+
+/**
+ *  ixgbe_negotiate_fc - Negotiate flow control
+ *  @hw: pointer to hardware structure
+ *  @adv_reg: flow control advertised settings
+ *  @lp_reg: link partner's flow control settings
+ *  @adv_sym: symmetric pause bit in advertisement
+ *  @adv_asm: asymmetric pause bit in advertisement
+ *  @lp_sym: symmetric pause bit in link partner advertisement
+ *  @lp_asm: asymmetric pause bit in link partner advertisement
+ *
+ *  Find the intersection between advertised settings and link partner's
+ *  advertised settings
+ **/
+static s32 ixgbe_negotiate_fc(struct ixgbe_hw *hw, u32 adv_reg, u32 lp_reg,
+			      u32 adv_sym, u32 adv_asm, u32 lp_sym, u32 lp_asm)
+{
+	if ((!(adv_reg)) ||  (!(lp_reg)))
+		return IXGBE_ERR_FC_NOT_NEGOTIATED;
+
+	if ((adv_reg & adv_sym) && (lp_reg & lp_sym)) {
+		/*
+		 * Now we need to check if the user selected Rx ONLY
+		 * of pause frames.  In this case, we had to advertise
+		 * FULL flow control because we could not advertise RX
+		 * ONLY. Hence, we must now check to see if we need to
+		 * turn OFF the TRANSMISSION of PAUSE frames.
+		 */
+		if (hw->fc.requested_mode == ixgbe_fc_full) {
+			hw->fc.current_mode = ixgbe_fc_full;
+			hw_dbg(hw, "Flow Control = FULL.\n");
+		} else {
+			hw->fc.current_mode = ixgbe_fc_rx_pause;
+			hw_dbg(hw, "Flow Control=RX PAUSE frames only\n");
+		}
+	} else if (!(adv_reg & adv_sym) && (adv_reg & adv_asm) &&
+		   (lp_reg & lp_sym) && (lp_reg & lp_asm)) {
+		hw->fc.current_mode = ixgbe_fc_tx_pause;
+		hw_dbg(hw, "Flow Control = TX PAUSE frames only.\n");
+	} else if ((adv_reg & adv_sym) && (adv_reg & adv_asm) &&
+		   !(lp_reg & lp_sym) && (lp_reg & lp_asm)) {
+		hw->fc.current_mode = ixgbe_fc_rx_pause;
+		hw_dbg(hw, "Flow Control = RX PAUSE frames only.\n");
+	} else {
+		hw->fc.current_mode = ixgbe_fc_none;
+		hw_dbg(hw, "Flow Control = NONE.\n");
+	}
+	return 0;
+}
+
+/**
+ *  ixgbe_fc_autoneg_fiber - Enable flow control on 1 gig fiber
+ *  @hw: pointer to hardware structure
+ *
+ *  Enable flow control according on 1 gig fiber.
+ **/
+static s32 ixgbe_fc_autoneg_fiber(struct ixgbe_hw *hw)
+{
+	u32 pcs_anadv_reg, pcs_lpab_reg, linkstat;
+	s32 ret_val = IXGBE_ERR_FC_NOT_NEGOTIATED;
+
+	/*
+	 * On multispeed fiber at 1g, bail out if
+	 * - link is up but AN did not complete, or if
+	 * - link is up and AN completed but timed out
+	 */
+
+	linkstat = IXGBE_READ_REG(hw, IXGBE_PCS1GLSTA);
+	if ((!!(linkstat & IXGBE_PCS1GLSTA_AN_COMPLETE) == 0) ||
+	    (!!(linkstat & IXGBE_PCS1GLSTA_AN_TIMED_OUT) == 1))
+		goto out;
+
+	pcs_anadv_reg = IXGBE_READ_REG(hw, IXGBE_PCS1GANA);
+	pcs_lpab_reg = IXGBE_READ_REG(hw, IXGBE_PCS1GANLP);
+
+	ret_val =  ixgbe_negotiate_fc(hw, pcs_anadv_reg,
+				      pcs_lpab_reg, IXGBE_PCS1GANA_SYM_PAUSE,
+				      IXGBE_PCS1GANA_ASM_PAUSE,
+				      IXGBE_PCS1GANA_SYM_PAUSE,
+				      IXGBE_PCS1GANA_ASM_PAUSE);
+
+out:
+	return ret_val;
+}
+
+/**
+ *  ixgbe_fc_autoneg_backplane - Enable flow control IEEE clause 37
+ *  @hw: pointer to hardware structure
+ *
+ *  Enable flow control according to IEEE clause 37.
+ **/
+static s32 ixgbe_fc_autoneg_backplane(struct ixgbe_hw *hw)
+{
+	u32 links2, anlp1_reg, autoc_reg, links;
+	s32 ret_val = IXGBE_ERR_FC_NOT_NEGOTIATED;
+
+	/*
+	 * On backplane, bail out if
+	 * - backplane autoneg was not completed, or if
+	 * - we are 82599 and link partner is not AN enabled
+	 */
+	links = IXGBE_READ_REG(hw, IXGBE_LINKS);
+	if ((links & IXGBE_LINKS_KX_AN_COMP) == 0)
+		goto out;
+
+	if (hw->mac.type == ixgbe_mac_82599EB) {
+		links2 = IXGBE_READ_REG(hw, IXGBE_LINKS2);
+		if ((links2 & IXGBE_LINKS2_AN_SUPPORTED) == 0)
+			goto out;
+	}
+	/*
+	 * Read the 10g AN autoc and LP ability registers and resolve
+	 * local flow control settings accordingly
+	 */
+	autoc_reg = IXGBE_READ_REG(hw, IXGBE_AUTOC);
+	anlp1_reg = IXGBE_READ_REG(hw, IXGBE_ANLP1);
+
+	ret_val = ixgbe_negotiate_fc(hw, autoc_reg,
+		anlp1_reg, IXGBE_AUTOC_SYM_PAUSE, IXGBE_AUTOC_ASM_PAUSE,
+		IXGBE_ANLP1_SYM_PAUSE, IXGBE_ANLP1_ASM_PAUSE);
+
+out:
+	return ret_val;
+}
+
+/**
+ *  ixgbe_fc_autoneg_copper - Enable flow control IEEE clause 37
+ *  @hw: pointer to hardware structure
+ *
+ *  Enable flow control according to IEEE clause 37.
+ **/
+static s32 ixgbe_fc_autoneg_copper(struct ixgbe_hw *hw)
+{
+	u16 technology_ability_reg = 0;
+	u16 lp_technology_ability_reg = 0;
+
+	hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_ADVT,
+			     IXGBE_MDIO_AUTO_NEG_DEV_TYPE,
+			     &technology_ability_reg);
+	hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_LP,
+			     IXGBE_MDIO_AUTO_NEG_DEV_TYPE,
+			     &lp_technology_ability_reg);
+
+	return ixgbe_negotiate_fc(hw, (u32)technology_ability_reg,
+				  (u32)lp_technology_ability_reg,
+				  IXGBE_TAF_SYM_PAUSE, IXGBE_TAF_ASM_PAUSE,
+				  IXGBE_TAF_SYM_PAUSE, IXGBE_TAF_ASM_PAUSE);
+}
+
+/**
+ *  ixgbe_fc_autoneg - Configure flow control
+ *  @hw: pointer to hardware structure
+ *
+ *  Compares our advertised flow control capabilities to those advertised by
+ *  our link partner, and determines the proper flow control mode to use.
+ **/
+void ixgbe_fc_autoneg(struct ixgbe_hw *hw)
+{
+	s32 ret_val = IXGBE_ERR_FC_NOT_NEGOTIATED;
+	ixgbe_link_speed speed;
+	bool link_up;
+
+	/*
+	 * AN should have completed when the cable was plugged in.
+	 * Look for reasons to bail out.  Bail out if:
+	 * - FC autoneg is disabled, or if
+	 * - link is not up.
+	 */
+	if (hw->fc.disable_fc_autoneg)
+		goto out;
+
+	hw->mac.ops.check_link(hw, &speed, &link_up, false);
+	if (!link_up)
+		goto out;
+
+	switch (hw->phy.media_type) {
+	/* Autoneg flow control on fiber adapters */
+	case ixgbe_media_type_fiber:
+		if (speed == IXGBE_LINK_SPEED_1GB_FULL)
+			ret_val = ixgbe_fc_autoneg_fiber(hw);
+		break;
+
+	/* Autoneg flow control on backplane adapters */
+	case ixgbe_media_type_backplane:
+		ret_val = ixgbe_fc_autoneg_backplane(hw);
+		break;
+
+	/* Autoneg flow control on copper adapters */
+	case ixgbe_media_type_copper:
+		if (ixgbe_device_supports_autoneg_fc(hw) == 0)
+			ret_val = ixgbe_fc_autoneg_copper(hw);
+		break;
+
+	default:
+		break;
+	}
+
+out:
+	if (ret_val == 0) {
+		hw->fc.fc_was_autonegged = true;
+	} else {
+		hw->fc.fc_was_autonegged = false;
+		hw->fc.current_mode = hw->fc.requested_mode;
+	}
+}
+
+/**
+ *  ixgbe_disable_pcie_master - Disable PCI-express master access
+ *  @hw: pointer to hardware structure
+ *
+ *  Disables PCI-Express master access and verifies there are no pending
+ *  requests. IXGBE_ERR_MASTER_REQUESTS_PENDING is returned if master disable
+ *  bit hasn't caused the master requests to be disabled, else 0
+ *  is returned signifying master requests disabled.
+ **/
+s32 ixgbe_disable_pcie_master(struct ixgbe_hw *hw)
+{
+	s32 status = 0;
+	u32 i;
+
+	/* Always set this bit to ensure any future transactions are blocked */
+	IXGBE_WRITE_REG(hw, IXGBE_CTRL, IXGBE_CTRL_GIO_DIS);
+
+	/* Exit if master requets are blocked */
+	if (!(IXGBE_READ_REG(hw, IXGBE_STATUS) & IXGBE_STATUS_GIO))
+		goto out;
+
+	/* Poll for master request bit to clear */
+	for (i = 0; i < IXGBE_PCI_MASTER_DISABLE_TIMEOUT; i++) {
+		udelay(100);
+		if (!(IXGBE_READ_REG(hw, IXGBE_STATUS) & IXGBE_STATUS_GIO))
+			goto out;
+	}
+
+	/*
+	 * Two consecutive resets are required via CTRL.RST per datasheet
+	 * 5.2.5.3.2 Master Disable.  We set a flag to inform the reset routine
+	 * of this need.  The first reset prevents new master requests from
+	 * being issued by our device.  We then must wait 1usec or more for any
+	 * remaining completions from the PCIe bus to trickle in, and then reset
+	 * again to clear out any effects they may have had on our device.
+	 */
+	hw_dbg(hw, "GIO Master Disable bit didn't clear - requesting resets\n");
+	hw->mac.flags |= IXGBE_FLAGS_DOUBLE_RESET_REQUIRED;
+
+	/*
+	 * Before proceeding, make sure that the PCIe block does not have
+	 * transactions pending.
+	 */
+	for (i = 0; i < IXGBE_PCI_MASTER_DISABLE_TIMEOUT; i++) {
+		udelay(100);
+		if (!(IXGBE_READ_PCIE_WORD(hw, IXGBE_PCI_DEVICE_STATUS) &
+		    IXGBE_PCI_DEVICE_STATUS_TRANSACTION_PENDING))
+			goto out;
+	}
+
+	hw_dbg(hw, "PCIe transaction pending bit also did not clear.\n");
+	status = IXGBE_ERR_MASTER_REQUESTS_PENDING;
+
+out:
+	return status;
+}
+
+/**
+ *  ixgbe_acquire_swfw_sync - Acquire SWFW semaphore
+ *  @hw: pointer to hardware structure
+ *  @mask: Mask to specify which semaphore to acquire
+ *
+ *  Acquires the SWFW semaphore through the GSSR register for the specified
+ *  function (CSR, PHY0, PHY1, EEPROM, Flash)
+ **/
+s32 ixgbe_acquire_swfw_sync(struct ixgbe_hw *hw, u16 mask)
+{
+	u32 gssr;
+	u32 swmask = mask;
+	u32 fwmask = mask << 5;
+	s32 timeout = 200;
+
+	while (timeout) {
+		/*
+		 * SW EEPROM semaphore bit is used for access to all
+		 * SW_FW_SYNC/GSSR bits (not just EEPROM)
+		 */
+		if (ixgbe_get_eeprom_semaphore(hw))
+			return IXGBE_ERR_SWFW_SYNC;
+
+		gssr = IXGBE_READ_REG(hw, IXGBE_GSSR);
+		if (!(gssr & (fwmask | swmask)))
+			break;
+
+		/*
+		 * Firmware currently using resource (fwmask) or other software
+		 * thread currently using resource (swmask)
+		 */
+		ixgbe_release_eeprom_semaphore(hw);
+		msleep(5);
+		timeout--;
+	}
+
+	if (!timeout) {
+		hw_dbg(hw, "Driver can't access resource, SW_FW_SYNC timeout.\n");
+		return IXGBE_ERR_SWFW_SYNC;
+	}
+
+	gssr |= swmask;
+	IXGBE_WRITE_REG(hw, IXGBE_GSSR, gssr);
+
+	ixgbe_release_eeprom_semaphore(hw);
+	return 0;
+}
+
+/**
+ *  ixgbe_release_swfw_sync - Release SWFW semaphore
+ *  @hw: pointer to hardware structure
+ *  @mask: Mask to specify which semaphore to release
+ *
+ *  Releases the SWFW semaphore through the GSSR register for the specified
+ *  function (CSR, PHY0, PHY1, EEPROM, Flash)
+ **/
+void ixgbe_release_swfw_sync(struct ixgbe_hw *hw, u16 mask)
+{
+	u32 gssr;
+	u32 swmask = mask;
+
+	ixgbe_get_eeprom_semaphore(hw);
+
+	gssr = IXGBE_READ_REG(hw, IXGBE_GSSR);
+	gssr &= ~swmask;
+	IXGBE_WRITE_REG(hw, IXGBE_GSSR, gssr);
+
+	ixgbe_release_eeprom_semaphore(hw);
+}
+
+/**
+ *  ixgbe_disable_sec_rx_path_generic - Stops the receive data path
+ *  @hw: pointer to hardware structure
+ *
+ *  Stops the receive data path and waits for the HW to internally empty
+ *  the Rx security block
+ **/
+s32 ixgbe_disable_sec_rx_path_generic(struct ixgbe_hw *hw)
+{
+#define IXGBE_MAX_SECRX_POLL 40
+
+	int i;
+	int secrxreg;
+
+	secrxreg = IXGBE_READ_REG(hw, IXGBE_SECRXCTRL);
+	secrxreg |= IXGBE_SECRXCTRL_RX_DIS;
+	IXGBE_WRITE_REG(hw, IXGBE_SECRXCTRL, secrxreg);
+	for (i = 0; i < IXGBE_MAX_SECRX_POLL; i++) {
+		secrxreg = IXGBE_READ_REG(hw, IXGBE_SECRXSTAT);
+		if (secrxreg & IXGBE_SECRXSTAT_SECRX_RDY)
+			break;
+		else
+			/* Use interrupt-safe sleep just in case */
+			udelay(1000);
+	}
+
+	/* For informational purposes only */
+	if (i >= IXGBE_MAX_SECRX_POLL)
+		hw_dbg(hw, "Rx unit being enabled before security "
+			 "path fully disabled.  Continuing with init.\n");
+
+	return 0;
+}
+
+/**
+ *  ixgbe_enable_sec_rx_path_generic - Enables the receive data path
+ *  @hw: pointer to hardware structure
+ *
+ *  Enables the receive data path.
+ **/
+s32 ixgbe_enable_sec_rx_path_generic(struct ixgbe_hw *hw)
+{
+	int secrxreg;
+
+	secrxreg = IXGBE_READ_REG(hw, IXGBE_SECRXCTRL);
+	secrxreg &= ~IXGBE_SECRXCTRL_RX_DIS;
+	IXGBE_WRITE_REG(hw, IXGBE_SECRXCTRL, secrxreg);
+	IXGBE_WRITE_FLUSH(hw);
+
+	return 0;
+}
+
+/**
+ *  ixgbe_enable_rx_dma_generic - Enable the Rx DMA unit
+ *  @hw: pointer to hardware structure
+ *  @regval: register value to write to RXCTRL
+ *
+ *  Enables the Rx DMA unit
+ **/
+s32 ixgbe_enable_rx_dma_generic(struct ixgbe_hw *hw, u32 regval)
+{
+	IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, regval);
+
+	return 0;
+}
+
+/**
+ *  ixgbe_blink_led_start_generic - Blink LED based on index.
+ *  @hw: pointer to hardware structure
+ *  @index: led number to blink
+ **/
+s32 ixgbe_blink_led_start_generic(struct ixgbe_hw *hw, u32 index)
+{
+	ixgbe_link_speed speed = 0;
+	bool link_up = 0;
+	u32 autoc_reg = IXGBE_READ_REG(hw, IXGBE_AUTOC);
+	u32 led_reg = IXGBE_READ_REG(hw, IXGBE_LEDCTL);
+
+	/*
+	 * Link must be up to auto-blink the LEDs;
+	 * Force it if link is down.
+	 */
+	hw->mac.ops.check_link(hw, &speed, &link_up, false);
+
+	if (!link_up) {
+		autoc_reg |= IXGBE_AUTOC_AN_RESTART;
+		autoc_reg |= IXGBE_AUTOC_FLU;
+		IXGBE_WRITE_REG(hw, IXGBE_AUTOC, autoc_reg);
+		IXGBE_WRITE_FLUSH(hw);
+		msleep(10);
+	}
+
+	led_reg &= ~IXGBE_LED_MODE_MASK(index);
+	led_reg |= IXGBE_LED_BLINK(index);
+	IXGBE_WRITE_REG(hw, IXGBE_LEDCTL, led_reg);
+	IXGBE_WRITE_FLUSH(hw);
+
+	return 0;
+}
+
+/**
+ *  ixgbe_blink_led_stop_generic - Stop blinking LED based on index.
+ *  @hw: pointer to hardware structure
+ *  @index: led number to stop blinking
+ **/
+s32 ixgbe_blink_led_stop_generic(struct ixgbe_hw *hw, u32 index)
+{
+	u32 autoc_reg = IXGBE_READ_REG(hw, IXGBE_AUTOC);
+	u32 led_reg = IXGBE_READ_REG(hw, IXGBE_LEDCTL);
+
+	autoc_reg &= ~IXGBE_AUTOC_FLU;
+	autoc_reg |= IXGBE_AUTOC_AN_RESTART;
+	IXGBE_WRITE_REG(hw, IXGBE_AUTOC, autoc_reg);
+
+	led_reg &= ~IXGBE_LED_MODE_MASK(index);
+	led_reg &= ~IXGBE_LED_BLINK(index);
+	led_reg |= IXGBE_LED_LINK_ACTIVE << IXGBE_LED_MODE_SHIFT(index);
+	IXGBE_WRITE_REG(hw, IXGBE_LEDCTL, led_reg);
+	IXGBE_WRITE_FLUSH(hw);
+
+	return 0;
+}
+
+/**
+ *  ixgbe_get_san_mac_addr_offset - Get SAN MAC address offset from the EEPROM
+ *  @hw: pointer to hardware structure
+ *  @san_mac_offset: SAN MAC address offset
+ *
+ *  This function will read the EEPROM location for the SAN MAC address
+ *  pointer, and returns the value at that location.  This is used in both
+ *  get and set mac_addr routines.
+ **/
+static s32 ixgbe_get_san_mac_addr_offset(struct ixgbe_hw *hw,
+					 u16 *san_mac_offset)
+{
+	/*
+	 * First read the EEPROM pointer to see if the MAC addresses are
+	 * available.
+	 */
+	hw->eeprom.ops.read(hw, IXGBE_SAN_MAC_ADDR_PTR, san_mac_offset);
+
+	return 0;
+}
+
+/**
+ *  ixgbe_get_san_mac_addr_generic - SAN MAC address retrieval from the EEPROM
+ *  @hw: pointer to hardware structure
+ *  @san_mac_addr: SAN MAC address
+ *
+ *  Reads the SAN MAC address from the EEPROM, if it's available.  This is
+ *  per-port, so set_lan_id() must be called before reading the addresses.
+ *  set_lan_id() is called by identify_sfp(), but this cannot be relied
+ *  upon for non-SFP connections, so we must call it here.
+ **/
+s32 ixgbe_get_san_mac_addr_generic(struct ixgbe_hw *hw, u8 *san_mac_addr)
+{
+	u16 san_mac_data, san_mac_offset;
+	u8 i;
+
+	/*
+	 * First read the EEPROM pointer to see if the MAC addresses are
+	 * available.  If they're not, no point in calling set_lan_id() here.
+	 */
+	ixgbe_get_san_mac_addr_offset(hw, &san_mac_offset);
+
+	if ((san_mac_offset == 0) || (san_mac_offset == 0xFFFF)) {
+		/*
+		 * No addresses available in this EEPROM.  It's not an
+		 * error though, so just wipe the local address and return.
+		 */
+		for (i = 0; i < 6; i++)
+			san_mac_addr[i] = 0xFF;
+
+		goto san_mac_addr_out;
+	}
+
+	/* make sure we know which port we need to program */
+	hw->mac.ops.set_lan_id(hw);
+	/* apply the port offset to the address offset */
+	(hw->bus.func) ? (san_mac_offset += IXGBE_SAN_MAC_ADDR_PORT1_OFFSET) :
+			 (san_mac_offset += IXGBE_SAN_MAC_ADDR_PORT0_OFFSET);
+	for (i = 0; i < 3; i++) {
+		hw->eeprom.ops.read(hw, san_mac_offset, &san_mac_data);
+		san_mac_addr[i * 2] = (u8)(san_mac_data);
+		san_mac_addr[i * 2 + 1] = (u8)(san_mac_data >> 8);
+		san_mac_offset++;
+	}
+
+san_mac_addr_out:
+	return 0;
+}
+
+/**
+ *  ixgbe_set_san_mac_addr_generic - Write the SAN MAC address to the EEPROM
+ *  @hw: pointer to hardware structure
+ *  @san_mac_addr: SAN MAC address
+ *
+ *  Write a SAN MAC address to the EEPROM.
+ **/
+s32 ixgbe_set_san_mac_addr_generic(struct ixgbe_hw *hw, u8 *san_mac_addr)
+{
+	s32 status = 0;
+	u16 san_mac_data, san_mac_offset;
+	u8 i;
+
+	/* Look for SAN mac address pointer.  If not defined, return */
+	ixgbe_get_san_mac_addr_offset(hw, &san_mac_offset);
+
+	if ((san_mac_offset == 0) || (san_mac_offset == 0xFFFF)) {
+		status = IXGBE_ERR_NO_SAN_ADDR_PTR;
+		goto san_mac_addr_out;
+	}
+
+	/* Make sure we know which port we need to write */
+	hw->mac.ops.set_lan_id(hw);
+	/* Apply the port offset to the address offset */
+	(hw->bus.func) ? (san_mac_offset += IXGBE_SAN_MAC_ADDR_PORT1_OFFSET) :
+			 (san_mac_offset += IXGBE_SAN_MAC_ADDR_PORT0_OFFSET);
+
+	for (i = 0; i < 3; i++) {
+		san_mac_data = (u16)((u16)(san_mac_addr[i * 2 + 1]) << 8);
+		san_mac_data |= (u16)(san_mac_addr[i * 2]);
+		hw->eeprom.ops.write(hw, san_mac_offset, san_mac_data);
+		san_mac_offset++;
+	}
+
+san_mac_addr_out:
+	return status;
+}
+
+/**
+ *  ixgbe_get_pcie_msix_count_generic - Gets MSI-X vector count
+ *  @hw: pointer to hardware structure
+ *
+ *  Read PCIe configuration space, and get the MSI-X vector count from
+ *  the capabilities table.
+ **/
+u16 ixgbe_get_pcie_msix_count_generic(struct ixgbe_hw *hw)
+{
+	u16 msix_count = 1;
+	u16 max_msix_count;
+	u16 pcie_offset;
+
+	switch (hw->mac.type) {
+	case ixgbe_mac_82598EB:
+		pcie_offset = IXGBE_PCIE_MSIX_82598_CAPS;
+		max_msix_count = IXGBE_MAX_MSIX_VECTORS_82598;
+		break;
+	case ixgbe_mac_82599EB:
+	case ixgbe_mac_X540:
+		pcie_offset = IXGBE_PCIE_MSIX_82599_CAPS;
+		max_msix_count = IXGBE_MAX_MSIX_VECTORS_82599;
+		break;
+	default:
+		return msix_count;
+	}
+
+	msix_count = IXGBE_READ_PCIE_WORD(hw, pcie_offset);
+	msix_count &= IXGBE_PCIE_MSIX_TBL_SZ_MASK;
+
+	/* MSI-X count is zero-based in HW */
+	msix_count++;
+
+	if (msix_count > max_msix_count)
+		msix_count = max_msix_count;
+
+	return msix_count;
+}
+
+/**
+ *  ixgbe_insert_mac_addr_generic - Find a RAR for this mac address
+ *  @hw: pointer to hardware structure
+ *  @addr: Address to put into receive address register
+ *  @vmdq: VMDq pool to assign
+ *
+ *  Puts an ethernet address into a receive address register, or
+ *  finds the rar that it is aleady in; adds to the pool list
+ **/
+s32 ixgbe_insert_mac_addr_generic(struct ixgbe_hw *hw, u8 *addr, u32 vmdq)
+{
+	static const u32 NO_EMPTY_RAR_FOUND = 0xFFFFFFFF;
+	u32 first_empty_rar = NO_EMPTY_RAR_FOUND;
+	u32 rar;
+	u32 rar_low, rar_high;
+	u32 addr_low, addr_high;
+
+	/* swap bytes for HW little endian */
+	addr_low  = addr[0] | (addr[1] << 8)
+			    | (addr[2] << 16)
+			    | (addr[3] << 24);
+	addr_high = addr[4] | (addr[5] << 8);
+
+	/*
+	 * Either find the mac_id in rar or find the first empty space.
+	 * rar_highwater points to just after the highest currently used
+	 * rar in order to shorten the search.  It grows when we add a new
+	 * rar to the top.
+	 */
+	for (rar = 0; rar < hw->mac.rar_highwater; rar++) {
+		rar_high = IXGBE_READ_REG(hw, IXGBE_RAH(rar));
+
+		if (((IXGBE_RAH_AV & rar_high) == 0)
+		    && first_empty_rar == NO_EMPTY_RAR_FOUND) {
+			first_empty_rar = rar;
+		} else if ((rar_high & 0xFFFF) == addr_high) {
+			rar_low = IXGBE_READ_REG(hw, IXGBE_RAL(rar));
+			if (rar_low == addr_low)
+				break;    /* found it already in the rars */
+		}
+	}
+
+	if (rar < hw->mac.rar_highwater) {
+		/* already there so just add to the pool bits */
+		ixgbe_set_vmdq(hw, rar, vmdq);
+	} else if (first_empty_rar != NO_EMPTY_RAR_FOUND) {
+		/* stick it into first empty RAR slot we found */
+		rar = first_empty_rar;
+		ixgbe_set_rar(hw, rar, addr, vmdq, IXGBE_RAH_AV);
+	} else if (rar == hw->mac.rar_highwater) {
+		/* add it to the top of the list and inc the highwater mark */
+		ixgbe_set_rar(hw, rar, addr, vmdq, IXGBE_RAH_AV);
+		hw->mac.rar_highwater++;
+	} else if (rar >= hw->mac.num_rar_entries) {
+		return IXGBE_ERR_INVALID_MAC_ADDR;
+	}
+
+	/*
+	 * If we found rar[0], make sure the default pool bit (we use pool 0)
+	 * remains cleared to be sure default pool packets will get delivered
+	 */
+	if (rar == 0)
+		ixgbe_clear_vmdq(hw, rar, 0);
+
+	return rar;
+}
+
+/**
+ *  ixgbe_clear_vmdq_generic - Disassociate a VMDq pool index from a rx address
+ *  @hw: pointer to hardware struct
+ *  @rar: receive address register index to disassociate
+ *  @vmdq: VMDq pool index to remove from the rar
+ **/
+s32 ixgbe_clear_vmdq_generic(struct ixgbe_hw *hw, u32 rar, u32 vmdq)
+{
+	u32 mpsar_lo, mpsar_hi;
+	u32 rar_entries = hw->mac.num_rar_entries;
+
+	/* Make sure we are using a valid rar index range */
+	if (rar >= rar_entries) {
+		hw_dbg(hw, "RAR index %d is out of range.\n", rar);
+		return IXGBE_ERR_INVALID_ARGUMENT;
+	}
+
+	mpsar_lo = IXGBE_READ_REG(hw, IXGBE_MPSAR_LO(rar));
+	mpsar_hi = IXGBE_READ_REG(hw, IXGBE_MPSAR_HI(rar));
+
+	if (!mpsar_lo && !mpsar_hi)
+		goto done;
+
+	if (vmdq == IXGBE_CLEAR_VMDQ_ALL) {
+		if (mpsar_lo) {
+			IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(rar), 0);
+			mpsar_lo = 0;
+		}
+		if (mpsar_hi) {
+			IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(rar), 0);
+			mpsar_hi = 0;
+		}
+	} else if (vmdq < 32) {
+		mpsar_lo &= ~(1 << vmdq);
+		IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(rar), mpsar_lo);
+	} else {
+		mpsar_hi &= ~(1 << (vmdq - 32));
+		IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(rar), mpsar_hi);
+	}
+
+	/* was that the last pool using this rar? */
+	if (mpsar_lo == 0 && mpsar_hi == 0 && rar != 0)
+		hw->mac.ops.clear_rar(hw, rar);
+done:
+	return 0;
+}
+
+/**
+ *  ixgbe_set_vmdq_generic - Associate a VMDq pool index with a rx address
+ *  @hw: pointer to hardware struct
+ *  @rar: receive address register index to associate with a VMDq index
+ *  @vmdq: VMDq pool index
+ **/
+s32 ixgbe_set_vmdq_generic(struct ixgbe_hw *hw, u32 rar, u32 vmdq)
+{
+	u32 mpsar;
+	u32 rar_entries = hw->mac.num_rar_entries;
+
+	/* Make sure we are using a valid rar index range */
+	if (rar >= rar_entries) {
+		hw_dbg(hw, "RAR index %d is out of range.\n", rar);
+		return IXGBE_ERR_INVALID_ARGUMENT;
+	}
+
+	if (vmdq < 32) {
+		mpsar = IXGBE_READ_REG(hw, IXGBE_MPSAR_LO(rar));
+		mpsar |= 1 << vmdq;
+		IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(rar), mpsar);
+	} else {
+		mpsar = IXGBE_READ_REG(hw, IXGBE_MPSAR_HI(rar));
+		mpsar |= 1 << (vmdq - 32);
+		IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(rar), mpsar);
+	}
+	return 0;
+}
+
+/**
+ *  This function should only be involved in the IOV mode.
+ *  In IOV mode, Default pool is next pool after the number of
+ *  VFs advertized and not 0.
+ *  MPSAR table needs to be updated for SAN_MAC RAR [hw->mac.san_mac_rar_index]
+ *
+ *  ixgbe_set_vmdq_san_mac - Associate default VMDq pool index with a rx address
+ *  @hw: pointer to hardware struct
+ *  @vmdq: VMDq pool index
+ **/
+s32 ixgbe_set_vmdq_san_mac_generic(struct ixgbe_hw *hw, u32 vmdq)
+{
+	u32 mpsar;
+	u32 rar = hw->mac.san_mac_rar_index;
+
+	if (vmdq < 32) {
+		mpsar = IXGBE_READ_REG(hw, IXGBE_MPSAR_LO(rar));
+		mpsar |= 1 << vmdq;
+		IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(rar), mpsar);
+	} else {
+		mpsar = IXGBE_READ_REG(hw, IXGBE_MPSAR_HI(rar));
+		mpsar |= 1 << (vmdq - 32);
+		IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(rar), mpsar);
+	}
+
+	return 0;
+}
+
+/**
+ *  ixgbe_init_uta_tables_generic - Initialize the Unicast Table Array
+ *  @hw: pointer to hardware structure
+ **/
+s32 ixgbe_init_uta_tables_generic(struct ixgbe_hw *hw)
+{
+	int i;
+
+	hw_dbg(hw, " Clearing UTA\n");
+
+	for (i = 0; i < 128; i++)
+		IXGBE_WRITE_REG(hw, IXGBE_UTA(i), 0);
+
+	return 0;
+}
+
+/**
+ *  ixgbe_find_vlvf_slot - find the vlanid or the first empty slot
+ *  @hw: pointer to hardware structure
+ *  @vlan: VLAN id to write to VLAN filter
+ *
+ *  return the VLVF index where this VLAN id should be placed
+ *
+ **/
+s32 ixgbe_find_vlvf_slot(struct ixgbe_hw *hw, u32 vlan)
+{
+	u32 bits = 0;
+	u32 first_empty_slot = 0;
+	s32 regindex;
+
+	/* short cut the special case */
+	if (vlan == 0)
+		return 0;
+
+	/*
+	  * Search for the vlan id in the VLVF entries. Save off the first empty
+	  * slot found along the way
+	  */
+	for (regindex = 1; regindex < IXGBE_VLVF_ENTRIES; regindex++) {
+		bits = IXGBE_READ_REG(hw, IXGBE_VLVF(regindex));
+		if (!bits && !(first_empty_slot))
+			first_empty_slot = regindex;
+		else if ((bits & 0x0FFF) == vlan)
+			break;
+	}
+
+	/*
+	  * If regindex is less than IXGBE_VLVF_ENTRIES, then we found the vlan
+	  * in the VLVF. Else use the first empty VLVF register for this
+	  * vlan id.
+	  */
+	if (regindex >= IXGBE_VLVF_ENTRIES) {
+		if (first_empty_slot)
+			regindex = first_empty_slot;
+		else {
+			hw_dbg(hw, "No space in VLVF.\n");
+			regindex = IXGBE_ERR_NO_SPACE;
+		}
+	}
+
+	return regindex;
+}
+
+/**
+ *  ixgbe_set_vfta_generic - Set VLAN filter table
+ *  @hw: pointer to hardware structure
+ *  @vlan: VLAN id to write to VLAN filter
+ *  @vind: VMDq output index that maps queue to VLAN id in VFVFB
+ *  @vlan_on: boolean flag to turn on/off VLAN in VFVF
+ *
+ *  Turn on/off specified VLAN in the VLAN filter table.
+ **/
+s32 ixgbe_set_vfta_generic(struct ixgbe_hw *hw, u32 vlan, u32 vind,
+			   bool vlan_on)
+{
+	s32 regindex;
+	u32 bitindex;
+	u32 vfta;
+	u32 targetbit;
+	s32 ret_val = 0;
+	bool vfta_changed = false;
+
+	if (vlan > 4095)
+		return IXGBE_ERR_PARAM;
+
+	/*
+	 * this is a 2 part operation - first the VFTA, then the
+	 * VLVF and VLVFB if VT Mode is set
+	 * We don't write the VFTA until we know the VLVF part succeeded.
+	 */
+
+	/* Part 1
+	 * The VFTA is a bitstring made up of 128 32-bit registers
+	 * that enable the particular VLAN id, much like the MTA:
+	 *    bits[11-5]: which register
+	 *    bits[4-0]:  which bit in the register
+	 */
+	regindex = (vlan >> 5) & 0x7F;
+	bitindex = vlan & 0x1F;
+	targetbit = (1 << bitindex);
+	vfta = IXGBE_READ_REG(hw, IXGBE_VFTA(regindex));
+
+	if (vlan_on) {
+		if (!(vfta & targetbit)) {
+			vfta |= targetbit;
+			vfta_changed = true;
+		}
+	} else {
+		if ((vfta & targetbit)) {
+			vfta &= ~targetbit;
+			vfta_changed = true;
+		}
+	}
+
+	/* Part 2
+	 * Call ixgbe_set_vlvf_generic to set VLVFB and VLVF
+	 */
+	ret_val = ixgbe_set_vlvf_generic(hw, vlan, vind, vlan_on,
+					 &vfta_changed);
+	if (ret_val != 0)
+		return ret_val;
+
+	if (vfta_changed)
+		IXGBE_WRITE_REG(hw, IXGBE_VFTA(regindex), vfta);
+
+	return 0;
+}
+
+/**
+ *  ixgbe_set_vlvf_generic - Set VLAN Pool Filter
+ *  @hw: pointer to hardware structure
+ *  @vlan: VLAN id to write to VLAN filter
+ *  @vind: VMDq output index that maps queue to VLAN id in VFVFB
+ *  @vlan_on: boolean flag to turn on/off VLAN in VFVF
+ *  @vfta_changed: pointer to boolean flag which indicates whether VFTA
+ *                 should be changed
+ *
+ *  Turn on/off specified bit in VLVF table.
+ **/
+s32 ixgbe_set_vlvf_generic(struct ixgbe_hw *hw, u32 vlan, u32 vind,
+			    bool vlan_on, bool *vfta_changed)
+{
+	u32 vt;
+
+	if (vlan > 4095)
+		return IXGBE_ERR_PARAM;
+
+	/* If VT Mode is set
+	 *   Either vlan_on
+	 *     make sure the vlan is in VLVF
+	 *     set the vind bit in the matching VLVFB
+	 *   Or !vlan_on
+	 *     clear the pool bit and possibly the vind
+	 */
+	vt = IXGBE_READ_REG(hw, IXGBE_VT_CTL);
+	if (vt & IXGBE_VT_CTL_VT_ENABLE) {
+		s32 vlvf_index;
+		u32 bits;
+
+		vlvf_index = ixgbe_find_vlvf_slot(hw, vlan);
+		if (vlvf_index < 0)
+			return vlvf_index;
+
+		if (vlan_on) {
+			/* set the pool bit */
+			if (vind < 32) {
+				bits = IXGBE_READ_REG(hw,
+						IXGBE_VLVFB(vlvf_index * 2));
+				bits |= (1 << vind);
+				IXGBE_WRITE_REG(hw,
+						IXGBE_VLVFB(vlvf_index * 2),
+						bits);
+			} else {
+				bits = IXGBE_READ_REG(hw,
+					IXGBE_VLVFB((vlvf_index * 2) + 1));
+				bits |= (1 << (vind - 32));
+				IXGBE_WRITE_REG(hw,
+					IXGBE_VLVFB((vlvf_index * 2) + 1),
+					bits);
+			}
+		} else {
+			/* clear the pool bit */
+			if (vind < 32) {
+				bits = IXGBE_READ_REG(hw,
+						IXGBE_VLVFB(vlvf_index * 2));
+				bits &= ~(1 << vind);
+				IXGBE_WRITE_REG(hw,
+						IXGBE_VLVFB(vlvf_index * 2),
+						bits);
+				bits |= IXGBE_READ_REG(hw,
+					IXGBE_VLVFB((vlvf_index * 2) + 1));
+			} else {
+				bits = IXGBE_READ_REG(hw,
+					IXGBE_VLVFB((vlvf_index * 2) + 1));
+				bits &= ~(1 << (vind - 32));
+				IXGBE_WRITE_REG(hw,
+					IXGBE_VLVFB((vlvf_index * 2) + 1),
+					bits);
+				bits |= IXGBE_READ_REG(hw,
+						IXGBE_VLVFB(vlvf_index * 2));
+			}
+		}
+
+		/*
+		 * If there are still bits set in the VLVFB registers
+		 * for the VLAN ID indicated we need to see if the
+		 * caller is requesting that we clear the VFTA entry bit.
+		 * If the caller has requested that we clear the VFTA
+		 * entry bit but there are still pools/VFs using this VLAN
+		 * ID entry then ignore the request.  We're not worried
+		 * about the case where we're turning the VFTA VLAN ID
+		 * entry bit on, only when requested to turn it off as
+		 * there may be multiple pools and/or VFs using the
+		 * VLAN ID entry.  In that case we cannot clear the
+		 * VFTA bit until all pools/VFs using that VLAN ID have also
+		 * been cleared.  This will be indicated by "bits" being
+		 * zero.
+		 */
+		if (bits) {
+			IXGBE_WRITE_REG(hw, IXGBE_VLVF(vlvf_index),
+					(IXGBE_VLVF_VIEN | vlan));
+			if ((!vlan_on) && (vfta_changed != NULL)) {
+				/* someone wants to clear the vfta entry
+				 * but some pools/VFs are still using it.
+				 * Ignore it. */
+				*vfta_changed = false;
+			}
+		} else
+			IXGBE_WRITE_REG(hw, IXGBE_VLVF(vlvf_index), 0);
+	}
+
+	return 0;
+}
+
+/**
+ *  ixgbe_clear_vfta_generic - Clear VLAN filter table
+ *  @hw: pointer to hardware structure
+ *
+ *  Clears the VLAN filer table, and the VMDq index associated with the filter
+ **/
+s32 ixgbe_clear_vfta_generic(struct ixgbe_hw *hw)
+{
+	u32 offset;
+
+	for (offset = 0; offset < hw->mac.vft_size; offset++)
+		IXGBE_WRITE_REG(hw, IXGBE_VFTA(offset), 0);
+
+	for (offset = 0; offset < IXGBE_VLVF_ENTRIES; offset++) {
+		IXGBE_WRITE_REG(hw, IXGBE_VLVF(offset), 0);
+		IXGBE_WRITE_REG(hw, IXGBE_VLVFB(offset * 2), 0);
+		IXGBE_WRITE_REG(hw, IXGBE_VLVFB((offset * 2) + 1), 0);
+	}
+
+	return 0;
+}
+
+/**
+ *  ixgbe_check_mac_link_generic - Determine link and speed status
+ *  @hw: pointer to hardware structure
+ *  @speed: pointer to link speed
+ *  @link_up: true when link is up
+ *  @link_up_wait_to_complete: bool used to wait for link up or not
+ *
+ *  Reads the links register to determine if link is up and the current speed
+ **/
+s32 ixgbe_check_mac_link_generic(struct ixgbe_hw *hw, ixgbe_link_speed *speed,
+				 bool *link_up, bool link_up_wait_to_complete)
+{
+	u32 links_reg, links_orig;
+	u32 i;
+
+	/* clear the old state */
+	links_orig = IXGBE_READ_REG(hw, IXGBE_LINKS);
+
+	links_reg = IXGBE_READ_REG(hw, IXGBE_LINKS);
+
+	if (links_orig != links_reg) {
+		hw_dbg(hw, "LINKS changed from %08X to %08X\n",
+			  links_orig, links_reg);
+	}
+
+	if (link_up_wait_to_complete) {
+		for (i = 0; i < IXGBE_LINK_UP_TIME; i++) {
+			if (links_reg & IXGBE_LINKS_UP) {
+				*link_up = true;
+				break;
+			} else {
+				*link_up = false;
+			}
+			msleep(100);
+			links_reg = IXGBE_READ_REG(hw, IXGBE_LINKS);
+		}
+	} else {
+		if (links_reg & IXGBE_LINKS_UP)
+			*link_up = true;
+		else
+			*link_up = false;
+	}
+
+	if ((links_reg & IXGBE_LINKS_SPEED_82599) ==
+	    IXGBE_LINKS_SPEED_10G_82599)
+		*speed = IXGBE_LINK_SPEED_10GB_FULL;
+	else if ((links_reg & IXGBE_LINKS_SPEED_82599) ==
+		 IXGBE_LINKS_SPEED_1G_82599)
+		*speed = IXGBE_LINK_SPEED_1GB_FULL;
+	else if ((links_reg & IXGBE_LINKS_SPEED_82599) ==
+		 IXGBE_LINKS_SPEED_100_82599)
+		*speed = IXGBE_LINK_SPEED_100_FULL;
+	else
+		*speed = IXGBE_LINK_SPEED_UNKNOWN;
+
+	return 0;
+}
+
+/**
+ *  ixgbe_get_wwn_prefix_generic - Get alternative WWNN/WWPN prefix from
+ *  the EEPROM
+ *  @hw: pointer to hardware structure
+ *  @wwnn_prefix: the alternative WWNN prefix
+ *  @wwpn_prefix: the alternative WWPN prefix
+ *
+ *  This function will read the EEPROM from the alternative SAN MAC address
+ *  block to check the support for the alternative WWNN/WWPN prefix support.
+ **/
+s32 ixgbe_get_wwn_prefix_generic(struct ixgbe_hw *hw, u16 *wwnn_prefix,
+				 u16 *wwpn_prefix)
+{
+	u16 offset, caps;
+	u16 alt_san_mac_blk_offset;
+
+	/* clear output first */
+	*wwnn_prefix = 0xFFFF;
+	*wwpn_prefix = 0xFFFF;
+
+	/* check if alternative SAN MAC is supported */
+	hw->eeprom.ops.read(hw, IXGBE_ALT_SAN_MAC_ADDR_BLK_PTR,
+			    &alt_san_mac_blk_offset);
+
+	if ((alt_san_mac_blk_offset == 0) ||
+	    (alt_san_mac_blk_offset == 0xFFFF))
+		goto wwn_prefix_out;
+
+	/* check capability in alternative san mac address block */
+	offset = alt_san_mac_blk_offset + IXGBE_ALT_SAN_MAC_ADDR_CAPS_OFFSET;
+	hw->eeprom.ops.read(hw, offset, &caps);
+	if (!(caps & IXGBE_ALT_SAN_MAC_ADDR_CAPS_ALTWWN))
+		goto wwn_prefix_out;
+
+	/* get the corresponding prefix for WWNN/WWPN */
+	offset = alt_san_mac_blk_offset + IXGBE_ALT_SAN_MAC_ADDR_WWNN_OFFSET;
+	hw->eeprom.ops.read(hw, offset, wwnn_prefix);
+
+	offset = alt_san_mac_blk_offset + IXGBE_ALT_SAN_MAC_ADDR_WWPN_OFFSET;
+	hw->eeprom.ops.read(hw, offset, wwpn_prefix);
+
+wwn_prefix_out:
+	return 0;
+}
+
+/**
+ *  ixgbe_get_fcoe_boot_status_generic - Get FCOE boot status from EEPROM
+ *  @hw: pointer to hardware structure
+ *  @bs: the fcoe boot status
+ *
+ *  This function will read the FCOE boot status from the iSCSI FCOE block
+ **/
+s32 ixgbe_get_fcoe_boot_status_generic(struct ixgbe_hw *hw, u16 *bs)
+{
+	u16 offset, caps, flags;
+	s32 status;
+
+	/* clear output first */
+	*bs = ixgbe_fcoe_bootstatus_unavailable;
+
+	/* check if FCOE IBA block is present */
+	offset = IXGBE_FCOE_IBA_CAPS_BLK_PTR;
+	status = hw->eeprom.ops.read(hw, offset, &caps);
+	if (status != 0)
+		goto out;
+
+	if (!(caps & IXGBE_FCOE_IBA_CAPS_FCOE))
+		goto out;
+
+	/* check if iSCSI FCOE block is populated */
+	status = hw->eeprom.ops.read(hw, IXGBE_ISCSI_FCOE_BLK_PTR, &offset);
+	if (status != 0)
+		goto out;
+
+	if ((offset == 0) || (offset == 0xFFFF))
+		goto out;
+
+	/* read fcoe flags in iSCSI FCOE block */
+	offset = offset + IXGBE_ISCSI_FCOE_FLAGS_OFFSET;
+	status = hw->eeprom.ops.read(hw, offset, &flags);
+	if (status != 0)
+		goto out;
+
+	if (flags & IXGBE_ISCSI_FCOE_FLAGS_ENABLE)
+		*bs = ixgbe_fcoe_bootstatus_enabled;
+	else
+		*bs = ixgbe_fcoe_bootstatus_disabled;
+
+out:
+	return status;
+}
+
+/**
+ *  ixgbe_set_mac_anti_spoofing - Enable/Disable MAC anti-spoofing
+ *  @hw: pointer to hardware structure
+ *  @enable: enable or disable switch for anti-spoofing
+ *  @pf: Physical Function pool - do not enable anti-spoofing for the PF
+ *
+ **/
+void ixgbe_set_mac_anti_spoofing(struct ixgbe_hw *hw, bool enable, int pf)
+{
+	int j;
+	int pf_target_reg = pf >> 3;
+	int pf_target_shift = pf % 8;
+	u32 pfvfspoof = 0;
+
+	if (hw->mac.type == ixgbe_mac_82598EB)
+		return;
+
+	if (enable)
+		pfvfspoof = IXGBE_SPOOF_MACAS_MASK;
+
+	/*
+	 * PFVFSPOOF register array is size 8 with 8 bits assigned to
+	 * MAC anti-spoof enables in each register array element.
+	 */
+	for (j = 0; j < IXGBE_PFVFSPOOF_REG_COUNT; j++)
+		IXGBE_WRITE_REG(hw, IXGBE_PFVFSPOOF(j), pfvfspoof);
+
+	/* If not enabling anti-spoofing then done */
+	if (!enable)
+		return;
+
+	/*
+	 * The PF should be allowed to spoof so that it can support
+	 * emulation mode NICs.  Reset the bit assigned to the PF
+	 */
+	pfvfspoof = IXGBE_READ_REG(hw, IXGBE_PFVFSPOOF(pf_target_reg));
+	pfvfspoof ^= (1 << pf_target_shift);
+	IXGBE_WRITE_REG(hw, IXGBE_PFVFSPOOF(pf_target_reg), pfvfspoof);
+}
+
+/**
+ *  ixgbe_set_vlan_anti_spoofing - Enable/Disable VLAN anti-spoofing
+ *  @hw: pointer to hardware structure
+ *  @enable: enable or disable switch for VLAN anti-spoofing
+ *  @pf: Virtual Function pool - VF Pool to set for VLAN anti-spoofing
+ *
+ **/
+void ixgbe_set_vlan_anti_spoofing(struct ixgbe_hw *hw, bool enable, int vf)
+{
+	int vf_target_reg = vf >> 3;
+	int vf_target_shift = vf % 8 + IXGBE_SPOOF_VLANAS_SHIFT;
+	u32 pfvfspoof;
+
+	if (hw->mac.type == ixgbe_mac_82598EB)
+		return;
+
+	pfvfspoof = IXGBE_READ_REG(hw, IXGBE_PFVFSPOOF(vf_target_reg));
+	if (enable)
+		pfvfspoof |= (1 << vf_target_shift);
+	else
+		pfvfspoof &= ~(1 << vf_target_shift);
+	IXGBE_WRITE_REG(hw, IXGBE_PFVFSPOOF(vf_target_reg), pfvfspoof);
+}
+
+/**
+ *  ixgbe_get_device_caps_generic - Get additional device capabilities
+ *  @hw: pointer to hardware structure
+ *  @device_caps: the EEPROM word with the extra device capabilities
+ *
+ *  This function will read the EEPROM location for the device capabilities,
+ *  and return the word through device_caps.
+ **/
+s32 ixgbe_get_device_caps_generic(struct ixgbe_hw *hw, u16 *device_caps)
+{
+	hw->eeprom.ops.read(hw, IXGBE_DEVICE_CAPS, device_caps);
+
+	return 0;
+}
+
+/**
+ *  ixgbe_calculate_checksum - Calculate checksum for buffer
+ *  @buffer: pointer to EEPROM
+ *  @length: size of EEPROM to calculate a checksum for
+ *  Calculates the checksum for some buffer on a specified length.  The
+ *  checksum calculated is returned.
+ **/
+static u8 ixgbe_calculate_checksum(u8 *buffer, u32 length)
+{
+	u32 i;
+	u8 sum = 0;
+
+	if (!buffer)
+		return 0;
+	for (i = 0; i < length; i++)
+		sum += buffer[i];
+
+	return (u8) (0 - sum);
+}
+
+/**
+ *  ixgbe_host_interface_command - Issue command to manageability block
+ *  @hw: pointer to the HW structure
+ *  @buffer: contains the command to write and where the return status will
+ *   be placed
+ *  @length: length of buffer, must be multiple of 4 bytes
+ *
+ *  Communicates with the manageability block.  On success return 0
+ *  else return IXGBE_ERR_HOST_INTERFACE_COMMAND.
+ **/
+static s32 ixgbe_host_interface_command(struct ixgbe_hw *hw, u32 *buffer,
+					u32 length)
+{
+	u32 hicr, i, bi;
+	u32 hdr_size = sizeof(struct ixgbe_hic_hdr);
+	u8 buf_len, dword_len;
+
+	s32 ret_val = 0;
+
+	if (length == 0 || length & 0x3 ||
+	    length > IXGBE_HI_MAX_BLOCK_BYTE_LENGTH) {
+		hw_dbg(hw, "Buffer length failure.\n");
+		ret_val = IXGBE_ERR_HOST_INTERFACE_COMMAND;
+		goto out;
+	}
+
+	/* Check that the host interface is enabled. */
+	hicr = IXGBE_READ_REG(hw, IXGBE_HICR);
+	if ((hicr & IXGBE_HICR_EN) == 0) {
+		hw_dbg(hw, "IXGBE_HOST_EN bit disabled.\n");
+		ret_val = IXGBE_ERR_HOST_INTERFACE_COMMAND;
+		goto out;
+	}
+
+	/* Calculate length in DWORDs */
+	dword_len = length >> 2;
+
+	/*
+	 * The device driver writes the relevant command block
+	 * into the ram area.
+	 */
+	for (i = 0; i < dword_len; i++)
+		IXGBE_WRITE_REG_ARRAY(hw, IXGBE_FLEX_MNG,
+				      i, IXGBE_CPU_TO_LE32(buffer[i]));
+
+	/* Setting this bit tells the ARC that a new command is pending. */
+	IXGBE_WRITE_REG(hw, IXGBE_HICR, hicr | IXGBE_HICR_C);
+
+	for (i = 0; i < IXGBE_HI_COMMAND_TIMEOUT; i++) {
+		hicr = IXGBE_READ_REG(hw, IXGBE_HICR);
+		if (!(hicr & IXGBE_HICR_C))
+			break;
+		msleep(1);
+	}
+
+	/* Check command successful completion. */
+	if (i == IXGBE_HI_COMMAND_TIMEOUT ||
+	    (!(IXGBE_READ_REG(hw, IXGBE_HICR) & IXGBE_HICR_SV))) {
+		hw_dbg(hw, "Command has failed with no status valid.\n");
+		ret_val = IXGBE_ERR_HOST_INTERFACE_COMMAND;
+		goto out;
+	}
+
+	/* Calculate length in DWORDs */
+	dword_len = hdr_size >> 2;
+
+	/* first pull in the header so we know the buffer length */
+	for (bi = 0; bi < dword_len; bi++) {
+		buffer[bi] = IXGBE_READ_REG_ARRAY(hw, IXGBE_FLEX_MNG, bi);
+		IXGBE_LE32_TO_CPUS(&buffer[bi]);
+	}
+
+	/* If there is any thing in data position pull it in */
+	buf_len = ((struct ixgbe_hic_hdr *)buffer)->buf_len;
+	if (buf_len == 0)
+		goto out;
+
+	if (length < (buf_len + hdr_size)) {
+		hw_dbg(hw, "Buffer not large enough for reply message.\n");
+		ret_val = IXGBE_ERR_HOST_INTERFACE_COMMAND;
+		goto out;
+	}
+
+	/* Calculate length in DWORDs, add 3 for odd lengths */
+	dword_len = (buf_len + 3) >> 2;
+
+	/* Pull in the rest of the buffer (bi is where we left off)*/
+	for (; bi <= dword_len; bi++) {
+		buffer[bi] = IXGBE_READ_REG_ARRAY(hw, IXGBE_FLEX_MNG, bi);
+		IXGBE_LE32_TO_CPUS(&buffer[bi]);
+	}
+
+out:
+	return ret_val;
+}
+
+/**
+ *  ixgbe_set_fw_drv_ver_generic - Sends driver version to firmware
+ *  @hw: pointer to the HW structure
+ *  @maj: driver version major number
+ *  @min: driver version minor number
+ *  @build: driver version build number
+ *  @sub: driver version sub build number
+ *
+ *  Sends driver version number to firmware through the manageability
+ *  block.  On success return 0
+ *  else returns IXGBE_ERR_SWFW_SYNC when encountering an error acquiring
+ *  semaphore or IXGBE_ERR_HOST_INTERFACE_COMMAND when command fails.
+ **/
+s32 ixgbe_set_fw_drv_ver_generic(struct ixgbe_hw *hw, u8 maj, u8 min,
+				 u8 build, u8 sub)
+{
+	struct ixgbe_hic_drv_info fw_cmd;
+	int i;
+	s32 ret_val = 0;
+
+	if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_SW_MNG_SM)
+	    != 0) {
+		ret_val = IXGBE_ERR_SWFW_SYNC;
+		goto out;
+	}
+
+	fw_cmd.hdr.cmd = FW_CEM_CMD_DRIVER_INFO;
+	fw_cmd.hdr.buf_len = FW_CEM_CMD_DRIVER_INFO_LEN;
+	fw_cmd.hdr.cmd_or_resp.cmd_resv = FW_CEM_CMD_RESERVED;
+	fw_cmd.port_num = (u8)hw->bus.func;
+	fw_cmd.ver_maj = maj;
+	fw_cmd.ver_min = min;
+	fw_cmd.ver_build = build;
+	fw_cmd.ver_sub = sub;
+	fw_cmd.hdr.checksum = 0;
+	fw_cmd.hdr.checksum = ixgbe_calculate_checksum((u8 *)&fw_cmd,
+				(FW_CEM_HDR_LEN + fw_cmd.hdr.buf_len));
+	fw_cmd.pad = 0;
+	fw_cmd.pad2 = 0;
+
+	for (i = 0; i <= FW_CEM_MAX_RETRIES; i++) {
+		ret_val = ixgbe_host_interface_command(hw, (u32 *)&fw_cmd,
+						       sizeof(fw_cmd));
+		if (ret_val != 0)
+			continue;
+
+		if (fw_cmd.hdr.cmd_or_resp.ret_status ==
+		    FW_CEM_RESP_STATUS_SUCCESS)
+			ret_val = 0;
+		else
+			ret_val = IXGBE_ERR_HOST_INTERFACE_COMMAND;
+
+		break;
+	}
+
+	hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_SW_MNG_SM);
+out:
+	return ret_val;
+}
+
+/**
+ * ixgbe_set_rxpba_generic - Initialize Rx packet buffer
+ * @hw: pointer to hardware structure
+ * @num_pb: number of packet buffers to allocate
+ * @headroom: reserve n KB of headroom
+ * @strategy: packet buffer allocation strategy
+ **/
+void ixgbe_set_rxpba_generic(struct ixgbe_hw *hw, int num_pb, u32 headroom,
+			     int strategy)
+{
+	u32 pbsize = hw->mac.rx_pb_size;
+	int i = 0;
+	u32 rxpktsize, txpktsize, txpbthresh;
+
+	/* Reserve headroom */
+	pbsize -= headroom;
+
+	if (!num_pb)
+		num_pb = 1;
+
+	/* Divide remaining packet buffer space amongst the number of packet
+	 * buffers requested using supplied strategy.
+	 */
+	switch (strategy) {
+	case PBA_STRATEGY_WEIGHTED:
+		/* ixgbe_dcb_pba_80_48 strategy weight first half of packet
+		 * buffer with 5/8 of the packet buffer space.
+		 */
+		rxpktsize = (pbsize * 5) / (num_pb * 4);
+		pbsize -= rxpktsize * (num_pb / 2);
+		rxpktsize <<= IXGBE_RXPBSIZE_SHIFT;
+		for (; i < (num_pb / 2); i++)
+			IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpktsize);
+		/* Fall through to configure remaining packet buffers */
+	case PBA_STRATEGY_EQUAL:
+		rxpktsize = (pbsize / (num_pb - i)) << IXGBE_RXPBSIZE_SHIFT;
+		for (; i < num_pb; i++)
+			IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpktsize);
+		break;
+	default:
+		break;
+	}
+
+	/* Only support an equally distributed Tx packet buffer strategy. */
+	txpktsize = IXGBE_TXPBSIZE_MAX / num_pb;
+	txpbthresh = (txpktsize / 1024) - IXGBE_TXPKT_SIZE_MAX;
+	for (i = 0; i < num_pb; i++) {
+		IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), txpktsize);
+		IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), txpbthresh);
+	}
+
+	/* Clear unused TCs, if any, to zero buffer size*/
+	for (; i < IXGBE_MAX_PB; i++) {
+		IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), 0);
+		IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), 0);
+		IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), 0);
+	}
+}
+
+/**
+ * ixgbe_clear_tx_pending - Clear pending TX work from the PCIe fifo
+ * @hw: pointer to the hardware structure
+ *
+ * The 82599 and x540 MACs can experience issues if TX work is still pending
+ * when a reset occurs.  This function prevents this by flushing the PCIe
+ * buffers on the system.
+ **/
+void ixgbe_clear_tx_pending(struct ixgbe_hw *hw)
+{
+	u32 gcr_ext, hlreg0;
+
+	/*
+	 * If double reset is not requested then all transactions should
+	 * already be clear and as such there is no work to do
+	 */
+	if (!(hw->mac.flags & IXGBE_FLAGS_DOUBLE_RESET_REQUIRED))
+		return;
+
+	/*
+	 * Set loopback enable to prevent any transmits from being sent
+	 * should the link come up.  This assumes that the RXCTRL.RXEN bit
+	 * has already been cleared.
+	 */
+	hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
+	IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0 | IXGBE_HLREG0_LPBK);
+
+	/* initiate cleaning flow for buffers in the PCIe transaction layer */
+	gcr_ext = IXGBE_READ_REG(hw, IXGBE_GCR_EXT);
+	IXGBE_WRITE_REG(hw, IXGBE_GCR_EXT,
+			gcr_ext | IXGBE_GCR_EXT_BUFFERS_CLEAR);
+
+	/* Flush all writes and allow 20usec for all transactions to clear */
+	IXGBE_WRITE_FLUSH(hw);
+	udelay(20);
+
+	/* restore previous register values */
+	IXGBE_WRITE_REG(hw, IXGBE_GCR_EXT, gcr_ext);
+	IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
+}
+
+static const u8 ixgbe_emc_temp_data[4] = {
+	IXGBE_EMC_INTERNAL_DATA,
+	IXGBE_EMC_DIODE1_DATA,
+	IXGBE_EMC_DIODE2_DATA,
+	IXGBE_EMC_DIODE3_DATA
+};
+static const u8 ixgbe_emc_therm_limit[4] = {
+	IXGBE_EMC_INTERNAL_THERM_LIMIT,
+	IXGBE_EMC_DIODE1_THERM_LIMIT,
+	IXGBE_EMC_DIODE2_THERM_LIMIT,
+	IXGBE_EMC_DIODE3_THERM_LIMIT
+};
+
+/**
+ *  ixgbe_get_thermal_sensor_data - Gathers thermal sensor data
+ *  @hw: pointer to hardware structure
+ *  @data: pointer to the thermal sensor data structure
+ *
+ *  Returns the thermal sensor data structure
+ **/
+s32 ixgbe_get_thermal_sensor_data_generic(struct ixgbe_hw *hw)
+{
+	s32 status = 0;
+	u16 ets_offset;
+	u16 ets_cfg;
+	u16 ets_sensor;
+	u8  num_sensors;
+	u8  sensor_index;
+	u8  sensor_location;
+	u8  i;
+	struct ixgbe_thermal_sensor_data *data = &hw->mac.thermal_sensor_data;
+
+	/* Only support thermal sensors attached to 82599 physical port 0 */
+	if ((hw->mac.type != ixgbe_mac_82599EB) ||
+	    (IXGBE_READ_REG(hw, IXGBE_STATUS) & IXGBE_STATUS_LAN_ID_1)) {
+		status = IXGBE_NOT_IMPLEMENTED;
+		goto out;
+	}
+
+	status = hw->eeprom.ops.read(hw, IXGBE_ETS_CFG, &ets_offset);
+	if (status)
+		goto out;
+
+	if ((ets_offset == 0x0000) || (ets_offset == 0xFFFF)) {
+		status = IXGBE_NOT_IMPLEMENTED;
+		goto out;
+	}
+
+	status = hw->eeprom.ops.read(hw, ets_offset, &ets_cfg);
+	if (status)
+		goto out;
+
+	if (((ets_cfg & IXGBE_ETS_TYPE_MASK) >> IXGBE_ETS_TYPE_SHIFT)
+		!= IXGBE_ETS_TYPE_EMC) {
+		status = IXGBE_NOT_IMPLEMENTED;
+		goto out;
+	}
+
+	num_sensors = (ets_cfg & IXGBE_ETS_NUM_SENSORS_MASK);
+	if (num_sensors > IXGBE_MAX_SENSORS)
+		num_sensors = IXGBE_MAX_SENSORS;
+
+	for (i = 0; i < num_sensors; i++) {
+		status = hw->eeprom.ops.read(hw, (ets_offset + 1 + i),
+					     &ets_sensor);
+		if (status)
+			goto out;
+
+		sensor_index = ((ets_sensor & IXGBE_ETS_DATA_INDEX_MASK) >>
+				IXGBE_ETS_DATA_INDEX_SHIFT);
+		sensor_location = ((ets_sensor & IXGBE_ETS_DATA_LOC_MASK) >>
+				   IXGBE_ETS_DATA_LOC_SHIFT);
+
+		if (sensor_location != 0) {
+			status = hw->phy.ops.read_i2c_byte(hw,
+					ixgbe_emc_temp_data[sensor_index],
+					IXGBE_I2C_THERMAL_SENSOR_ADDR,
+					&data->sensor[i].temp);
+			if (status)
+				goto out;
+		}
+	}
+out:
+	return status;
+}
+
+/**
+ *  ixgbe_init_thermal_sensor_thresh_generic - Inits thermal sensor thresholds
+ *  @hw: pointer to hardware structure
+ *
+ *  Inits the thermal sensor thresholds according to the NVM map
+ *  and save off the threshold and location values into mac.thermal_sensor_data
+ **/
+s32 ixgbe_init_thermal_sensor_thresh_generic(struct ixgbe_hw *hw)
+{
+	s32 status = 0;
+	u16 ets_offset;
+	u16 ets_cfg;
+	u16 ets_sensor;
+	u8  low_thresh_delta;
+	u8  num_sensors;
+	u8  sensor_index;
+	u8  sensor_location;
+	u8  therm_limit;
+	u8  i;
+	struct ixgbe_thermal_sensor_data *data = &hw->mac.thermal_sensor_data;
+
+	memset(data, 0, sizeof(struct ixgbe_thermal_sensor_data));
+
+	/* Only support thermal sensors attached to 82599 physical port 0 */
+	if ((hw->mac.type != ixgbe_mac_82599EB) ||
+	    (IXGBE_READ_REG(hw, IXGBE_STATUS) & IXGBE_STATUS_LAN_ID_1))
+		return IXGBE_NOT_IMPLEMENTED;
+
+	hw->eeprom.ops.read(hw, IXGBE_ETS_CFG, &ets_offset);
+	if ((ets_offset == 0x0000) || (ets_offset == 0xFFFF))
+		return IXGBE_NOT_IMPLEMENTED;
+
+	hw->eeprom.ops.read(hw, ets_offset, &ets_cfg);
+	if (((ets_cfg & IXGBE_ETS_TYPE_MASK) >> IXGBE_ETS_TYPE_SHIFT)
+		!= IXGBE_ETS_TYPE_EMC)
+		return IXGBE_NOT_IMPLEMENTED;
+
+	low_thresh_delta = ((ets_cfg & IXGBE_ETS_LTHRES_DELTA_MASK) >>
+			     IXGBE_ETS_LTHRES_DELTA_SHIFT);
+	num_sensors = (ets_cfg & IXGBE_ETS_NUM_SENSORS_MASK);
+
+	for (i = 0; i < num_sensors; i++) {
+		hw->eeprom.ops.read(hw, (ets_offset + 1 + i), &ets_sensor);
+		sensor_index = ((ets_sensor & IXGBE_ETS_DATA_INDEX_MASK) >>
+				IXGBE_ETS_DATA_INDEX_SHIFT);
+		sensor_location = ((ets_sensor & IXGBE_ETS_DATA_LOC_MASK) >>
+				   IXGBE_ETS_DATA_LOC_SHIFT);
+		therm_limit = ets_sensor & IXGBE_ETS_DATA_HTHRESH_MASK;
+
+		hw->phy.ops.write_i2c_byte(hw,
+			ixgbe_emc_therm_limit[sensor_index],
+			IXGBE_I2C_THERMAL_SENSOR_ADDR, therm_limit);
+
+		if ((i < IXGBE_MAX_SENSORS) && (sensor_location != 0)) {
+			data->sensor[i].location = sensor_location;
+			data->sensor[i].caution_thresh = therm_limit;
+			data->sensor[i].max_op_thresh = therm_limit -
+							low_thresh_delta;
+		}
+	}
+	return status;
+}
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_common.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_common.h
new file mode 100644
index 00000000..9bd6f534
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_common.h
@@ -0,0 +1,140 @@
+/*******************************************************************************
+
+  Intel 10 Gigabit PCI Express Linux driver
+  Copyright(c) 1999 - 2012 Intel Corporation.
+
+  This program is free software; you can redistribute it and/or modify it
+  under the terms and conditions of the GNU General Public License,
+  version 2, as published by the Free Software Foundation.
+
+  This program is distributed in the hope it will be useful, but WITHOUT
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+  more details.
+
+  You should have received a copy of the GNU General Public License along with
+  this program; if not, write to the Free Software Foundation, Inc.,
+  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+  The full GNU General Public License is included in this distribution in
+  the file called "COPYING".
+
+  Contact Information:
+  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+#ifndef _IXGBE_COMMON_H_
+#define _IXGBE_COMMON_H_
+
+#include "ixgbe_type.h"
+
+u16 ixgbe_get_pcie_msix_count_generic(struct ixgbe_hw *hw);
+
+s32 ixgbe_init_ops_generic(struct ixgbe_hw *hw);
+s32 ixgbe_init_hw_generic(struct ixgbe_hw *hw);
+s32 ixgbe_start_hw_generic(struct ixgbe_hw *hw);
+s32 ixgbe_start_hw_gen2(struct ixgbe_hw *hw);
+s32 ixgbe_clear_hw_cntrs_generic(struct ixgbe_hw *hw);
+s32 ixgbe_read_pba_string_generic(struct ixgbe_hw *hw, u8 *pba_num,
+				  u32 pba_num_size);
+s32 ixgbe_get_mac_addr_generic(struct ixgbe_hw *hw, u8 *mac_addr);
+s32 ixgbe_get_bus_info_generic(struct ixgbe_hw *hw);
+void ixgbe_set_lan_id_multi_port_pcie(struct ixgbe_hw *hw);
+s32 ixgbe_stop_adapter_generic(struct ixgbe_hw *hw);
+
+s32 ixgbe_led_on_generic(struct ixgbe_hw *hw, u32 index);
+s32 ixgbe_led_off_generic(struct ixgbe_hw *hw, u32 index);
+
+s32 ixgbe_init_eeprom_params_generic(struct ixgbe_hw *hw);
+s32 ixgbe_write_eeprom_generic(struct ixgbe_hw *hw, u16 offset, u16 data);
+s32 ixgbe_write_eeprom_buffer_bit_bang_generic(struct ixgbe_hw *hw, u16 offset,
+					       u16 words, u16 *data);
+s32 ixgbe_read_eerd_generic(struct ixgbe_hw *hw, u16 offset, u16 *data);
+s32 ixgbe_read_eerd_buffer_generic(struct ixgbe_hw *hw, u16 offset,
+				   u16 words, u16 *data);
+s32 ixgbe_write_eewr_generic(struct ixgbe_hw *hw, u16 offset, u16 data);
+s32 ixgbe_write_eewr_buffer_generic(struct ixgbe_hw *hw, u16 offset,
+				    u16 words, u16 *data);
+s32 ixgbe_read_eeprom_bit_bang_generic(struct ixgbe_hw *hw, u16 offset,
+				       u16 *data);
+s32 ixgbe_read_eeprom_buffer_bit_bang_generic(struct ixgbe_hw *hw, u16 offset,
+					      u16 words, u16 *data);
+u16 ixgbe_calc_eeprom_checksum_generic(struct ixgbe_hw *hw);
+s32 ixgbe_validate_eeprom_checksum_generic(struct ixgbe_hw *hw,
+					   u16 *checksum_val);
+s32 ixgbe_update_eeprom_checksum_generic(struct ixgbe_hw *hw);
+s32 ixgbe_poll_eerd_eewr_done(struct ixgbe_hw *hw, u32 ee_reg);
+
+s32 ixgbe_set_rar_generic(struct ixgbe_hw *hw, u32 index, u8 *addr, u32 vmdq,
+			  u32 enable_addr);
+s32 ixgbe_clear_rar_generic(struct ixgbe_hw *hw, u32 index);
+s32 ixgbe_init_rx_addrs_generic(struct ixgbe_hw *hw);
+s32 ixgbe_update_mc_addr_list_generic(struct ixgbe_hw *hw, u8 *mc_addr_list,
+				      u32 mc_addr_count,
+				      ixgbe_mc_addr_itr func, bool clear);
+s32 ixgbe_update_uc_addr_list_generic(struct ixgbe_hw *hw, u8 *addr_list,
+				      u32 addr_count, ixgbe_mc_addr_itr func);
+s32 ixgbe_enable_mc_generic(struct ixgbe_hw *hw);
+s32 ixgbe_disable_mc_generic(struct ixgbe_hw *hw);
+s32 ixgbe_enable_rx_dma_generic(struct ixgbe_hw *hw, u32 regval);
+s32 ixgbe_disable_sec_rx_path_generic(struct ixgbe_hw *hw);
+s32 ixgbe_enable_sec_rx_path_generic(struct ixgbe_hw *hw);
+
+s32 ixgbe_fc_enable_generic(struct ixgbe_hw *hw);
+void ixgbe_fc_autoneg(struct ixgbe_hw *hw);
+
+s32 ixgbe_validate_mac_addr(u8 *mac_addr);
+s32 ixgbe_acquire_swfw_sync(struct ixgbe_hw *hw, u16 mask);
+void ixgbe_release_swfw_sync(struct ixgbe_hw *hw, u16 mask);
+s32 ixgbe_disable_pcie_master(struct ixgbe_hw *hw);
+
+s32 ixgbe_blink_led_start_generic(struct ixgbe_hw *hw, u32 index);
+s32 ixgbe_blink_led_stop_generic(struct ixgbe_hw *hw, u32 index);
+
+s32 ixgbe_get_san_mac_addr_generic(struct ixgbe_hw *hw, u8 *san_mac_addr);
+s32 ixgbe_set_san_mac_addr_generic(struct ixgbe_hw *hw, u8 *san_mac_addr);
+
+s32 ixgbe_set_vmdq_generic(struct ixgbe_hw *hw, u32 rar, u32 vmdq);
+s32 ixgbe_set_vmdq_san_mac_generic(struct ixgbe_hw *hw, u32 vmdq);
+s32 ixgbe_clear_vmdq_generic(struct ixgbe_hw *hw, u32 rar, u32 vmdq);
+s32 ixgbe_insert_mac_addr_generic(struct ixgbe_hw *hw, u8 *addr, u32 vmdq);
+s32 ixgbe_init_uta_tables_generic(struct ixgbe_hw *hw);
+s32 ixgbe_set_vfta_generic(struct ixgbe_hw *hw, u32 vlan,
+			 u32 vind, bool vlan_on);
+s32 ixgbe_set_vlvf_generic(struct ixgbe_hw *hw, u32 vlan, u32 vind,
+			   bool vlan_on, bool *vfta_changed);
+s32 ixgbe_clear_vfta_generic(struct ixgbe_hw *hw);
+s32 ixgbe_find_vlvf_slot(struct ixgbe_hw *hw, u32 vlan);
+
+s32 ixgbe_check_mac_link_generic(struct ixgbe_hw *hw,
+			       ixgbe_link_speed *speed,
+			       bool *link_up, bool link_up_wait_to_complete);
+
+s32 ixgbe_get_wwn_prefix_generic(struct ixgbe_hw *hw, u16 *wwnn_prefix,
+				 u16 *wwpn_prefix);
+
+s32 ixgbe_get_fcoe_boot_status_generic(struct ixgbe_hw *hw, u16 *bs);
+void ixgbe_set_mac_anti_spoofing(struct ixgbe_hw *hw, bool enable, int pf);
+void ixgbe_set_vlan_anti_spoofing(struct ixgbe_hw *hw, bool enable, int vf);
+s32 ixgbe_get_device_caps_generic(struct ixgbe_hw *hw, u16 *device_caps);
+void ixgbe_set_rxpba_generic(struct ixgbe_hw *hw, int num_pb, u32 headroom,
+			     int strategy);
+s32 ixgbe_set_fw_drv_ver_generic(struct ixgbe_hw *hw, u8 maj, u8 min,
+				 u8 build, u8 ver);
+void ixgbe_clear_tx_pending(struct ixgbe_hw *hw);
+
+#define IXGBE_I2C_THERMAL_SENSOR_ADDR	0xF8
+#define IXGBE_EMC_INTERNAL_DATA		0x00
+#define IXGBE_EMC_INTERNAL_THERM_LIMIT	0x20
+#define IXGBE_EMC_DIODE1_DATA		0x01
+#define IXGBE_EMC_DIODE1_THERM_LIMIT	0x19
+#define IXGBE_EMC_DIODE2_DATA		0x23
+#define IXGBE_EMC_DIODE2_THERM_LIMIT	0x1A
+#define IXGBE_EMC_DIODE3_DATA		0x2A
+#define IXGBE_EMC_DIODE3_THERM_LIMIT	0x30
+
+s32 ixgbe_get_thermal_sensor_data_generic(struct ixgbe_hw *hw);
+s32 ixgbe_init_thermal_sensor_thresh_generic(struct ixgbe_hw *hw);
+#endif /* IXGBE_COMMON */
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_dcb.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_dcb.h
new file mode 100644
index 00000000..a6690451
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_dcb.h
@@ -0,0 +1,168 @@
+/*******************************************************************************
+
+  Intel 10 Gigabit PCI Express Linux driver
+  Copyright(c) 1999 - 2012 Intel Corporation.
+
+  This program is free software; you can redistribute it and/or modify it
+  under the terms and conditions of the GNU General Public License,
+  version 2, as published by the Free Software Foundation.
+
+  This program is distributed in the hope it will be useful, but WITHOUT
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+  more details.
+
+  You should have received a copy of the GNU General Public License along with
+  this program; if not, write to the Free Software Foundation, Inc.,
+  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+  The full GNU General Public License is included in this distribution in
+  the file called "COPYING".
+
+  Contact Information:
+  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+#ifndef _IXGBE_DCB_H_
+#define _IXGBE_DCB_H_
+
+
+#include "ixgbe_type.h"
+
+/* DCB defines */
+/* DCB credit calculation defines */
+#define IXGBE_DCB_CREDIT_QUANTUM	64
+#define IXGBE_DCB_MAX_CREDIT_REFILL	200   /* 200 * 64B = 12800B */
+#define IXGBE_DCB_MAX_TSO_SIZE		(32 * 1024) /* Max TSO pkt size in DCB*/
+#define IXGBE_DCB_MAX_CREDIT		(2 * IXGBE_DCB_MAX_CREDIT_REFILL)
+
+/* 513 for 32KB TSO packet */
+#define IXGBE_DCB_MIN_TSO_CREDIT	\
+	((IXGBE_DCB_MAX_TSO_SIZE / IXGBE_DCB_CREDIT_QUANTUM) + 1)
+
+/* DCB configuration defines */
+#define IXGBE_DCB_MAX_USER_PRIORITY	8
+#define IXGBE_DCB_MAX_BW_GROUP		8
+#define IXGBE_DCB_BW_PERCENT		100
+
+#define IXGBE_DCB_TX_CONFIG		0
+#define IXGBE_DCB_RX_CONFIG		1
+
+/* DCB capability defines */
+#define IXGBE_DCB_PG_SUPPORT	0x00000001
+#define IXGBE_DCB_PFC_SUPPORT	0x00000002
+#define IXGBE_DCB_BCN_SUPPORT	0x00000004
+#define IXGBE_DCB_UP2TC_SUPPORT	0x00000008
+#define IXGBE_DCB_GSP_SUPPORT	0x00000010
+
+struct ixgbe_dcb_support {
+	u32 capabilities; /* DCB capabilities */
+
+	/* Each bit represents a number of TCs configurable in the hw.
+	 * If 8 traffic classes can be configured, the value is 0x80. */
+	u8 traffic_classes;
+	u8 pfc_traffic_classes;
+};
+
+enum ixgbe_dcb_tsa {
+	ixgbe_dcb_tsa_ets = 0,
+	ixgbe_dcb_tsa_group_strict_cee,
+	ixgbe_dcb_tsa_strict
+};
+
+/* Traffic class bandwidth allocation per direction */
+struct ixgbe_dcb_tc_path {
+	u8 bwg_id; /* Bandwidth Group (BWG) ID */
+	u8 bwg_percent; /* % of BWG's bandwidth */
+	u8 link_percent; /* % of link bandwidth */
+	u8 up_to_tc_bitmap; /* User Priority to Traffic Class mapping */
+	u16 data_credits_refill; /* Credit refill amount in 64B granularity */
+	u16 data_credits_max; /* Max credits for a configured packet buffer
+			       * in 64B granularity.*/
+	enum ixgbe_dcb_tsa tsa; /* Link or Group Strict Priority */
+};
+
+enum ixgbe_dcb_pfc {
+	ixgbe_dcb_pfc_disabled = 0,
+	ixgbe_dcb_pfc_enabled,
+	ixgbe_dcb_pfc_enabled_txonly,
+	ixgbe_dcb_pfc_enabled_rxonly
+};
+
+/* Traffic class configuration */
+struct ixgbe_dcb_tc_config {
+	struct ixgbe_dcb_tc_path path[2]; /* One each for Tx/Rx */
+	enum ixgbe_dcb_pfc pfc; /* Class based flow control setting */
+
+	u16 desc_credits_max; /* For Tx Descriptor arbitration */
+	u8 tc; /* Traffic class (TC) */
+};
+
+enum ixgbe_dcb_pba {
+	/* PBA[0-7] each use 64KB FIFO */
+	ixgbe_dcb_pba_equal = PBA_STRATEGY_EQUAL,
+	/* PBA[0-3] each use 80KB, PBA[4-7] each use 48KB */
+	ixgbe_dcb_pba_80_48 = PBA_STRATEGY_WEIGHTED
+};
+
+struct ixgbe_dcb_num_tcs {
+	u8 pg_tcs;
+	u8 pfc_tcs;
+};
+
+struct ixgbe_dcb_config {
+	struct ixgbe_dcb_tc_config tc_config[IXGBE_DCB_MAX_TRAFFIC_CLASS];
+	struct ixgbe_dcb_support support;
+	struct ixgbe_dcb_num_tcs num_tcs;
+	u8 bw_percentage[2][IXGBE_DCB_MAX_BW_GROUP]; /* One each for Tx/Rx */
+	bool pfc_mode_enable;
+	bool round_robin_enable;
+
+	enum ixgbe_dcb_pba rx_pba_cfg;
+
+	u32 dcb_cfg_version; /* Not used...OS-specific? */
+	u32 link_speed; /* For bandwidth allocation validation purpose */
+	bool vt_mode;
+};
+
+/* DCB driver APIs */
+
+/* DCB rule checking */
+s32 ixgbe_dcb_check_config_cee(struct ixgbe_dcb_config *);
+
+/* DCB credits calculation */
+s32 ixgbe_dcb_calculate_tc_credits(u8 *, u16 *, u16 *, int);
+s32 ixgbe_dcb_calculate_tc_credits_cee(struct ixgbe_hw *,
+				       struct ixgbe_dcb_config *, u32, u8);
+
+/* DCB PFC */
+s32 ixgbe_dcb_config_pfc(struct ixgbe_hw *, u8, u8 *);
+s32 ixgbe_dcb_config_pfc_cee(struct ixgbe_hw *, struct ixgbe_dcb_config *);
+
+/* DCB stats */
+s32 ixgbe_dcb_config_tc_stats(struct ixgbe_hw *);
+s32 ixgbe_dcb_get_tc_stats(struct ixgbe_hw *, struct ixgbe_hw_stats *, u8);
+s32 ixgbe_dcb_get_pfc_stats(struct ixgbe_hw *, struct ixgbe_hw_stats *, u8);
+
+/* DCB config arbiters */
+s32 ixgbe_dcb_config_tx_desc_arbiter_cee(struct ixgbe_hw *,
+					 struct ixgbe_dcb_config *);
+s32 ixgbe_dcb_config_tx_data_arbiter_cee(struct ixgbe_hw *,
+					 struct ixgbe_dcb_config *);
+s32 ixgbe_dcb_config_rx_arbiter_cee(struct ixgbe_hw *,
+				    struct ixgbe_dcb_config *);
+
+/* DCB unpack routines */
+void ixgbe_dcb_unpack_pfc_cee(struct ixgbe_dcb_config *, u8 *, u8 *);
+void ixgbe_dcb_unpack_refill_cee(struct ixgbe_dcb_config *, int, u16 *);
+void ixgbe_dcb_unpack_max_cee(struct ixgbe_dcb_config *, u16 *);
+void ixgbe_dcb_unpack_bwgid_cee(struct ixgbe_dcb_config *, int, u8 *);
+void ixgbe_dcb_unpack_tsa_cee(struct ixgbe_dcb_config *, int, u8 *);
+void ixgbe_dcb_unpack_map_cee(struct ixgbe_dcb_config *, int, u8 *);
+
+/* DCB initialization */
+s32 ixgbe_dcb_hw_config(struct ixgbe_hw *, u16 *, u16 *, u8 *, u8 *, u8 *);
+s32 ixgbe_dcb_hw_config_cee(struct ixgbe_hw *, struct ixgbe_dcb_config *);
+#endif /* _IXGBE_DCB_H_ */
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_ethtool.c b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_ethtool.c
new file mode 100644
index 00000000..11472bd3
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_ethtool.c
@@ -0,0 +1,2901 @@
+/*******************************************************************************
+
+  Intel 10 Gigabit PCI Express Linux driver
+  Copyright(c) 1999 - 2012 Intel Corporation.
+
+  This program is free software; you can redistribute it and/or modify it
+  under the terms and conditions of the GNU General Public License,
+  version 2, as published by the Free Software Foundation.
+
+  This program is distributed in the hope it will be useful, but WITHOUT
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+  more details.
+
+  You should have received a copy of the GNU General Public License along with
+  this program; if not, write to the Free Software Foundation, Inc.,
+  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+  The full GNU General Public License is included in this distribution in
+  the file called "COPYING".
+
+  Contact Information:
+  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+/* ethtool support for ixgbe */
+
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+#include <linux/ethtool.h>
+#include <linux/vmalloc.h>
+#include <linux/highmem.h>
+#ifdef SIOCETHTOOL
+#include <asm/uaccess.h>
+
+#include "ixgbe.h"
+
+#ifndef ETH_GSTRING_LEN
+#define ETH_GSTRING_LEN 32
+#endif
+
+#define IXGBE_ALL_RAR_ENTRIES 16
+
+#ifdef ETHTOOL_OPS_COMPAT
+#include "kcompat_ethtool.c"
+#endif
+#ifdef ETHTOOL_GSTATS
+struct ixgbe_stats {
+	char stat_string[ETH_GSTRING_LEN];
+	int sizeof_stat;
+	int stat_offset;
+};
+
+#define IXGBE_NETDEV_STAT(_net_stat) { \
+	.stat_string = #_net_stat, \
+	.sizeof_stat = FIELD_SIZEOF(struct net_device_stats, _net_stat), \
+	.stat_offset = offsetof(struct net_device_stats, _net_stat) \
+}
+static const struct ixgbe_stats ixgbe_gstrings_net_stats[] = {
+	IXGBE_NETDEV_STAT(rx_packets),
+	IXGBE_NETDEV_STAT(tx_packets),
+	IXGBE_NETDEV_STAT(rx_bytes),
+	IXGBE_NETDEV_STAT(tx_bytes),
+	IXGBE_NETDEV_STAT(rx_errors),
+	IXGBE_NETDEV_STAT(tx_errors),
+	IXGBE_NETDEV_STAT(rx_dropped),
+	IXGBE_NETDEV_STAT(tx_dropped),
+	IXGBE_NETDEV_STAT(multicast),
+	IXGBE_NETDEV_STAT(collisions),
+	IXGBE_NETDEV_STAT(rx_over_errors),
+	IXGBE_NETDEV_STAT(rx_crc_errors),
+	IXGBE_NETDEV_STAT(rx_frame_errors),
+	IXGBE_NETDEV_STAT(rx_fifo_errors),
+	IXGBE_NETDEV_STAT(rx_missed_errors),
+	IXGBE_NETDEV_STAT(tx_aborted_errors),
+	IXGBE_NETDEV_STAT(tx_carrier_errors),
+	IXGBE_NETDEV_STAT(tx_fifo_errors),
+	IXGBE_NETDEV_STAT(tx_heartbeat_errors),
+};
+
+#define IXGBE_STAT(_name, _stat) { \
+	.stat_string = _name, \
+	.sizeof_stat = FIELD_SIZEOF(struct ixgbe_adapter, _stat), \
+	.stat_offset = offsetof(struct ixgbe_adapter, _stat) \
+}
+static struct ixgbe_stats ixgbe_gstrings_stats[] = {
+	IXGBE_STAT("rx_pkts_nic", stats.gprc),
+	IXGBE_STAT("tx_pkts_nic", stats.gptc),
+	IXGBE_STAT("rx_bytes_nic", stats.gorc),
+	IXGBE_STAT("tx_bytes_nic", stats.gotc),
+	IXGBE_STAT("lsc_int", lsc_int),
+	IXGBE_STAT("tx_busy", tx_busy),
+	IXGBE_STAT("non_eop_descs", non_eop_descs),
+#ifndef CONFIG_IXGBE_NAPI
+	IXGBE_STAT("rx_dropped_backlog", rx_dropped_backlog),
+#endif
+	IXGBE_STAT("broadcast", stats.bprc),
+	IXGBE_STAT("rx_no_buffer_count", stats.rnbc[0]) ,
+	IXGBE_STAT("tx_timeout_count", tx_timeout_count),
+	IXGBE_STAT("tx_restart_queue", restart_queue),
+	IXGBE_STAT("rx_long_length_errors", stats.roc),
+	IXGBE_STAT("rx_short_length_errors", stats.ruc),
+	IXGBE_STAT("tx_flow_control_xon", stats.lxontxc),
+	IXGBE_STAT("rx_flow_control_xon", stats.lxonrxc),
+	IXGBE_STAT("tx_flow_control_xoff", stats.lxofftxc),
+	IXGBE_STAT("rx_flow_control_xoff", stats.lxoffrxc),
+	IXGBE_STAT("rx_csum_offload_errors", hw_csum_rx_error),
+	IXGBE_STAT("alloc_rx_page_failed", alloc_rx_page_failed),
+	IXGBE_STAT("alloc_rx_buff_failed", alloc_rx_buff_failed),
+#ifndef IXGBE_NO_LRO
+	IXGBE_STAT("lro_aggregated", lro_stats.coal),
+	IXGBE_STAT("lro_flushed", lro_stats.flushed),
+#endif /* IXGBE_NO_LRO */
+	IXGBE_STAT("rx_no_dma_resources", hw_rx_no_dma_resources),
+	IXGBE_STAT("hw_rsc_aggregated", rsc_total_count),
+	IXGBE_STAT("hw_rsc_flushed", rsc_total_flush),
+#ifdef HAVE_TX_MQ
+	IXGBE_STAT("fdir_match", stats.fdirmatch),
+	IXGBE_STAT("fdir_miss", stats.fdirmiss),
+	IXGBE_STAT("fdir_overflow", fdir_overflow),
+#endif /* HAVE_TX_MQ */
+#ifdef IXGBE_FCOE
+	IXGBE_STAT("fcoe_bad_fccrc", stats.fccrc),
+	IXGBE_STAT("fcoe_last_errors", stats.fclast),
+	IXGBE_STAT("rx_fcoe_dropped", stats.fcoerpdc),
+	IXGBE_STAT("rx_fcoe_packets", stats.fcoeprc),
+	IXGBE_STAT("rx_fcoe_dwords", stats.fcoedwrc),
+	IXGBE_STAT("fcoe_noddp", stats.fcoe_noddp),
+	IXGBE_STAT("fcoe_noddp_ext_buff", stats.fcoe_noddp_ext_buff),
+	IXGBE_STAT("tx_fcoe_packets", stats.fcoeptc),
+	IXGBE_STAT("tx_fcoe_dwords", stats.fcoedwtc),
+#endif /* IXGBE_FCOE */
+	IXGBE_STAT("os2bmc_rx_by_bmc", stats.o2bgptc),
+	IXGBE_STAT("os2bmc_tx_by_bmc", stats.b2ospc),
+	IXGBE_STAT("os2bmc_tx_by_host", stats.o2bspc),
+	IXGBE_STAT("os2bmc_rx_by_host", stats.b2ogprc),
+};
+
+#define IXGBE_QUEUE_STATS_LEN \
+	((((struct ixgbe_adapter *)netdev_priv(netdev))->num_tx_queues + \
+	 ((struct ixgbe_adapter *)netdev_priv(netdev))->num_rx_queues) * \
+	  (sizeof(struct ixgbe_queue_stats) / sizeof(u64)))
+#define IXGBE_GLOBAL_STATS_LEN	ARRAY_SIZE(ixgbe_gstrings_stats)
+#define IXGBE_NETDEV_STATS_LEN	ARRAY_SIZE(ixgbe_gstrings_net_stats)
+#define IXGBE_PB_STATS_LEN ( \
+		(((struct ixgbe_adapter *)netdev_priv(netdev))->flags & \
+		 IXGBE_FLAG_DCB_ENABLED) ? \
+		 (sizeof(((struct ixgbe_adapter *)0)->stats.pxonrxc) + \
+		  sizeof(((struct ixgbe_adapter *)0)->stats.pxontxc) + \
+		  sizeof(((struct ixgbe_adapter *)0)->stats.pxoffrxc) + \
+		  sizeof(((struct ixgbe_adapter *)0)->stats.pxofftxc)) \
+		 / sizeof(u64) : 0)
+#define IXGBE_VF_STATS_LEN \
+	((((struct ixgbe_adapter *)netdev_priv(netdev))->num_vfs) * \
+	  (sizeof(struct vf_stats) / sizeof(u64)))
+#define IXGBE_STATS_LEN (IXGBE_GLOBAL_STATS_LEN + \
+			 IXGBE_NETDEV_STATS_LEN + \
+			 IXGBE_PB_STATS_LEN + \
+			 IXGBE_QUEUE_STATS_LEN + \
+			 IXGBE_VF_STATS_LEN)
+
+#endif /* ETHTOOL_GSTATS */
+#ifdef ETHTOOL_TEST
+static const char ixgbe_gstrings_test[][ETH_GSTRING_LEN] = {
+	"Register test  (offline)", "Eeprom test    (offline)",
+	"Interrupt test (offline)", "Loopback test  (offline)",
+	"Link test   (on/offline)"
+};
+#define IXGBE_TEST_LEN	(sizeof(ixgbe_gstrings_test) / ETH_GSTRING_LEN)
+#endif /* ETHTOOL_TEST */
+
+int ixgbe_get_settings(struct net_device *netdev,
+		       struct ethtool_cmd *ecmd)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+	struct ixgbe_hw *hw = &adapter->hw;
+	u32 link_speed = 0;
+	bool link_up;
+
+	ecmd->supported = SUPPORTED_10000baseT_Full;
+	ecmd->autoneg = AUTONEG_ENABLE;
+	ecmd->transceiver = XCVR_EXTERNAL;
+	if ((hw->phy.media_type == ixgbe_media_type_copper) ||
+	    (hw->phy.multispeed_fiber)) {
+		ecmd->supported |= (SUPPORTED_1000baseT_Full |
+				    SUPPORTED_Autoneg);
+		switch (hw->mac.type) {
+		case ixgbe_mac_X540:
+			ecmd->supported |= SUPPORTED_100baseT_Full;
+			break;
+		default:
+			break;
+		}
+
+		ecmd->advertising = ADVERTISED_Autoneg;
+		if (hw->phy.autoneg_advertised) {
+			if (hw->phy.autoneg_advertised &
+			    IXGBE_LINK_SPEED_100_FULL)
+				ecmd->advertising |= ADVERTISED_100baseT_Full;
+			if (hw->phy.autoneg_advertised &
+			    IXGBE_LINK_SPEED_10GB_FULL)
+				ecmd->advertising |= ADVERTISED_10000baseT_Full;
+			if (hw->phy.autoneg_advertised &
+			    IXGBE_LINK_SPEED_1GB_FULL)
+				ecmd->advertising |= ADVERTISED_1000baseT_Full;
+		} else {
+			/*
+			 * Default advertised modes in case
+			 * phy.autoneg_advertised isn't set.
+			 */
+			ecmd->advertising |= (ADVERTISED_10000baseT_Full |
+					      ADVERTISED_1000baseT_Full);
+			if (hw->mac.type == ixgbe_mac_X540)
+				ecmd->advertising |= ADVERTISED_100baseT_Full;
+		}
+
+		if (hw->phy.media_type == ixgbe_media_type_copper) {
+			ecmd->supported |= SUPPORTED_TP;
+			ecmd->advertising |= ADVERTISED_TP;
+			ecmd->port = PORT_TP;
+		} else {
+			ecmd->supported |= SUPPORTED_FIBRE;
+			ecmd->advertising |= ADVERTISED_FIBRE;
+			ecmd->port = PORT_FIBRE;
+		}
+	} else if (hw->phy.media_type == ixgbe_media_type_backplane) {
+		/* Set as FIBRE until SERDES defined in kernel */
+		if (hw->device_id == IXGBE_DEV_ID_82598_BX) {
+			ecmd->supported = (SUPPORTED_1000baseT_Full |
+					   SUPPORTED_FIBRE);
+			ecmd->advertising = (ADVERTISED_1000baseT_Full |
+					     ADVERTISED_FIBRE);
+			ecmd->port = PORT_FIBRE;
+			ecmd->autoneg = AUTONEG_DISABLE;
+		} else if ((hw->device_id == IXGBE_DEV_ID_82599_COMBO_BACKPLANE)
+			  || (hw->device_id == IXGBE_DEV_ID_82599_KX4_MEZZ)) {
+			ecmd->supported |= (SUPPORTED_1000baseT_Full |
+					    SUPPORTED_Autoneg |
+					    SUPPORTED_FIBRE);
+			ecmd->advertising = (ADVERTISED_10000baseT_Full |
+					     ADVERTISED_1000baseT_Full |
+					     ADVERTISED_Autoneg |
+					     ADVERTISED_FIBRE);
+			ecmd->port = PORT_FIBRE;
+		} else {
+			ecmd->supported |= (SUPPORTED_1000baseT_Full |
+					    SUPPORTED_FIBRE);
+			ecmd->advertising = (ADVERTISED_10000baseT_Full |
+					     ADVERTISED_1000baseT_Full |
+					     ADVERTISED_FIBRE);
+			ecmd->port = PORT_FIBRE;
+		}
+	} else {
+		ecmd->supported |= SUPPORTED_FIBRE;
+		ecmd->advertising = (ADVERTISED_10000baseT_Full |
+				     ADVERTISED_FIBRE);
+		ecmd->port = PORT_FIBRE;
+		ecmd->autoneg = AUTONEG_DISABLE;
+	}
+
+#ifdef HAVE_ETHTOOL_SFP_DISPLAY_PORT
+	/* Get PHY type */
+	switch (adapter->hw.phy.type) {
+	case ixgbe_phy_tn:
+	case ixgbe_phy_aq:
+	case ixgbe_phy_cu_unknown:
+		/* Copper 10G-BASET */
+		ecmd->port = PORT_TP;
+		break;
+	case ixgbe_phy_qt:
+		ecmd->port = PORT_FIBRE;
+		break;
+	case ixgbe_phy_nl:
+	case ixgbe_phy_sfp_passive_tyco:
+	case ixgbe_phy_sfp_passive_unknown:
+	case ixgbe_phy_sfp_ftl:
+	case ixgbe_phy_sfp_avago:
+	case ixgbe_phy_sfp_intel:
+	case ixgbe_phy_sfp_unknown:
+		switch (adapter->hw.phy.sfp_type) {
+		/* SFP+ devices, further checking needed */
+		case ixgbe_sfp_type_da_cu:
+		case ixgbe_sfp_type_da_cu_core0:
+		case ixgbe_sfp_type_da_cu_core1:
+			ecmd->port = PORT_DA;
+			break;
+		case ixgbe_sfp_type_sr:
+		case ixgbe_sfp_type_lr:
+		case ixgbe_sfp_type_srlr_core0:
+		case ixgbe_sfp_type_srlr_core1:
+			ecmd->port = PORT_FIBRE;
+			break;
+		case ixgbe_sfp_type_not_present:
+			ecmd->port = PORT_NONE;
+			break;
+		case ixgbe_sfp_type_1g_cu_core0:
+		case ixgbe_sfp_type_1g_cu_core1:
+			ecmd->port = PORT_TP;
+			ecmd->supported = SUPPORTED_TP;
+			ecmd->advertising = (ADVERTISED_1000baseT_Full |
+				ADVERTISED_TP);
+			break;
+		case ixgbe_sfp_type_1g_sx_core0:
+		case ixgbe_sfp_type_1g_sx_core1:
+			ecmd->port = PORT_FIBRE;
+			ecmd->supported = SUPPORTED_FIBRE;
+			ecmd->advertising = (ADVERTISED_1000baseT_Full |
+				ADVERTISED_FIBRE);
+			break;
+		case ixgbe_sfp_type_unknown:
+		default:
+			ecmd->port = PORT_OTHER;
+			break;
+		}
+		break;
+	case ixgbe_phy_xaui:
+		ecmd->port = PORT_NONE;
+		break;
+	case ixgbe_phy_unknown:
+	case ixgbe_phy_generic:
+	case ixgbe_phy_sfp_unsupported:
+	default:
+		ecmd->port = PORT_OTHER;
+		break;
+	}
+#endif
+
+	if (!in_interrupt()) {
+		hw->mac.ops.check_link(hw, &link_speed, &link_up, false);
+	} else {
+		/*
+		 * this case is a special workaround for RHEL5 bonding
+		 * that calls this routine from interrupt context
+		 */
+		link_speed = adapter->link_speed;
+		link_up = adapter->link_up;
+	}
+
+	if (link_up) {
+		switch (link_speed) {
+		case IXGBE_LINK_SPEED_10GB_FULL:
+			ecmd->speed = SPEED_10000;
+			break;
+		case IXGBE_LINK_SPEED_1GB_FULL:
+			ecmd->speed = SPEED_1000;
+			break;
+		case IXGBE_LINK_SPEED_100_FULL:
+			ecmd->speed = SPEED_100;
+			break;
+		default:
+			break;
+		}
+		ecmd->duplex = DUPLEX_FULL;
+	} else {
+		ecmd->speed = -1;
+		ecmd->duplex = -1;
+	}
+
+	return 0;
+}
+
+static int ixgbe_set_settings(struct net_device *netdev,
+			      struct ethtool_cmd *ecmd)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+	struct ixgbe_hw *hw = &adapter->hw;
+	u32 advertised, old;
+	s32 err = 0;
+
+	if ((hw->phy.media_type == ixgbe_media_type_copper) ||
+	    (hw->phy.multispeed_fiber)) {
+		/*
+		 * this function does not support duplex forcing, but can
+		 * limit the advertising of the adapter to the specified speed
+		 */
+		if (ecmd->autoneg == AUTONEG_DISABLE)
+			return -EINVAL;
+
+		if (ecmd->advertising & ~ecmd->supported)
+			return -EINVAL;
+
+		old = hw->phy.autoneg_advertised;
+		advertised = 0;
+		if (ecmd->advertising & ADVERTISED_10000baseT_Full)
+			advertised |= IXGBE_LINK_SPEED_10GB_FULL;
+
+		if (ecmd->advertising & ADVERTISED_1000baseT_Full)
+			advertised |= IXGBE_LINK_SPEED_1GB_FULL;
+
+		if (ecmd->advertising & ADVERTISED_100baseT_Full)
+			advertised |= IXGBE_LINK_SPEED_100_FULL;
+
+		if (old == advertised)
+			return err;
+		/* this sets the link speed and restarts auto-neg */
+		hw->mac.autotry_restart = true;
+		err = hw->mac.ops.setup_link(hw, advertised, true, true);
+		if (err) {
+			e_info(probe, "setup link failed with code %d\n", err);
+			hw->mac.ops.setup_link(hw, old, true, true);
+		}
+	}
+	return err;
+}
+
+static void ixgbe_get_pauseparam(struct net_device *netdev,
+				 struct ethtool_pauseparam *pause)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+	struct ixgbe_hw *hw = &adapter->hw;
+
+	if (hw->fc.disable_fc_autoneg)
+		pause->autoneg = 0;
+	else
+		pause->autoneg = 1;
+
+	if (hw->fc.current_mode == ixgbe_fc_rx_pause) {
+		pause->rx_pause = 1;
+	} else if (hw->fc.current_mode == ixgbe_fc_tx_pause) {
+		pause->tx_pause = 1;
+	} else if (hw->fc.current_mode == ixgbe_fc_full) {
+		pause->rx_pause = 1;
+		pause->tx_pause = 1;
+	}
+}
+
+static int ixgbe_set_pauseparam(struct net_device *netdev,
+				struct ethtool_pauseparam *pause)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+	struct ixgbe_hw *hw = &adapter->hw;
+	struct ixgbe_fc_info fc = hw->fc;
+
+	/* 82598 does no support link flow control with DCB enabled */
+	if ((hw->mac.type == ixgbe_mac_82598EB) &&
+	    (adapter->flags & IXGBE_FLAG_DCB_ENABLED))
+		return -EINVAL;
+
+	fc.disable_fc_autoneg = (pause->autoneg != AUTONEG_ENABLE);
+
+	if ((pause->rx_pause && pause->tx_pause) || pause->autoneg)
+		fc.requested_mode = ixgbe_fc_full;
+	else if (pause->rx_pause)
+		fc.requested_mode = ixgbe_fc_rx_pause;
+	else if (pause->tx_pause)
+		fc.requested_mode = ixgbe_fc_tx_pause;
+	else
+		fc.requested_mode = ixgbe_fc_none;
+
+	/* if the thing changed then we'll update and use new autoneg */
+	if (memcmp(&fc, &hw->fc, sizeof(struct ixgbe_fc_info))) {
+		hw->fc = fc;
+		if (netif_running(netdev))
+			ixgbe_reinit_locked(adapter);
+		else
+			ixgbe_reset(adapter);
+	}
+
+	return 0;
+}
+
+static u32 ixgbe_get_msglevel(struct net_device *netdev)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+	return adapter->msg_enable;
+}
+
+static void ixgbe_set_msglevel(struct net_device *netdev, u32 data)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+	adapter->msg_enable = data;
+}
+
+static int ixgbe_get_regs_len(struct net_device *netdev)
+{
+#define IXGBE_REGS_LEN  1129
+	return IXGBE_REGS_LEN * sizeof(u32);
+}
+
+#define IXGBE_GET_STAT(_A_, _R_)	(_A_->stats._R_)
+
+
+static void ixgbe_get_regs(struct net_device *netdev, struct ethtool_regs *regs,
+			   void *p)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+	struct ixgbe_hw *hw = &adapter->hw;
+	u32 *regs_buff = p;
+	u8 i;
+
+	printk(KERN_DEBUG "ixgbe_get_regs_1\n");
+	memset(p, 0, IXGBE_REGS_LEN * sizeof(u32));
+	printk(KERN_DEBUG "ixgbe_get_regs_2 0x%p\n", hw->hw_addr);
+
+	regs->version = (1 << 24) | hw->revision_id << 16 | hw->device_id;
+
+	/* General Registers */
+	regs_buff[0] = IXGBE_READ_REG(hw, IXGBE_CTRL);
+	printk(KERN_DEBUG "ixgbe_get_regs_3\n");
+	regs_buff[1] = IXGBE_READ_REG(hw, IXGBE_STATUS);
+	regs_buff[2] = IXGBE_READ_REG(hw, IXGBE_CTRL_EXT);
+	regs_buff[3] = IXGBE_READ_REG(hw, IXGBE_ESDP);
+	regs_buff[4] = IXGBE_READ_REG(hw, IXGBE_EODSDP);
+	regs_buff[5] = IXGBE_READ_REG(hw, IXGBE_LEDCTL);
+	regs_buff[6] = IXGBE_READ_REG(hw, IXGBE_FRTIMER);
+	regs_buff[7] = IXGBE_READ_REG(hw, IXGBE_TCPTIMER);
+
+	printk(KERN_DEBUG "ixgbe_get_regs_4\n");
+
+	/* NVM Register */
+	regs_buff[8] = IXGBE_READ_REG(hw, IXGBE_EEC);
+	regs_buff[9] = IXGBE_READ_REG(hw, IXGBE_EERD);
+	regs_buff[10] = IXGBE_READ_REG(hw, IXGBE_FLA);
+	regs_buff[11] = IXGBE_READ_REG(hw, IXGBE_EEMNGCTL);
+	regs_buff[12] = IXGBE_READ_REG(hw, IXGBE_EEMNGDATA);
+	regs_buff[13] = IXGBE_READ_REG(hw, IXGBE_FLMNGCTL);
+	regs_buff[14] = IXGBE_READ_REG(hw, IXGBE_FLMNGDATA);
+	regs_buff[15] = IXGBE_READ_REG(hw, IXGBE_FLMNGCNT);
+	regs_buff[16] = IXGBE_READ_REG(hw, IXGBE_FLOP);
+	regs_buff[17] = IXGBE_READ_REG(hw, IXGBE_GRC);
+
+	/* Interrupt */
+	/* don't read EICR because it can clear interrupt causes, instead
+	 * read EICS which is a shadow but doesn't clear EICR */
+	regs_buff[18] = IXGBE_READ_REG(hw, IXGBE_EICS);
+	regs_buff[19] = IXGBE_READ_REG(hw, IXGBE_EICS);
+	regs_buff[20] = IXGBE_READ_REG(hw, IXGBE_EIMS);
+	regs_buff[21] = IXGBE_READ_REG(hw, IXGBE_EIMC);
+	regs_buff[22] = IXGBE_READ_REG(hw, IXGBE_EIAC);
+	regs_buff[23] = IXGBE_READ_REG(hw, IXGBE_EIAM);
+	regs_buff[24] = IXGBE_READ_REG(hw, IXGBE_EITR(0));
+	regs_buff[25] = IXGBE_READ_REG(hw, IXGBE_IVAR(0));
+	regs_buff[26] = IXGBE_READ_REG(hw, IXGBE_MSIXT);
+	regs_buff[27] = IXGBE_READ_REG(hw, IXGBE_MSIXPBA);
+	regs_buff[28] = IXGBE_READ_REG(hw, IXGBE_PBACL(0));
+	regs_buff[29] = IXGBE_READ_REG(hw, IXGBE_GPIE);
+
+	/* Flow Control */
+	regs_buff[30] = IXGBE_READ_REG(hw, IXGBE_PFCTOP);
+	regs_buff[31] = IXGBE_READ_REG(hw, IXGBE_FCTTV(0));
+	regs_buff[32] = IXGBE_READ_REG(hw, IXGBE_FCTTV(1));
+	regs_buff[33] = IXGBE_READ_REG(hw, IXGBE_FCTTV(2));
+	regs_buff[34] = IXGBE_READ_REG(hw, IXGBE_FCTTV(3));
+	for (i = 0; i < 8; i++) {
+		switch (hw->mac.type) {
+		case ixgbe_mac_82598EB:
+			regs_buff[35 + i] = IXGBE_READ_REG(hw, IXGBE_FCRTL(i));
+			regs_buff[43 + i] = IXGBE_READ_REG(hw, IXGBE_FCRTH(i));
+			break;
+		case ixgbe_mac_82599EB:
+		case ixgbe_mac_X540:
+			regs_buff[35 + i] = IXGBE_READ_REG(hw,
+							  IXGBE_FCRTL_82599(i));
+			regs_buff[43 + i] = IXGBE_READ_REG(hw,
+							  IXGBE_FCRTH_82599(i));
+			break;
+		default:
+			break;
+		}
+	}
+	regs_buff[51] = IXGBE_READ_REG(hw, IXGBE_FCRTV);
+	regs_buff[52] = IXGBE_READ_REG(hw, IXGBE_TFCS);
+
+	/* Receive DMA */
+	for (i = 0; i < 64; i++)
+		regs_buff[53 + i] = IXGBE_READ_REG(hw, IXGBE_RDBAL(i));
+	for (i = 0; i < 64; i++)
+		regs_buff[117 + i] = IXGBE_READ_REG(hw, IXGBE_RDBAH(i));
+	for (i = 0; i < 64; i++)
+		regs_buff[181 + i] = IXGBE_READ_REG(hw, IXGBE_RDLEN(i));
+	for (i = 0; i < 64; i++)
+		regs_buff[245 + i] = IXGBE_READ_REG(hw, IXGBE_RDH(i));
+	for (i = 0; i < 64; i++)
+		regs_buff[309 + i] = IXGBE_READ_REG(hw, IXGBE_RDT(i));
+	for (i = 0; i < 64; i++)
+		regs_buff[373 + i] = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
+	for (i = 0; i < 16; i++)
+		regs_buff[437 + i] = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
+	for (i = 0; i < 16; i++)
+		regs_buff[453 + i] = IXGBE_READ_REG(hw, IXGBE_DCA_RXCTRL(i));
+	regs_buff[469] = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
+	for (i = 0; i < 8; i++)
+		regs_buff[470 + i] = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
+	regs_buff[478] = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
+	regs_buff[479] = IXGBE_READ_REG(hw, IXGBE_DROPEN);
+
+	/* Receive */
+	regs_buff[480] = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
+	regs_buff[481] = IXGBE_READ_REG(hw, IXGBE_RFCTL);
+	for (i = 0; i < 16; i++)
+		regs_buff[482 + i] = IXGBE_READ_REG(hw, IXGBE_RAL(i));
+	for (i = 0; i < 16; i++)
+		regs_buff[498 + i] = IXGBE_READ_REG(hw, IXGBE_RAH(i));
+	regs_buff[514] = IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0));
+	regs_buff[515] = IXGBE_READ_REG(hw, IXGBE_FCTRL);
+	regs_buff[516] = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
+	regs_buff[517] = IXGBE_READ_REG(hw, IXGBE_MCSTCTRL);
+	regs_buff[518] = IXGBE_READ_REG(hw, IXGBE_MRQC);
+	regs_buff[519] = IXGBE_READ_REG(hw, IXGBE_VMD_CTL);
+	for (i = 0; i < 8; i++)
+		regs_buff[520 + i] = IXGBE_READ_REG(hw, IXGBE_IMIR(i));
+	for (i = 0; i < 8; i++)
+		regs_buff[528 + i] = IXGBE_READ_REG(hw, IXGBE_IMIREXT(i));
+	regs_buff[536] = IXGBE_READ_REG(hw, IXGBE_IMIRVP);
+
+	/* Transmit */
+	for (i = 0; i < 32; i++)
+		regs_buff[537 + i] = IXGBE_READ_REG(hw, IXGBE_TDBAL(i));
+	for (i = 0; i < 32; i++)
+		regs_buff[569 + i] = IXGBE_READ_REG(hw, IXGBE_TDBAH(i));
+	for (i = 0; i < 32; i++)
+		regs_buff[601 + i] = IXGBE_READ_REG(hw, IXGBE_TDLEN(i));
+	for (i = 0; i < 32; i++)
+		regs_buff[633 + i] = IXGBE_READ_REG(hw, IXGBE_TDH(i));
+	for (i = 0; i < 32; i++)
+		regs_buff[665 + i] = IXGBE_READ_REG(hw, IXGBE_TDT(i));
+	for (i = 0; i < 32; i++)
+		regs_buff[697 + i] = IXGBE_READ_REG(hw, IXGBE_TXDCTL(i));
+	for (i = 0; i < 32; i++)
+		regs_buff[729 + i] = IXGBE_READ_REG(hw, IXGBE_TDWBAL(i));
+	for (i = 0; i < 32; i++)
+		regs_buff[761 + i] = IXGBE_READ_REG(hw, IXGBE_TDWBAH(i));
+	regs_buff[793] = IXGBE_READ_REG(hw, IXGBE_DTXCTL);
+	for (i = 0; i < 16; i++)
+		regs_buff[794 + i] = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL(i));
+	regs_buff[810] = IXGBE_READ_REG(hw, IXGBE_TIPG);
+	for (i = 0; i < 8; i++)
+		regs_buff[811 + i] = IXGBE_READ_REG(hw, IXGBE_TXPBSIZE(i));
+	regs_buff[819] = IXGBE_READ_REG(hw, IXGBE_MNGTXMAP);
+
+	/* Wake Up */
+	regs_buff[820] = IXGBE_READ_REG(hw, IXGBE_WUC);
+	regs_buff[821] = IXGBE_READ_REG(hw, IXGBE_WUFC);
+	regs_buff[822] = IXGBE_READ_REG(hw, IXGBE_WUS);
+	regs_buff[823] = IXGBE_READ_REG(hw, IXGBE_IPAV);
+	regs_buff[824] = IXGBE_READ_REG(hw, IXGBE_IP4AT);
+	regs_buff[825] = IXGBE_READ_REG(hw, IXGBE_IP6AT);
+	regs_buff[826] = IXGBE_READ_REG(hw, IXGBE_WUPL);
+	regs_buff[827] = IXGBE_READ_REG(hw, IXGBE_WUPM);
+	regs_buff[828] = IXGBE_READ_REG(hw, IXGBE_FHFT(0));
+
+	/* DCB */
+	regs_buff[829] = IXGBE_READ_REG(hw, IXGBE_RMCS);
+	regs_buff[830] = IXGBE_READ_REG(hw, IXGBE_DPMCS);
+	regs_buff[831] = IXGBE_READ_REG(hw, IXGBE_PDPMCS);
+	regs_buff[832] = IXGBE_READ_REG(hw, IXGBE_RUPPBMR);
+	for (i = 0; i < 8; i++)
+		regs_buff[833 + i] = IXGBE_READ_REG(hw, IXGBE_RT2CR(i));
+	for (i = 0; i < 8; i++)
+		regs_buff[841 + i] = IXGBE_READ_REG(hw, IXGBE_RT2SR(i));
+	for (i = 0; i < 8; i++)
+		regs_buff[849 + i] = IXGBE_READ_REG(hw, IXGBE_TDTQ2TCCR(i));
+	for (i = 0; i < 8; i++)
+		regs_buff[857 + i] = IXGBE_READ_REG(hw, IXGBE_TDTQ2TCSR(i));
+	for (i = 0; i < 8; i++)
+		regs_buff[865 + i] = IXGBE_READ_REG(hw, IXGBE_TDPT2TCCR(i));
+	for (i = 0; i < 8; i++)
+		regs_buff[873 + i] = IXGBE_READ_REG(hw, IXGBE_TDPT2TCSR(i));
+
+	/* Statistics */
+	regs_buff[881] = IXGBE_GET_STAT(adapter, crcerrs);
+	regs_buff[882] = IXGBE_GET_STAT(adapter, illerrc);
+	regs_buff[883] = IXGBE_GET_STAT(adapter, errbc);
+	regs_buff[884] = IXGBE_GET_STAT(adapter, mspdc);
+	for (i = 0; i < 8; i++)
+		regs_buff[885 + i] = IXGBE_GET_STAT(adapter, mpc[i]);
+	regs_buff[893] = IXGBE_GET_STAT(adapter, mlfc);
+	regs_buff[894] = IXGBE_GET_STAT(adapter, mrfc);
+	regs_buff[895] = IXGBE_GET_STAT(adapter, rlec);
+	regs_buff[896] = IXGBE_GET_STAT(adapter, lxontxc);
+	regs_buff[897] = IXGBE_GET_STAT(adapter, lxonrxc);
+	regs_buff[898] = IXGBE_GET_STAT(adapter, lxofftxc);
+	regs_buff[899] = IXGBE_GET_STAT(adapter, lxoffrxc);
+	for (i = 0; i < 8; i++)
+		regs_buff[900 + i] = IXGBE_GET_STAT(adapter, pxontxc[i]);
+	for (i = 0; i < 8; i++)
+		regs_buff[908 + i] = IXGBE_GET_STAT(adapter, pxonrxc[i]);
+	for (i = 0; i < 8; i++)
+		regs_buff[916 + i] = IXGBE_GET_STAT(adapter, pxofftxc[i]);
+	for (i = 0; i < 8; i++)
+		regs_buff[924 + i] = IXGBE_GET_STAT(adapter, pxoffrxc[i]);
+	regs_buff[932] = IXGBE_GET_STAT(adapter, prc64);
+	regs_buff[933] = IXGBE_GET_STAT(adapter, prc127);
+	regs_buff[934] = IXGBE_GET_STAT(adapter, prc255);
+	regs_buff[935] = IXGBE_GET_STAT(adapter, prc511);
+	regs_buff[936] = IXGBE_GET_STAT(adapter, prc1023);
+	regs_buff[937] = IXGBE_GET_STAT(adapter, prc1522);
+	regs_buff[938] = IXGBE_GET_STAT(adapter, gprc);
+	regs_buff[939] = IXGBE_GET_STAT(adapter, bprc);
+	regs_buff[940] = IXGBE_GET_STAT(adapter, mprc);
+	regs_buff[941] = IXGBE_GET_STAT(adapter, gptc);
+	regs_buff[942] = IXGBE_GET_STAT(adapter, gorc);
+	regs_buff[944] = IXGBE_GET_STAT(adapter, gotc);
+	for (i = 0; i < 8; i++)
+		regs_buff[946 + i] = IXGBE_GET_STAT(adapter, rnbc[i]);
+	regs_buff[954] = IXGBE_GET_STAT(adapter, ruc);
+	regs_buff[955] = IXGBE_GET_STAT(adapter, rfc);
+	regs_buff[956] = IXGBE_GET_STAT(adapter, roc);
+	regs_buff[957] = IXGBE_GET_STAT(adapter, rjc);
+	regs_buff[958] = IXGBE_GET_STAT(adapter, mngprc);
+	regs_buff[959] = IXGBE_GET_STAT(adapter, mngpdc);
+	regs_buff[960] = IXGBE_GET_STAT(adapter, mngptc);
+	regs_buff[961] = IXGBE_GET_STAT(adapter, tor);
+	regs_buff[963] = IXGBE_GET_STAT(adapter, tpr);
+	regs_buff[964] = IXGBE_GET_STAT(adapter, tpt);
+	regs_buff[965] = IXGBE_GET_STAT(adapter, ptc64);
+	regs_buff[966] = IXGBE_GET_STAT(adapter, ptc127);
+	regs_buff[967] = IXGBE_GET_STAT(adapter, ptc255);
+	regs_buff[968] = IXGBE_GET_STAT(adapter, ptc511);
+	regs_buff[969] = IXGBE_GET_STAT(adapter, ptc1023);
+	regs_buff[970] = IXGBE_GET_STAT(adapter, ptc1522);
+	regs_buff[971] = IXGBE_GET_STAT(adapter, mptc);
+	regs_buff[972] = IXGBE_GET_STAT(adapter, bptc);
+	regs_buff[973] = IXGBE_GET_STAT(adapter, xec);
+	for (i = 0; i < 16; i++)
+		regs_buff[974 + i] = IXGBE_GET_STAT(adapter, qprc[i]);
+	for (i = 0; i < 16; i++)
+		regs_buff[990 + i] = IXGBE_GET_STAT(adapter, qptc[i]);
+	for (i = 0; i < 16; i++)
+		regs_buff[1006 + i] = IXGBE_GET_STAT(adapter, qbrc[i]);
+	for (i = 0; i < 16; i++)
+		regs_buff[1022 + i] = IXGBE_GET_STAT(adapter, qbtc[i]);
+
+	/* MAC */
+	regs_buff[1038] = IXGBE_READ_REG(hw, IXGBE_PCS1GCFIG);
+	regs_buff[1039] = IXGBE_READ_REG(hw, IXGBE_PCS1GLCTL);
+	regs_buff[1040] = IXGBE_READ_REG(hw, IXGBE_PCS1GLSTA);
+	regs_buff[1041] = IXGBE_READ_REG(hw, IXGBE_PCS1GDBG0);
+	regs_buff[1042] = IXGBE_READ_REG(hw, IXGBE_PCS1GDBG1);
+	regs_buff[1043] = IXGBE_READ_REG(hw, IXGBE_PCS1GANA);
+	regs_buff[1044] = IXGBE_READ_REG(hw, IXGBE_PCS1GANLP);
+	regs_buff[1045] = IXGBE_READ_REG(hw, IXGBE_PCS1GANNP);
+	regs_buff[1046] = IXGBE_READ_REG(hw, IXGBE_PCS1GANLPNP);
+	regs_buff[1047] = IXGBE_READ_REG(hw, IXGBE_HLREG0);
+	regs_buff[1048] = IXGBE_READ_REG(hw, IXGBE_HLREG1);
+	regs_buff[1049] = IXGBE_READ_REG(hw, IXGBE_PAP);
+	regs_buff[1050] = IXGBE_READ_REG(hw, IXGBE_MACA);
+	regs_buff[1051] = IXGBE_READ_REG(hw, IXGBE_APAE);
+	regs_buff[1052] = IXGBE_READ_REG(hw, IXGBE_ARD);
+	regs_buff[1053] = IXGBE_READ_REG(hw, IXGBE_AIS);
+	regs_buff[1054] = IXGBE_READ_REG(hw, IXGBE_MSCA);
+	regs_buff[1055] = IXGBE_READ_REG(hw, IXGBE_MSRWD);
+	regs_buff[1056] = IXGBE_READ_REG(hw, IXGBE_MLADD);
+	regs_buff[1057] = IXGBE_READ_REG(hw, IXGBE_MHADD);
+	regs_buff[1058] = IXGBE_READ_REG(hw, IXGBE_TREG);
+	regs_buff[1059] = IXGBE_READ_REG(hw, IXGBE_PCSS1);
+	regs_buff[1060] = IXGBE_READ_REG(hw, IXGBE_PCSS2);
+	regs_buff[1061] = IXGBE_READ_REG(hw, IXGBE_XPCSS);
+	regs_buff[1062] = IXGBE_READ_REG(hw, IXGBE_SERDESC);
+	regs_buff[1063] = IXGBE_READ_REG(hw, IXGBE_MACS);
+	regs_buff[1064] = IXGBE_READ_REG(hw, IXGBE_AUTOC);
+	regs_buff[1065] = IXGBE_READ_REG(hw, IXGBE_LINKS);
+	regs_buff[1066] = IXGBE_READ_REG(hw, IXGBE_AUTOC2);
+	regs_buff[1067] = IXGBE_READ_REG(hw, IXGBE_AUTOC3);
+	regs_buff[1068] = IXGBE_READ_REG(hw, IXGBE_ANLP1);
+	regs_buff[1069] = IXGBE_READ_REG(hw, IXGBE_ANLP2);
+	regs_buff[1070] = IXGBE_READ_REG(hw, IXGBE_ATLASCTL);
+
+	/* Diagnostic */
+	regs_buff[1071] = IXGBE_READ_REG(hw, IXGBE_RDSTATCTL);
+	for (i = 0; i < 8; i++)
+		regs_buff[1072 + i] = IXGBE_READ_REG(hw, IXGBE_RDSTAT(i));
+	regs_buff[1080] = IXGBE_READ_REG(hw, IXGBE_RDHMPN);
+	for (i = 0; i < 4; i++)
+		regs_buff[1081 + i] = IXGBE_READ_REG(hw, IXGBE_RIC_DW(i));
+	regs_buff[1085] = IXGBE_READ_REG(hw, IXGBE_RDPROBE);
+	regs_buff[1086] = IXGBE_READ_REG(hw, IXGBE_TDSTATCTL);
+	for (i = 0; i < 8; i++)
+		regs_buff[1087 + i] = IXGBE_READ_REG(hw, IXGBE_TDSTAT(i));
+	regs_buff[1095] = IXGBE_READ_REG(hw, IXGBE_TDHMPN);
+	for (i = 0; i < 4; i++)
+		regs_buff[1096 + i] = IXGBE_READ_REG(hw, IXGBE_TIC_DW(i));
+	regs_buff[1100] = IXGBE_READ_REG(hw, IXGBE_TDPROBE);
+	regs_buff[1101] = IXGBE_READ_REG(hw, IXGBE_TXBUFCTRL);
+	regs_buff[1102] = IXGBE_READ_REG(hw, IXGBE_TXBUFDATA0);
+	regs_buff[1103] = IXGBE_READ_REG(hw, IXGBE_TXBUFDATA1);
+	regs_buff[1104] = IXGBE_READ_REG(hw, IXGBE_TXBUFDATA2);
+	regs_buff[1105] = IXGBE_READ_REG(hw, IXGBE_TXBUFDATA3);
+	regs_buff[1106] = IXGBE_READ_REG(hw, IXGBE_RXBUFCTRL);
+	regs_buff[1107] = IXGBE_READ_REG(hw, IXGBE_RXBUFDATA0);
+	regs_buff[1108] = IXGBE_READ_REG(hw, IXGBE_RXBUFDATA1);
+	regs_buff[1109] = IXGBE_READ_REG(hw, IXGBE_RXBUFDATA2);
+	regs_buff[1110] = IXGBE_READ_REG(hw, IXGBE_RXBUFDATA3);
+	for (i = 0; i < 8; i++)
+		regs_buff[1111 + i] = IXGBE_READ_REG(hw, IXGBE_PCIE_DIAG(i));
+	regs_buff[1119] = IXGBE_READ_REG(hw, IXGBE_RFVAL);
+	regs_buff[1120] = IXGBE_READ_REG(hw, IXGBE_MDFTC1);
+	regs_buff[1121] = IXGBE_READ_REG(hw, IXGBE_MDFTC2);
+	regs_buff[1122] = IXGBE_READ_REG(hw, IXGBE_MDFTFIFO1);
+	regs_buff[1123] = IXGBE_READ_REG(hw, IXGBE_MDFTFIFO2);
+	regs_buff[1124] = IXGBE_READ_REG(hw, IXGBE_MDFTS);
+	regs_buff[1125] = IXGBE_READ_REG(hw, IXGBE_PCIEECCCTL);
+	regs_buff[1126] = IXGBE_READ_REG(hw, IXGBE_PBTXECC);
+	regs_buff[1127] = IXGBE_READ_REG(hw, IXGBE_PBRXECC);
+
+	/* 82599 X540 specific registers  */
+	regs_buff[1128] = IXGBE_READ_REG(hw, IXGBE_MFLCN);
+}
+
+static int ixgbe_get_eeprom_len(struct net_device *netdev)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+	return adapter->hw.eeprom.word_size * 2;
+}
+
+static int ixgbe_get_eeprom(struct net_device *netdev,
+			    struct ethtool_eeprom *eeprom, u8 *bytes)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+	struct ixgbe_hw *hw = &adapter->hw;
+	u16 *eeprom_buff;
+	int first_word, last_word, eeprom_len;
+	int ret_val = 0;
+	u16 i;
+
+	if (eeprom->len == 0)
+		return -EINVAL;
+
+	eeprom->magic = hw->vendor_id | (hw->device_id << 16);
+
+	first_word = eeprom->offset >> 1;
+	last_word = (eeprom->offset + eeprom->len - 1) >> 1;
+	eeprom_len = last_word - first_word + 1;
+
+	eeprom_buff = kmalloc(sizeof(u16) * eeprom_len, GFP_KERNEL);
+	if (!eeprom_buff)
+		return -ENOMEM;
+
+	ret_val = ixgbe_read_eeprom_buffer(hw, first_word, eeprom_len,
+					   eeprom_buff);
+
+	/* Device's eeprom is always little-endian, word addressable */
+	for (i = 0; i < eeprom_len; i++)
+		le16_to_cpus(&eeprom_buff[i]);
+
+	memcpy(bytes, (u8 *)eeprom_buff + (eeprom->offset & 1), eeprom->len);
+	kfree(eeprom_buff);
+
+	return ret_val;
+}
+
+static int ixgbe_set_eeprom(struct net_device *netdev,
+			    struct ethtool_eeprom *eeprom, u8 *bytes)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+	struct ixgbe_hw *hw = &adapter->hw;
+	u16 *eeprom_buff;
+	void *ptr;
+	int max_len, first_word, last_word, ret_val = 0;
+	u16 i;
+
+	if (eeprom->len == 0)
+		return -EINVAL;
+
+	if (eeprom->magic != (hw->vendor_id | (hw->device_id << 16)))
+		return -EINVAL;
+
+	max_len = hw->eeprom.word_size * 2;
+
+	first_word = eeprom->offset >> 1;
+	last_word = (eeprom->offset + eeprom->len - 1) >> 1;
+	eeprom_buff = kmalloc(max_len, GFP_KERNEL);
+	if (!eeprom_buff)
+		return -ENOMEM;
+
+	ptr = eeprom_buff;
+
+	if (eeprom->offset & 1) {
+		/*
+		 * need read/modify/write of first changed EEPROM word
+		 * only the second byte of the word is being modified
+		 */
+		ret_val = ixgbe_read_eeprom(hw, first_word, &eeprom_buff[0]);
+		if (ret_val)
+			goto err;
+
+		ptr++;
+	}
+	if (((eeprom->offset + eeprom->len) & 1) && (ret_val == 0)) {
+		/*
+		 * need read/modify/write of last changed EEPROM word
+		 * only the first byte of the word is being modified
+		 */
+		ret_val = ixgbe_read_eeprom(hw, last_word,
+					  &eeprom_buff[last_word - first_word]);
+		if (ret_val)
+			goto err;
+	}
+
+	/* Device's eeprom is always little-endian, word addressable */
+	for (i = 0; i < last_word - first_word + 1; i++)
+		le16_to_cpus(&eeprom_buff[i]);
+
+	memcpy(ptr, bytes, eeprom->len);
+
+	for (i = 0; i < last_word - first_word + 1; i++)
+		cpu_to_le16s(&eeprom_buff[i]);
+
+	ret_val = ixgbe_write_eeprom_buffer(hw, first_word,
+					    last_word - first_word + 1,
+					    eeprom_buff);
+
+	/* Update the checksum */
+	if (ret_val == 0)
+		ixgbe_update_eeprom_checksum(hw);
+
+err:
+	kfree(eeprom_buff);
+	return ret_val;
+}
+
+static void ixgbe_get_drvinfo(struct net_device *netdev,
+			      struct ethtool_drvinfo *drvinfo)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+
+	strlcpy(drvinfo->driver, ixgbe_driver_name, sizeof(drvinfo->driver));
+
+	strlcpy(drvinfo->version, ixgbe_driver_version,
+				sizeof(drvinfo->version));
+
+	strlcpy(drvinfo->fw_version, adapter->eeprom_id,
+				sizeof(drvinfo->fw_version));
+
+	strlcpy(drvinfo->bus_info, pci_name(adapter->pdev),
+				sizeof(drvinfo->bus_info));
+
+	drvinfo->n_stats = IXGBE_STATS_LEN;
+	drvinfo->testinfo_len = IXGBE_TEST_LEN;
+	drvinfo->regdump_len = ixgbe_get_regs_len(netdev);
+}
+
+static void ixgbe_get_ringparam(struct net_device *netdev,
+				struct ethtool_ringparam *ring)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+
+	ring->rx_max_pending = IXGBE_MAX_RXD;
+	ring->tx_max_pending = IXGBE_MAX_TXD;
+	ring->rx_mini_max_pending = 0;
+	ring->rx_jumbo_max_pending = 0;
+	ring->rx_pending = adapter->rx_ring_count;
+	ring->tx_pending = adapter->tx_ring_count;
+	ring->rx_mini_pending = 0;
+	ring->rx_jumbo_pending = 0;
+}
+
+static int ixgbe_set_ringparam(struct net_device *netdev,
+			       struct ethtool_ringparam *ring)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+	struct ixgbe_ring *tx_ring = NULL, *rx_ring = NULL;
+	u32 new_rx_count, new_tx_count;
+	int i, err = 0;
+
+	if ((ring->rx_mini_pending) || (ring->rx_jumbo_pending))
+		return -EINVAL;
+
+	new_tx_count = clamp_t(u32, ring->tx_pending,
+			       IXGBE_MIN_TXD, IXGBE_MAX_TXD);
+	new_tx_count = ALIGN(new_tx_count, IXGBE_REQ_TX_DESCRIPTOR_MULTIPLE);
+
+	new_rx_count = clamp_t(u32, ring->rx_pending,
+			       IXGBE_MIN_RXD, IXGBE_MAX_RXD);
+	new_rx_count = ALIGN(new_rx_count, IXGBE_REQ_RX_DESCRIPTOR_MULTIPLE);
+
+	/* if nothing to do return success */
+	if ((new_tx_count == adapter->tx_ring_count) &&
+	    (new_rx_count == adapter->rx_ring_count))
+		return 0;
+
+	while (test_and_set_bit(__IXGBE_RESETTING, &adapter->state))
+		usleep_range(1000, 2000);
+
+	if (!netif_running(adapter->netdev)) {
+		for (i = 0; i < adapter->num_tx_queues; i++)
+			adapter->tx_ring[i]->count = new_tx_count;
+		for (i = 0; i < adapter->num_rx_queues; i++)
+			adapter->rx_ring[i]->count = new_rx_count;
+		adapter->tx_ring_count = new_tx_count;
+		adapter->rx_ring_count = new_rx_count;
+		goto clear_reset;
+	}
+
+	/* alloc updated Tx resources */
+	if (new_tx_count != adapter->tx_ring_count) {
+		tx_ring = vmalloc(adapter->num_tx_queues * sizeof(*tx_ring));
+		if (!tx_ring) {
+			err = -ENOMEM;
+			goto clear_reset;
+		}
+
+		for (i = 0; i < adapter->num_tx_queues; i++) {
+			/* clone ring and setup updated count */
+			tx_ring[i] = *adapter->tx_ring[i];
+			tx_ring[i].count = new_tx_count;
+			err = ixgbe_setup_tx_resources(&tx_ring[i]);
+			if (err) {
+				while (i) {
+					i--;
+					ixgbe_free_tx_resources(&tx_ring[i]);
+				}
+
+				vfree(tx_ring);
+				tx_ring = NULL;
+
+				goto clear_reset;
+			}
+		}
+	}
+
+	/* alloc updated Rx resources */
+	if (new_rx_count != adapter->rx_ring_count) {
+		rx_ring = vmalloc(adapter->num_rx_queues * sizeof(*rx_ring));
+		if (!rx_ring) {
+			err = -ENOMEM;
+			goto clear_reset;
+		}
+
+		for (i = 0; i < adapter->num_rx_queues; i++) {
+			/* clone ring and setup updated count */
+			rx_ring[i] = *adapter->rx_ring[i];
+			rx_ring[i].count = new_rx_count;
+			err = ixgbe_setup_rx_resources(&rx_ring[i]);
+			if (err) {
+				while (i) {
+					i--;
+					ixgbe_free_rx_resources(&rx_ring[i]);
+				}
+
+				vfree(rx_ring);
+				rx_ring = NULL;
+
+				goto clear_reset;
+			}
+		}
+	}
+
+	/* bring interface down to prepare for update */
+	ixgbe_down(adapter);
+
+	/* Tx */
+	if (tx_ring) {
+		for (i = 0; i < adapter->num_tx_queues; i++) {
+			ixgbe_free_tx_resources(adapter->tx_ring[i]);
+			*adapter->tx_ring[i] = tx_ring[i];
+		}
+		adapter->tx_ring_count = new_tx_count;
+
+		vfree(tx_ring);
+		tx_ring = NULL;
+	}
+
+	/* Rx */
+	if (rx_ring) {
+		for (i = 0; i < adapter->num_rx_queues; i++) {
+			ixgbe_free_rx_resources(adapter->rx_ring[i]);
+			*adapter->rx_ring[i] = rx_ring[i];
+		}
+		adapter->rx_ring_count = new_rx_count;
+
+		vfree(rx_ring);
+		rx_ring = NULL;
+	}
+
+	/* restore interface using new values */
+	ixgbe_up(adapter);
+
+clear_reset:
+	/* free Tx resources if Rx error is encountered */
+	if (tx_ring) {
+		for (i = 0; i < adapter->num_tx_queues; i++)
+			ixgbe_free_tx_resources(&tx_ring[i]);
+		vfree(tx_ring);
+	}
+
+	clear_bit(__IXGBE_RESETTING, &adapter->state);
+	return err;
+}
+
+#ifndef HAVE_ETHTOOL_GET_SSET_COUNT
+static int ixgbe_get_stats_count(struct net_device *netdev)
+{
+	return IXGBE_STATS_LEN;
+}
+
+#else /* HAVE_ETHTOOL_GET_SSET_COUNT */
+static int ixgbe_get_sset_count(struct net_device *netdev, int sset)
+{
+	switch (sset) {
+	case ETH_SS_TEST:
+		return IXGBE_TEST_LEN;
+	case ETH_SS_STATS:
+		return IXGBE_STATS_LEN;
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+#endif /* HAVE_ETHTOOL_GET_SSET_COUNT */
+static void ixgbe_get_ethtool_stats(struct net_device *netdev,
+				    struct ethtool_stats *stats, u64 *data)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+#ifdef HAVE_NETDEV_STATS_IN_NETDEV
+	struct net_device_stats *net_stats = &netdev->stats;
+#else
+	struct net_device_stats *net_stats = &adapter->net_stats;
+#endif
+	u64 *queue_stat;
+	int stat_count = sizeof(struct ixgbe_queue_stats) / sizeof(u64);
+	int i, j, k;
+	char *p;
+
+	printk(KERN_DEBUG "ixgbe_stats 0\n");
+	ixgbe_update_stats(adapter);
+	printk(KERN_DEBUG "ixgbe_stats 1\n");
+
+	for (i = 0; i < IXGBE_NETDEV_STATS_LEN; i++) {
+		p = (char *)net_stats + ixgbe_gstrings_net_stats[i].stat_offset;
+		data[i] = (ixgbe_gstrings_net_stats[i].sizeof_stat ==
+			sizeof(u64)) ? *(u64 *)p : *(u32 *)p;
+	}
+	for (j = 0; j < IXGBE_GLOBAL_STATS_LEN; j++, i++) {
+		p = (char *)adapter + ixgbe_gstrings_stats[j].stat_offset;
+		data[i] = (ixgbe_gstrings_stats[j].sizeof_stat ==
+			   sizeof(u64)) ? *(u64 *)p : *(u32 *)p;
+	}
+	printk(KERN_DEBUG "ixgbe_stats 2\n");
+#ifdef NO_VNIC
+	for (j = 0; j < adapter->num_tx_queues; j++) {
+		queue_stat = (u64 *)&adapter->tx_ring[j]->stats;
+		for (k = 0; k < stat_count; k++)
+			data[i + k] = queue_stat[k];
+		i += k;
+	}
+	for (j = 0; j < adapter->num_rx_queues; j++) {
+		queue_stat = (u64 *)&adapter->rx_ring[j]->stats;
+		for (k = 0; k < stat_count; k++)
+			data[i + k] = queue_stat[k];
+		i += k;
+	}
+	printk(KERN_DEBUG "ixgbe_stats 3\n");
+#endif
+	if (adapter->flags & IXGBE_FLAG_DCB_ENABLED) {
+		for (j = 0; j < MAX_TX_PACKET_BUFFERS; j++) {
+			data[i++] = adapter->stats.pxontxc[j];
+			data[i++] = adapter->stats.pxofftxc[j];
+		}
+		for (j = 0; j < MAX_RX_PACKET_BUFFERS; j++) {
+			data[i++] = adapter->stats.pxonrxc[j];
+			data[i++] = adapter->stats.pxoffrxc[j];
+		}
+	}
+	printk(KERN_DEBUG "ixgbe_stats 4\n");
+	stat_count = sizeof(struct vf_stats) / sizeof(u64);
+	for (j = 0; j < adapter->num_vfs; j++) {
+		queue_stat = (u64 *)&adapter->vfinfo[j].vfstats;
+		for (k = 0; k < stat_count; k++)
+			data[i + k] = queue_stat[k];
+		queue_stat = (u64 *)&adapter->vfinfo[j].saved_rst_vfstats;
+		for (k = 0; k < stat_count; k++)
+			data[i + k] += queue_stat[k];
+		i += k;
+	}
+}
+
+static void ixgbe_get_strings(struct net_device *netdev, u32 stringset,
+			      u8 *data)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+	char *p = (char *)data;
+	int i;
+
+	switch (stringset) {
+	case ETH_SS_TEST:
+		memcpy(data, *ixgbe_gstrings_test,
+			IXGBE_TEST_LEN * ETH_GSTRING_LEN);
+		break;
+	case ETH_SS_STATS:
+		for (i = 0; i < IXGBE_NETDEV_STATS_LEN; i++) {
+			memcpy(p, ixgbe_gstrings_net_stats[i].stat_string,
+			       ETH_GSTRING_LEN);
+			p += ETH_GSTRING_LEN;
+		}
+		for (i = 0; i < IXGBE_GLOBAL_STATS_LEN; i++) {
+			memcpy(p, ixgbe_gstrings_stats[i].stat_string,
+			       ETH_GSTRING_LEN);
+			p += ETH_GSTRING_LEN;
+		}
+		for (i = 0; i < adapter->num_tx_queues; i++) {
+			sprintf(p, "tx_queue_%u_packets", i);
+			p += ETH_GSTRING_LEN;
+			sprintf(p, "tx_queue_%u_bytes", i);
+			p += ETH_GSTRING_LEN;
+		}
+		for (i = 0; i < adapter->num_rx_queues; i++) {
+			sprintf(p, "rx_queue_%u_packets", i);
+			p += ETH_GSTRING_LEN;
+			sprintf(p, "rx_queue_%u_bytes", i);
+			p += ETH_GSTRING_LEN;
+		}
+		if (adapter->flags & IXGBE_FLAG_DCB_ENABLED) {
+			for (i = 0; i < MAX_TX_PACKET_BUFFERS; i++) {
+				sprintf(p, "tx_pb_%u_pxon", i);
+				p += ETH_GSTRING_LEN;
+				sprintf(p, "tx_pb_%u_pxoff", i);
+				p += ETH_GSTRING_LEN;
+			}
+			for (i = 0; i < MAX_RX_PACKET_BUFFERS; i++) {
+				sprintf(p, "rx_pb_%u_pxon", i);
+				p += ETH_GSTRING_LEN;
+				sprintf(p, "rx_pb_%u_pxoff", i);
+				p += ETH_GSTRING_LEN;
+			}
+		}
+		for (i = 0; i < adapter->num_vfs; i++) {
+			sprintf(p, "VF %d Rx Packets", i);
+			p += ETH_GSTRING_LEN;
+			sprintf(p, "VF %d Rx Bytes", i);
+			p += ETH_GSTRING_LEN;
+			sprintf(p, "VF %d Tx Packets", i);
+			p += ETH_GSTRING_LEN;
+			sprintf(p, "VF %d Tx Bytes", i);
+			p += ETH_GSTRING_LEN;
+			sprintf(p, "VF %d MC Packets", i);
+			p += ETH_GSTRING_LEN;
+		}
+		/* BUG_ON(p - data != IXGBE_STATS_LEN * ETH_GSTRING_LEN); */
+		break;
+	}
+}
+
+static int ixgbe_link_test(struct ixgbe_adapter *adapter, u64 *data)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	bool link_up;
+	u32 link_speed = 0;
+	*data = 0;
+
+	hw->mac.ops.check_link(hw, &link_speed, &link_up, true);
+	if (link_up)
+		return *data;
+	else
+		*data = 1;
+	return *data;
+}
+
+/* ethtool register test data */
+struct ixgbe_reg_test {
+	u16 reg;
+	u8  array_len;
+	u8  test_type;
+	u32 mask;
+	u32 write;
+};
+
+/* In the hardware, registers are laid out either singly, in arrays
+ * spaced 0x40 bytes apart, or in contiguous tables.  We assume
+ * most tests take place on arrays or single registers (handled
+ * as a single-element array) and special-case the tables.
+ * Table tests are always pattern tests.
+ *
+ * We also make provision for some required setup steps by specifying
+ * registers to be written without any read-back testing.
+ */
+
+#define PATTERN_TEST	1
+#define SET_READ_TEST	2
+#define WRITE_NO_TEST	3
+#define TABLE32_TEST	4
+#define TABLE64_TEST_LO	5
+#define TABLE64_TEST_HI	6
+
+/* default 82599 register test */
+static struct ixgbe_reg_test reg_test_82599[] = {
+	{ IXGBE_FCRTL_82599(0), 1, PATTERN_TEST, 0x8007FFF0, 0x8007FFF0 },
+	{ IXGBE_FCRTH_82599(0), 1, PATTERN_TEST, 0x8007FFF0, 0x8007FFF0 },
+	{ IXGBE_PFCTOP, 1, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
+	{ IXGBE_VLNCTRL, 1, PATTERN_TEST, 0x00000000, 0x00000000 },
+	{ IXGBE_RDBAL(0), 4, PATTERN_TEST, 0xFFFFFF80, 0xFFFFFF80 },
+	{ IXGBE_RDBAH(0), 4, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
+	{ IXGBE_RDLEN(0), 4, PATTERN_TEST, 0x000FFF80, 0x000FFFFF },
+	{ IXGBE_RXDCTL(0), 4, WRITE_NO_TEST, 0, IXGBE_RXDCTL_ENABLE },
+	{ IXGBE_RDT(0), 4, PATTERN_TEST, 0x0000FFFF, 0x0000FFFF },
+	{ IXGBE_RXDCTL(0), 4, WRITE_NO_TEST, 0, 0 },
+	{ IXGBE_FCRTH(0), 1, PATTERN_TEST, 0x8007FFF0, 0x8007FFF0 },
+	{ IXGBE_FCTTV(0), 1, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
+	{ IXGBE_TDBAL(0), 4, PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF },
+	{ IXGBE_TDBAH(0), 4, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
+	{ IXGBE_TDLEN(0), 4, PATTERN_TEST, 0x000FFF80, 0x000FFF80 },
+	{ IXGBE_RXCTRL, 1, SET_READ_TEST, 0x00000001, 0x00000001 },
+	{ IXGBE_RAL(0), 16, TABLE64_TEST_LO, 0xFFFFFFFF, 0xFFFFFFFF },
+	{ IXGBE_RAL(0), 16, TABLE64_TEST_HI, 0x8001FFFF, 0x800CFFFF },
+	{ IXGBE_MTA(0), 128, TABLE32_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
+	{ 0, 0, 0, 0 }
+};
+
+/* default 82598 register test */
+static struct ixgbe_reg_test reg_test_82598[] = {
+	{ IXGBE_FCRTL(0), 1, PATTERN_TEST, 0x8007FFF0, 0x8007FFF0 },
+	{ IXGBE_FCRTH(0), 1, PATTERN_TEST, 0x8007FFF0, 0x8007FFF0 },
+	{ IXGBE_PFCTOP, 1, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
+	{ IXGBE_VLNCTRL, 1, PATTERN_TEST, 0x00000000, 0x00000000 },
+	{ IXGBE_RDBAL(0), 4, PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF },
+	{ IXGBE_RDBAH(0), 4, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
+	{ IXGBE_RDLEN(0), 4, PATTERN_TEST, 0x000FFF80, 0x000FFFFF },
+	/* Enable all four RX queues before testing. */
+	{ IXGBE_RXDCTL(0), 4, WRITE_NO_TEST, 0, IXGBE_RXDCTL_ENABLE },
+	/* RDH is read-only for 82598, only test RDT. */
+	{ IXGBE_RDT(0), 4, PATTERN_TEST, 0x0000FFFF, 0x0000FFFF },
+	{ IXGBE_RXDCTL(0), 4, WRITE_NO_TEST, 0, 0 },
+	{ IXGBE_FCRTH(0), 1, PATTERN_TEST, 0x8007FFF0, 0x8007FFF0 },
+	{ IXGBE_FCTTV(0), 1, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
+	{ IXGBE_TIPG, 1, PATTERN_TEST, 0x000000FF, 0x000000FF },
+	{ IXGBE_TDBAL(0), 4, PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF },
+	{ IXGBE_TDBAH(0), 4, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
+	{ IXGBE_TDLEN(0), 4, PATTERN_TEST, 0x000FFF80, 0x000FFFFF },
+	{ IXGBE_RXCTRL, 1, SET_READ_TEST, 0x00000003, 0x00000003 },
+	{ IXGBE_DTXCTL, 1, SET_READ_TEST, 0x00000005, 0x00000005 },
+	{ IXGBE_RAL(0), 16, TABLE64_TEST_LO, 0xFFFFFFFF, 0xFFFFFFFF },
+	{ IXGBE_RAL(0), 16, TABLE64_TEST_HI, 0x800CFFFF, 0x800CFFFF },
+	{ IXGBE_MTA(0), 128, TABLE32_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
+	{ 0, 0, 0, 0 }
+};
+
+#define REG_PATTERN_TEST(R, M, W)					      \
+{									      \
+	u32 pat, val, before;						      \
+	const u32 _test[] = {0x5A5A5A5A, 0xA5A5A5A5, 0x00000000, 0xFFFFFFFF}; \
+	for (pat = 0; pat < ARRAY_SIZE(_test); pat++) {			      \
+		before = readl(adapter->hw.hw_addr + R);		      \
+		writel((_test[pat] & W), (adapter->hw.hw_addr + R));	      \
+		val = readl(adapter->hw.hw_addr + R);			      \
+		if (val != (_test[pat] & W & M)) {			      \
+			e_err(drv, "pattern test reg %04X failed: got "	      \
+			      "0x%08X expected 0x%08X\n",		      \
+				R, val, (_test[pat] & W & M));		      \
+			*data = R;					      \
+			writel(before, adapter->hw.hw_addr + R);	      \
+			return 1;					      \
+		}							      \
+		writel(before, adapter->hw.hw_addr + R);		      \
+	}								      \
+}
+
+#define REG_SET_AND_CHECK(R, M, W)					      \
+{									      \
+	u32 val, before;						      \
+	before = readl(adapter->hw.hw_addr + R);			      \
+	writel((W & M), (adapter->hw.hw_addr + R));			      \
+	val = readl(adapter->hw.hw_addr + R);				      \
+	if ((W & M) != (val & M)) {					      \
+		e_err(drv, "set/check reg %04X test failed: got 0x%08X "      \
+		      "expected 0x%08X\n", R, (val & M), (W & M));	      \
+		*data = R;						      \
+		writel(before, (adapter->hw.hw_addr + R));		      \
+		return 1;						      \
+	}								      \
+	writel(before, (adapter->hw.hw_addr + R));			      \
+}
+
+static int ixgbe_reg_test(struct ixgbe_adapter *adapter, u64 *data)
+{
+	struct ixgbe_reg_test *test;
+	u32 value, status_before, status_after;
+	u32 i, toggle;
+
+	switch (adapter->hw.mac.type) {
+	case ixgbe_mac_82598EB:
+		toggle = 0x7FFFF3FF;
+		test = reg_test_82598;
+		break;
+	case ixgbe_mac_82599EB:
+	case ixgbe_mac_X540:
+		toggle = 0x7FFFF30F;
+		test = reg_test_82599;
+		break;
+	default:
+		*data = 1;
+		return 1;
+		break;
+	}
+
+	/*
+	 * Because the status register is such a special case,
+	 * we handle it separately from the rest of the register
+	 * tests.  Some bits are read-only, some toggle, and some
+	 * are writeable on newer MACs.
+	 */
+	status_before = IXGBE_READ_REG(&adapter->hw, IXGBE_STATUS);
+	value = (IXGBE_READ_REG(&adapter->hw, IXGBE_STATUS) & toggle);
+	IXGBE_WRITE_REG(&adapter->hw, IXGBE_STATUS, toggle);
+	status_after = IXGBE_READ_REG(&adapter->hw, IXGBE_STATUS) & toggle;
+	if (value != status_after) {
+		e_err(drv, "failed STATUS register test got: "
+		      "0x%08X expected: 0x%08X\n", status_after, value);
+		*data = 1;
+		return 1;
+	}
+	/* restore previous status */
+	IXGBE_WRITE_REG(&adapter->hw, IXGBE_STATUS, status_before);
+
+	/*
+	 * Perform the remainder of the register test, looping through
+	 * the test table until we either fail or reach the null entry.
+	 */
+	while (test->reg) {
+		for (i = 0; i < test->array_len; i++) {
+			switch (test->test_type) {
+			case PATTERN_TEST:
+				REG_PATTERN_TEST(test->reg + (i * 0x40),
+						test->mask,
+						test->write);
+				break;
+			case SET_READ_TEST:
+				REG_SET_AND_CHECK(test->reg + (i * 0x40),
+						test->mask,
+						test->write);
+				break;
+			case WRITE_NO_TEST:
+				writel(test->write,
+				       (adapter->hw.hw_addr + test->reg)
+				       + (i * 0x40));
+				break;
+			case TABLE32_TEST:
+				REG_PATTERN_TEST(test->reg + (i * 4),
+						test->mask,
+						test->write);
+				break;
+			case TABLE64_TEST_LO:
+				REG_PATTERN_TEST(test->reg + (i * 8),
+						test->mask,
+						test->write);
+				break;
+			case TABLE64_TEST_HI:
+				REG_PATTERN_TEST((test->reg + 4) + (i * 8),
+						test->mask,
+						test->write);
+				break;
+			}
+		}
+		test++;
+	}
+
+	*data = 0;
+	return 0;
+}
+
+static int ixgbe_eeprom_test(struct ixgbe_adapter *adapter, u64 *data)
+{
+	if (ixgbe_validate_eeprom_checksum(&adapter->hw, NULL))
+		*data = 1;
+	else
+		*data = 0;
+	return *data;
+}
+
+static irqreturn_t ixgbe_test_intr(int irq, void *data)
+{
+	struct net_device *netdev = (struct net_device *) data;
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+
+	adapter->test_icr |= IXGBE_READ_REG(&adapter->hw, IXGBE_EICR);
+
+	return IRQ_HANDLED;
+}
+
+static int ixgbe_intr_test(struct ixgbe_adapter *adapter, u64 *data)
+{
+	struct net_device *netdev = adapter->netdev;
+	u32 mask, i = 0, shared_int = true;
+	u32 irq = adapter->pdev->irq;
+
+	*data = 0;
+
+	/* Hook up test interrupt handler just for this test */
+	if (adapter->msix_entries) {
+		/* NOTE: we don't test MSI-X interrupts here, yet */
+		return 0;
+	} else if (adapter->flags & IXGBE_FLAG_MSI_ENABLED) {
+		shared_int = false;
+		if (request_irq(irq, &ixgbe_test_intr, 0, netdev->name,
+				netdev)) {
+			*data = 1;
+			return -1;
+		}
+	} else if (!request_irq(irq, &ixgbe_test_intr, IRQF_PROBE_SHARED,
+				netdev->name, netdev)) {
+		shared_int = false;
+	} else if (request_irq(irq, &ixgbe_test_intr, IRQF_SHARED,
+			       netdev->name, netdev)) {
+		*data = 1;
+		return -1;
+	}
+	e_info(hw, "testing %s interrupt\n",
+	       (shared_int ? "shared" : "unshared"));
+
+	/* Disable all the interrupts */
+	IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, 0xFFFFFFFF);
+	IXGBE_WRITE_FLUSH(&adapter->hw);
+	usleep_range(10000, 20000);
+
+	/* Test each interrupt */
+	for (; i < 10; i++) {
+		/* Interrupt to test */
+		mask = 1 << i;
+
+		if (!shared_int) {
+			/*
+			 * Disable the interrupts to be reported in
+			 * the cause register and then force the same
+			 * interrupt and see if one gets posted.  If
+			 * an interrupt was posted to the bus, the
+			 * test failed.
+			 */
+			adapter->test_icr = 0;
+			IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC,
+					~mask & 0x00007FFF);
+			IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS,
+					~mask & 0x00007FFF);
+			IXGBE_WRITE_FLUSH(&adapter->hw);
+			usleep_range(10000, 20000);
+
+			if (adapter->test_icr & mask) {
+				*data = 3;
+				break;
+			}
+		}
+
+		/*
+		 * Enable the interrupt to be reported in the cause
+		 * register and then force the same interrupt and see
+		 * if one gets posted.  If an interrupt was not posted
+		 * to the bus, the test failed.
+		 */
+		adapter->test_icr = 0;
+		IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMS, mask);
+		IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS, mask);
+		IXGBE_WRITE_FLUSH(&adapter->hw);
+		usleep_range(10000, 20000);
+
+		if (!(adapter->test_icr & mask)) {
+			*data = 4;
+			break;
+		}
+
+		if (!shared_int) {
+			/*
+			 * Disable the other interrupts to be reported in
+			 * the cause register and then force the other
+			 * interrupts and see if any get posted.  If
+			 * an interrupt was posted to the bus, the
+			 * test failed.
+			 */
+			adapter->test_icr = 0;
+			IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC,
+					~mask & 0x00007FFF);
+			IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS,
+					~mask & 0x00007FFF);
+			IXGBE_WRITE_FLUSH(&adapter->hw);
+			usleep_range(10000, 20000);
+
+			if (adapter->test_icr) {
+				*data = 5;
+				break;
+			}
+		}
+	}
+
+	/* Disable all the interrupts */
+	IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, 0xFFFFFFFF);
+	IXGBE_WRITE_FLUSH(&adapter->hw);
+	usleep_range(10000, 20000);
+
+	/* Unhook test interrupt handler */
+	free_irq(irq, netdev);
+
+	return *data;
+}
+
+
+
+static int ixgbe_setup_loopback_test(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	u32 reg_data;
+
+	/* X540 needs to set the MACC.FLU bit to force link up */
+	if (adapter->hw.mac.type == ixgbe_mac_X540) {
+		reg_data = IXGBE_READ_REG(hw, IXGBE_MACC);
+		reg_data |= IXGBE_MACC_FLU;
+		IXGBE_WRITE_REG(hw, IXGBE_MACC, reg_data);
+	}
+
+	/* right now we only support MAC loopback in the driver */
+	reg_data = IXGBE_READ_REG(hw, IXGBE_HLREG0);
+	/* Setup MAC loopback */
+	reg_data |= IXGBE_HLREG0_LPBK;
+	IXGBE_WRITE_REG(hw, IXGBE_HLREG0, reg_data);
+
+	reg_data = IXGBE_READ_REG(hw, IXGBE_FCTRL);
+	reg_data |= IXGBE_FCTRL_BAM | IXGBE_FCTRL_SBP | IXGBE_FCTRL_MPE;
+	IXGBE_WRITE_REG(hw, IXGBE_FCTRL, reg_data);
+
+	reg_data = IXGBE_READ_REG(hw, IXGBE_AUTOC);
+	reg_data &= ~IXGBE_AUTOC_LMS_MASK;
+	reg_data |= IXGBE_AUTOC_LMS_10G_LINK_NO_AN | IXGBE_AUTOC_FLU;
+	IXGBE_WRITE_REG(hw, IXGBE_AUTOC, reg_data);
+	IXGBE_WRITE_FLUSH(hw);
+	usleep_range(10000, 20000);
+
+	/* Disable Atlas Tx lanes; re-enabled in reset path */
+	if (hw->mac.type == ixgbe_mac_82598EB) {
+		u8 atlas;
+
+		ixgbe_read_analog_reg8(hw, IXGBE_ATLAS_PDN_LPBK, &atlas);
+		atlas |= IXGBE_ATLAS_PDN_TX_REG_EN;
+		ixgbe_write_analog_reg8(hw, IXGBE_ATLAS_PDN_LPBK, atlas);
+
+		ixgbe_read_analog_reg8(hw, IXGBE_ATLAS_PDN_10G, &atlas);
+		atlas |= IXGBE_ATLAS_PDN_TX_10G_QL_ALL;
+		ixgbe_write_analog_reg8(hw, IXGBE_ATLAS_PDN_10G, atlas);
+
+		ixgbe_read_analog_reg8(hw, IXGBE_ATLAS_PDN_1G, &atlas);
+		atlas |= IXGBE_ATLAS_PDN_TX_1G_QL_ALL;
+		ixgbe_write_analog_reg8(hw, IXGBE_ATLAS_PDN_1G, atlas);
+
+		ixgbe_read_analog_reg8(hw, IXGBE_ATLAS_PDN_AN, &atlas);
+		atlas |= IXGBE_ATLAS_PDN_TX_AN_QL_ALL;
+		ixgbe_write_analog_reg8(hw, IXGBE_ATLAS_PDN_AN, atlas);
+	}
+
+	return 0;
+}
+
+static void ixgbe_loopback_cleanup(struct ixgbe_adapter *adapter)
+{
+	u32 reg_data;
+
+	reg_data = IXGBE_READ_REG(&adapter->hw, IXGBE_HLREG0);
+	reg_data &= ~IXGBE_HLREG0_LPBK;
+	IXGBE_WRITE_REG(&adapter->hw, IXGBE_HLREG0, reg_data);
+}
+
+
+
+
+
+
+static int ixgbe_loopback_test(struct ixgbe_adapter *adapter, u64 *data)
+{
+
+	//*data = ixgbe_setup_desc_rings(adapter);
+	//if (*data)
+	//	goto out;
+	*data = ixgbe_setup_loopback_test(adapter);
+	if (*data)
+		goto err_loopback;
+	//*data = ixgbe_run_loopback_test(adapter);
+	ixgbe_loopback_cleanup(adapter);
+
+err_loopback:
+	//ixgbe_free_desc_rings(adapter);
+//out:
+	return *data;
+
+}
+
+#ifndef HAVE_ETHTOOL_GET_SSET_COUNT
+static int ixgbe_diag_test_count(struct net_device *netdev)
+{
+	return IXGBE_TEST_LEN;
+}
+
+#endif /* HAVE_ETHTOOL_GET_SSET_COUNT */
+static void ixgbe_diag_test(struct net_device *netdev,
+			    struct ethtool_test *eth_test, u64 *data)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+	bool if_running = netif_running(netdev);
+
+	set_bit(__IXGBE_TESTING, &adapter->state);
+	if (eth_test->flags == ETH_TEST_FL_OFFLINE) {
+		/* Offline tests */
+
+		e_info(hw, "offline testing starting\n");
+
+		/* Link test performed before hardware reset so autoneg doesn't
+		 * interfere with test result */
+		if (ixgbe_link_test(adapter, &data[4]))
+			eth_test->flags |= ETH_TEST_FL_FAILED;
+
+		if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) {
+			int i;
+			for (i = 0; i < adapter->num_vfs; i++) {
+				if (adapter->vfinfo[i].clear_to_send) {
+					e_warn(drv, "Please take active VFS "
+					       "offline and restart the "
+					       "adapter before running NIC "
+					       "diagnostics\n");
+					data[0] = 1;
+					data[1] = 1;
+					data[2] = 1;
+					data[3] = 1;
+					eth_test->flags |= ETH_TEST_FL_FAILED;
+					clear_bit(__IXGBE_TESTING,
+						  &adapter->state);
+					goto skip_ol_tests;
+				}
+			}
+		}
+
+		if (if_running)
+			/* indicate we're in test mode */
+			dev_close(netdev);
+		else
+			ixgbe_reset(adapter);
+
+		e_info(hw, "register testing starting\n");
+		if (ixgbe_reg_test(adapter, &data[0]))
+			eth_test->flags |= ETH_TEST_FL_FAILED;
+
+		ixgbe_reset(adapter);
+		e_info(hw, "eeprom testing starting\n");
+		if (ixgbe_eeprom_test(adapter, &data[1]))
+			eth_test->flags |= ETH_TEST_FL_FAILED;
+
+		ixgbe_reset(adapter);
+		e_info(hw, "interrupt testing starting\n");
+		if (ixgbe_intr_test(adapter, &data[2]))
+			eth_test->flags |= ETH_TEST_FL_FAILED;
+
+		/* If SRIOV or VMDq is enabled then skip MAC
+		 * loopback diagnostic. */
+		if (adapter->flags & (IXGBE_FLAG_SRIOV_ENABLED |
+				      IXGBE_FLAG_VMDQ_ENABLED)) {
+			e_info(hw, "skip MAC loopback diagnostic in VT mode\n");
+			data[3] = 0;
+			goto skip_loopback;
+		}
+
+		ixgbe_reset(adapter);
+		e_info(hw, "loopback testing starting\n");
+		if (ixgbe_loopback_test(adapter, &data[3]))
+			eth_test->flags |= ETH_TEST_FL_FAILED;
+
+skip_loopback:
+		ixgbe_reset(adapter);
+
+		clear_bit(__IXGBE_TESTING, &adapter->state);
+		if (if_running)
+			dev_open(netdev);
+	} else {
+		e_info(hw, "online testing starting\n");
+		/* Online tests */
+		if (ixgbe_link_test(adapter, &data[4]))
+			eth_test->flags |= ETH_TEST_FL_FAILED;
+
+		/* Online tests aren't run; pass by default */
+		data[0] = 0;
+		data[1] = 0;
+		data[2] = 0;
+		data[3] = 0;
+
+		clear_bit(__IXGBE_TESTING, &adapter->state);
+	}
+skip_ol_tests:
+	msleep_interruptible(4 * 1000);
+}
+
+static int ixgbe_wol_exclusion(struct ixgbe_adapter *adapter,
+			       struct ethtool_wolinfo *wol)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	int retval = 1;
+	u16 wol_cap = adapter->eeprom_cap & IXGBE_DEVICE_CAPS_WOL_MASK;
+
+	/* WOL not supported except for the following */
+	switch (hw->device_id) {
+	case IXGBE_DEV_ID_82599_SFP:
+		/* Only these subdevice could supports WOL */
+		switch (hw->subsystem_device_id) {
+		case IXGBE_SUBDEV_ID_82599_560FLR:
+			/* only support first port */
+			if (hw->bus.func != 0) {
+				wol->supported = 0;
+				break;
+			}
+		case IXGBE_SUBDEV_ID_82599_SFP:
+			retval = 0;
+			break;
+		default:
+			wol->supported = 0;
+			break;
+		}
+		break;
+	case IXGBE_DEV_ID_82599_COMBO_BACKPLANE:
+		/* All except this subdevice support WOL */
+		if (hw->subsystem_device_id ==
+		    IXGBE_SUBDEV_ID_82599_KX4_KR_MEZZ) {
+			wol->supported = 0;
+			break;
+		}
+		retval = 0;
+		break;
+	case IXGBE_DEV_ID_82599_KX4:
+		retval = 0;
+		break;
+	case IXGBE_DEV_ID_X540T:
+		/* check eeprom to see if enabled wol */
+		if ((wol_cap == IXGBE_DEVICE_CAPS_WOL_PORT0_1) ||
+		    ((wol_cap == IXGBE_DEVICE_CAPS_WOL_PORT0) &&
+		     (hw->bus.func == 0))) {
+			retval = 0;
+			break;
+		}
+
+		/* All others not supported */
+		wol->supported = 0;
+		break;
+	default:
+		wol->supported = 0;
+	}
+	return retval;
+}
+
+static void ixgbe_get_wol(struct net_device *netdev,
+			  struct ethtool_wolinfo *wol)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+
+	wol->supported = WAKE_UCAST | WAKE_MCAST |
+			 WAKE_BCAST | WAKE_MAGIC;
+	wol->wolopts = 0;
+
+	if (ixgbe_wol_exclusion(adapter, wol) ||
+	    !device_can_wakeup(&adapter->pdev->dev))
+		return;
+
+	if (adapter->wol & IXGBE_WUFC_EX)
+		wol->wolopts |= WAKE_UCAST;
+	if (adapter->wol & IXGBE_WUFC_MC)
+		wol->wolopts |= WAKE_MCAST;
+	if (adapter->wol & IXGBE_WUFC_BC)
+		wol->wolopts |= WAKE_BCAST;
+	if (adapter->wol & IXGBE_WUFC_MAG)
+		wol->wolopts |= WAKE_MAGIC;
+}
+
+static int ixgbe_set_wol(struct net_device *netdev, struct ethtool_wolinfo *wol)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+
+	if (wol->wolopts & (WAKE_PHY | WAKE_ARP | WAKE_MAGICSECURE))
+		return -EOPNOTSUPP;
+
+	if (ixgbe_wol_exclusion(adapter, wol))
+		return wol->wolopts ? -EOPNOTSUPP : 0;
+
+	adapter->wol = 0;
+
+	if (wol->wolopts & WAKE_UCAST)
+		adapter->wol |= IXGBE_WUFC_EX;
+	if (wol->wolopts & WAKE_MCAST)
+		adapter->wol |= IXGBE_WUFC_MC;
+	if (wol->wolopts & WAKE_BCAST)
+		adapter->wol |= IXGBE_WUFC_BC;
+	if (wol->wolopts & WAKE_MAGIC)
+		adapter->wol |= IXGBE_WUFC_MAG;
+
+	device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
+
+	return 0;
+}
+
+static int ixgbe_nway_reset(struct net_device *netdev)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+
+	if (netif_running(netdev))
+		ixgbe_reinit_locked(adapter);
+
+	return 0;
+}
+
+#ifdef HAVE_ETHTOOL_SET_PHYS_ID
+static int ixgbe_set_phys_id(struct net_device *netdev,
+			     enum ethtool_phys_id_state state)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+	struct ixgbe_hw *hw = &adapter->hw;
+
+	switch (state) {
+	case ETHTOOL_ID_ACTIVE:
+		adapter->led_reg = IXGBE_READ_REG(hw, IXGBE_LEDCTL);
+		return 2;
+
+	case ETHTOOL_ID_ON:
+		hw->mac.ops.led_on(hw, IXGBE_LED_ON);
+		break;
+
+	case ETHTOOL_ID_OFF:
+		hw->mac.ops.led_off(hw, IXGBE_LED_ON);
+		break;
+
+	case ETHTOOL_ID_INACTIVE:
+		/* Restore LED settings */
+		IXGBE_WRITE_REG(&adapter->hw, IXGBE_LEDCTL, adapter->led_reg);
+		break;
+	}
+
+	return 0;
+}
+#else
+static int ixgbe_phys_id(struct net_device *netdev, u32 data)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+	struct ixgbe_hw *hw = &adapter->hw;
+	u32 led_reg = IXGBE_READ_REG(hw, IXGBE_LEDCTL);
+	u32 i;
+
+	if (!data || data > 300)
+		data = 300;
+
+	for (i = 0; i < (data * 1000); i += 400) {
+		ixgbe_led_on(hw, IXGBE_LED_ON);
+		msleep_interruptible(200);
+		ixgbe_led_off(hw, IXGBE_LED_ON);
+		msleep_interruptible(200);
+	}
+
+	/* Restore LED settings */
+	IXGBE_WRITE_REG(hw, IXGBE_LEDCTL, led_reg);
+
+	return 0;
+}
+#endif /* HAVE_ETHTOOL_SET_PHYS_ID */
+
+static int ixgbe_get_coalesce(struct net_device *netdev,
+			      struct ethtool_coalesce *ec)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+
+	ec->tx_max_coalesced_frames_irq = adapter->tx_work_limit;
+#ifndef CONFIG_IXGBE_NAPI
+	ec->rx_max_coalesced_frames_irq = adapter->rx_work_limit;
+#endif /* CONFIG_IXGBE_NAPI */
+	/* only valid if in constant ITR mode */
+	if (adapter->rx_itr_setting <= 1)
+		ec->rx_coalesce_usecs = adapter->rx_itr_setting;
+	else
+		ec->rx_coalesce_usecs = adapter->rx_itr_setting >> 2;
+
+	/* if in mixed tx/rx queues per vector mode, report only rx settings */
+	if (adapter->q_vector[0]->tx.count && adapter->q_vector[0]->rx.count)
+		return 0;
+
+	/* only valid if in constant ITR mode */
+	if (adapter->tx_itr_setting <= 1)
+		ec->tx_coalesce_usecs = adapter->tx_itr_setting;
+	else
+		ec->tx_coalesce_usecs = adapter->tx_itr_setting >> 2;
+
+	return 0;
+}
+
+/*
+ * this function must be called before setting the new value of
+ * rx_itr_setting
+ */
+#ifdef NO_VNIC
+static bool ixgbe_update_rsc(struct ixgbe_adapter *adapter)
+{
+	struct net_device *netdev = adapter->netdev;
+
+	/* nothing to do if LRO or RSC are not enabled */
+	if (!(adapter->flags2 & IXGBE_FLAG2_RSC_CAPABLE) ||
+	    !(netdev->features & NETIF_F_LRO))
+		return false;
+
+	/* check the feature flag value and enable RSC if necessary */
+	if (adapter->rx_itr_setting == 1 ||
+	    adapter->rx_itr_setting > IXGBE_MIN_RSC_ITR) {
+		if (!(adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED)) {
+			adapter->flags2 |= IXGBE_FLAG2_RSC_ENABLED;
+			e_info(probe, "rx-usecs value high enough "
+				      "to re-enable RSC\n");
+			return true;
+		}
+	/* if interrupt rate is too high then disable RSC */
+	} else if (adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED) {
+		adapter->flags2 &= ~IXGBE_FLAG2_RSC_ENABLED;
+#ifdef IXGBE_NO_LRO
+		e_info(probe, "rx-usecs set too low, disabling RSC\n");
+#else
+		e_info(probe, "rx-usecs set too low, "
+			      "falling back to software LRO\n");
+#endif
+		return true;
+	}
+	return false;
+}
+#endif
+
+static int ixgbe_set_coalesce(struct net_device *netdev,
+			      struct ethtool_coalesce *ec)
+{
+#ifdef NO_VNIC
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+	struct ixgbe_q_vector *q_vector;
+	int i;
+	int num_vectors;
+	u16 tx_itr_param, rx_itr_param;
+	bool need_reset = false;
+
+	/* don't accept tx specific changes if we've got mixed RxTx vectors */
+	if (adapter->q_vector[0]->tx.count && adapter->q_vector[0]->rx.count
+	    && ec->tx_coalesce_usecs)
+		return -EINVAL;
+
+	if (ec->tx_max_coalesced_frames_irq)
+		adapter->tx_work_limit = ec->tx_max_coalesced_frames_irq;
+
+#ifndef CONFIG_IXGBE_NAPI
+	if (ec->rx_max_coalesced_frames_irq)
+		adapter->rx_work_limit = ec->rx_max_coalesced_frames_irq;
+
+#endif
+	if ((ec->rx_coalesce_usecs > (IXGBE_MAX_EITR >> 2)) ||
+	    (ec->tx_coalesce_usecs > (IXGBE_MAX_EITR >> 2)))
+		return -EINVAL;
+
+	if (ec->rx_coalesce_usecs > 1)
+		adapter->rx_itr_setting = ec->rx_coalesce_usecs << 2;
+	else
+		adapter->rx_itr_setting = ec->rx_coalesce_usecs;
+
+	if (adapter->rx_itr_setting == 1)
+		rx_itr_param = IXGBE_20K_ITR;
+	else
+		rx_itr_param = adapter->rx_itr_setting;
+
+	if (ec->tx_coalesce_usecs > 1)
+		adapter->tx_itr_setting = ec->tx_coalesce_usecs << 2;
+	else
+		adapter->tx_itr_setting = ec->tx_coalesce_usecs;
+
+	if (adapter->tx_itr_setting == 1)
+		tx_itr_param = IXGBE_10K_ITR;
+	else
+		tx_itr_param = adapter->tx_itr_setting;
+
+	/* check the old value and enable RSC if necessary */
+	need_reset = ixgbe_update_rsc(adapter);
+
+	if (adapter->flags & IXGBE_FLAG_MSIX_ENABLED)
+		num_vectors = adapter->num_msix_vectors - NON_Q_VECTORS;
+	else
+		num_vectors = 1;
+
+	for (i = 0; i < num_vectors; i++) {
+		q_vector = adapter->q_vector[i];
+		q_vector->tx.work_limit = adapter->tx_work_limit;
+		q_vector->rx.work_limit = adapter->rx_work_limit;
+		if (q_vector->tx.count && !q_vector->rx.count)
+			/* tx only */
+			q_vector->itr = tx_itr_param;
+		else
+			/* rx only or mixed */
+			q_vector->itr = rx_itr_param;
+		ixgbe_write_eitr(q_vector);
+	}
+
+	/*
+	 * do reset here at the end to make sure EITR==0 case is handled
+	 * correctly w.r.t stopping tx, and changing TXDCTL.WTHRESH settings
+	 * also locks in RSC enable/disable which requires reset
+	 */
+	if (need_reset)
+		ixgbe_do_reset(netdev);
+#endif
+	return 0;
+}
+
+#ifndef HAVE_NDO_SET_FEATURES
+static u32 ixgbe_get_rx_csum(struct net_device *netdev)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+	struct ixgbe_ring *ring = adapter->rx_ring[0];
+	return test_bit(__IXGBE_RX_CSUM_ENABLED, &ring->state);
+}
+
+static int ixgbe_set_rx_csum(struct net_device *netdev, u32 data)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+	int i;
+
+	for (i = 0; i < adapter->num_rx_queues; i++) {
+		struct ixgbe_ring *ring = adapter->rx_ring[i];
+		if (data)
+			set_bit(__IXGBE_RX_CSUM_ENABLED, &ring->state);
+		else
+			clear_bit(__IXGBE_RX_CSUM_ENABLED, &ring->state);
+	}
+
+	/* LRO and RSC both depend on RX checksum to function */
+	if (!data && (netdev->features & NETIF_F_LRO)) {
+		netdev->features &= ~NETIF_F_LRO;
+
+		if (adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED) {
+			adapter->flags2 &= ~IXGBE_FLAG2_RSC_ENABLED;
+			ixgbe_do_reset(netdev);
+		}
+	}
+
+	return 0;
+}
+
+static u32 ixgbe_get_tx_csum(struct net_device *netdev)
+{
+	return (netdev->features & NETIF_F_IP_CSUM) != 0;
+}
+
+static int ixgbe_set_tx_csum(struct net_device *netdev, u32 data)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+	u32 feature_list;
+
+#ifdef NETIF_F_IPV6_CSUM
+	feature_list = NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
+#else
+	feature_list = NETIF_F_IP_CSUM;
+#endif
+	switch (adapter->hw.mac.type) {
+	case ixgbe_mac_82599EB:
+	case ixgbe_mac_X540:
+		feature_list |= NETIF_F_SCTP_CSUM;
+		break;
+	default:
+		break;
+	}
+	if (data)
+		netdev->features |= feature_list;
+	else
+		netdev->features &= ~feature_list;
+
+	return 0;
+}
+
+#ifdef NETIF_F_TSO
+static int ixgbe_set_tso(struct net_device *netdev, u32 data)
+{
+	if (data) {
+		netdev->features |= NETIF_F_TSO;
+#ifdef NETIF_F_TSO6
+		netdev->features |= NETIF_F_TSO6;
+#endif
+	} else {
+#ifndef HAVE_NETDEV_VLAN_FEATURES
+#ifdef NETIF_F_HW_VLAN_TX
+		struct ixgbe_adapter *adapter = netdev_priv(netdev);
+		/* disable TSO on all VLANs if they're present */
+		if (adapter->vlgrp) {
+			int i;
+			struct net_device *v_netdev;
+			for (i = 0; i < VLAN_N_VID; i++) {
+				v_netdev =
+				       vlan_group_get_device(adapter->vlgrp, i);
+				if (v_netdev) {
+					v_netdev->features &= ~NETIF_F_TSO;
+#ifdef NETIF_F_TSO6
+					v_netdev->features &= ~NETIF_F_TSO6;
+#endif
+					vlan_group_set_device(adapter->vlgrp, i,
+							      v_netdev);
+				}
+			}
+		}
+#endif
+#endif /* HAVE_NETDEV_VLAN_FEATURES */
+		netdev->features &= ~NETIF_F_TSO;
+#ifdef NETIF_F_TSO6
+		netdev->features &= ~NETIF_F_TSO6;
+#endif
+	}
+	return 0;
+}
+
+#endif /* NETIF_F_TSO */
+#ifdef ETHTOOL_GFLAGS
+static int ixgbe_set_flags(struct net_device *netdev, u32 data)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+	u32 supported_flags = ETH_FLAG_RXVLAN | ETH_FLAG_TXVLAN;
+	u32 changed = netdev->features ^ data;
+	bool need_reset = false;
+	int rc;
+
+#ifndef HAVE_VLAN_RX_REGISTER
+	if ((adapter->flags & IXGBE_FLAG_DCB_ENABLED) &&
+	    !(data & ETH_FLAG_RXVLAN))
+		return -EINVAL;
+
+#endif
+#ifdef NETIF_F_RXHASH
+	if (adapter->flags & IXGBE_FLAG_RSS_ENABLED)
+		supported_flags |= ETH_FLAG_RXHASH;
+#endif
+#ifdef IXGBE_NO_LRO
+	if (adapter->flags2 & IXGBE_FLAG2_RSC_CAPABLE)
+#endif
+		supported_flags |= ETH_FLAG_LRO;
+
+#ifdef ETHTOOL_GRXRINGS
+	switch (adapter->hw.mac.type) {
+	case ixgbe_mac_X540:
+	case ixgbe_mac_82599EB:
+		supported_flags |= ETH_FLAG_NTUPLE;
+	default:
+		break;
+	}
+
+#endif
+	rc = ethtool_op_set_flags(netdev, data, supported_flags);
+	if (rc)
+		return rc;
+
+#ifndef HAVE_VLAN_RX_REGISTER
+	if (changed & ETH_FLAG_RXVLAN)
+		ixgbe_vlan_mode(netdev, netdev->features);
+
+#endif
+	/* if state changes we need to update adapter->flags and reset */
+	if (!(netdev->features & NETIF_F_LRO)) {
+		if (adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED)
+			need_reset = true;
+		adapter->flags2 &= ~IXGBE_FLAG2_RSC_ENABLED;
+	} else if ((adapter->flags2 & IXGBE_FLAG2_RSC_CAPABLE) &&
+		   !(adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED)) {
+		if (adapter->rx_itr_setting == 1 ||
+		    adapter->rx_itr_setting > IXGBE_MIN_RSC_ITR) {
+			adapter->flags2 |= IXGBE_FLAG2_RSC_ENABLED;
+			need_reset = true;
+		} else if (changed & ETH_FLAG_LRO) {
+#ifdef IXGBE_NO_LRO
+			e_info(probe, "rx-usecs set too low, "
+			       "disabling RSC\n");
+#else
+			e_info(probe, "rx-usecs set too low, "
+			       "falling back to software LRO\n");
+#endif
+		}
+	}
+
+#ifdef ETHTOOL_GRXRINGS
+	/*
+	 * Check if Flow Director n-tuple support was enabled or disabled.  If
+	 * the state changed, we need to reset.
+	 */
+	if (!(netdev->features & NETIF_F_NTUPLE)) {
+		if (adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE) {
+			/* turn off Flow Director, set ATR and reset */
+			if ((adapter->flags & IXGBE_FLAG_RSS_ENABLED) &&
+			    !(adapter->flags & IXGBE_FLAG_DCB_ENABLED))
+				adapter->flags |= IXGBE_FLAG_FDIR_HASH_CAPABLE;
+			need_reset = true;
+		}
+		adapter->flags &= ~IXGBE_FLAG_FDIR_PERFECT_CAPABLE;
+	} else if (!(adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE)) {
+		/* turn off ATR, enable perfect filters and reset */
+		adapter->flags &= ~IXGBE_FLAG_FDIR_HASH_CAPABLE;
+		adapter->flags |= IXGBE_FLAG_FDIR_PERFECT_CAPABLE;
+		need_reset = true;
+	}
+
+#endif /* ETHTOOL_GRXRINGS */
+	if (need_reset)
+		ixgbe_do_reset(netdev);
+
+	return 0;
+}
+
+#endif /* ETHTOOL_GFLAGS */
+#endif /* HAVE_NDO_SET_FEATURES */
+#ifdef ETHTOOL_GRXRINGS
+static int ixgbe_get_ethtool_fdir_entry(struct ixgbe_adapter *adapter,
+					struct ethtool_rxnfc *cmd)
+{
+	union ixgbe_atr_input *mask = &adapter->fdir_mask;
+	struct ethtool_rx_flow_spec *fsp =
+		(struct ethtool_rx_flow_spec *)&cmd->fs;
+	struct hlist_node *node, *node2;
+	struct ixgbe_fdir_filter *rule = NULL;
+
+	/* report total rule count */
+	cmd->data = (1024 << adapter->fdir_pballoc) - 2;
+
+	hlist_for_each_entry_safe(rule, node, node2,
+				  &adapter->fdir_filter_list, fdir_node) {
+		if (fsp->location <= rule->sw_idx)
+			break;
+	}
+
+	if (!rule || fsp->location != rule->sw_idx)
+		return -EINVAL;
+
+	/* fill out the flow spec entry */
+
+	/* set flow type field */
+	switch (rule->filter.formatted.flow_type) {
+	case IXGBE_ATR_FLOW_TYPE_TCPV4:
+		fsp->flow_type = TCP_V4_FLOW;
+		break;
+	case IXGBE_ATR_FLOW_TYPE_UDPV4:
+		fsp->flow_type = UDP_V4_FLOW;
+		break;
+	case IXGBE_ATR_FLOW_TYPE_SCTPV4:
+		fsp->flow_type = SCTP_V4_FLOW;
+		break;
+	case IXGBE_ATR_FLOW_TYPE_IPV4:
+		fsp->flow_type = IP_USER_FLOW;
+		fsp->h_u.usr_ip4_spec.ip_ver = ETH_RX_NFC_IP4;
+		fsp->h_u.usr_ip4_spec.proto = 0;
+		fsp->m_u.usr_ip4_spec.proto = 0;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	fsp->h_u.tcp_ip4_spec.psrc = rule->filter.formatted.src_port;
+	fsp->m_u.tcp_ip4_spec.psrc = mask->formatted.src_port;
+	fsp->h_u.tcp_ip4_spec.pdst = rule->filter.formatted.dst_port;
+	fsp->m_u.tcp_ip4_spec.pdst = mask->formatted.dst_port;
+	fsp->h_u.tcp_ip4_spec.ip4src = rule->filter.formatted.src_ip[0];
+	fsp->m_u.tcp_ip4_spec.ip4src = mask->formatted.src_ip[0];
+	fsp->h_u.tcp_ip4_spec.ip4dst = rule->filter.formatted.dst_ip[0];
+	fsp->m_u.tcp_ip4_spec.ip4dst = mask->formatted.dst_ip[0];
+	fsp->h_ext.vlan_tci = rule->filter.formatted.vlan_id;
+	fsp->m_ext.vlan_tci = mask->formatted.vlan_id;
+	fsp->h_ext.vlan_etype = rule->filter.formatted.flex_bytes;
+	fsp->m_ext.vlan_etype = mask->formatted.flex_bytes;
+	fsp->h_ext.data[1] = htonl(rule->filter.formatted.vm_pool);
+	fsp->m_ext.data[1] = htonl(mask->formatted.vm_pool);
+	fsp->flow_type |= FLOW_EXT;
+
+	/* record action */
+	if (rule->action == IXGBE_FDIR_DROP_QUEUE)
+		fsp->ring_cookie = RX_CLS_FLOW_DISC;
+	else
+		fsp->ring_cookie = rule->action;
+
+	return 0;
+}
+
+static int ixgbe_get_ethtool_fdir_all(struct ixgbe_adapter *adapter,
+				      struct ethtool_rxnfc *cmd,
+				      u32 *rule_locs)
+{
+	struct hlist_node *node, *node2;
+	struct ixgbe_fdir_filter *rule;
+	int cnt = 0;
+
+	/* report total rule count */
+	cmd->data = (1024 << adapter->fdir_pballoc) - 2;
+
+	hlist_for_each_entry_safe(rule, node, node2,
+				  &adapter->fdir_filter_list, fdir_node) {
+		if (cnt == cmd->rule_cnt)
+			return -EMSGSIZE;
+		rule_locs[cnt] = rule->sw_idx;
+		cnt++;
+	}
+
+	cmd->rule_cnt = cnt;
+
+	return 0;
+}
+
+static int ixgbe_get_rss_hash_opts(struct ixgbe_adapter *adapter,
+				   struct ethtool_rxnfc *cmd)
+{
+	cmd->data = 0;
+
+	/* if RSS is disabled then report no hashing */
+	if (!(adapter->flags & IXGBE_FLAG_RSS_ENABLED))
+		return 0;
+
+	/* Report default options for RSS on ixgbe */
+	switch (cmd->flow_type) {
+	case TCP_V4_FLOW:
+		cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+	case UDP_V4_FLOW:
+		if (adapter->flags2 & IXGBE_FLAG2_RSS_FIELD_IPV4_UDP)
+			cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+	case SCTP_V4_FLOW:
+	case AH_ESP_V4_FLOW:
+	case AH_V4_FLOW:
+	case ESP_V4_FLOW:
+	case IPV4_FLOW:
+		cmd->data |= RXH_IP_SRC | RXH_IP_DST;
+		break;
+	case TCP_V6_FLOW:
+		cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+	case UDP_V6_FLOW:
+		if (adapter->flags2 & IXGBE_FLAG2_RSS_FIELD_IPV6_UDP)
+			cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+	case SCTP_V6_FLOW:
+	case AH_ESP_V6_FLOW:
+	case AH_V6_FLOW:
+	case ESP_V6_FLOW:
+	case IPV6_FLOW:
+		cmd->data |= RXH_IP_SRC | RXH_IP_DST;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int ixgbe_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd,
+#ifdef HAVE_ETHTOOL_GET_RXNFC_VOID_RULE_LOCS
+			   void *rule_locs)
+#else
+			   u32 *rule_locs)
+#endif
+{
+	struct ixgbe_adapter *adapter = netdev_priv(dev);
+	int ret = -EOPNOTSUPP;
+
+	switch (cmd->cmd) {
+	case ETHTOOL_GRXRINGS:
+		cmd->data = adapter->num_rx_queues;
+		ret = 0;
+		break;
+	case ETHTOOL_GRXCLSRLCNT:
+		cmd->rule_cnt = adapter->fdir_filter_count;
+		ret = 0;
+		break;
+	case ETHTOOL_GRXCLSRULE:
+		ret = ixgbe_get_ethtool_fdir_entry(adapter, cmd);
+		break;
+	case ETHTOOL_GRXCLSRLALL:
+		ret = ixgbe_get_ethtool_fdir_all(adapter, cmd,
+						 (u32 *)rule_locs);
+		break;
+	case ETHTOOL_GRXFH:
+		ret = ixgbe_get_rss_hash_opts(adapter, cmd);
+		break;
+	default:
+		break;
+	}
+
+	return ret;
+}
+
+static int ixgbe_update_ethtool_fdir_entry(struct ixgbe_adapter *adapter,
+					   struct ixgbe_fdir_filter *input,
+					   u16 sw_idx)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	struct hlist_node *node, *node2, *parent;
+	struct ixgbe_fdir_filter *rule;
+	int err = -EINVAL;
+
+	parent = NULL;
+	rule = NULL;
+
+	hlist_for_each_entry_safe(rule, node, node2,
+				  &adapter->fdir_filter_list, fdir_node) {
+		/* hash found, or no matching entry */
+		if (rule->sw_idx >= sw_idx)
+			break;
+		parent = node;
+	}
+
+	/* if there is an old rule occupying our place remove it */
+	if (rule && (rule->sw_idx == sw_idx)) {
+		if (!input || (rule->filter.formatted.bkt_hash !=
+			       input->filter.formatted.bkt_hash)) {
+			err = ixgbe_fdir_erase_perfect_filter_82599(hw,
+								&rule->filter,
+								sw_idx);
+		}
+
+		hlist_del(&rule->fdir_node);
+		kfree(rule);
+		adapter->fdir_filter_count--;
+	}
+
+	/*
+	 * If no input this was a delete, err should be 0 if a rule was
+	 * successfully found and removed from the list else -EINVAL
+	 */
+	if (!input)
+		return err;
+
+	/* initialize node and set software index */
+	INIT_HLIST_NODE(&input->fdir_node);
+
+	/* add filter to the list */
+	if (parent)
+		hlist_add_after(parent, &input->fdir_node);
+	else
+		hlist_add_head(&input->fdir_node,
+			       &adapter->fdir_filter_list);
+
+	/* update counts */
+	adapter->fdir_filter_count++;
+
+	return 0;
+}
+
+static int ixgbe_flowspec_to_flow_type(struct ethtool_rx_flow_spec *fsp,
+				       u8 *flow_type)
+{
+	switch (fsp->flow_type & ~FLOW_EXT) {
+	case TCP_V4_FLOW:
+		*flow_type = IXGBE_ATR_FLOW_TYPE_TCPV4;
+		break;
+	case UDP_V4_FLOW:
+		*flow_type = IXGBE_ATR_FLOW_TYPE_UDPV4;
+		break;
+	case SCTP_V4_FLOW:
+		*flow_type = IXGBE_ATR_FLOW_TYPE_SCTPV4;
+		break;
+	case IP_USER_FLOW:
+		switch (fsp->h_u.usr_ip4_spec.proto) {
+		case IPPROTO_TCP:
+			*flow_type = IXGBE_ATR_FLOW_TYPE_TCPV4;
+			break;
+		case IPPROTO_UDP:
+			*flow_type = IXGBE_ATR_FLOW_TYPE_UDPV4;
+			break;
+		case IPPROTO_SCTP:
+			*flow_type = IXGBE_ATR_FLOW_TYPE_SCTPV4;
+			break;
+		case 0:
+			if (!fsp->m_u.usr_ip4_spec.proto) {
+				*flow_type = IXGBE_ATR_FLOW_TYPE_IPV4;
+				break;
+			}
+		default:
+			return 0;
+		}
+		break;
+	default:
+		return 0;
+	}
+
+	return 1;
+}
+
+static int ixgbe_add_ethtool_fdir_entry(struct ixgbe_adapter *adapter,
+					struct ethtool_rxnfc *cmd)
+{
+	struct ethtool_rx_flow_spec *fsp =
+		(struct ethtool_rx_flow_spec *)&cmd->fs;
+	struct ixgbe_hw *hw = &adapter->hw;
+	struct ixgbe_fdir_filter *input;
+	union ixgbe_atr_input mask;
+	int err;
+
+	if (!(adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE))
+		return -EOPNOTSUPP;
+
+	/*
+	 * Don't allow programming if the action is a queue greater than
+	 * the number of online Rx queues.
+	 */
+	if ((fsp->ring_cookie != RX_CLS_FLOW_DISC) &&
+	    (fsp->ring_cookie >= adapter->num_rx_queues))
+		return -EINVAL;
+
+	/* Don't allow indexes to exist outside of available space */
+	if (fsp->location >= ((1024 << adapter->fdir_pballoc) - 2)) {
+		e_err(drv, "Location out of range\n");
+		return -EINVAL;
+	}
+
+	input = kzalloc(sizeof(*input), GFP_ATOMIC);
+	if (!input)
+		return -ENOMEM;
+
+	memset(&mask, 0, sizeof(union ixgbe_atr_input));
+
+	/* set SW index */
+	input->sw_idx = fsp->location;
+
+	/* record flow type */
+	if (!ixgbe_flowspec_to_flow_type(fsp,
+					 &input->filter.formatted.flow_type)) {
+		e_err(drv, "Unrecognized flow type\n");
+		goto err_out;
+	}
+
+	mask.formatted.flow_type = IXGBE_ATR_L4TYPE_IPV6_MASK |
+				   IXGBE_ATR_L4TYPE_MASK;
+
+	if (input->filter.formatted.flow_type == IXGBE_ATR_FLOW_TYPE_IPV4)
+		mask.formatted.flow_type &= IXGBE_ATR_L4TYPE_IPV6_MASK;
+
+	/* Copy input into formatted structures */
+	input->filter.formatted.src_ip[0] = fsp->h_u.tcp_ip4_spec.ip4src;
+	mask.formatted.src_ip[0] = fsp->m_u.tcp_ip4_spec.ip4src;
+	input->filter.formatted.dst_ip[0] = fsp->h_u.tcp_ip4_spec.ip4dst;
+	mask.formatted.dst_ip[0] = fsp->m_u.tcp_ip4_spec.ip4dst;
+	input->filter.formatted.src_port = fsp->h_u.tcp_ip4_spec.psrc;
+	mask.formatted.src_port = fsp->m_u.tcp_ip4_spec.psrc;
+	input->filter.formatted.dst_port = fsp->h_u.tcp_ip4_spec.pdst;
+	mask.formatted.dst_port = fsp->m_u.tcp_ip4_spec.pdst;
+
+	if (fsp->flow_type & FLOW_EXT) {
+		input->filter.formatted.vm_pool =
+				(unsigned char)ntohl(fsp->h_ext.data[1]);
+		mask.formatted.vm_pool =
+				(unsigned char)ntohl(fsp->m_ext.data[1]);
+		input->filter.formatted.vlan_id = fsp->h_ext.vlan_tci;
+		mask.formatted.vlan_id = fsp->m_ext.vlan_tci;
+		input->filter.formatted.flex_bytes =
+						fsp->h_ext.vlan_etype;
+		mask.formatted.flex_bytes = fsp->m_ext.vlan_etype;
+	}
+
+	/* determine if we need to drop or route the packet */
+	if (fsp->ring_cookie == RX_CLS_FLOW_DISC)
+		input->action = IXGBE_FDIR_DROP_QUEUE;
+	else
+		input->action = fsp->ring_cookie;
+
+	spin_lock(&adapter->fdir_perfect_lock);
+
+	if (hlist_empty(&adapter->fdir_filter_list)) {
+		/* save mask and program input mask into HW */
+		memcpy(&adapter->fdir_mask, &mask, sizeof(mask));
+		err = ixgbe_fdir_set_input_mask_82599(hw, &mask);
+		if (err) {
+			e_err(drv, "Error writing mask\n");
+			goto err_out_w_lock;
+		}
+	} else if (memcmp(&adapter->fdir_mask, &mask, sizeof(mask))) {
+		e_err(drv, "Only one mask supported per port\n");
+		goto err_out_w_lock;
+	}
+
+	/* apply mask and compute/store hash */
+	ixgbe_atr_compute_perfect_hash_82599(&input->filter, &mask);
+
+	/* program filters to filter memory */
+	err = ixgbe_fdir_write_perfect_filter_82599(hw,
+				&input->filter, input->sw_idx,
+				(input->action == IXGBE_FDIR_DROP_QUEUE) ?
+				IXGBE_FDIR_DROP_QUEUE :
+				adapter->rx_ring[input->action]->reg_idx);
+	if (err)
+		goto err_out_w_lock;
+
+	ixgbe_update_ethtool_fdir_entry(adapter, input, input->sw_idx);
+
+	spin_unlock(&adapter->fdir_perfect_lock);
+
+	kfree(input);
+	return err;
+err_out_w_lock:
+	spin_unlock(&adapter->fdir_perfect_lock);
+err_out:
+	kfree(input);
+	return -EINVAL;
+}
+
+static int ixgbe_del_ethtool_fdir_entry(struct ixgbe_adapter *adapter,
+					struct ethtool_rxnfc *cmd)
+{
+	struct ethtool_rx_flow_spec *fsp =
+		(struct ethtool_rx_flow_spec *)&cmd->fs;
+	int err;
+
+	spin_lock(&adapter->fdir_perfect_lock);
+	err = ixgbe_update_ethtool_fdir_entry(adapter, NULL, (u16)(fsp->location));
+	spin_unlock(&adapter->fdir_perfect_lock);
+
+	return err;
+}
+
+#ifdef ETHTOOL_SRXNTUPLE
+/*
+ * We need to keep this around for kernels 2.6.33 - 2.6.39 in order to avoid
+ * a null pointer dereference as it was assumend if the NETIF_F_NTUPLE flag
+ * was defined that this function was present.
+ */
+static int ixgbe_set_rx_ntuple(struct net_device *dev,
+			       struct ethtool_rx_ntuple *cmd)
+{
+	return -EOPNOTSUPP;
+}
+
+#endif
+#define UDP_RSS_FLAGS (IXGBE_FLAG2_RSS_FIELD_IPV4_UDP | \
+		       IXGBE_FLAG2_RSS_FIELD_IPV6_UDP)
+static int ixgbe_set_rss_hash_opt(struct ixgbe_adapter *adapter,
+				  struct ethtool_rxnfc *nfc)
+{
+	u32 flags2 = adapter->flags2;
+
+	/*
+	 * RSS does not support anything other than hashing
+	 * to queues on src and dst IPs and ports
+	 */
+	if (nfc->data & ~(RXH_IP_SRC | RXH_IP_DST |
+			  RXH_L4_B_0_1 | RXH_L4_B_2_3))
+		return -EINVAL;
+
+	switch (nfc->flow_type) {
+	case TCP_V4_FLOW:
+	case TCP_V6_FLOW:
+		if (!(nfc->data & RXH_IP_SRC) ||
+		    !(nfc->data & RXH_IP_DST) ||
+		    !(nfc->data & RXH_L4_B_0_1) ||
+		    !(nfc->data & RXH_L4_B_2_3))
+			return -EINVAL;
+		break;
+	case UDP_V4_FLOW:
+		if (!(nfc->data & RXH_IP_SRC) ||
+		    !(nfc->data & RXH_IP_DST))
+			return -EINVAL;
+		switch (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) {
+		case 0:
+			flags2 &= ~IXGBE_FLAG2_RSS_FIELD_IPV4_UDP;
+			break;
+		case (RXH_L4_B_0_1 | RXH_L4_B_2_3):
+			flags2 |= IXGBE_FLAG2_RSS_FIELD_IPV4_UDP;
+			break;
+		default:
+			return -EINVAL;
+		}
+		break;
+	case UDP_V6_FLOW:
+		if (!(nfc->data & RXH_IP_SRC) ||
+		    !(nfc->data & RXH_IP_DST))
+			return -EINVAL;
+		switch (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) {
+		case 0:
+			flags2 &= ~IXGBE_FLAG2_RSS_FIELD_IPV6_UDP;
+			break;
+		case (RXH_L4_B_0_1 | RXH_L4_B_2_3):
+			flags2 |= IXGBE_FLAG2_RSS_FIELD_IPV6_UDP;
+			break;
+		default:
+			return -EINVAL;
+		}
+		break;
+	case AH_ESP_V4_FLOW:
+	case AH_V4_FLOW:
+	case ESP_V4_FLOW:
+	case SCTP_V4_FLOW:
+	case AH_ESP_V6_FLOW:
+	case AH_V6_FLOW:
+	case ESP_V6_FLOW:
+	case SCTP_V6_FLOW:
+		if (!(nfc->data & RXH_IP_SRC) ||
+		    !(nfc->data & RXH_IP_DST) ||
+		    (nfc->data & RXH_L4_B_0_1) ||
+		    (nfc->data & RXH_L4_B_2_3))
+			return -EINVAL;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	/* if we changed something we need to update flags */
+	if (flags2 != adapter->flags2) {
+		struct ixgbe_hw *hw = &adapter->hw;
+		u32 mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
+
+		if ((flags2 & UDP_RSS_FLAGS) &&
+		    !(adapter->flags2 & UDP_RSS_FLAGS))
+			e_warn(drv, "enabling UDP RSS: fragmented packets"
+			       " may arrive out of order to the stack above\n");
+
+		adapter->flags2 = flags2;
+
+		/* Perform hash on these packet types */
+		mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4
+		      | IXGBE_MRQC_RSS_FIELD_IPV4_TCP
+		      | IXGBE_MRQC_RSS_FIELD_IPV6
+		      | IXGBE_MRQC_RSS_FIELD_IPV6_TCP;
+
+		mrqc &= ~(IXGBE_MRQC_RSS_FIELD_IPV4_UDP |
+			  IXGBE_MRQC_RSS_FIELD_IPV6_UDP);
+
+		if (flags2 & IXGBE_FLAG2_RSS_FIELD_IPV4_UDP)
+			mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_UDP;
+
+		if (flags2 & IXGBE_FLAG2_RSS_FIELD_IPV6_UDP)
+			mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_UDP;
+
+		IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
+	}
+
+	return 0;
+}
+
+static int ixgbe_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(dev);
+	int ret = -EOPNOTSUPP;
+
+	switch (cmd->cmd) {
+	case ETHTOOL_SRXCLSRLINS:
+		ret = ixgbe_add_ethtool_fdir_entry(adapter, cmd);
+		break;
+	case ETHTOOL_SRXCLSRLDEL:
+		ret = ixgbe_del_ethtool_fdir_entry(adapter, cmd);
+		break;
+	case ETHTOOL_SRXFH:
+		ret = ixgbe_set_rss_hash_opt(adapter, cmd);
+		break;
+	default:
+		break;
+	}
+
+	return ret;
+}
+
+#endif /* ETHTOOL_GRXRINGS */
+//static
+struct ethtool_ops ixgbe_ethtool_ops = {
+	.get_settings		= ixgbe_get_settings,
+	.set_settings		= ixgbe_set_settings,
+	.get_drvinfo		= ixgbe_get_drvinfo,
+	.get_regs_len		= ixgbe_get_regs_len,
+	.get_regs		= ixgbe_get_regs,
+	.get_wol		= ixgbe_get_wol,
+	.set_wol		= ixgbe_set_wol,
+	.nway_reset		= ixgbe_nway_reset,
+	.get_link		= ethtool_op_get_link,
+	.get_eeprom_len		= ixgbe_get_eeprom_len,
+	.get_eeprom		= ixgbe_get_eeprom,
+	.set_eeprom		= ixgbe_set_eeprom,
+	.get_ringparam		= ixgbe_get_ringparam,
+	.set_ringparam		= ixgbe_set_ringparam,
+	.get_pauseparam		= ixgbe_get_pauseparam,
+	.set_pauseparam		= ixgbe_set_pauseparam,
+	.get_msglevel		= ixgbe_get_msglevel,
+	.set_msglevel		= ixgbe_set_msglevel,
+#ifndef HAVE_ETHTOOL_GET_SSET_COUNT
+	.self_test_count	= ixgbe_diag_test_count,
+#endif /* HAVE_ETHTOOL_GET_SSET_COUNT */
+	.self_test		= ixgbe_diag_test,
+	.get_strings		= ixgbe_get_strings,
+#ifdef HAVE_ETHTOOL_SET_PHYS_ID
+	.set_phys_id		= ixgbe_set_phys_id,
+#else
+	.phys_id		= ixgbe_phys_id,
+#endif /* HAVE_ETHTOOL_SET_PHYS_ID */
+#ifndef HAVE_ETHTOOL_GET_SSET_COUNT
+	.get_stats_count	= ixgbe_get_stats_count,
+#else /* HAVE_ETHTOOL_GET_SSET_COUNT */
+	.get_sset_count		= ixgbe_get_sset_count,
+#endif /* HAVE_ETHTOOL_GET_SSET_COUNT */
+	.get_ethtool_stats      = ixgbe_get_ethtool_stats,
+#ifdef HAVE_ETHTOOL_GET_PERM_ADDR
+	.get_perm_addr		= ethtool_op_get_perm_addr,
+#endif
+	.get_coalesce		= ixgbe_get_coalesce,
+	.set_coalesce		= ixgbe_set_coalesce,
+#ifndef HAVE_NDO_SET_FEATURES
+	.get_rx_csum		= ixgbe_get_rx_csum,
+	.set_rx_csum		= ixgbe_set_rx_csum,
+	.get_tx_csum		= ixgbe_get_tx_csum,
+	.set_tx_csum		= ixgbe_set_tx_csum,
+	.get_sg			= ethtool_op_get_sg,
+	.set_sg			= ethtool_op_set_sg,
+#ifdef NETIF_F_TSO
+	.get_tso		= ethtool_op_get_tso,
+	.set_tso		= ixgbe_set_tso,
+#endif
+#ifdef ETHTOOL_GFLAGS
+	.get_flags		= ethtool_op_get_flags,
+	.set_flags		= ixgbe_set_flags,
+#endif
+#endif /* HAVE_NDO_SET_FEATURES */
+#ifdef ETHTOOL_GRXRINGS
+	.get_rxnfc		= ixgbe_get_rxnfc,
+	.set_rxnfc		= ixgbe_set_rxnfc,
+#ifdef ETHTOOL_SRXNTUPLE
+	.set_rx_ntuple		= ixgbe_set_rx_ntuple,
+#endif
+#endif
+};
+
+void ixgbe_set_ethtool_ops(struct net_device *netdev)
+{
+	SET_ETHTOOL_OPS(netdev, &ixgbe_ethtool_ops);
+}
+#endif /* SIOCETHTOOL */
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_fcoe.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_fcoe.h
new file mode 100644
index 00000000..cad28622
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_fcoe.h
@@ -0,0 +1,91 @@
+/*******************************************************************************
+
+  Intel 10 Gigabit PCI Express Linux driver
+  Copyright(c) 1999 - 2012 Intel Corporation.
+
+  This program is free software; you can redistribute it and/or modify it
+  under the terms and conditions of the GNU General Public License,
+  version 2, as published by the Free Software Foundation.
+
+  This program is distributed in the hope it will be useful, but WITHOUT
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+  more details.
+
+  You should have received a copy of the GNU General Public License along with
+  this program; if not, write to the Free Software Foundation, Inc.,
+  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+  The full GNU General Public License is included in this distribution in
+  the file called "COPYING".
+
+  Contact Information:
+  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+#ifndef _IXGBE_FCOE_H
+#define _IXGBE_FCOE_H
+
+#ifdef IXGBE_FCOE
+
+#include <scsi/fc/fc_fs.h>
+#include <scsi/fc/fc_fcoe.h>
+
+/* shift bits within STAT fo FCSTAT */
+#define IXGBE_RXDADV_FCSTAT_SHIFT	4
+
+/* ddp user buffer */
+#define IXGBE_BUFFCNT_MAX	256	/* 8 bits bufcnt */
+#define IXGBE_FCPTR_ALIGN	16
+#define IXGBE_FCPTR_MAX		(IXGBE_BUFFCNT_MAX * sizeof(dma_addr_t))
+#define IXGBE_FCBUFF_4KB	0x0
+#define IXGBE_FCBUFF_8KB	0x1
+#define IXGBE_FCBUFF_16KB	0x2
+#define IXGBE_FCBUFF_64KB	0x3
+#define IXGBE_FCBUFF_MAX	65536	/* 64KB max */
+#define IXGBE_FCBUFF_MIN	4096	/* 4KB min */
+#define IXGBE_FCOE_DDP_MAX	512	/* 9 bits xid */
+
+/* Default traffic class to use for FCoE */
+#define IXGBE_FCOE_DEFTC	3
+
+/* fcerr */
+#define IXGBE_FCERR_BADCRC	0x00100000
+#define IXGBE_FCERR_EOFSOF	0x00200000
+#define IXGBE_FCERR_NOFIRST	0x00300000
+#define IXGBE_FCERR_OOOSEQ	0x00400000
+#define IXGBE_FCERR_NODMA	0x00500000
+#define IXGBE_FCERR_PKTLOST	0x00600000
+
+/* FCoE DDP for target mode */
+#define __IXGBE_FCOE_TARGET	1
+
+struct ixgbe_fcoe_ddp {
+	int len;
+	u32 err;
+	unsigned int sgc;
+	struct scatterlist *sgl;
+	dma_addr_t udp;
+	u64 *udl;
+	struct pci_pool *pool;
+};
+
+struct ixgbe_fcoe {
+	struct pci_pool **pool;
+	atomic_t refcnt;
+	spinlock_t lock;
+	struct ixgbe_fcoe_ddp ddp[IXGBE_FCOE_DDP_MAX];
+	unsigned char *extra_ddp_buffer;
+	dma_addr_t extra_ddp_buffer_dma;
+	u64 __percpu *pcpu_noddp;
+	u64 __percpu *pcpu_noddp_ext_buff;
+	unsigned long mode;
+	u8 tc;
+	u8 up;
+	u8 up_set;
+};
+#endif /* IXGBE_FCOE */
+
+#endif /* _IXGBE_FCOE_H */
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_main.c b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_main.c
new file mode 100644
index 00000000..8c1d2fe3
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_main.c
@@ -0,0 +1,2970 @@
+/*******************************************************************************
+
+  Intel 10 Gigabit PCI Express Linux driver
+  Copyright(c) 1999 - 2012 Intel Corporation.
+
+  This program is free software; you can redistribute it and/or modify it
+  under the terms and conditions of the GNU General Public License,
+  version 2, as published by the Free Software Foundation.
+
+  This program is distributed in the hope it will be useful, but WITHOUT
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+  more details.
+
+  You should have received a copy of the GNU General Public License along with
+  this program; if not, write to the Free Software Foundation, Inc.,
+  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+  The full GNU General Public License is included in this distribution in
+  the file called "COPYING".
+
+  Contact Information:
+  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+/******************************************************************************
+ Copyright (c)2006 - 2007 Myricom, Inc. for some LRO specific code
+******************************************************************************/
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+#include <linux/vmalloc.h>
+#include <linux/highmem.h>
+#include <linux/string.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#ifdef HAVE_SCTP
+#include <linux/sctp.h>
+#endif
+#include <linux/pkt_sched.h>
+#include <linux/ipv6.h>
+#ifdef NETIF_F_TSO
+#include <net/checksum.h>
+#ifdef NETIF_F_TSO6
+#include <net/ip6_checksum.h>
+#endif
+#endif
+#ifdef SIOCETHTOOL
+#include <linux/ethtool.h>
+#endif
+
+#include "ixgbe.h"
+
+#undef CONFIG_DCA
+#undef CONFIG_DCA_MODULE
+
+char ixgbe_driver_name[] = "ixgbe";
+static const char ixgbe_driver_string[] =
+			      "Intel(R) 10 Gigabit PCI Express Network Driver";
+#define DRV_HW_PERF
+
+#ifndef CONFIG_IXGBE_NAPI
+#define DRIVERNAPI
+#else
+#define DRIVERNAPI "-NAPI"
+#endif
+
+#define FPGA
+
+#define VMDQ_TAG
+
+#define MAJ 3
+#define MIN 9
+#define BUILD 17
+#define DRV_VERSION	__stringify(MAJ) "." __stringify(MIN) "." \
+			__stringify(BUILD) DRIVERNAPI DRV_HW_PERF FPGA VMDQ_TAG
+const char ixgbe_driver_version[] = DRV_VERSION;
+static const char ixgbe_copyright[] =
+				"Copyright (c) 1999-2012 Intel Corporation.";
+
+/* ixgbe_pci_tbl - PCI Device ID Table
+ *
+ * Wildcard entries (PCI_ANY_ID) should come last
+ * Last entry must be all 0s
+ *
+ * { Vendor ID, Device ID, SubVendor ID, SubDevice ID,
+ *   Class, Class Mask, private data (not used) }
+ */
+DEFINE_PCI_DEVICE_TABLE(ixgbe_pci_tbl) = {
+	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598)},
+	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598AF_DUAL_PORT)},
+	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598AF_SINGLE_PORT)},
+	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598AT)},
+	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598AT2)},
+	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598EB_CX4)},
+	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598_CX4_DUAL_PORT)},
+	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598_DA_DUAL_PORT)},
+	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM)},
+	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598EB_XF_LR)},
+	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598EB_SFP_LOM)},
+	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598_BX)},
+	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_KX4)},
+	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_XAUI_LOM)},
+	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_KR)},
+	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_SFP)},
+	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_SFP_EM)},
+	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_KX4_MEZZ)},
+	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_CX4)},
+	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_BACKPLANE_FCOE)},
+	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_SFP_FCOE)},
+	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_T3_LOM)},
+	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_COMBO_BACKPLANE)},
+	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X540T)},
+	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_SFP_SF2)},
+	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_LS)},
+	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599EN_SFP)},
+	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_QSFP_SF_QP)},
+	/* required last entry */
+	{0, }
+};
+
+#if defined(CONFIG_DCA) || defined(CONFIG_DCA_MODULE)
+static int ixgbe_notify_dca(struct notifier_block *, unsigned long event,
+			    void *p);
+static struct notifier_block dca_notifier = {
+	.notifier_call	= ixgbe_notify_dca,
+	.next		= NULL,
+	.priority	= 0
+};
+
+#endif
+MODULE_AUTHOR("Intel Corporation, <linux.nics@intel.com>");
+MODULE_DESCRIPTION("Intel(R) 10 Gigabit PCI Express Network Driver");
+MODULE_LICENSE("GPL");
+MODULE_VERSION(DRV_VERSION);
+
+#define DEFAULT_DEBUG_LEVEL_SHIFT 3
+
+
+static void ixgbe_release_hw_control(struct ixgbe_adapter *adapter)
+{
+	u32 ctrl_ext;
+
+	/* Let firmware take over control of h/w */
+	ctrl_ext = IXGBE_READ_REG(&adapter->hw, IXGBE_CTRL_EXT);
+	IXGBE_WRITE_REG(&adapter->hw, IXGBE_CTRL_EXT,
+			ctrl_ext & ~IXGBE_CTRL_EXT_DRV_LOAD);
+}
+
+#ifdef NO_VNIC
+static void ixgbe_get_hw_control(struct ixgbe_adapter *adapter)
+{
+	u32 ctrl_ext;
+
+	/* Let firmware know the driver has taken over */
+	ctrl_ext = IXGBE_READ_REG(&adapter->hw, IXGBE_CTRL_EXT);
+	IXGBE_WRITE_REG(&adapter->hw, IXGBE_CTRL_EXT,
+			ctrl_ext | IXGBE_CTRL_EXT_DRV_LOAD);
+}
+#endif
+
+
+static void ixgbe_update_xoff_rx_lfc(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	struct ixgbe_hw_stats *hwstats = &adapter->stats;
+	int i;
+	u32 data;
+
+	if ((hw->fc.current_mode != ixgbe_fc_full) &&
+	    (hw->fc.current_mode != ixgbe_fc_rx_pause))
+		return;
+
+	switch (hw->mac.type) {
+	case ixgbe_mac_82598EB:
+		data = IXGBE_READ_REG(hw, IXGBE_LXOFFRXC);
+		break;
+	default:
+		data = IXGBE_READ_REG(hw, IXGBE_LXOFFRXCNT);
+	}
+	hwstats->lxoffrxc += data;
+
+	/* refill credits (no tx hang) if we received xoff */
+	if (!data)
+		return;
+
+	for (i = 0; i < adapter->num_tx_queues; i++)
+		clear_bit(__IXGBE_HANG_CHECK_ARMED,
+			  &adapter->tx_ring[i]->state);
+}
+
+static void ixgbe_update_xoff_received(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	struct ixgbe_hw_stats *hwstats = &adapter->stats;
+	u32 xoff[8] = {0};
+	int i;
+	bool pfc_en = adapter->dcb_cfg.pfc_mode_enable;
+
+#ifdef HAVE_DCBNL_IEEE
+	if (adapter->ixgbe_ieee_pfc)
+		pfc_en |= !!(adapter->ixgbe_ieee_pfc->pfc_en);
+
+#endif
+	if (!(adapter->flags & IXGBE_FLAG_DCB_ENABLED) || !pfc_en) {
+		ixgbe_update_xoff_rx_lfc(adapter);
+		return;
+	}
+
+	/* update stats for each tc, only valid with PFC enabled */
+	for (i = 0; i < MAX_TX_PACKET_BUFFERS; i++) {
+		switch (hw->mac.type) {
+		case ixgbe_mac_82598EB:
+			xoff[i] = IXGBE_READ_REG(hw, IXGBE_PXOFFRXC(i));
+			break;
+		default:
+			xoff[i] = IXGBE_READ_REG(hw, IXGBE_PXOFFRXCNT(i));
+		}
+		hwstats->pxoffrxc[i] += xoff[i];
+	}
+
+	/* disarm tx queues that have received xoff frames */
+	for (i = 0; i < adapter->num_tx_queues; i++) {
+		struct ixgbe_ring *tx_ring = adapter->tx_ring[i];
+		u8 tc = tx_ring->dcb_tc;
+
+		if ((tc <= 7) && (xoff[tc]))
+			clear_bit(__IXGBE_HANG_CHECK_ARMED, &tx_ring->state);
+	}
+}
+
+
+
+
+#define IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT 2
+
+
+
+
+#ifdef HAVE_8021P_SUPPORT
+/**
+ * ixgbe_vlan_stripping_disable - helper to disable vlan tag stripping
+ * @adapter: driver data
+ */
+void ixgbe_vlan_stripping_disable(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	u32 vlnctrl;
+	int i;
+
+	/* leave vlan tag stripping enabled for DCB */
+	if (adapter->flags & IXGBE_FLAG_DCB_ENABLED)
+		return;
+
+	switch (hw->mac.type) {
+	case ixgbe_mac_82598EB:
+		vlnctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
+		vlnctrl &= ~IXGBE_VLNCTRL_VME;
+		IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctrl);
+		break;
+	case ixgbe_mac_82599EB:
+	case ixgbe_mac_X540:
+		for (i = 0; i < adapter->num_rx_queues; i++) {
+			u8 reg_idx = adapter->rx_ring[i]->reg_idx;
+			vlnctrl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(reg_idx));
+			vlnctrl &= ~IXGBE_RXDCTL_VME;
+			IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(reg_idx), vlnctrl);
+		}
+		break;
+	default:
+		break;
+	}
+}
+
+#endif
+/**
+ * ixgbe_vlan_stripping_enable - helper to enable vlan tag stripping
+ * @adapter: driver data
+ */
+void ixgbe_vlan_stripping_enable(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	u32 vlnctrl;
+	int i;
+
+	switch (hw->mac.type) {
+	case ixgbe_mac_82598EB:
+		vlnctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
+		vlnctrl |= IXGBE_VLNCTRL_VME;
+		IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctrl);
+		break;
+	case ixgbe_mac_82599EB:
+	case ixgbe_mac_X540:
+		for (i = 0; i < adapter->num_rx_queues; i++) {
+			u8 reg_idx = adapter->rx_ring[i]->reg_idx;
+			vlnctrl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(reg_idx));
+			vlnctrl |= IXGBE_RXDCTL_VME;
+			IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(reg_idx), vlnctrl);
+		}
+		break;
+	default:
+		break;
+	}
+}
+
+#ifdef HAVE_VLAN_RX_REGISTER
+void ixgbe_vlan_mode(struct net_device *netdev, struct vlan_group *grp)
+#else
+void ixgbe_vlan_mode(struct net_device *netdev, u32 features)
+#endif
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+#ifdef HAVE_8021P_SUPPORT
+	bool enable;
+#endif
+#ifdef HAVE_VLAN_RX_REGISTER
+
+	//if (!test_bit(__IXGBE_DOWN, &adapter->state))
+	//	ixgbe_irq_disable(adapter);
+
+	adapter->vlgrp = grp;
+
+	//if (!test_bit(__IXGBE_DOWN, &adapter->state))
+	//	ixgbe_irq_enable(adapter, true, true);
+#endif
+#ifdef HAVE_8021P_SUPPORT
+#ifdef HAVE_VLAN_RX_REGISTER
+	enable = (grp || (adapter->flags & IXGBE_FLAG_DCB_ENABLED));
+#else
+	enable = !!(features & NETIF_F_HW_VLAN_RX);
+#endif
+	if (enable)
+		/* enable VLAN tag insert/strip */
+		ixgbe_vlan_stripping_enable(adapter);
+	else
+		/* disable VLAN tag insert/strip */
+		ixgbe_vlan_stripping_disable(adapter);
+
+#endif
+}
+
+static u8 *ixgbe_addr_list_itr(struct ixgbe_hw *hw, u8 **mc_addr_ptr, u32 *vmdq)
+{
+#ifdef NETDEV_HW_ADDR_T_MULTICAST
+	struct netdev_hw_addr *mc_ptr;
+#else
+	struct dev_mc_list *mc_ptr;
+#endif
+	struct ixgbe_adapter *adapter = hw->back;
+	u8 *addr = *mc_addr_ptr;
+
+	*vmdq = adapter->num_vfs;
+
+#ifdef NETDEV_HW_ADDR_T_MULTICAST
+	mc_ptr = container_of(addr, struct netdev_hw_addr, addr[0]);
+	if (mc_ptr->list.next) {
+		struct netdev_hw_addr *ha;
+
+		ha = list_entry(mc_ptr->list.next, struct netdev_hw_addr, list);
+		*mc_addr_ptr = ha->addr;
+	}
+#else
+	mc_ptr = container_of(addr, struct dev_mc_list, dmi_addr[0]);
+	if (mc_ptr->next)
+		*mc_addr_ptr = mc_ptr->next->dmi_addr;
+#endif
+	else
+		*mc_addr_ptr = NULL;
+
+	return addr;
+}
+
+/**
+ * ixgbe_write_mc_addr_list - write multicast addresses to MTA
+ * @netdev: network interface device structure
+ *
+ * Writes multicast address list to the MTA hash table.
+ * Returns: -ENOMEM on failure
+ *                0 on no addresses written
+ *                X on writing X addresses to MTA
+ **/
+int ixgbe_write_mc_addr_list(struct net_device *netdev)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+	struct ixgbe_hw *hw = &adapter->hw;
+#ifdef NETDEV_HW_ADDR_T_MULTICAST
+	struct netdev_hw_addr *ha;
+#endif
+	u8  *addr_list = NULL;
+	int addr_count = 0;
+
+	if (!hw->mac.ops.update_mc_addr_list)
+		return -ENOMEM;
+
+	if (!netif_running(netdev))
+		return 0;
+
+
+	hw->mac.ops.update_mc_addr_list(hw, NULL, 0,
+					ixgbe_addr_list_itr, true);
+
+	if (!netdev_mc_empty(netdev)) {
+#ifdef NETDEV_HW_ADDR_T_MULTICAST
+		ha = list_first_entry(&netdev->mc.list,
+				      struct netdev_hw_addr, list);
+		addr_list = ha->addr;
+#else
+		addr_list = netdev->mc_list->dmi_addr;
+#endif
+		addr_count = netdev_mc_count(netdev);
+
+		hw->mac.ops.update_mc_addr_list(hw, addr_list, addr_count,
+						ixgbe_addr_list_itr, false);
+	}
+
+#ifdef CONFIG_PCI_IOV
+	//ixgbe_restore_vf_multicasts(adapter);
+#endif
+	return addr_count;
+}
+
+
+void ixgbe_full_sync_mac_table(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	int i;
+	for (i = 0; i < hw->mac.num_rar_entries; i++) {
+		if (adapter->mac_table[i].state & IXGBE_MAC_STATE_IN_USE) {
+			hw->mac.ops.set_rar(hw, i, adapter->mac_table[i].addr,
+						adapter->mac_table[i].queue,
+						IXGBE_RAH_AV);
+		} else {
+			hw->mac.ops.clear_rar(hw, i);
+		}
+	}
+}
+
+void ixgbe_sync_mac_table(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	int i;
+	for (i = 0; i < hw->mac.num_rar_entries; i++) {
+		if (adapter->mac_table[i].state & IXGBE_MAC_STATE_MODIFIED) {
+			if (adapter->mac_table[i].state &
+					IXGBE_MAC_STATE_IN_USE) {
+				hw->mac.ops.set_rar(hw, i,
+						adapter->mac_table[i].addr,
+						adapter->mac_table[i].queue,
+						IXGBE_RAH_AV);
+			} else {
+				hw->mac.ops.clear_rar(hw, i);
+			}
+			adapter->mac_table[i].state &=
+				~(IXGBE_MAC_STATE_MODIFIED);
+		}
+	}
+}
+
+int ixgbe_available_rars(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	int i, count = 0;
+
+	for (i = 0; i < hw->mac.num_rar_entries; i++) {
+		if (adapter->mac_table[i].state == 0)
+			count++;
+	}
+	return count;
+}
+
+int ixgbe_add_mac_filter(struct ixgbe_adapter *adapter, u8 *addr, u16 queue)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	int i;
+
+	if (is_zero_ether_addr(addr))
+		return 0;
+
+	for (i = 0; i < hw->mac.num_rar_entries; i++) {
+		if (adapter->mac_table[i].state & IXGBE_MAC_STATE_IN_USE)
+			continue;
+		adapter->mac_table[i].state |= (IXGBE_MAC_STATE_MODIFIED |
+						IXGBE_MAC_STATE_IN_USE);
+		memcpy(adapter->mac_table[i].addr, addr, ETH_ALEN);
+		adapter->mac_table[i].queue = queue;
+		ixgbe_sync_mac_table(adapter);
+		return i;
+	}
+	return -ENOMEM;
+}
+
+void ixgbe_flush_sw_mac_table(struct ixgbe_adapter *adapter)
+{
+	int i;
+	struct ixgbe_hw *hw = &adapter->hw;
+
+	for (i = 0; i < hw->mac.num_rar_entries; i++) {
+		adapter->mac_table[i].state |= IXGBE_MAC_STATE_MODIFIED;
+		adapter->mac_table[i].state &= ~IXGBE_MAC_STATE_IN_USE;
+		memset(adapter->mac_table[i].addr, 0, ETH_ALEN);
+		adapter->mac_table[i].queue = 0;
+	}
+	ixgbe_sync_mac_table(adapter);
+}
+
+void ixgbe_del_mac_filter_by_index(struct ixgbe_adapter *adapter, int index)
+{
+	adapter->mac_table[index].state |= IXGBE_MAC_STATE_MODIFIED;
+	adapter->mac_table[index].state &= ~IXGBE_MAC_STATE_IN_USE;
+	memset(adapter->mac_table[index].addr, 0, ETH_ALEN);
+	adapter->mac_table[index].queue = 0;
+	ixgbe_sync_mac_table(adapter);
+}
+
+int ixgbe_del_mac_filter(struct ixgbe_adapter *adapter, u8* addr, u16 queue)
+{
+	/* search table for addr, if found, set to 0 and sync */
+	int i;
+	struct ixgbe_hw *hw = &adapter->hw;
+
+	if (is_zero_ether_addr(addr))
+		return 0;
+	for (i = 0; i < hw->mac.num_rar_entries; i++) {
+		if (ether_addr_equal(addr, adapter->mac_table[i].addr) &&
+		    adapter->mac_table[i].queue == queue) {
+			adapter->mac_table[i].state |= IXGBE_MAC_STATE_MODIFIED;
+			adapter->mac_table[i].state &= ~IXGBE_MAC_STATE_IN_USE;
+			memset(adapter->mac_table[i].addr, 0, ETH_ALEN);
+			adapter->mac_table[i].queue = 0;
+			ixgbe_sync_mac_table(adapter);
+			return 0;
+		}
+	}
+	return -ENOMEM;
+}
+#ifdef HAVE_SET_RX_MODE
+/**
+ * ixgbe_write_uc_addr_list - write unicast addresses to RAR table
+ * @netdev: network interface device structure
+ *
+ * Writes unicast address list to the RAR table.
+ * Returns: -ENOMEM on failure/insufficient address space
+ *                0 on no addresses written
+ *                X on writing X addresses to the RAR table
+ **/
+int ixgbe_write_uc_addr_list(struct ixgbe_adapter *adapter,
+			     struct net_device *netdev, unsigned int vfn)
+{
+	int count = 0;
+
+	/* return ENOMEM indicating insufficient memory for addresses */
+	if (netdev_uc_count(netdev) > ixgbe_available_rars(adapter))
+		return -ENOMEM;
+
+	if (!netdev_uc_empty(netdev)) {
+#ifdef NETDEV_HW_ADDR_T_UNICAST
+		struct netdev_hw_addr *ha;
+#else
+		struct dev_mc_list *ha;
+#endif
+		netdev_for_each_uc_addr(ha, netdev) {
+#ifdef NETDEV_HW_ADDR_T_UNICAST
+			ixgbe_del_mac_filter(adapter, ha->addr, (u16)vfn);
+			ixgbe_add_mac_filter(adapter, ha->addr, (u16)vfn);
+#else
+			ixgbe_del_mac_filter(adapter, ha->da_addr, (u16)vfn);
+			ixgbe_add_mac_filter(adapter, ha->da_addr, (u16)vfn);
+#endif
+			count++;
+		}
+	}
+	return count;
+}
+
+#endif
+/**
+ * ixgbe_set_rx_mode - Unicast, Multicast and Promiscuous mode set
+ * @netdev: network interface device structure
+ *
+ * The set_rx_method entry point is called whenever the unicast/multicast
+ * address list or the network interface flags are updated.  This routine is
+ * responsible for configuring the hardware for proper unicast, multicast and
+ * promiscuous mode.
+ **/
+void ixgbe_set_rx_mode(struct net_device *netdev)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+	struct ixgbe_hw *hw = &adapter->hw;
+	u32 fctrl, vmolr = IXGBE_VMOLR_BAM | IXGBE_VMOLR_AUPE;
+	u32 vlnctrl;
+	int count;
+
+	/* Check for Promiscuous and All Multicast modes */
+	fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
+	vlnctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
+
+	/* set all bits that we expect to always be set */
+	fctrl |= IXGBE_FCTRL_BAM;
+	fctrl |= IXGBE_FCTRL_DPF; /* discard pause frames when FC enabled */
+	fctrl |= IXGBE_FCTRL_PMCF;
+
+	/* clear the bits we are changing the status of */
+	fctrl &= ~(IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
+	vlnctrl  &= ~(IXGBE_VLNCTRL_VFE | IXGBE_VLNCTRL_CFIEN);
+
+	if (netdev->flags & IFF_PROMISC) {
+		hw->addr_ctrl.user_set_promisc = true;
+		fctrl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
+		vmolr |= IXGBE_VMOLR_MPE;
+	} else {
+		if (netdev->flags & IFF_ALLMULTI) {
+			fctrl |= IXGBE_FCTRL_MPE;
+			vmolr |= IXGBE_VMOLR_MPE;
+		} else {
+			/*
+			 * Write addresses to the MTA, if the attempt fails
+			 * then we should just turn on promiscuous mode so
+			 * that we can at least receive multicast traffic
+			 */
+			count = ixgbe_write_mc_addr_list(netdev);
+			if (count < 0) {
+				fctrl |= IXGBE_FCTRL_MPE;
+				vmolr |= IXGBE_VMOLR_MPE;
+			} else if (count) {
+				vmolr |= IXGBE_VMOLR_ROMPE;
+			}
+		}
+#ifdef NETIF_F_HW_VLAN_TX
+		/* enable hardware vlan filtering */
+		vlnctrl |= IXGBE_VLNCTRL_VFE;
+#endif
+		hw->addr_ctrl.user_set_promisc = false;
+#ifdef HAVE_SET_RX_MODE
+		/*
+		 * Write addresses to available RAR registers, if there is not
+		 * sufficient space to store all the addresses then enable
+		 * unicast promiscuous mode
+		 */
+		count = ixgbe_write_uc_addr_list(adapter, netdev,
+						 adapter->num_vfs);
+		if (count < 0) {
+			fctrl |= IXGBE_FCTRL_UPE;
+			vmolr |= IXGBE_VMOLR_ROPE;
+		}
+#endif
+	}
+
+	if (hw->mac.type != ixgbe_mac_82598EB) {
+		vmolr |= IXGBE_READ_REG(hw, IXGBE_VMOLR(adapter->num_vfs)) &
+			 ~(IXGBE_VMOLR_MPE | IXGBE_VMOLR_ROMPE |
+			   IXGBE_VMOLR_ROPE);
+		IXGBE_WRITE_REG(hw, IXGBE_VMOLR(adapter->num_vfs), vmolr);
+	}
+
+	IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctrl);
+	IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
+}
+
+
+
+
+
+
+
+
+/* Additional bittime to account for IXGBE framing */
+#define IXGBE_ETH_FRAMING 20
+
+/*
+ * ixgbe_hpbthresh - calculate high water mark for flow control
+ *
+ * @adapter: board private structure to calculate for
+ * @pb - packet buffer to calculate
+ */
+static int ixgbe_hpbthresh(struct ixgbe_adapter *adapter, int pb)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	struct net_device *dev = adapter->netdev;
+	int link, tc, kb, marker;
+	u32 dv_id, rx_pba;
+
+	/* Calculate max LAN frame size */
+	tc = link = dev->mtu + ETH_HLEN + ETH_FCS_LEN + IXGBE_ETH_FRAMING;
+
+#ifdef IXGBE_FCOE
+	/* FCoE traffic class uses FCOE jumbo frames */
+	if (dev->features & NETIF_F_FCOE_MTU) {
+		int fcoe_pb = 0;
+
+		fcoe_pb = netdev_get_prio_tc_map(dev, adapter->fcoe.up);
+
+		if (fcoe_pb == pb && tc < IXGBE_FCOE_JUMBO_FRAME_SIZE)
+			tc = IXGBE_FCOE_JUMBO_FRAME_SIZE;
+	}
+#endif
+
+	/* Calculate delay value for device */
+	switch (hw->mac.type) {
+	case ixgbe_mac_X540:
+		dv_id = IXGBE_DV_X540(link, tc);
+		break;
+	default:
+		dv_id = IXGBE_DV(link, tc);
+		break;
+	}
+
+	/* Loopback switch introduces additional latency */
+	if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED)
+		dv_id += IXGBE_B2BT(tc);
+
+	/* Delay value is calculated in bit times convert to KB */
+	kb = IXGBE_BT2KB(dv_id);
+	rx_pba = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(pb)) >> 10;
+
+	marker = rx_pba - kb;
+
+	/* It is possible that the packet buffer is not large enough
+	 * to provide required headroom. In this case throw an error
+	 * to user and a do the best we can.
+	 */
+	if (marker < 0) {
+		e_warn(drv, "Packet Buffer(%i) can not provide enough"
+			    "headroom to suppport flow control."
+			    "Decrease MTU or number of traffic classes\n", pb);
+		marker = tc + 1;
+	}
+
+	return marker;
+}
+
+/*
+ * ixgbe_lpbthresh - calculate low water mark for for flow control
+ *
+ * @adapter: board private structure to calculate for
+ * @pb - packet buffer to calculate
+ */
+static int ixgbe_lpbthresh(struct ixgbe_adapter *adapter, int pb)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	struct net_device *dev = adapter->netdev;
+	int tc;
+	u32 dv_id;
+
+	/* Calculate max LAN frame size */
+	tc = dev->mtu + ETH_HLEN + ETH_FCS_LEN;
+
+#ifdef IXGBE_FCOE
+	/* FCoE traffic class uses FCOE jumbo frames */
+	if (dev->features & NETIF_F_FCOE_MTU) {
+		int fcoe_pb = 0;
+
+		fcoe_pb = netdev_get_prio_tc_map(dev, adapter->fcoe.up);
+
+		if (fcoe_pb == pb && tc < IXGBE_FCOE_JUMBO_FRAME_SIZE)
+			tc = IXGBE_FCOE_JUMBO_FRAME_SIZE;
+	}
+#endif
+
+	/* Calculate delay value for device */
+	switch (hw->mac.type) {
+	case ixgbe_mac_X540:
+		dv_id = IXGBE_LOW_DV_X540(tc);
+		break;
+	default:
+		dv_id = IXGBE_LOW_DV(tc);
+		break;
+	}
+
+	/* Delay value is calculated in bit times convert to KB */
+	return IXGBE_BT2KB(dv_id);
+}
+
+/*
+ * ixgbe_pbthresh_setup - calculate and setup high low water marks
+ */
+static void ixgbe_pbthresh_setup(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	int num_tc = netdev_get_num_tc(adapter->netdev);
+	int i;
+
+	if (!num_tc)
+		num_tc = 1;
+	if (num_tc > IXGBE_DCB_MAX_TRAFFIC_CLASS)
+		num_tc = IXGBE_DCB_MAX_TRAFFIC_CLASS;
+
+	for (i = 0; i < num_tc; i++) {
+		hw->fc.high_water[i] = ixgbe_hpbthresh(adapter, i);
+		hw->fc.low_water[i] = ixgbe_lpbthresh(adapter, i);
+
+		/* Low water marks must not be larger than high water marks */
+		if (hw->fc.low_water[i] > hw->fc.high_water[i])
+			hw->fc.low_water[i] = 0;
+	}
+
+	for (; i < IXGBE_DCB_MAX_TRAFFIC_CLASS; i++)
+		hw->fc.high_water[i] = 0;
+}
+
+
+
+#ifdef NO_VNIC
+static void ixgbe_configure(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+
+	ixgbe_configure_pb(adapter);
+	ixgbe_configure_dcb(adapter);
+
+	ixgbe_set_rx_mode(adapter->netdev);
+#ifdef NETIF_F_HW_VLAN_TX
+	ixgbe_restore_vlan(adapter);
+#endif
+
+#ifdef IXGBE_FCOE
+	if (adapter->flags & IXGBE_FLAG_FCOE_ENABLED)
+		ixgbe_configure_fcoe(adapter);
+
+#endif /* IXGBE_FCOE */
+
+	if (adapter->hw.mac.type != ixgbe_mac_82598EB)
+		hw->mac.ops.disable_sec_rx_path(hw);
+
+	if (adapter->flags & IXGBE_FLAG_FDIR_HASH_CAPABLE) {
+		ixgbe_init_fdir_signature_82599(&adapter->hw,
+						adapter->fdir_pballoc);
+	} else if (adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE) {
+		ixgbe_init_fdir_perfect_82599(&adapter->hw,
+					      adapter->fdir_pballoc);
+		ixgbe_fdir_filter_restore(adapter);
+	}
+
+	if (adapter->hw.mac.type != ixgbe_mac_82598EB)
+		hw->mac.ops.enable_sec_rx_path(hw);
+
+	ixgbe_configure_virtualization(adapter);
+
+	ixgbe_configure_tx(adapter);
+	ixgbe_configure_rx(adapter);
+}
+#endif
+
+static bool ixgbe_is_sfp(struct ixgbe_hw *hw)
+{
+	switch (hw->phy.type) {
+	case ixgbe_phy_sfp_avago:
+	case ixgbe_phy_sfp_ftl:
+	case ixgbe_phy_sfp_intel:
+	case ixgbe_phy_sfp_unknown:
+	case ixgbe_phy_sfp_passive_tyco:
+	case ixgbe_phy_sfp_passive_unknown:
+	case ixgbe_phy_sfp_active_unknown:
+	case ixgbe_phy_sfp_ftl_active:
+		return true;
+	case ixgbe_phy_nl:
+		if (hw->mac.type == ixgbe_mac_82598EB)
+			return true;
+	default:
+		return false;
+	}
+}
+
+
+/**
+ * ixgbe_clear_vf_stats_counters - Clear out VF stats after reset
+ * @adapter: board private structure
+ *
+ * On a reset we need to clear out the VF stats or accounting gets
+ * messed up because they're not clear on read.
+ **/
+void ixgbe_clear_vf_stats_counters(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	int i;
+
+	for (i = 0; i < adapter->num_vfs; i++) {
+		adapter->vfinfo[i].last_vfstats.gprc =
+			IXGBE_READ_REG(hw, IXGBE_PVFGPRC(i));
+		adapter->vfinfo[i].saved_rst_vfstats.gprc +=
+			adapter->vfinfo[i].vfstats.gprc;
+		adapter->vfinfo[i].vfstats.gprc = 0;
+		adapter->vfinfo[i].last_vfstats.gptc =
+			IXGBE_READ_REG(hw, IXGBE_PVFGPTC(i));
+		adapter->vfinfo[i].saved_rst_vfstats.gptc +=
+			adapter->vfinfo[i].vfstats.gptc;
+		adapter->vfinfo[i].vfstats.gptc = 0;
+		adapter->vfinfo[i].last_vfstats.gorc =
+			IXGBE_READ_REG(hw, IXGBE_PVFGORC_LSB(i));
+		adapter->vfinfo[i].saved_rst_vfstats.gorc +=
+			adapter->vfinfo[i].vfstats.gorc;
+		adapter->vfinfo[i].vfstats.gorc = 0;
+		adapter->vfinfo[i].last_vfstats.gotc =
+			IXGBE_READ_REG(hw, IXGBE_PVFGOTC_LSB(i));
+		adapter->vfinfo[i].saved_rst_vfstats.gotc +=
+			adapter->vfinfo[i].vfstats.gotc;
+		adapter->vfinfo[i].vfstats.gotc = 0;
+		adapter->vfinfo[i].last_vfstats.mprc =
+			IXGBE_READ_REG(hw, IXGBE_PVFMPRC(i));
+		adapter->vfinfo[i].saved_rst_vfstats.mprc +=
+			adapter->vfinfo[i].vfstats.mprc;
+		adapter->vfinfo[i].vfstats.mprc = 0;
+	}
+}
+
+
+
+void ixgbe_reinit_locked(struct ixgbe_adapter *adapter)
+{
+#ifdef NO_VNIC
+	WARN_ON(in_interrupt());
+	/* put off any impending NetWatchDogTimeout */
+	adapter->netdev->trans_start = jiffies;
+
+	while (test_and_set_bit(__IXGBE_RESETTING, &adapter->state))
+		usleep_range(1000, 2000);
+	ixgbe_down(adapter);
+	/*
+	 * If SR-IOV enabled then wait a bit before bringing the adapter
+	 * back up to give the VFs time to respond to the reset.  The
+	 * two second wait is based upon the watchdog timer cycle in
+	 * the VF driver.
+	 */
+	if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED)
+		msleep(2000);
+	ixgbe_up(adapter);
+	clear_bit(__IXGBE_RESETTING, &adapter->state);
+#endif
+}
+
+void ixgbe_up(struct ixgbe_adapter *adapter)
+{
+	/* hardware has been reset, we need to reload some things */
+	//ixgbe_configure(adapter);
+
+	//ixgbe_up_complete(adapter);
+}
+
+void ixgbe_reset(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	struct net_device *netdev = adapter->netdev;
+	int err;
+
+	/* lock SFP init bit to prevent race conditions with the watchdog */
+	while (test_and_set_bit(__IXGBE_IN_SFP_INIT, &adapter->state))
+		usleep_range(1000, 2000);
+
+	/* clear all SFP and link config related flags while holding SFP_INIT */
+	adapter->flags2 &= ~(IXGBE_FLAG2_SEARCH_FOR_SFP |
+			     IXGBE_FLAG2_SFP_NEEDS_RESET);
+	adapter->flags &= ~IXGBE_FLAG_NEED_LINK_CONFIG;
+
+	err = hw->mac.ops.init_hw(hw);
+	switch (err) {
+	case 0:
+	case IXGBE_ERR_SFP_NOT_PRESENT:
+	case IXGBE_ERR_SFP_NOT_SUPPORTED:
+		break;
+	case IXGBE_ERR_MASTER_REQUESTS_PENDING:
+		e_dev_err("master disable timed out\n");
+		break;
+	case IXGBE_ERR_EEPROM_VERSION:
+		/* We are running on a pre-production device, log a warning */
+		e_dev_warn("This device is a pre-production adapter/LOM. "
+			   "Please be aware there may be issues associated "
+			   "with your hardware.  If you are experiencing "
+			   "problems please contact your Intel or hardware "
+			   "representative who provided you with this "
+			   "hardware.\n");
+		break;
+	default:
+		e_dev_err("Hardware Error: %d\n", err);
+	}
+
+	clear_bit(__IXGBE_IN_SFP_INIT, &adapter->state);
+
+	ixgbe_flush_sw_mac_table(adapter);
+	memcpy(&adapter->mac_table[0].addr, hw->mac.perm_addr,
+	       netdev->addr_len);
+	adapter->mac_table[0].queue = adapter->num_vfs;
+	adapter->mac_table[0].state = (IXGBE_MAC_STATE_DEFAULT |
+					IXGBE_MAC_STATE_IN_USE);
+	hw->mac.ops.set_rar(hw, 0, adapter->mac_table[0].addr,
+				adapter->mac_table[0].queue,
+				IXGBE_RAH_AV);
+}
+
+
+
+
+
+
+void ixgbe_down(struct ixgbe_adapter *adapter)
+{
+#ifdef NO_VNIC
+	struct net_device *netdev = adapter->netdev;
+	struct ixgbe_hw *hw = &adapter->hw;
+	u32 rxctrl;
+	int i;
+
+	/* signal that we are down to the interrupt handler */
+	set_bit(__IXGBE_DOWN, &adapter->state);
+
+	/* disable receives */
+	rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
+	IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, rxctrl & ~IXGBE_RXCTRL_RXEN);
+
+	/* disable all enabled rx queues */
+	for (i = 0; i < adapter->num_rx_queues; i++)
+		/* this call also flushes the previous write */
+		ixgbe_disable_rx_queue(adapter, adapter->rx_ring[i]);
+
+	usleep_range(10000, 20000);
+
+	netif_tx_stop_all_queues(netdev);
+
+	/* call carrier off first to avoid false dev_watchdog timeouts */
+	netif_carrier_off(netdev);
+	netif_tx_disable(netdev);
+
+	ixgbe_irq_disable(adapter);
+
+	ixgbe_napi_disable_all(adapter);
+
+	adapter->flags2 &= ~(IXGBE_FLAG2_FDIR_REQUIRES_REINIT |
+			     IXGBE_FLAG2_RESET_REQUESTED);
+	adapter->flags &= ~IXGBE_FLAG_NEED_LINK_UPDATE;
+
+	del_timer_sync(&adapter->service_timer);
+
+	if (adapter->num_vfs) {
+		/* Clear EITR Select mapping */
+		IXGBE_WRITE_REG(&adapter->hw, IXGBE_EITRSEL, 0);
+
+		/* Mark all the VFs as inactive */
+		for (i = 0 ; i < adapter->num_vfs; i++)
+			adapter->vfinfo[i].clear_to_send = 0;
+
+		/* ping all the active vfs to let them know we are going down */
+		ixgbe_ping_all_vfs(adapter);
+
+		/* Disable all VFTE/VFRE TX/RX */
+		ixgbe_disable_tx_rx(adapter);
+	}
+
+	/* disable transmits in the hardware now that interrupts are off */
+	for (i = 0; i < adapter->num_tx_queues; i++) {
+		u8 reg_idx = adapter->tx_ring[i]->reg_idx;
+		IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(reg_idx), IXGBE_TXDCTL_SWFLSH);
+	}
+
+	/* Disable the Tx DMA engine on 82599 and X540 */
+	switch (hw->mac.type) {
+	case ixgbe_mac_82599EB:
+	case ixgbe_mac_X540:
+		IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL,
+				(IXGBE_READ_REG(hw, IXGBE_DMATXCTL) &
+				 ~IXGBE_DMATXCTL_TE));
+		break;
+	default:
+		break;
+	}
+
+#ifdef HAVE_PCI_ERS
+	if (!pci_channel_offline(adapter->pdev))
+#endif
+		ixgbe_reset(adapter);
+	/* power down the optics */
+	if ((hw->phy.multispeed_fiber) ||
+	    ((hw->mac.ops.get_media_type(hw) == ixgbe_media_type_fiber) &&
+	     (hw->mac.type == ixgbe_mac_82599EB)))
+		ixgbe_disable_tx_laser(hw);
+
+	ixgbe_clean_all_tx_rings(adapter);
+	ixgbe_clean_all_rx_rings(adapter);
+
+#if defined(CONFIG_DCA) || defined(CONFIG_DCA_MODULE)
+	/* since we reset the hardware DCA settings were cleared */
+	ixgbe_setup_dca(adapter);
+#endif
+
+#endif /* NO_VNIC */
+}
+
+#ifndef NO_VNIC
+
+#undef IXGBE_FCOE
+
+/* Artificial max queue cap per traffic class in DCB mode */
+#define DCB_QUEUE_CAP 8
+
+/**
+ * ixgbe_set_dcb_queues: Allocate queues for a DCB-enabled device
+ * @adapter: board private structure to initialize
+ *
+ * When DCB (Data Center Bridging) is enabled, allocate queues for
+ * each traffic class.  If multiqueue isn't available,then abort DCB
+ * initialization.
+ *
+ * This function handles all combinations of DCB, RSS, and FCoE.
+ *
+ **/
+static bool ixgbe_set_dcb_queues(struct ixgbe_adapter *adapter)
+{
+       int tcs;
+#ifdef HAVE_MQPRIO
+       int rss_i, i, offset = 0;
+       struct net_device *dev = adapter->netdev;
+
+       /* Map queue offset and counts onto allocated tx queues */
+       tcs = netdev_get_num_tc(dev);
+
+       if (!tcs)
+              return false;
+
+       rss_i = min_t(int, dev->num_tx_queues / tcs, num_online_cpus());
+
+       if (rss_i > DCB_QUEUE_CAP)
+              rss_i = DCB_QUEUE_CAP;
+
+       for (i = 0; i < tcs; i++) {
+              netdev_set_tc_queue(dev, i, rss_i, offset);
+              offset += rss_i;
+       }
+
+       adapter->num_tx_queues = rss_i * tcs;
+       adapter->num_rx_queues = rss_i * tcs;
+
+#ifdef IXGBE_FCOE
+       /* FCoE enabled queues require special configuration indexed
+        * by feature specific indices and mask. Here we map FCoE
+        * indices onto the DCB queue pairs allowing FCoE to own
+        * configuration later.
+        */
+
+       if (adapter->flags & IXGBE_FLAG_FCOE_ENABLED) {
+              struct ixgbe_ring_feature *f;
+              int tc;
+              u8 prio_tc[IXGBE_DCB_MAX_USER_PRIORITY] = {0};
+
+              ixgbe_dcb_unpack_map_cee(&adapter->dcb_cfg,
+                                    IXGBE_DCB_TX_CONFIG,
+                                    prio_tc);
+              tc = prio_tc[adapter->fcoe.up];
+
+              f = &adapter->ring_feature[RING_F_FCOE];
+              f->indices = min_t(int, rss_i, f->indices);
+              f->mask = rss_i * tc;
+       }
+#endif /* IXGBE_FCOE */
+#else
+       if (!(adapter->flags & IXGBE_FLAG_DCB_ENABLED))
+              return false;
+
+       /* Enable one Queue per traffic class */
+       tcs = adapter->tc;
+       if (!tcs)
+              return false;
+
+#ifdef IXGBE_FCOE
+       if (adapter->flags & IXGBE_FLAG_FCOE_ENABLED) {
+              struct ixgbe_ring_feature *f;
+              int tc = netdev_get_prio_tc_map(adapter->netdev,
+                                          adapter->fcoe.up);
+
+              f = &adapter->ring_feature[RING_F_FCOE];
+
+              /*
+               * We have max 8 queues for FCoE, where 8 the is
+               * FCoE redirection table size.  We must also share
+               * ring resources with network traffic so if FCoE TC is
+               * 4 or greater and we are in 8 TC mode we can only use
+               * 7 queues.
+               */
+              if ((tcs > 4) && (tc >= 4) && (f->indices > 7))
+                     f->indices = 7;
+
+              f->indices = min_t(int, num_online_cpus(), f->indices);
+              f->mask = tcs;
+
+              adapter->num_rx_queues = f->indices + tcs;
+              adapter->num_tx_queues = f->indices + tcs;
+
+              return true;
+       }
+
+#endif /* IXGBE_FCOE */
+       adapter->num_rx_queues = tcs;
+       adapter->num_tx_queues = tcs;
+#endif /* HAVE_MQ */
+
+       return true;
+}
+
+/**
+ * ixgbe_set_vmdq_queues: Allocate queues for VMDq devices
+ * @adapter: board private structure to initialize
+ *
+ * When VMDq (Virtual Machine Devices queue) is enabled, allocate queues
+ * and VM pools where appropriate.  If RSS is available, then also try and
+ * enable RSS and map accordingly.
+ *
+ **/
+static bool ixgbe_set_vmdq_queues(struct ixgbe_adapter *adapter)
+{
+       int vmdq_i = adapter->ring_feature[RING_F_VMDQ].indices;
+       int vmdq_m = 0;
+       int rss_i = adapter->ring_feature[RING_F_RSS].indices;
+       unsigned long i;
+       int rss_shift;
+       bool ret = false;
+
+
+       switch (adapter->flags & (IXGBE_FLAG_RSS_ENABLED
+                               | IXGBE_FLAG_DCB_ENABLED
+                               | IXGBE_FLAG_VMDQ_ENABLED)) {
+
+       case (IXGBE_FLAG_RSS_ENABLED | IXGBE_FLAG_VMDQ_ENABLED):
+              switch (adapter->hw.mac.type) {
+              case ixgbe_mac_82599EB:
+              case ixgbe_mac_X540:
+                     vmdq_i = min((int)IXGBE_MAX_VMDQ_INDICES, vmdq_i);
+                     if (vmdq_i > 32)
+                            rss_i = 2;
+                     else
+                            rss_i = 4;
+                     i = rss_i;
+                     rss_shift = find_first_bit(&i, sizeof(i) * 8);
+                     vmdq_m = ((IXGBE_MAX_VMDQ_INDICES - 1) <<
+                               rss_shift) & (MAX_RX_QUEUES - 1);
+                     break;
+              default:
+                     break;
+              }
+              adapter->num_rx_queues = vmdq_i * rss_i;
+              adapter->num_tx_queues = min((int)MAX_TX_QUEUES, vmdq_i * rss_i);
+              ret = true;
+              break;
+
+       case (IXGBE_FLAG_VMDQ_ENABLED):
+              switch (adapter->hw.mac.type) {
+              case ixgbe_mac_82598EB:
+                     vmdq_m = (IXGBE_MAX_VMDQ_INDICES - 1);
+                     break;
+              case ixgbe_mac_82599EB:
+              case ixgbe_mac_X540:
+                     vmdq_m = (IXGBE_MAX_VMDQ_INDICES - 1) << 1;
+                     break;
+              default:
+                     break;
+              }
+              adapter->num_rx_queues = vmdq_i;
+              adapter->num_tx_queues = vmdq_i;
+              ret = true;
+              break;
+
+       default:
+              ret = false;
+              goto vmdq_queues_out;
+       }
+
+       if (adapter->flags & IXGBE_FLAG_VMDQ_ENABLED) {
+              adapter->num_rx_pools = vmdq_i;
+              adapter->num_rx_queues_per_pool = adapter->num_rx_queues /
+                                            vmdq_i;
+       } else {
+              adapter->num_rx_pools = adapter->num_rx_queues;
+              adapter->num_rx_queues_per_pool = 1;
+       }
+       /* save the mask for later use */
+       adapter->ring_feature[RING_F_VMDQ].mask = vmdq_m;
+vmdq_queues_out:
+       return ret;
+}
+
+/**
+ * ixgbe_set_rss_queues: Allocate queues for RSS
+ * @adapter: board private structure to initialize
+ *
+ * This is our "base" multiqueue mode.  RSS (Receive Side Scaling) will try
+ * to allocate one Rx queue per CPU, and if available, one Tx queue per CPU.
+ *
+ **/
+static bool ixgbe_set_rss_queues(struct ixgbe_adapter *adapter)
+{
+       struct ixgbe_ring_feature *f;
+
+       if (!(adapter->flags & IXGBE_FLAG_RSS_ENABLED)) {
+              adapter->flags &= ~IXGBE_FLAG_FDIR_HASH_CAPABLE;
+              return false;
+       }
+
+       /* set mask for 16 queue limit of RSS */
+       f = &adapter->ring_feature[RING_F_RSS];
+       f->mask = 0xF;
+
+       /*
+        * Use Flow Director in addition to RSS to ensure the best
+        * distribution of flows across cores, even when an FDIR flow
+        * isn't matched.
+        */
+       if (adapter->flags & IXGBE_FLAG_FDIR_HASH_CAPABLE) {
+              f = &adapter->ring_feature[RING_F_FDIR];
+
+              f->indices = min_t(int, num_online_cpus(), f->indices);
+              f->mask = 0;
+       }
+
+       adapter->num_rx_queues = f->indices;
+#ifdef HAVE_TX_MQ
+       adapter->num_tx_queues = f->indices;
+#endif
+
+       return true;
+}
+
+#ifdef IXGBE_FCOE
+/**
+ * ixgbe_set_fcoe_queues: Allocate queues for Fiber Channel over Ethernet (FCoE)
+ * @adapter: board private structure to initialize
+ *
+ * FCoE RX FCRETA can use up to 8 rx queues for up to 8 different exchanges.
+ * The ring feature mask is not used as a mask for FCoE, as it can take any 8
+ * rx queues out of the max number of rx queues, instead, it is used as the
+ * index of the first rx queue used by FCoE.
+ *
+ **/
+static bool ixgbe_set_fcoe_queues(struct ixgbe_adapter *adapter)
+{
+       struct ixgbe_ring_feature *f;
+
+       if (!(adapter->flags & IXGBE_FLAG_FCOE_ENABLED))
+              return false;
+
+       ixgbe_set_rss_queues(adapter);
+
+       f = &adapter->ring_feature[RING_F_FCOE];
+       f->indices = min_t(int, num_online_cpus(), f->indices);
+
+       /* adding FCoE queues */
+       f->mask = adapter->num_rx_queues;
+       adapter->num_rx_queues += f->indices;
+       adapter->num_tx_queues += f->indices;
+
+       return true;
+}
+
+#endif /* IXGBE_FCOE */
+/*
+ * ixgbe_set_num_queues: Allocate queues for device, feature dependent
+ * @adapter: board private structure to initialize
+ *
+ * This is the top level queue allocation routine.  The order here is very
+ * important, starting with the "most" number of features turned on at once,
+ * and ending with the smallest set of features.  This way large combinations
+ * can be allocated if they're turned on, and smaller combinations are the
+ * fallthrough conditions.
+ *
+ **/
+static void ixgbe_set_num_queues(struct ixgbe_adapter *adapter)
+{
+       /* Start with base case */
+       adapter->num_rx_queues = 1;
+       adapter->num_tx_queues = 1;
+       adapter->num_rx_pools = adapter->num_rx_queues;
+       adapter->num_rx_queues_per_pool = 1;
+
+       if (ixgbe_set_vmdq_queues(adapter))
+              return;
+
+       if (ixgbe_set_dcb_queues(adapter))
+              return;
+
+#ifdef IXGBE_FCOE
+       if (ixgbe_set_fcoe_queues(adapter))
+              return;
+
+#endif /* IXGBE_FCOE */
+       ixgbe_set_rss_queues(adapter);
+}
+
+#endif
+
+
+/**
+ * ixgbe_sw_init - Initialize general software structures (struct ixgbe_adapter)
+ * @adapter: board private structure to initialize
+ *
+ * ixgbe_sw_init initializes the Adapter private data structure.
+ * Fields are initialized based on PCI device information and
+ * OS network device settings (MTU size).
+ **/
+static int ixgbe_sw_init(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	struct pci_dev *pdev = adapter->pdev;
+	int err;
+
+	/* PCI config space info */
+
+	hw->vendor_id = pdev->vendor;
+	hw->device_id = pdev->device;
+	pci_read_config_byte(pdev, PCI_REVISION_ID, &hw->revision_id);
+	hw->subsystem_vendor_id = pdev->subsystem_vendor;
+	hw->subsystem_device_id = pdev->subsystem_device;
+
+	err = ixgbe_init_shared_code(hw);
+	if (err) {
+		e_err(probe, "init_shared_code failed: %d\n", err);
+		goto out;
+	}
+	adapter->mac_table = kzalloc(sizeof(struct ixgbe_mac_addr) *
+				     hw->mac.num_rar_entries,
+				     GFP_ATOMIC);
+	/* Set capability flags */
+	switch (hw->mac.type) {
+	case ixgbe_mac_82598EB:
+		adapter->flags |= IXGBE_FLAG_MSI_CAPABLE |
+				  IXGBE_FLAG_MSIX_CAPABLE |
+				  IXGBE_FLAG_MQ_CAPABLE |
+				  IXGBE_FLAG_RSS_CAPABLE;
+		adapter->flags |= IXGBE_FLAG_DCB_CAPABLE;
+#if defined(CONFIG_DCA) || defined(CONFIG_DCA_MODULE)
+		adapter->flags |= IXGBE_FLAG_DCA_CAPABLE;
+#endif
+		adapter->flags &= ~IXGBE_FLAG_SRIOV_CAPABLE;
+		adapter->flags2 &= ~IXGBE_FLAG2_RSC_CAPABLE;
+
+		if (hw->device_id == IXGBE_DEV_ID_82598AT)
+			adapter->flags |= IXGBE_FLAG_FAN_FAIL_CAPABLE;
+
+		adapter->max_msix_q_vectors = IXGBE_MAX_MSIX_Q_VECTORS_82598;
+		break;
+	case ixgbe_mac_X540:
+		adapter->flags2 |= IXGBE_FLAG2_TEMP_SENSOR_CAPABLE;
+	case ixgbe_mac_82599EB:
+		adapter->flags |= IXGBE_FLAG_MSI_CAPABLE |
+				  IXGBE_FLAG_MSIX_CAPABLE |
+				  IXGBE_FLAG_MQ_CAPABLE |
+				  IXGBE_FLAG_RSS_CAPABLE;
+		adapter->flags |= IXGBE_FLAG_DCB_CAPABLE;
+#if defined(CONFIG_DCA) || defined(CONFIG_DCA_MODULE)
+		adapter->flags |= IXGBE_FLAG_DCA_CAPABLE;
+#endif
+		adapter->flags |= IXGBE_FLAG_SRIOV_CAPABLE;
+		adapter->flags2 |= IXGBE_FLAG2_RSC_CAPABLE;
+#ifdef IXGBE_FCOE
+		adapter->flags |= IXGBE_FLAG_FCOE_CAPABLE;
+		adapter->flags &= ~IXGBE_FLAG_FCOE_ENABLED;
+		adapter->ring_feature[RING_F_FCOE].indices = 0;
+#ifdef CONFIG_DCB
+		/* Default traffic class to use for FCoE */
+		adapter->fcoe.tc = IXGBE_FCOE_DEFTC;
+		adapter->fcoe.up = IXGBE_FCOE_DEFTC;
+		adapter->fcoe.up_set = IXGBE_FCOE_DEFTC;
+#endif
+#endif
+		if (hw->device_id == IXGBE_DEV_ID_82599_T3_LOM)
+			adapter->flags2 |= IXGBE_FLAG2_TEMP_SENSOR_CAPABLE;
+#ifndef IXGBE_NO_SMART_SPEED
+		hw->phy.smart_speed = ixgbe_smart_speed_on;
+#else
+		hw->phy.smart_speed = ixgbe_smart_speed_off;
+#endif
+		adapter->max_msix_q_vectors = IXGBE_MAX_MSIX_Q_VECTORS_82599;
+	default:
+		break;
+	}
+
+	/* n-tuple support exists, always init our spinlock */
+	//spin_lock_init(&adapter->fdir_perfect_lock);
+
+	if (adapter->flags & IXGBE_FLAG_DCB_CAPABLE) {
+		int j;
+		struct ixgbe_dcb_tc_config *tc;
+		int dcb_i = IXGBE_DCB_MAX_TRAFFIC_CLASS;
+
+
+		adapter->dcb_cfg.num_tcs.pg_tcs = dcb_i;
+		adapter->dcb_cfg.num_tcs.pfc_tcs = dcb_i;
+		for (j = 0; j < dcb_i; j++) {
+			tc = &adapter->dcb_cfg.tc_config[j];
+			tc->path[IXGBE_DCB_TX_CONFIG].bwg_id = 0;
+			tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent = 100 / dcb_i;
+			tc->path[IXGBE_DCB_RX_CONFIG].bwg_id = 0;
+			tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent = 100 / dcb_i;
+			tc->pfc = ixgbe_dcb_pfc_disabled;
+			if (j == 0) {
+				/* total of all TCs bandwidth needs to be 100 */
+				tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent +=
+								 100 % dcb_i;
+				tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent +=
+								 100 % dcb_i;
+			}
+		}
+
+		/* Initialize default user to priority mapping, UPx->TC0 */
+		tc = &adapter->dcb_cfg.tc_config[0];
+		tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap = 0xFF;
+		tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap = 0xFF;
+
+		adapter->dcb_cfg.bw_percentage[IXGBE_DCB_TX_CONFIG][0] = 100;
+		adapter->dcb_cfg.bw_percentage[IXGBE_DCB_RX_CONFIG][0] = 100;
+		adapter->dcb_cfg.rx_pba_cfg = ixgbe_dcb_pba_equal;
+		adapter->dcb_cfg.pfc_mode_enable = false;
+		adapter->dcb_cfg.round_robin_enable = false;
+		adapter->dcb_set_bitmap = 0x00;
+#ifdef CONFIG_DCB
+		adapter->dcbx_cap = DCB_CAP_DCBX_HOST | DCB_CAP_DCBX_VER_CEE;
+#endif /* CONFIG_DCB */
+
+		if (hw->mac.type == ixgbe_mac_X540) {
+			adapter->dcb_cfg.num_tcs.pg_tcs = 4;
+			adapter->dcb_cfg.num_tcs.pfc_tcs = 4;
+		}
+	}
+#ifdef CONFIG_DCB
+	/* XXX does this need to be initialized even w/o DCB? */
+	//memcpy(&adapter->temp_dcb_cfg, &adapter->dcb_cfg,
+	//       sizeof(adapter->temp_dcb_cfg));
+
+#endif
+	//if (hw->mac.type == ixgbe_mac_82599EB ||
+	//    hw->mac.type == ixgbe_mac_X540)
+	//	hw->mbx.ops.init_params(hw);
+
+	/* default flow control settings */
+	hw->fc.requested_mode = ixgbe_fc_full;
+	hw->fc.current_mode = ixgbe_fc_full;	/* init for ethtool output */
+
+	adapter->last_lfc_mode = hw->fc.current_mode;
+	ixgbe_pbthresh_setup(adapter);
+	hw->fc.pause_time = IXGBE_DEFAULT_FCPAUSE;
+	hw->fc.send_xon = true;
+	hw->fc.disable_fc_autoneg = false;
+
+	/* set default ring sizes */
+	adapter->tx_ring_count = IXGBE_DEFAULT_TXD;
+	adapter->rx_ring_count = IXGBE_DEFAULT_RXD;
+
+	/* set default work limits */
+	adapter->tx_work_limit = IXGBE_DEFAULT_TX_WORK;
+	adapter->rx_work_limit = IXGBE_DEFAULT_RX_WORK;
+
+	set_bit(__IXGBE_DOWN, &adapter->state);
+out:
+	return err;
+}
+
+/**
+ * ixgbe_setup_tx_resources - allocate Tx resources (Descriptors)
+ * @tx_ring:    tx descriptor ring (for a specific queue) to setup
+ *
+ * Return 0 on success, negative on failure
+ **/
+int ixgbe_setup_tx_resources(struct ixgbe_ring *tx_ring)
+{
+	struct device *dev = tx_ring->dev;
+	//int orig_node = dev_to_node(dev);
+	int numa_node = -1;
+	int size;
+
+	size = sizeof(struct ixgbe_tx_buffer) * tx_ring->count;
+
+	if (tx_ring->q_vector)
+		numa_node = tx_ring->q_vector->numa_node;
+
+	tx_ring->tx_buffer_info = vzalloc_node(size, numa_node);
+	if (!tx_ring->tx_buffer_info)
+		tx_ring->tx_buffer_info = vzalloc(size);
+	if (!tx_ring->tx_buffer_info)
+		goto err;
+
+	/* round up to nearest 4K */
+	tx_ring->size = tx_ring->count * sizeof(union ixgbe_adv_tx_desc);
+	tx_ring->size = ALIGN(tx_ring->size, 4096);
+
+	//set_dev_node(dev, numa_node);
+	//tx_ring->desc = dma_alloc_coherent(dev,
+	//				   tx_ring->size,
+	//				   &tx_ring->dma,
+	//				   GFP_KERNEL);
+	//set_dev_node(dev, orig_node);
+	//if (!tx_ring->desc)
+	//	tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size,
+	//					   &tx_ring->dma, GFP_KERNEL);
+	//if (!tx_ring->desc)
+	//	goto err;
+
+	return 0;
+
+err:
+	vfree(tx_ring->tx_buffer_info);
+	tx_ring->tx_buffer_info = NULL;
+	dev_err(dev, "Unable to allocate memory for the Tx descriptor ring\n");
+	return -ENOMEM;
+}
+
+/**
+ * ixgbe_setup_all_tx_resources - allocate all queues Tx resources
+ * @adapter: board private structure
+ *
+ * If this function returns with an error, then it's possible one or
+ * more of the rings is populated (while the rest are not).  It is the
+ * callers duty to clean those orphaned rings.
+ *
+ * Return 0 on success, negative on failure
+ **/
+static int ixgbe_setup_all_tx_resources(struct ixgbe_adapter *adapter)
+{
+	int i, err = 0;
+
+	for (i = 0; i < adapter->num_tx_queues; i++) {
+		err = ixgbe_setup_tx_resources(adapter->tx_ring[i]);
+		if (!err)
+			continue;
+		e_err(probe, "Allocation for Tx Queue %u failed\n", i);
+		break;
+	}
+
+	return err;
+}
+
+/**
+ * ixgbe_setup_rx_resources - allocate Rx resources (Descriptors)
+ * @rx_ring:    rx descriptor ring (for a specific queue) to setup
+ *
+ * Returns 0 on success, negative on failure
+ **/
+int ixgbe_setup_rx_resources(struct ixgbe_ring *rx_ring)
+{
+	struct device *dev = rx_ring->dev;
+	//int orig_node = dev_to_node(dev);
+	int numa_node = -1;
+	int size;
+
+	size = sizeof(struct ixgbe_rx_buffer) * rx_ring->count;
+
+	if (rx_ring->q_vector)
+		numa_node = rx_ring->q_vector->numa_node;
+
+	rx_ring->rx_buffer_info = vzalloc_node(size, numa_node);
+	if (!rx_ring->rx_buffer_info)
+		rx_ring->rx_buffer_info = vzalloc(size);
+	if (!rx_ring->rx_buffer_info)
+		goto err;
+
+	/* Round up to nearest 4K */
+	rx_ring->size = rx_ring->count * sizeof(union ixgbe_adv_rx_desc);
+	rx_ring->size = ALIGN(rx_ring->size, 4096);
+
+#ifdef NO_VNIC
+	set_dev_node(dev, numa_node);
+	rx_ring->desc = dma_alloc_coherent(dev,
+					   rx_ring->size,
+					   &rx_ring->dma,
+					   GFP_KERNEL);
+	set_dev_node(dev, orig_node);
+	if (!rx_ring->desc)
+		rx_ring->desc = dma_alloc_coherent(dev, rx_ring->size,
+						   &rx_ring->dma, GFP_KERNEL);
+	if (!rx_ring->desc)
+		goto err;
+
+#ifndef CONFIG_IXGBE_DISABLE_PACKET_SPLIT
+	ixgbe_init_rx_page_offset(rx_ring);
+
+#endif
+
+#endif /* NO_VNIC */
+	return 0;
+err:
+	vfree(rx_ring->rx_buffer_info);
+	rx_ring->rx_buffer_info = NULL;
+	dev_err(dev, "Unable to allocate memory for the Rx descriptor ring\n");
+	return -ENOMEM;
+}
+
+/**
+ * ixgbe_setup_all_rx_resources - allocate all queues Rx resources
+ * @adapter: board private structure
+ *
+ * If this function returns with an error, then it's possible one or
+ * more of the rings is populated (while the rest are not).  It is the
+ * callers duty to clean those orphaned rings.
+ *
+ * Return 0 on success, negative on failure
+ **/
+static int ixgbe_setup_all_rx_resources(struct ixgbe_adapter *adapter)
+{
+	int i, err = 0;
+
+	for (i = 0; i < adapter->num_rx_queues; i++) {
+		err = ixgbe_setup_rx_resources(adapter->rx_ring[i]);
+		if (!err)
+			continue;
+		e_err(probe, "Allocation for Rx Queue %u failed\n", i);
+		break;
+	}
+
+	return err;
+}
+
+/**
+ * ixgbe_free_tx_resources - Free Tx Resources per Queue
+ * @tx_ring: Tx descriptor ring for a specific queue
+ *
+ * Free all transmit software resources
+ **/
+void ixgbe_free_tx_resources(struct ixgbe_ring *tx_ring)
+{
+	//ixgbe_clean_tx_ring(tx_ring);
+
+	vfree(tx_ring->tx_buffer_info);
+	tx_ring->tx_buffer_info = NULL;
+
+	/* if not set, then don't free */
+	if (!tx_ring->desc)
+		return;
+
+	//dma_free_coherent(tx_ring->dev, tx_ring->size,
+	//		  tx_ring->desc, tx_ring->dma);
+
+	tx_ring->desc = NULL;
+}
+
+/**
+ * ixgbe_free_all_tx_resources - Free Tx Resources for All Queues
+ * @adapter: board private structure
+ *
+ * Free all transmit software resources
+ **/
+static void ixgbe_free_all_tx_resources(struct ixgbe_adapter *adapter)
+{
+	int i;
+
+	for (i = 0; i < adapter->num_tx_queues; i++)
+		if (adapter->tx_ring[i]->desc)
+			ixgbe_free_tx_resources(adapter->tx_ring[i]);
+}
+
+/**
+ * ixgbe_free_rx_resources - Free Rx Resources
+ * @rx_ring: ring to clean the resources from
+ *
+ * Free all receive software resources
+ **/
+void ixgbe_free_rx_resources(struct ixgbe_ring *rx_ring)
+{
+	//ixgbe_clean_rx_ring(rx_ring);
+
+	vfree(rx_ring->rx_buffer_info);
+	rx_ring->rx_buffer_info = NULL;
+
+	/* if not set, then don't free */
+	if (!rx_ring->desc)
+		return;
+
+	//dma_free_coherent(rx_ring->dev, rx_ring->size,
+	//		  rx_ring->desc, rx_ring->dma);
+
+	rx_ring->desc = NULL;
+}
+
+/**
+ * ixgbe_free_all_rx_resources - Free Rx Resources for All Queues
+ * @adapter: board private structure
+ *
+ * Free all receive software resources
+ **/
+static void ixgbe_free_all_rx_resources(struct ixgbe_adapter *adapter)
+{
+	int i;
+
+	for (i = 0; i < adapter->num_rx_queues; i++)
+		if (adapter->rx_ring[i]->desc)
+			ixgbe_free_rx_resources(adapter->rx_ring[i]);
+}
+
+
+/**
+ * ixgbe_open - Called when a network interface is made active
+ * @netdev: network interface device structure
+ *
+ * Returns 0 on success, negative value on failure
+ *
+ * The open entry point is called when a network interface is made
+ * active by the system (IFF_UP).  At this point all resources needed
+ * for transmit and receive operations are allocated, the interrupt
+ * handler is registered with the OS, the watchdog timer is started,
+ * and the stack is notified that the interface is ready.
+ **/
+//static
+int ixgbe_open(struct net_device *netdev)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+	int err;
+
+	/* disallow open during test */
+	if (test_bit(__IXGBE_TESTING, &adapter->state))
+		return -EBUSY;
+
+	netif_carrier_off(netdev);
+
+	/* allocate transmit descriptors */
+	err = ixgbe_setup_all_tx_resources(adapter);
+	if (err)
+		goto err_setup_tx;
+
+	/* allocate receive descriptors */
+	err = ixgbe_setup_all_rx_resources(adapter);
+	if (err)
+		goto err_setup_rx;
+
+#ifdef NO_VNIC
+	ixgbe_configure(adapter);
+
+	err = ixgbe_request_irq(adapter);
+	if (err)
+		goto err_req_irq;
+
+	ixgbe_up_complete(adapter);
+
+err_req_irq:
+#else
+	return 0;
+#endif
+err_setup_rx:
+	ixgbe_free_all_rx_resources(adapter);
+err_setup_tx:
+	ixgbe_free_all_tx_resources(adapter);
+	ixgbe_reset(adapter);
+
+	return err;
+}
+
+/**
+ * ixgbe_close - Disables a network interface
+ * @netdev: network interface device structure
+ *
+ * Returns 0, this is not allowed to fail
+ *
+ * The close entry point is called when an interface is de-activated
+ * by the OS.  The hardware is still under the drivers control, but
+ * needs to be disabled.  A global MAC reset is issued to stop the
+ * hardware, and all transmit and receive resources are freed.
+ **/
+//static
+int ixgbe_close(struct net_device *netdev)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+
+	//ixgbe_down(adapter);
+	//ixgbe_free_irq(adapter);
+
+	//ixgbe_fdir_filter_exit(adapter);
+
+	//ixgbe_free_all_tx_resources(adapter);
+	//ixgbe_free_all_rx_resources(adapter);
+
+	ixgbe_release_hw_control(adapter);
+
+	return 0;
+}
+
+
+
+
+
+/**
+ * ixgbe_get_stats - Get System Network Statistics
+ * @netdev: network interface device structure
+ *
+ * Returns the address of the device statistics structure.
+ * The statistics are actually updated from the timer callback.
+ **/
+//static
+struct net_device_stats *ixgbe_get_stats(struct net_device *netdev)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+
+	/* update the stats data */
+	ixgbe_update_stats(adapter);
+
+#ifdef HAVE_NETDEV_STATS_IN_NETDEV
+	/* only return the current stats */
+	return &netdev->stats;
+#else
+	/* only return the current stats */
+	return &adapter->net_stats;
+#endif /* HAVE_NETDEV_STATS_IN_NETDEV */
+}
+
+/**
+ * ixgbe_update_stats - Update the board statistics counters.
+ * @adapter: board private structure
+ **/
+void ixgbe_update_stats(struct ixgbe_adapter *adapter)
+{
+#ifdef HAVE_NETDEV_STATS_IN_NETDEV
+	struct net_device_stats *net_stats = &adapter->netdev->stats;
+#else
+	struct net_device_stats *net_stats = &adapter->net_stats;
+#endif /* HAVE_NETDEV_STATS_IN_NETDEV */
+	struct ixgbe_hw *hw = &adapter->hw;
+	struct ixgbe_hw_stats *hwstats = &adapter->stats;
+	u64 total_mpc = 0;
+	u32 i, missed_rx = 0, mpc, bprc, lxon, lxoff, xon_off_tot;
+	u64 non_eop_descs = 0, restart_queue = 0, tx_busy = 0;
+	u64 alloc_rx_page_failed = 0, alloc_rx_buff_failed = 0;
+	u64 bytes = 0, packets = 0, hw_csum_rx_error = 0;
+#ifndef IXGBE_NO_LRO
+	u32 flushed = 0, coal = 0;
+	int num_q_vectors = 1;
+#endif
+#ifdef IXGBE_FCOE
+	struct ixgbe_fcoe *fcoe = &adapter->fcoe;
+	unsigned int cpu;
+	u64 fcoe_noddp_counts_sum = 0, fcoe_noddp_ext_buff_counts_sum = 0;
+#endif /* IXGBE_FCOE */
+
+	printk(KERN_DEBUG "ixgbe_update_stats, tx_queues=%d, rx_queues=%d\n",
+			adapter->num_tx_queues, adapter->num_rx_queues);
+
+	if (test_bit(__IXGBE_DOWN, &adapter->state) ||
+	    test_bit(__IXGBE_RESETTING, &adapter->state))
+		return;
+
+#ifndef IXGBE_NO_LRO
+	if (adapter->flags & IXGBE_FLAG_MSIX_ENABLED)
+		num_q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS;
+
+#endif
+	if (adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED) {
+		u64 rsc_count = 0;
+		u64 rsc_flush = 0;
+		for (i = 0; i < adapter->num_rx_queues; i++) {
+			rsc_count += adapter->rx_ring[i]->rx_stats.rsc_count;
+			rsc_flush += adapter->rx_ring[i]->rx_stats.rsc_flush;
+		}
+		adapter->rsc_total_count = rsc_count;
+		adapter->rsc_total_flush = rsc_flush;
+	}
+
+#ifndef IXGBE_NO_LRO
+	for (i = 0; i < num_q_vectors; i++) {
+		struct ixgbe_q_vector *q_vector = adapter->q_vector[i];
+		if (!q_vector)
+			continue;
+		flushed += q_vector->lrolist.stats.flushed;
+		coal += q_vector->lrolist.stats.coal;
+	}
+	adapter->lro_stats.flushed = flushed;
+	adapter->lro_stats.coal = coal;
+
+#endif
+	for (i = 0; i < adapter->num_rx_queues; i++) {
+		struct ixgbe_ring *rx_ring = adapter->rx_ring[i];
+		non_eop_descs += rx_ring->rx_stats.non_eop_descs;
+		alloc_rx_page_failed += rx_ring->rx_stats.alloc_rx_page_failed;
+		alloc_rx_buff_failed += rx_ring->rx_stats.alloc_rx_buff_failed;
+		hw_csum_rx_error += rx_ring->rx_stats.csum_err;
+		bytes += rx_ring->stats.bytes;
+		packets += rx_ring->stats.packets;
+
+	}
+	adapter->non_eop_descs = non_eop_descs;
+	adapter->alloc_rx_page_failed = alloc_rx_page_failed;
+	adapter->alloc_rx_buff_failed = alloc_rx_buff_failed;
+	adapter->hw_csum_rx_error = hw_csum_rx_error;
+	net_stats->rx_bytes = bytes;
+	net_stats->rx_packets = packets;
+
+	bytes = 0;
+	packets = 0;
+	/* gather some stats to the adapter struct that are per queue */
+	for (i = 0; i < adapter->num_tx_queues; i++) {
+		struct ixgbe_ring *tx_ring = adapter->tx_ring[i];
+		restart_queue += tx_ring->tx_stats.restart_queue;
+		tx_busy += tx_ring->tx_stats.tx_busy;
+		bytes += tx_ring->stats.bytes;
+		packets += tx_ring->stats.packets;
+	}
+	adapter->restart_queue = restart_queue;
+	adapter->tx_busy = tx_busy;
+	net_stats->tx_bytes = bytes;
+	net_stats->tx_packets = packets;
+
+	hwstats->crcerrs += IXGBE_READ_REG(hw, IXGBE_CRCERRS);
+
+	/* 8 register reads */
+	for (i = 0; i < 8; i++) {
+		/* for packet buffers not used, the register should read 0 */
+		mpc = IXGBE_READ_REG(hw, IXGBE_MPC(i));
+		missed_rx += mpc;
+		hwstats->mpc[i] += mpc;
+		total_mpc += hwstats->mpc[i];
+		hwstats->pxontxc[i] += IXGBE_READ_REG(hw, IXGBE_PXONTXC(i));
+		hwstats->pxofftxc[i] += IXGBE_READ_REG(hw, IXGBE_PXOFFTXC(i));
+		switch (hw->mac.type) {
+		case ixgbe_mac_82598EB:
+			hwstats->rnbc[i] += IXGBE_READ_REG(hw, IXGBE_RNBC(i));
+			hwstats->qbtc[i] += IXGBE_READ_REG(hw, IXGBE_QBTC(i));
+			hwstats->qbrc[i] += IXGBE_READ_REG(hw, IXGBE_QBRC(i));
+			hwstats->pxonrxc[i] +=
+				IXGBE_READ_REG(hw, IXGBE_PXONRXC(i));
+			break;
+		case ixgbe_mac_82599EB:
+		case ixgbe_mac_X540:
+			hwstats->pxonrxc[i] +=
+				IXGBE_READ_REG(hw, IXGBE_PXONRXCNT(i));
+			break;
+		default:
+			break;
+		}
+	}
+
+	/*16 register reads */
+	for (i = 0; i < 16; i++) {
+		hwstats->qptc[i] += IXGBE_READ_REG(hw, IXGBE_QPTC(i));
+		hwstats->qprc[i] += IXGBE_READ_REG(hw, IXGBE_QPRC(i));
+		if ((hw->mac.type == ixgbe_mac_82599EB) ||
+		    (hw->mac.type == ixgbe_mac_X540)) {
+			hwstats->qbtc[i] += IXGBE_READ_REG(hw, IXGBE_QBTC_L(i));
+			IXGBE_READ_REG(hw, IXGBE_QBTC_H(i)); /* to clear */
+			hwstats->qbrc[i] += IXGBE_READ_REG(hw, IXGBE_QBRC_L(i));
+			IXGBE_READ_REG(hw, IXGBE_QBRC_H(i)); /* to clear */
+		}
+	}
+
+	hwstats->gprc += IXGBE_READ_REG(hw, IXGBE_GPRC);
+	/* work around hardware counting issue */
+	hwstats->gprc -= missed_rx;
+
+	ixgbe_update_xoff_received(adapter);
+
+	/* 82598 hardware only has a 32 bit counter in the high register */
+	switch (hw->mac.type) {
+	case ixgbe_mac_82598EB:
+		hwstats->lxonrxc += IXGBE_READ_REG(hw, IXGBE_LXONRXC);
+		hwstats->gorc += IXGBE_READ_REG(hw, IXGBE_GORCH);
+		hwstats->gotc += IXGBE_READ_REG(hw, IXGBE_GOTCH);
+		hwstats->tor += IXGBE_READ_REG(hw, IXGBE_TORH);
+		break;
+	case ixgbe_mac_X540:
+		/* OS2BMC stats are X540 only*/
+		hwstats->o2bgptc += IXGBE_READ_REG(hw, IXGBE_O2BGPTC);
+		hwstats->o2bspc += IXGBE_READ_REG(hw, IXGBE_O2BSPC);
+		hwstats->b2ospc += IXGBE_READ_REG(hw, IXGBE_B2OSPC);
+		hwstats->b2ogprc += IXGBE_READ_REG(hw, IXGBE_B2OGPRC);
+	case ixgbe_mac_82599EB:
+		for (i = 0; i < 16; i++)
+			adapter->hw_rx_no_dma_resources +=
+					     IXGBE_READ_REG(hw, IXGBE_QPRDC(i));
+		hwstats->gorc += IXGBE_READ_REG(hw, IXGBE_GORCL);
+		IXGBE_READ_REG(hw, IXGBE_GORCH); /* to clear */
+		hwstats->gotc += IXGBE_READ_REG(hw, IXGBE_GOTCL);
+		IXGBE_READ_REG(hw, IXGBE_GOTCH); /* to clear */
+		hwstats->tor += IXGBE_READ_REG(hw, IXGBE_TORL);
+		IXGBE_READ_REG(hw, IXGBE_TORH); /* to clear */
+		hwstats->lxonrxc += IXGBE_READ_REG(hw, IXGBE_LXONRXCNT);
+#ifdef HAVE_TX_MQ
+		hwstats->fdirmatch += IXGBE_READ_REG(hw, IXGBE_FDIRMATCH);
+		hwstats->fdirmiss += IXGBE_READ_REG(hw, IXGBE_FDIRMISS);
+#endif /* HAVE_TX_MQ */
+#ifdef IXGBE_FCOE
+		hwstats->fccrc += IXGBE_READ_REG(hw, IXGBE_FCCRC);
+		hwstats->fclast += IXGBE_READ_REG(hw, IXGBE_FCLAST);
+		hwstats->fcoerpdc += IXGBE_READ_REG(hw, IXGBE_FCOERPDC);
+		hwstats->fcoeprc += IXGBE_READ_REG(hw, IXGBE_FCOEPRC);
+		hwstats->fcoeptc += IXGBE_READ_REG(hw, IXGBE_FCOEPTC);
+		hwstats->fcoedwrc += IXGBE_READ_REG(hw, IXGBE_FCOEDWRC);
+		hwstats->fcoedwtc += IXGBE_READ_REG(hw, IXGBE_FCOEDWTC);
+		/* Add up per cpu counters for total ddp aloc fail */
+		if (fcoe && fcoe->pcpu_noddp && fcoe->pcpu_noddp_ext_buff) {
+			for_each_possible_cpu(cpu) {
+				fcoe_noddp_counts_sum +=
+					*per_cpu_ptr(fcoe->pcpu_noddp, cpu);
+				fcoe_noddp_ext_buff_counts_sum +=
+					*per_cpu_ptr(fcoe->
+						pcpu_noddp_ext_buff, cpu);
+			}
+		}
+		hwstats->fcoe_noddp = fcoe_noddp_counts_sum;
+		hwstats->fcoe_noddp_ext_buff = fcoe_noddp_ext_buff_counts_sum;
+
+#endif /* IXGBE_FCOE */
+		break;
+	default:
+		break;
+	}
+	bprc = IXGBE_READ_REG(hw, IXGBE_BPRC);
+	hwstats->bprc += bprc;
+	hwstats->mprc += IXGBE_READ_REG(hw, IXGBE_MPRC);
+	if (hw->mac.type == ixgbe_mac_82598EB)
+		hwstats->mprc -= bprc;
+	hwstats->roc += IXGBE_READ_REG(hw, IXGBE_ROC);
+	hwstats->prc64 += IXGBE_READ_REG(hw, IXGBE_PRC64);
+	hwstats->prc127 += IXGBE_READ_REG(hw, IXGBE_PRC127);
+	hwstats->prc255 += IXGBE_READ_REG(hw, IXGBE_PRC255);
+	hwstats->prc511 += IXGBE_READ_REG(hw, IXGBE_PRC511);
+	hwstats->prc1023 += IXGBE_READ_REG(hw, IXGBE_PRC1023);
+	hwstats->prc1522 += IXGBE_READ_REG(hw, IXGBE_PRC1522);
+	hwstats->rlec += IXGBE_READ_REG(hw, IXGBE_RLEC);
+	lxon = IXGBE_READ_REG(hw, IXGBE_LXONTXC);
+	hwstats->lxontxc += lxon;
+	lxoff = IXGBE_READ_REG(hw, IXGBE_LXOFFTXC);
+	hwstats->lxofftxc += lxoff;
+	hwstats->gptc += IXGBE_READ_REG(hw, IXGBE_GPTC);
+	hwstats->mptc += IXGBE_READ_REG(hw, IXGBE_MPTC);
+	/*
+	 * 82598 errata - tx of flow control packets is included in tx counters
+	 */
+	xon_off_tot = lxon + lxoff;
+	hwstats->gptc -= xon_off_tot;
+	hwstats->mptc -= xon_off_tot;
+	hwstats->gotc -= (xon_off_tot * (ETH_ZLEN + ETH_FCS_LEN));
+	hwstats->ruc += IXGBE_READ_REG(hw, IXGBE_RUC);
+	hwstats->rfc += IXGBE_READ_REG(hw, IXGBE_RFC);
+	hwstats->rjc += IXGBE_READ_REG(hw, IXGBE_RJC);
+	hwstats->tpr += IXGBE_READ_REG(hw, IXGBE_TPR);
+	hwstats->ptc64 += IXGBE_READ_REG(hw, IXGBE_PTC64);
+	hwstats->ptc64 -= xon_off_tot;
+	hwstats->ptc127 += IXGBE_READ_REG(hw, IXGBE_PTC127);
+	hwstats->ptc255 += IXGBE_READ_REG(hw, IXGBE_PTC255);
+	hwstats->ptc511 += IXGBE_READ_REG(hw, IXGBE_PTC511);
+	hwstats->ptc1023 += IXGBE_READ_REG(hw, IXGBE_PTC1023);
+	hwstats->ptc1522 += IXGBE_READ_REG(hw, IXGBE_PTC1522);
+	hwstats->bptc += IXGBE_READ_REG(hw, IXGBE_BPTC);
+	/* Fill out the OS statistics structure */
+	net_stats->multicast = hwstats->mprc;
+
+	/* Rx Errors */
+	net_stats->rx_errors = hwstats->crcerrs +
+				       hwstats->rlec;
+	net_stats->rx_dropped = 0;
+	net_stats->rx_length_errors = hwstats->rlec;
+	net_stats->rx_crc_errors = hwstats->crcerrs;
+	net_stats->rx_missed_errors = total_mpc;
+
+	/*
+	 * VF Stats Collection - skip while resetting because these
+	 * are not clear on read and otherwise you'll sometimes get
+	 * crazy values.
+	 */
+	if (!test_bit(__IXGBE_RESETTING, &adapter->state)) {
+		for (i = 0; i < adapter->num_vfs; i++) {
+			UPDATE_VF_COUNTER_32bit(IXGBE_PVFGPRC(i),	      \
+					adapter->vfinfo[i].last_vfstats.gprc, \
+					adapter->vfinfo[i].vfstats.gprc);
+			UPDATE_VF_COUNTER_32bit(IXGBE_PVFGPTC(i),	      \
+					adapter->vfinfo[i].last_vfstats.gptc, \
+					adapter->vfinfo[i].vfstats.gptc);
+			UPDATE_VF_COUNTER_36bit(IXGBE_PVFGORC_LSB(i),	      \
+					IXGBE_PVFGORC_MSB(i),		      \
+					adapter->vfinfo[i].last_vfstats.gorc, \
+					adapter->vfinfo[i].vfstats.gorc);
+			UPDATE_VF_COUNTER_36bit(IXGBE_PVFGOTC_LSB(i),	      \
+					IXGBE_PVFGOTC_MSB(i),		      \
+					adapter->vfinfo[i].last_vfstats.gotc, \
+					adapter->vfinfo[i].vfstats.gotc);
+			UPDATE_VF_COUNTER_32bit(IXGBE_PVFMPRC(i),	      \
+					adapter->vfinfo[i].last_vfstats.mprc, \
+					adapter->vfinfo[i].vfstats.mprc);
+		}
+	}
+}
+
+
+#ifdef NO_VNIC
+
+/**
+ * ixgbe_watchdog_update_link - update the link status
+ * @adapter - pointer to the device adapter structure
+ * @link_speed - pointer to a u32 to store the link_speed
+ **/
+static void ixgbe_watchdog_update_link(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	u32 link_speed = adapter->link_speed;
+	bool link_up = adapter->link_up;
+	bool pfc_en = adapter->dcb_cfg.pfc_mode_enable;
+
+	if (!(adapter->flags & IXGBE_FLAG_NEED_LINK_UPDATE))
+		return;
+
+	if (hw->mac.ops.check_link) {
+		hw->mac.ops.check_link(hw, &link_speed, &link_up, false);
+	} else {
+		/* always assume link is up, if no check link function */
+		link_speed = IXGBE_LINK_SPEED_10GB_FULL;
+		link_up = true;
+	}
+
+#ifdef HAVE_DCBNL_IEEE
+	if (adapter->ixgbe_ieee_pfc)
+		pfc_en |= !!(adapter->ixgbe_ieee_pfc->pfc_en);
+
+#endif
+	if (link_up && !((adapter->flags & IXGBE_FLAG_DCB_ENABLED) && pfc_en)) {
+		hw->mac.ops.fc_enable(hw);
+		//ixgbe_set_rx_drop_en(adapter);
+	}
+
+	if (link_up ||
+	    time_after(jiffies, (adapter->link_check_timeout +
+				 IXGBE_TRY_LINK_TIMEOUT))) {
+		adapter->flags &= ~IXGBE_FLAG_NEED_LINK_UPDATE;
+		IXGBE_WRITE_REG(hw, IXGBE_EIMS, IXGBE_EIMC_LSC);
+		IXGBE_WRITE_FLUSH(hw);
+	}
+
+	adapter->link_up = link_up;
+	adapter->link_speed = link_speed;
+}
+#endif
+
+
+
+#ifdef NO_VNIC
+
+/**
+ * ixgbe_service_task - manages and runs subtasks
+ * @work: pointer to work_struct containing our data
+ **/
+static void ixgbe_service_task(struct work_struct *work)
+{
+	//struct ixgbe_adapter *adapter = container_of(work,
+	//					     struct ixgbe_adapter,
+	//					     service_task);
+
+	//ixgbe_reset_subtask(adapter);
+	//ixgbe_sfp_detection_subtask(adapter);
+	//ixgbe_sfp_link_config_subtask(adapter);
+	//ixgbe_check_overtemp_subtask(adapter);
+	//ixgbe_watchdog_subtask(adapter);
+#ifdef HAVE_TX_MQ
+	//ixgbe_fdir_reinit_subtask(adapter);
+#endif
+	//ixgbe_check_hang_subtask(adapter);
+
+	//ixgbe_service_event_complete(adapter);
+}
+
+
+
+
+#define IXGBE_TXD_CMD (IXGBE_TXD_CMD_EOP | \
+		       IXGBE_TXD_CMD_RS)
+
+
+/**
+ * ixgbe_set_mac - Change the Ethernet Address of the NIC
+ * @netdev: network interface device structure
+ * @p: pointer to an address structure
+ *
+ * Returns 0 on success, negative on failure
+ **/
+static int ixgbe_set_mac(struct net_device *netdev, void *p)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+	struct ixgbe_hw *hw = &adapter->hw;
+	struct sockaddr *addr = p;
+	int ret;
+
+	if (!is_valid_ether_addr(addr->sa_data))
+		return -EADDRNOTAVAIL;
+
+	ixgbe_del_mac_filter(adapter, hw->mac.addr,
+			     adapter->num_vfs);
+	memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
+	memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
+
+
+	/* set the correct pool for the new PF MAC address in entry 0 */
+	ret = ixgbe_add_mac_filter(adapter, hw->mac.addr,
+				    adapter->num_vfs);
+	return ret > 0 ? 0 : ret;
+}
+
+
+/**
+ * ixgbe_ioctl -
+ * @netdev:
+ * @ifreq:
+ * @cmd:
+ **/
+static int ixgbe_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
+{
+	switch (cmd) {
+#ifdef ETHTOOL_OPS_COMPAT
+	case SIOCETHTOOL:
+		return ethtool_ioctl(ifr);
+#endif
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+#endif /* NO_VNIC */
+
+
+void ixgbe_do_reset(struct net_device *netdev)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+
+	if (netif_running(netdev))
+		ixgbe_reinit_locked(adapter);
+	else
+		ixgbe_reset(adapter);
+}
+
+
+
+
+
+
+/**
+ * ixgbe_probe - Device Initialization Routine
+ * @pdev: PCI device information struct
+ * @ent: entry in ixgbe_pci_tbl
+ *
+ * Returns 0 on success, negative on failure
+ *
+ * ixgbe_probe initializes an adapter identified by a pci_dev structure.
+ * The OS initialization, configuring of the adapter private structure,
+ * and a hardware reset occur.
+ **/
+//static
+int ixgbe_kni_probe(struct pci_dev *pdev,
+				 struct net_device **lad_dev)
+{
+	size_t count;
+	struct net_device *netdev;
+	struct ixgbe_adapter *adapter = NULL;
+	struct ixgbe_hw *hw = NULL;
+	static int cards_found;
+	int i, err;
+	u16 offset;
+	u16 eeprom_verh, eeprom_verl, eeprom_cfg_blkh, eeprom_cfg_blkl;
+	u32 etrack_id;
+	u16 build, major, patch;
+	char *info_string, *i_s_var;
+	u8 part_str[IXGBE_PBANUM_LENGTH];
+	enum ixgbe_mac_type mac_type = ixgbe_mac_unknown;
+#ifdef HAVE_TX_MQ
+	unsigned int indices = num_possible_cpus();
+#endif /* HAVE_TX_MQ */
+#ifdef IXGBE_FCOE
+	u16 device_caps;
+#endif
+	u16 wol_cap;
+
+	err = pci_enable_device_mem(pdev);
+	if (err)
+		return err;
+
+
+#ifdef NO_VNIC
+	err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
+					   IORESOURCE_MEM), ixgbe_driver_name);
+	if (err) {
+		dev_err(pci_dev_to_dev(pdev),
+			"pci_request_selected_regions failed 0x%x\n", err);
+		goto err_pci_reg;
+	}
+#endif
+
+	/*
+	 * The mac_type is needed before we have the adapter is  set up
+	 * so rather than maintain two devID -> MAC tables we dummy up
+	 * an ixgbe_hw stuct and use ixgbe_set_mac_type.
+	 */
+	hw = vmalloc(sizeof(struct ixgbe_hw));
+	if (!hw) {
+		pr_info("Unable to allocate memory for early mac "
+			"check\n");
+	} else {
+		hw->vendor_id = pdev->vendor;
+		hw->device_id = pdev->device;
+		ixgbe_set_mac_type(hw);
+		mac_type = hw->mac.type;
+		vfree(hw);
+	}
+
+#ifdef NO_VNIC
+	/*
+	 * Workaround of Silicon errata on 82598. Disable LOs in the PCI switch
+	 * port to which the 82598 is connected to prevent duplicate
+	 * completions caused by LOs.  We need the mac type so that we only
+	 * do this on 82598 devices, ixgbe_set_mac_type does this for us if
+	 * we set it's device ID.
+	 */
+	if (mac_type == ixgbe_mac_82598EB)
+		pci_disable_link_state(pdev, PCIE_LINK_STATE_L0S);
+
+	pci_enable_pcie_error_reporting(pdev);
+
+	pci_set_master(pdev);
+#endif
+
+#ifdef HAVE_TX_MQ
+#ifdef CONFIG_DCB
+#ifdef HAVE_MQPRIO
+	indices *= IXGBE_DCB_MAX_TRAFFIC_CLASS;
+#else
+	indices = max_t(unsigned int, indices, IXGBE_MAX_DCB_INDICES);
+#endif /* HAVE_MQPRIO */
+#endif /* CONFIG_DCB */
+
+	if (mac_type == ixgbe_mac_82598EB)
+		indices = min_t(unsigned int, indices, IXGBE_MAX_RSS_INDICES);
+	else
+		indices = min_t(unsigned int, indices, IXGBE_MAX_FDIR_INDICES);
+
+#ifdef IXGBE_FCOE
+	indices += min_t(unsigned int, num_possible_cpus(),
+			 IXGBE_MAX_FCOE_INDICES);
+#endif
+	netdev = alloc_etherdev_mq(sizeof(struct ixgbe_adapter), indices);
+#else /* HAVE_TX_MQ */
+	netdev = alloc_etherdev(sizeof(struct ixgbe_adapter));
+#endif /* HAVE_TX_MQ */
+	if (!netdev) {
+		err = -ENOMEM;
+		goto err_alloc_etherdev;
+	}
+
+	SET_NETDEV_DEV(netdev, &pdev->dev);
+
+	adapter = netdev_priv(netdev);
+	//pci_set_drvdata(pdev, adapter);
+
+	adapter->netdev = netdev;
+	adapter->pdev = pdev;
+	hw = &adapter->hw;
+	hw->back = adapter;
+	adapter->msg_enable = (1 << DEFAULT_DEBUG_LEVEL_SHIFT) - 1;
+
+#ifdef HAVE_PCI_ERS
+	/*
+	 * call save state here in standalone driver because it relies on
+	 * adapter struct to exist, and needs to call netdev_priv
+	 */
+	pci_save_state(pdev);
+
+#endif
+	hw->hw_addr = ioremap(pci_resource_start(pdev, 0),
+			      pci_resource_len(pdev, 0));
+	if (!hw->hw_addr) {
+		err = -EIO;
+		goto err_ioremap;
+	}
+	//ixgbe_assign_netdev_ops(netdev);
+	ixgbe_set_ethtool_ops(netdev);
+
+	strlcpy(netdev->name, pci_name(pdev), sizeof(netdev->name));
+
+	adapter->bd_number = cards_found;
+
+	/* setup the private structure */
+	err = ixgbe_sw_init(adapter);
+	if (err)
+		goto err_sw_init;
+
+	/* Make it possible the adapter to be woken up via WOL */
+	switch (adapter->hw.mac.type) {
+	case ixgbe_mac_82599EB:
+	case ixgbe_mac_X540:
+		IXGBE_WRITE_REG(&adapter->hw, IXGBE_WUS, ~0);
+		break;
+	default:
+		break;
+	}
+
+	/*
+	 * check_options must be called before setup_link to set up
+	 * hw->fc completely
+	 */
+	//ixgbe_check_options(adapter);
+
+#ifndef NO_VNIC
+	/* reset_hw fills in the perm_addr as well */
+	hw->phy.reset_if_overtemp = true;
+	err = hw->mac.ops.reset_hw(hw);
+	hw->phy.reset_if_overtemp = false;
+	if (err == IXGBE_ERR_SFP_NOT_PRESENT &&
+	    hw->mac.type == ixgbe_mac_82598EB) {
+		err = 0;
+	} else if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
+		e_dev_err("failed to load because an unsupported SFP+ "
+			  "module type was detected.\n");
+		e_dev_err("Reload the driver after installing a supported "
+			  "module.\n");
+		goto err_sw_init;
+	} else if (err) {
+		e_dev_err("HW Init failed: %d\n", err);
+		goto err_sw_init;
+	}
+#endif
+
+	//if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED)
+	//	ixgbe_probe_vf(adapter);
+
+
+#ifdef MAX_SKB_FRAGS
+	netdev->features |= NETIF_F_SG |
+			    NETIF_F_IP_CSUM;
+
+#ifdef NETIF_F_IPV6_CSUM
+	netdev->features |= NETIF_F_IPV6_CSUM;
+#endif
+
+#ifdef NETIF_F_HW_VLAN_TX
+	netdev->features |= NETIF_F_HW_VLAN_TX |
+			    NETIF_F_HW_VLAN_RX;
+#endif
+#ifdef NETIF_F_TSO
+	netdev->features |= NETIF_F_TSO;
+#endif /* NETIF_F_TSO */
+#ifdef NETIF_F_TSO6
+	netdev->features |= NETIF_F_TSO6;
+#endif /* NETIF_F_TSO6 */
+#ifdef NETIF_F_RXHASH
+	netdev->features |= NETIF_F_RXHASH;
+#endif /* NETIF_F_RXHASH */
+
+#ifdef HAVE_NDO_SET_FEATURES
+	netdev->features |= NETIF_F_RXCSUM;
+
+	/* copy netdev features into list of user selectable features */
+	netdev->hw_features |= netdev->features;
+
+	/* give us the option of enabling RSC/LRO later */
+#ifdef IXGBE_NO_LRO
+	if (adapter->flags2 & IXGBE_FLAG2_RSC_CAPABLE)
+#endif
+		netdev->hw_features |= NETIF_F_LRO;
+
+#else
+#ifdef NETIF_F_GRO
+
+	/* this is only needed on kernels prior to 2.6.39 */
+	netdev->features |= NETIF_F_GRO;
+#endif /* NETIF_F_GRO */
+#endif
+
+#ifdef NETIF_F_HW_VLAN_TX
+	/* set this bit last since it cannot be part of hw_features */
+	netdev->features |= NETIF_F_HW_VLAN_FILTER;
+#endif
+	switch (adapter->hw.mac.type) {
+	case ixgbe_mac_82599EB:
+	case ixgbe_mac_X540:
+		netdev->features |= NETIF_F_SCTP_CSUM;
+#ifdef HAVE_NDO_SET_FEATURES
+		netdev->hw_features |= NETIF_F_SCTP_CSUM |
+				       NETIF_F_NTUPLE;
+#endif
+		break;
+	default:
+		break;
+	}
+
+#ifdef HAVE_NETDEV_VLAN_FEATURES
+	netdev->vlan_features |= NETIF_F_SG |
+				 NETIF_F_IP_CSUM |
+				 NETIF_F_IPV6_CSUM |
+				 NETIF_F_TSO |
+				 NETIF_F_TSO6;
+
+#endif /* HAVE_NETDEV_VLAN_FEATURES */
+	/*
+	 * If perfect filters were enabled in check_options(), enable them
+	 * on the netdevice too.
+	 */
+	if (adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE)
+		netdev->features |= NETIF_F_NTUPLE;
+	if (adapter->flags & IXGBE_FLAG_VMDQ_ENABLED)
+		adapter->flags &= ~IXGBE_FLAG_RSS_ENABLED;
+	if (adapter->flags & IXGBE_FLAG_DCB_ENABLED)
+		adapter->flags &= ~IXGBE_FLAG_RSS_ENABLED;
+	if (adapter->flags & IXGBE_FLAG_VMDQ_ENABLED) {
+		adapter->flags &= ~IXGBE_FLAG_FDIR_HASH_CAPABLE;
+		/* clear n-tuple support in the netdev unconditionally */
+		netdev->features &= ~NETIF_F_NTUPLE;
+	}
+
+#ifdef NETIF_F_RXHASH
+	if (!(adapter->flags & IXGBE_FLAG_RSS_ENABLED))
+		netdev->features &= ~NETIF_F_RXHASH;
+
+#endif /* NETIF_F_RXHASH */
+	if (netdev->features & NETIF_F_LRO) {
+		if ((adapter->flags2 & IXGBE_FLAG2_RSC_CAPABLE) &&
+		    ((adapter->rx_itr_setting == 1) ||
+		     (adapter->rx_itr_setting > IXGBE_MIN_RSC_ITR))) {
+			adapter->flags2 |= IXGBE_FLAG2_RSC_ENABLED;
+		} else if (adapter->flags2 & IXGBE_FLAG2_RSC_CAPABLE) {
+#ifdef IXGBE_NO_LRO
+			e_info(probe, "InterruptThrottleRate set too high, "
+			       "disabling RSC\n");
+#else
+			e_info(probe, "InterruptThrottleRate set too high, "
+			       "falling back to software LRO\n");
+#endif
+		}
+	}
+#ifdef CONFIG_DCB
+	//netdev->dcbnl_ops = &dcbnl_ops;
+#endif
+
+#ifdef IXGBE_FCOE
+#ifdef NETIF_F_FSO
+	if (adapter->flags & IXGBE_FLAG_FCOE_CAPABLE) {
+		ixgbe_get_device_caps(hw, &device_caps);
+		if (device_caps & IXGBE_DEVICE_CAPS_FCOE_OFFLOADS) {
+			adapter->flags &= ~IXGBE_FLAG_FCOE_ENABLED;
+			adapter->flags &= ~IXGBE_FLAG_FCOE_CAPABLE;
+			e_info(probe, "FCoE offload feature is not available. "
+			       "Disabling FCoE offload feature\n");
+		}
+#ifndef HAVE_NETDEV_OPS_FCOE_ENABLE
+		else {
+			adapter->flags |= IXGBE_FLAG_FCOE_ENABLED;
+			adapter->ring_feature[RING_F_FCOE].indices =
+				IXGBE_FCRETA_SIZE;
+			netdev->features |= NETIF_F_FSO |
+					    NETIF_F_FCOE_CRC |
+					    NETIF_F_FCOE_MTU;
+			netdev->fcoe_ddp_xid = IXGBE_FCOE_DDP_MAX - 1;
+		}
+#endif /* HAVE_NETDEV_OPS_FCOE_ENABLE */
+#ifdef HAVE_NETDEV_VLAN_FEATURES
+		netdev->vlan_features |= NETIF_F_FSO |
+					 NETIF_F_FCOE_CRC |
+					 NETIF_F_FCOE_MTU;
+#endif /* HAVE_NETDEV_VLAN_FEATURES */
+	}
+#endif /* NETIF_F_FSO */
+#endif /* IXGBE_FCOE */
+
+#endif /* MAX_SKB_FRAGS */
+	/* make sure the EEPROM is good */
+	if (hw->eeprom.ops.validate_checksum &&
+	    (hw->eeprom.ops.validate_checksum(hw, NULL) < 0)) {
+		e_dev_err("The EEPROM Checksum Is Not Valid\n");
+		err = -EIO;
+		goto err_sw_init;
+	}
+
+	memcpy(netdev->dev_addr, hw->mac.perm_addr, netdev->addr_len);
+#ifdef ETHTOOL_GPERMADDR
+	memcpy(netdev->perm_addr, hw->mac.perm_addr, netdev->addr_len);
+
+	if (ixgbe_validate_mac_addr(netdev->perm_addr)) {
+		e_dev_err("invalid MAC address\n");
+		err = -EIO;
+		goto err_sw_init;
+	}
+#else
+	if (ixgbe_validate_mac_addr(netdev->dev_addr)) {
+		e_dev_err("invalid MAC address\n");
+		err = -EIO;
+		goto err_sw_init;
+	}
+#endif
+	memcpy(&adapter->mac_table[0].addr, hw->mac.perm_addr,
+	       netdev->addr_len);
+	adapter->mac_table[0].queue = adapter->num_vfs;
+	adapter->mac_table[0].state = (IXGBE_MAC_STATE_DEFAULT |
+				       IXGBE_MAC_STATE_IN_USE);
+	hw->mac.ops.set_rar(hw, 0, adapter->mac_table[0].addr,
+			    adapter->mac_table[0].queue,
+			    IXGBE_RAH_AV);
+
+	//setup_timer(&adapter->service_timer, &ixgbe_service_timer,
+	//	    (unsigned long) adapter);
+
+	//INIT_WORK(&adapter->service_task, ixgbe_service_task);
+	//clear_bit(__IXGBE_SERVICE_SCHED, &adapter->state);
+
+	//err = ixgbe_init_interrupt_scheme(adapter);
+	//if (err)
+	//	goto err_sw_init;
+
+	//adapter->flags &= ~IXGBE_FLAG_RSS_ENABLED;
+	ixgbe_set_num_queues(adapter);
+
+	adapter->wol = 0;
+	/* WOL not supported for all but the following */
+	switch (pdev->device) {
+	case IXGBE_DEV_ID_82599_SFP:
+		/* Only these subdevice supports WOL */
+		switch (pdev->subsystem_device) {
+		case IXGBE_SUBDEV_ID_82599_560FLR:
+			/* only support first port */
+			if (hw->bus.func != 0)
+				break;
+		case IXGBE_SUBDEV_ID_82599_SFP:
+			adapter->wol = IXGBE_WUFC_MAG;
+			break;
+		}
+		break;
+	case IXGBE_DEV_ID_82599_COMBO_BACKPLANE:
+		/* All except this subdevice support WOL */
+		if (pdev->subsystem_device != IXGBE_SUBDEV_ID_82599_KX4_KR_MEZZ)
+			adapter->wol = IXGBE_WUFC_MAG;
+		break;
+	case IXGBE_DEV_ID_82599_KX4:
+		adapter->wol = IXGBE_WUFC_MAG;
+		break;
+	case IXGBE_DEV_ID_X540T:
+		/* Check eeprom to see if it is enabled */
+		ixgbe_read_eeprom(hw, 0x2c, &adapter->eeprom_cap);
+		wol_cap = adapter->eeprom_cap & IXGBE_DEVICE_CAPS_WOL_MASK;
+
+		if ((wol_cap == IXGBE_DEVICE_CAPS_WOL_PORT0_1) ||
+		    ((wol_cap == IXGBE_DEVICE_CAPS_WOL_PORT0) &&
+		     (hw->bus.func == 0)))
+			adapter->wol = IXGBE_WUFC_MAG;
+		break;
+	}
+	//device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
+
+
+	/*
+	 * Save off EEPROM version number and Option Rom version which
+	 * together make a unique identify for the eeprom
+	 */
+	ixgbe_read_eeprom(hw, 0x2e, &eeprom_verh);
+	ixgbe_read_eeprom(hw, 0x2d, &eeprom_verl);
+
+	etrack_id = (eeprom_verh << 16) | eeprom_verl;
+
+	ixgbe_read_eeprom(hw, 0x17, &offset);
+
+	/* Make sure offset to SCSI block is valid */
+	if (!(offset == 0x0) && !(offset == 0xffff)) {
+		ixgbe_read_eeprom(hw, offset + 0x84, &eeprom_cfg_blkh);
+		ixgbe_read_eeprom(hw, offset + 0x83, &eeprom_cfg_blkl);
+
+		/* Only display Option Rom if exist */
+		if (eeprom_cfg_blkl && eeprom_cfg_blkh) {
+			major = eeprom_cfg_blkl >> 8;
+			build = (eeprom_cfg_blkl << 8) | (eeprom_cfg_blkh >> 8);
+			patch = eeprom_cfg_blkh & 0x00ff;
+
+			snprintf(adapter->eeprom_id, sizeof(adapter->eeprom_id),
+				 "0x%08x, %d.%d.%d", etrack_id, major, build,
+				 patch);
+		} else {
+			snprintf(adapter->eeprom_id, sizeof(adapter->eeprom_id),
+				 "0x%08x", etrack_id);
+		}
+	} else {
+		snprintf(adapter->eeprom_id, sizeof(adapter->eeprom_id),
+			 "0x%08x", etrack_id);
+	}
+
+	/* reset the hardware with the new settings */
+	err = hw->mac.ops.start_hw(hw);
+	if (err == IXGBE_ERR_EEPROM_VERSION) {
+		/* We are running on a pre-production device, log a warning */
+		e_dev_warn("This device is a pre-production adapter/LOM. "
+			   "Please be aware there may be issues associated "
+			   "with your hardware.  If you are experiencing "
+			   "problems please contact your Intel or hardware "
+			   "representative who provided you with this "
+			   "hardware.\n");
+	}
+	/* pick up the PCI bus settings for reporting later */
+	if (hw->mac.ops.get_bus_info)
+		hw->mac.ops.get_bus_info(hw);
+
+	strlcpy(netdev->name, "eth%d", sizeof(netdev->name));
+	*lad_dev = netdev;
+
+	adapter->netdev_registered = true;
+#ifdef NO_VNIC
+	/* power down the optics */
+	if ((hw->phy.multispeed_fiber) ||
+	    ((hw->mac.ops.get_media_type(hw) == ixgbe_media_type_fiber) &&
+	     (hw->mac.type == ixgbe_mac_82599EB)))
+		ixgbe_disable_tx_laser(hw);
+
+	/* carrier off reporting is important to ethtool even BEFORE open */
+	netif_carrier_off(netdev);
+	/* keep stopping all the transmit queues for older kernels */
+	netif_tx_stop_all_queues(netdev);
+#endif
+
+	/* print all messages at the end so that we use our eth%d name */
+	/* print bus type/speed/width info */
+	e_dev_info("(PCI Express:%s:%s) ",
+		   (hw->bus.speed == ixgbe_bus_speed_5000 ? "5.0GT/s" :
+		   hw->bus.speed == ixgbe_bus_speed_2500 ? "2.5GT/s" :
+		   "Unknown"),
+		   (hw->bus.width == ixgbe_bus_width_pcie_x8 ? "Width x8" :
+		   hw->bus.width == ixgbe_bus_width_pcie_x4 ? "Width x4" :
+		   hw->bus.width == ixgbe_bus_width_pcie_x1 ? "Width x1" :
+		   "Unknown"));
+
+	/* print the MAC address */
+	for (i = 0; i < 6; i++)
+		pr_cont("%2.2x%c", netdev->dev_addr[i], i == 5 ? '\n' : ':');
+
+	/* First try to read PBA as a string */
+	err = ixgbe_read_pba_string(hw, part_str, IXGBE_PBANUM_LENGTH);
+	if (err)
+		strlcpy(part_str, "Unknown", sizeof(part_str));
+	if (ixgbe_is_sfp(hw) && hw->phy.sfp_type != ixgbe_sfp_type_not_present)
+		e_info(probe, "MAC: %d, PHY: %d, SFP+: %d, PBA No: %s\n",
+		       hw->mac.type, hw->phy.type, hw->phy.sfp_type, part_str);
+	else
+		e_info(probe, "MAC: %d, PHY: %d, PBA No: %s\n",
+		      hw->mac.type, hw->phy.type, part_str);
+
+	if (((hw->bus.speed == ixgbe_bus_speed_2500) &&
+	     (hw->bus.width <= ixgbe_bus_width_pcie_x4)) ||
+	    (hw->bus.width <= ixgbe_bus_width_pcie_x2)) {
+		e_dev_warn("PCI-Express bandwidth available for this "
+			   "card is not sufficient for optimal "
+			   "performance.\n");
+		e_dev_warn("For optimal performance a x8 PCI-Express "
+			   "slot is required.\n");
+	}
+
+#define INFO_STRING_LEN 255
+	info_string = kzalloc(INFO_STRING_LEN, GFP_KERNEL);
+	if (!info_string) {
+		e_err(probe, "allocation for info string failed\n");
+		goto no_info_string;
+	}
+	count = 0;
+	i_s_var = info_string;
+	count += snprintf(i_s_var, INFO_STRING_LEN, "Enabled Features: ");
+
+	i_s_var = info_string + count;
+	count += snprintf(i_s_var, (INFO_STRING_LEN - count),
+			"RxQ: %d TxQ: %d ", adapter->num_rx_queues,
+					adapter->num_tx_queues);
+	i_s_var = info_string + count;
+#ifdef IXGBE_FCOE
+	if (adapter->flags & IXGBE_FLAG_FCOE_ENABLED) {
+		count += snprintf(i_s_var, INFO_STRING_LEN - count, "FCoE ");
+		i_s_var = info_string + count;
+	}
+#endif
+	if (adapter->flags & IXGBE_FLAG_FDIR_HASH_CAPABLE) {
+		count += snprintf(i_s_var, INFO_STRING_LEN - count,
+							"FdirHash ");
+		i_s_var = info_string + count;
+	}
+	if (adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE) {
+		count += snprintf(i_s_var, INFO_STRING_LEN - count,
+						"FdirPerfect ");
+		i_s_var = info_string + count;
+	}
+	if (adapter->flags & IXGBE_FLAG_DCB_ENABLED) {
+		count += snprintf(i_s_var, INFO_STRING_LEN - count, "DCB ");
+		i_s_var = info_string + count;
+	}
+	if (adapter->flags & IXGBE_FLAG_RSS_ENABLED) {
+		count += snprintf(i_s_var, INFO_STRING_LEN - count, "RSS ");
+		i_s_var = info_string + count;
+	}
+	if (adapter->flags & IXGBE_FLAG_DCA_ENABLED) {
+		count += snprintf(i_s_var, INFO_STRING_LEN - count, "DCA ");
+		i_s_var = info_string + count;
+	}
+	if (adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED) {
+		count += snprintf(i_s_var, INFO_STRING_LEN - count, "RSC ");
+		i_s_var = info_string + count;
+	}
+#ifndef IXGBE_NO_LRO
+	else if (netdev->features & NETIF_F_LRO) {
+		count += snprintf(i_s_var, INFO_STRING_LEN - count, "LRO ");
+		i_s_var = info_string + count;
+	}
+#endif
+
+	BUG_ON(i_s_var > (info_string + INFO_STRING_LEN));
+	/* end features printing */
+	e_info(probe, "%s\n", info_string);
+	kfree(info_string);
+no_info_string:
+
+	/* firmware requires blank driver version */
+	ixgbe_set_fw_drv_ver(hw, 0xFF, 0xFF, 0xFF, 0xFF);
+
+#if defined(HAVE_NETDEV_STORAGE_ADDRESS) && defined(NETDEV_HW_ADDR_T_SAN)
+	/* add san mac addr to netdev */
+	//ixgbe_add_sanmac_netdev(netdev);
+
+#endif /* (HAVE_NETDEV_STORAGE_ADDRESS) && (NETDEV_HW_ADDR_T_SAN) */
+	e_info(probe, "Intel(R) 10 Gigabit Network Connection\n");
+	cards_found++;
+
+#ifdef IXGBE_SYSFS
+	//if (ixgbe_sysfs_init(adapter))
+	//	e_err(probe, "failed to allocate sysfs resources\n");
+#else
+#ifdef IXGBE_PROCFS
+	//if (ixgbe_procfs_init(adapter))
+	//	e_err(probe, "failed to allocate procfs resources\n");
+#endif /* IXGBE_PROCFS */
+#endif /* IXGBE_SYSFS */
+
+	return 0;
+
+//err_register:
+	//ixgbe_clear_interrupt_scheme(adapter);
+	//ixgbe_release_hw_control(adapter);
+err_sw_init:
+	adapter->flags2 &= ~IXGBE_FLAG2_SEARCH_FOR_SFP;
+	if (adapter->mac_table)
+		kfree(adapter->mac_table);
+	iounmap(hw->hw_addr);
+err_ioremap:
+	free_netdev(netdev);
+err_alloc_etherdev:
+	//pci_release_selected_regions(pdev,
+	//			     pci_select_bars(pdev, IORESOURCE_MEM));
+//err_pci_reg:
+//err_dma:
+	pci_disable_device(pdev);
+	return err;
+}
+
+/**
+ * ixgbe_remove - Device Removal Routine
+ * @pdev: PCI device information struct
+ *
+ * ixgbe_remove is called by the PCI subsystem to alert the driver
+ * that it should release a PCI device.  The could be caused by a
+ * Hot-Plug event, or because the driver is going to be removed from
+ * memory.
+ **/
+void ixgbe_kni_remove(struct pci_dev *pdev)
+{
+	pci_disable_device(pdev);
+}
+
+
+u16 ixgbe_read_pci_cfg_word(struct ixgbe_hw *hw, u32 reg)
+{
+	u16 value;
+	struct ixgbe_adapter *adapter = hw->back;
+
+	pci_read_config_word(adapter->pdev, reg, &value);
+	return value;
+}
+
+void ixgbe_write_pci_cfg_word(struct ixgbe_hw *hw, u32 reg, u16 value)
+{
+	struct ixgbe_adapter *adapter = hw->back;
+
+	pci_write_config_word(adapter->pdev, reg, value);
+}
+
+void ewarn(struct ixgbe_hw *hw, const char *st, u32 status)
+{
+	struct ixgbe_adapter *adapter = hw->back;
+
+	netif_warn(adapter, drv, adapter->netdev,  "%s", st);
+}
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_mbx.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_mbx.h
new file mode 100644
index 00000000..124f00de
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_mbx.h
@@ -0,0 +1,105 @@
+/*******************************************************************************
+
+  Intel 10 Gigabit PCI Express Linux driver
+  Copyright(c) 1999 - 2012 Intel Corporation.
+
+  This program is free software; you can redistribute it and/or modify it
+  under the terms and conditions of the GNU General Public License,
+  version 2, as published by the Free Software Foundation.
+
+  This program is distributed in the hope it will be useful, but WITHOUT
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+  more details.
+
+  You should have received a copy of the GNU General Public License along with
+  this program; if not, write to the Free Software Foundation, Inc.,
+  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+  The full GNU General Public License is included in this distribution in
+  the file called "COPYING".
+
+  Contact Information:
+  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+#ifndef _IXGBE_MBX_H_
+#define _IXGBE_MBX_H_
+
+#include "ixgbe_type.h"
+
+#define IXGBE_VFMAILBOX_SIZE	16 /* 16 32 bit words - 64 bytes */
+#define IXGBE_ERR_MBX		-100
+
+#define IXGBE_VFMAILBOX		0x002FC
+#define IXGBE_VFMBMEM		0x00200
+
+/* Define mailbox register bits */
+#define IXGBE_VFMAILBOX_REQ	0x00000001 /* Request for PF Ready bit */
+#define IXGBE_VFMAILBOX_ACK	0x00000002 /* Ack PF message received */
+#define IXGBE_VFMAILBOX_VFU	0x00000004 /* VF owns the mailbox buffer */
+#define IXGBE_VFMAILBOX_PFU	0x00000008 /* PF owns the mailbox buffer */
+#define IXGBE_VFMAILBOX_PFSTS	0x00000010 /* PF wrote a message in the MB */
+#define IXGBE_VFMAILBOX_PFACK	0x00000020 /* PF ack the previous VF msg */
+#define IXGBE_VFMAILBOX_RSTI	0x00000040 /* PF has reset indication */
+#define IXGBE_VFMAILBOX_RSTD	0x00000080 /* PF has indicated reset done */
+#define IXGBE_VFMAILBOX_R2C_BITS	0x000000B0 /* All read to clear bits */
+
+#define IXGBE_PFMAILBOX_STS	0x00000001 /* Initiate message send to VF */
+#define IXGBE_PFMAILBOX_ACK	0x00000002 /* Ack message recv'd from VF */
+#define IXGBE_PFMAILBOX_VFU	0x00000004 /* VF owns the mailbox buffer */
+#define IXGBE_PFMAILBOX_PFU	0x00000008 /* PF owns the mailbox buffer */
+#define IXGBE_PFMAILBOX_RVFU	0x00000010 /* Reset VFU - used when VF stuck */
+
+#define IXGBE_MBVFICR_VFREQ_MASK	0x0000FFFF /* bits for VF messages */
+#define IXGBE_MBVFICR_VFREQ_VF1		0x00000001 /* bit for VF 1 message */
+#define IXGBE_MBVFICR_VFACK_MASK	0xFFFF0000 /* bits for VF acks */
+#define IXGBE_MBVFICR_VFACK_VF1		0x00010000 /* bit for VF 1 ack */
+
+
+/* If it's a IXGBE_VF_* msg then it originates in the VF and is sent to the
+ * PF.  The reverse is true if it is IXGBE_PF_*.
+ * Message ACK's are the value or'd with 0xF0000000
+ */
+#define IXGBE_VT_MSGTYPE_ACK	0x80000000 /* Messages below or'd with
+					    * this are the ACK */
+#define IXGBE_VT_MSGTYPE_NACK	0x40000000 /* Messages below or'd with
+					    * this are the NACK */
+#define IXGBE_VT_MSGTYPE_CTS	0x20000000 /* Indicates that VF is still
+					    * clear to send requests */
+#define IXGBE_VT_MSGINFO_SHIFT	16
+/* bits 23:16 are used for extra info for certain messages */
+#define IXGBE_VT_MSGINFO_MASK	(0xFF << IXGBE_VT_MSGINFO_SHIFT)
+
+#define IXGBE_VF_RESET		0x01 /* VF requests reset */
+#define IXGBE_VF_SET_MAC_ADDR	0x02 /* VF requests PF to set MAC addr */
+#define IXGBE_VF_SET_MULTICAST	0x03 /* VF requests PF to set MC addr */
+#define IXGBE_VF_SET_VLAN	0x04 /* VF requests PF to set VLAN */
+#define IXGBE_VF_SET_LPE	0x05 /* VF requests PF to set VMOLR.LPE */
+#define IXGBE_VF_SET_MACVLAN	0x06 /* VF requests PF for unicast filter */
+
+/* length of permanent address message returned from PF */
+#define IXGBE_VF_PERMADDR_MSG_LEN	4
+/* word in permanent address message with the current multicast type */
+#define IXGBE_VF_MC_TYPE_WORD		3
+
+#define IXGBE_PF_CONTROL_MSG		0x0100 /* PF control message */
+
+
+#define IXGBE_VF_MBX_INIT_TIMEOUT	2000 /* number of retries on mailbox */
+#define IXGBE_VF_MBX_INIT_DELAY		500  /* microseconds between retries */
+
+s32 ixgbe_read_mbx(struct ixgbe_hw *, u32 *, u16, u16);
+s32 ixgbe_write_mbx(struct ixgbe_hw *, u32 *, u16, u16);
+s32 ixgbe_read_posted_mbx(struct ixgbe_hw *, u32 *, u16, u16);
+s32 ixgbe_write_posted_mbx(struct ixgbe_hw *, u32 *, u16, u16);
+s32 ixgbe_check_for_msg(struct ixgbe_hw *, u16);
+s32 ixgbe_check_for_ack(struct ixgbe_hw *, u16);
+s32 ixgbe_check_for_rst(struct ixgbe_hw *, u16);
+void ixgbe_init_mbx_ops_generic(struct ixgbe_hw *hw);
+void ixgbe_init_mbx_params_vf(struct ixgbe_hw *);
+void ixgbe_init_mbx_params_pf(struct ixgbe_hw *);
+
+#endif /* _IXGBE_MBX_H_ */
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_osdep.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_osdep.h
new file mode 100644
index 00000000..d161600b
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_osdep.h
@@ -0,0 +1,132 @@
+/*******************************************************************************
+
+  Intel 10 Gigabit PCI Express Linux driver
+  Copyright(c) 1999 - 2012 Intel Corporation.
+
+  This program is free software; you can redistribute it and/or modify it
+  under the terms and conditions of the GNU General Public License,
+  version 2, as published by the Free Software Foundation.
+
+  This program is distributed in the hope it will be useful, but WITHOUT
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+  more details.
+
+  You should have received a copy of the GNU General Public License along with
+  this program; if not, write to the Free Software Foundation, Inc.,
+  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+  The full GNU General Public License is included in this distribution in
+  the file called "COPYING".
+
+  Contact Information:
+  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+
+/* glue for the OS independent part of ixgbe
+ * includes register access macros
+ */
+
+#ifndef _IXGBE_OSDEP_H_
+#define _IXGBE_OSDEP_H_
+
+#include <linux/pci.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/if_ether.h>
+#include <linux/sched.h>
+#include "kcompat.h"
+
+
+#ifndef msleep
+#define msleep(x)	do { if (in_interrupt()) { \
+				/* Don't mdelay in interrupt context! */ \
+				BUG(); \
+			} else { \
+				msleep(x); \
+			} } while (0)
+
+#endif
+
+#undef ASSERT
+
+#ifdef DBG
+#define hw_dbg(hw, S, A...)	printk(KERN_DEBUG S, ## A)
+#else
+#define hw_dbg(hw, S, A...)	do {} while (0)
+#endif
+
+#define e_dev_info(format, arg...) \
+	dev_info(pci_dev_to_dev(adapter->pdev), format, ## arg)
+#define e_dev_warn(format, arg...) \
+	dev_warn(pci_dev_to_dev(adapter->pdev), format, ## arg)
+#define e_dev_err(format, arg...) \
+	dev_err(pci_dev_to_dev(adapter->pdev), format, ## arg)
+#define e_dev_notice(format, arg...) \
+	dev_notice(pci_dev_to_dev(adapter->pdev), format, ## arg)
+#define e_info(msglvl, format, arg...) \
+	netif_info(adapter, msglvl, adapter->netdev, format, ## arg)
+#define e_err(msglvl, format, arg...) \
+	netif_err(adapter, msglvl, adapter->netdev, format, ## arg)
+#define e_warn(msglvl, format, arg...) \
+	netif_warn(adapter, msglvl, adapter->netdev, format, ## arg)
+#define e_crit(msglvl, format, arg...) \
+	netif_crit(adapter, msglvl, adapter->netdev, format, ## arg)
+
+
+#ifdef DBG
+#define IXGBE_WRITE_REG(a, reg, value) do {\
+	switch (reg) { \
+	case IXGBE_EIMS: \
+	case IXGBE_EIMC: \
+	case IXGBE_EIAM: \
+	case IXGBE_EIAC: \
+	case IXGBE_EICR: \
+	case IXGBE_EICS: \
+		printk("%s: Reg - 0x%05X, value - 0x%08X\n", __func__, \
+		       reg, (u32)(value)); \
+	default: \
+		break; \
+	} \
+	writel((value), ((a)->hw_addr + (reg))); \
+} while (0)
+#else
+#define IXGBE_WRITE_REG(a, reg, value) writel((value), ((a)->hw_addr + (reg)))
+#endif
+
+#define IXGBE_READ_REG(a, reg) readl((a)->hw_addr + (reg))
+
+#define IXGBE_WRITE_REG_ARRAY(a, reg, offset, value) ( \
+	writel((value), ((a)->hw_addr + (reg) + ((offset) << 2))))
+
+#define IXGBE_READ_REG_ARRAY(a, reg, offset) ( \
+	readl((a)->hw_addr + (reg) + ((offset) << 2)))
+
+#ifndef writeq
+#define writeq(val, addr)	do { writel((u32) (val), addr); \
+				     writel((u32) (val >> 32), (addr + 4)); \
+				} while (0);
+#endif
+
+#define IXGBE_WRITE_REG64(a, reg, value) writeq((value), ((a)->hw_addr + (reg)))
+
+#define IXGBE_WRITE_FLUSH(a) IXGBE_READ_REG(a, IXGBE_STATUS)
+struct ixgbe_hw;
+extern u16 ixgbe_read_pci_cfg_word(struct ixgbe_hw *hw, u32 reg);
+extern void ixgbe_write_pci_cfg_word(struct ixgbe_hw *hw, u32 reg, u16 value);
+extern void ewarn(struct ixgbe_hw *hw, const char *str, u32 status);
+
+#define IXGBE_READ_PCIE_WORD ixgbe_read_pci_cfg_word
+#define IXGBE_WRITE_PCIE_WORD ixgbe_write_pci_cfg_word
+#define IXGBE_EEPROM_GRANT_ATTEMPS 100
+#define IXGBE_HTONL(_i) htonl(_i)
+#define IXGBE_NTOHL(_i) ntohl(_i)
+#define IXGBE_NTOHS(_i) ntohs(_i)
+#define IXGBE_CPU_TO_LE32(_i) cpu_to_le32(_i)
+#define IXGBE_LE32_TO_CPUS(_i) le32_to_cpus(_i)
+#define EWARN(H, W, S) ewarn(H, W, S)
+
+#endif /* _IXGBE_OSDEP_H_ */
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_phy.c b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_phy.c
new file mode 100644
index 00000000..e3f5275e
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_phy.c
@@ -0,0 +1,1847 @@
+/*******************************************************************************
+
+  Intel 10 Gigabit PCI Express Linux driver
+  Copyright(c) 1999 - 2012 Intel Corporation.
+
+  This program is free software; you can redistribute it and/or modify it
+  under the terms and conditions of the GNU General Public License,
+  version 2, as published by the Free Software Foundation.
+
+  This program is distributed in the hope it will be useful, but WITHOUT
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+  more details.
+
+  You should have received a copy of the GNU General Public License along with
+  this program; if not, write to the Free Software Foundation, Inc.,
+  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+  The full GNU General Public License is included in this distribution in
+  the file called "COPYING".
+
+  Contact Information:
+  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+#include "ixgbe_api.h"
+#include "ixgbe_common.h"
+#include "ixgbe_phy.h"
+
+static void ixgbe_i2c_start(struct ixgbe_hw *hw);
+static void ixgbe_i2c_stop(struct ixgbe_hw *hw);
+static s32 ixgbe_clock_in_i2c_byte(struct ixgbe_hw *hw, u8 *data);
+static s32 ixgbe_clock_out_i2c_byte(struct ixgbe_hw *hw, u8 data);
+static s32 ixgbe_get_i2c_ack(struct ixgbe_hw *hw);
+static s32 ixgbe_clock_in_i2c_bit(struct ixgbe_hw *hw, bool *data);
+static s32 ixgbe_clock_out_i2c_bit(struct ixgbe_hw *hw, bool data);
+static void ixgbe_raise_i2c_clk(struct ixgbe_hw *hw, u32 *i2cctl);
+static void ixgbe_lower_i2c_clk(struct ixgbe_hw *hw, u32 *i2cctl);
+static s32 ixgbe_set_i2c_data(struct ixgbe_hw *hw, u32 *i2cctl, bool data);
+static bool ixgbe_get_i2c_data(u32 *i2cctl);
+
+/**
+ *  ixgbe_init_phy_ops_generic - Inits PHY function ptrs
+ *  @hw: pointer to the hardware structure
+ *
+ *  Initialize the function pointers.
+ **/
+s32 ixgbe_init_phy_ops_generic(struct ixgbe_hw *hw)
+{
+	struct ixgbe_phy_info *phy = &hw->phy;
+
+	/* PHY */
+	phy->ops.identify = &ixgbe_identify_phy_generic;
+	phy->ops.reset = &ixgbe_reset_phy_generic;
+	phy->ops.read_reg = &ixgbe_read_phy_reg_generic;
+	phy->ops.write_reg = &ixgbe_write_phy_reg_generic;
+	phy->ops.setup_link = &ixgbe_setup_phy_link_generic;
+	phy->ops.setup_link_speed = &ixgbe_setup_phy_link_speed_generic;
+	phy->ops.check_link = NULL;
+	phy->ops.get_firmware_version = ixgbe_get_phy_firmware_version_generic;
+	phy->ops.read_i2c_byte = &ixgbe_read_i2c_byte_generic;
+	phy->ops.write_i2c_byte = &ixgbe_write_i2c_byte_generic;
+	phy->ops.read_i2c_eeprom = &ixgbe_read_i2c_eeprom_generic;
+	phy->ops.write_i2c_eeprom = &ixgbe_write_i2c_eeprom_generic;
+	phy->ops.i2c_bus_clear = &ixgbe_i2c_bus_clear;
+	phy->ops.identify_sfp = &ixgbe_identify_module_generic;
+	phy->sfp_type = ixgbe_sfp_type_unknown;
+	phy->ops.check_overtemp = &ixgbe_tn_check_overtemp;
+	return 0;
+}
+
+/**
+ *  ixgbe_identify_phy_generic - Get physical layer module
+ *  @hw: pointer to hardware structure
+ *
+ *  Determines the physical layer module found on the current adapter.
+ **/
+s32 ixgbe_identify_phy_generic(struct ixgbe_hw *hw)
+{
+	s32 status = IXGBE_ERR_PHY_ADDR_INVALID;
+	u32 phy_addr;
+	u16 ext_ability = 0;
+
+	if (hw->phy.type == ixgbe_phy_unknown) {
+		for (phy_addr = 0; phy_addr < IXGBE_MAX_PHY_ADDR; phy_addr++) {
+			if (ixgbe_validate_phy_addr(hw, phy_addr)) {
+				hw->phy.addr = phy_addr;
+				ixgbe_get_phy_id(hw);
+				hw->phy.type =
+					ixgbe_get_phy_type_from_id(hw->phy.id);
+
+				if (hw->phy.type == ixgbe_phy_unknown) {
+					hw->phy.ops.read_reg(hw,
+						  IXGBE_MDIO_PHY_EXT_ABILITY,
+						  IXGBE_MDIO_PMA_PMD_DEV_TYPE,
+						  &ext_ability);
+					if (ext_ability &
+					    (IXGBE_MDIO_PHY_10GBASET_ABILITY |
+					     IXGBE_MDIO_PHY_1000BASET_ABILITY))
+						hw->phy.type =
+							 ixgbe_phy_cu_unknown;
+					else
+						hw->phy.type =
+							 ixgbe_phy_generic;
+				}
+
+				status = 0;
+				break;
+			}
+		}
+		/* clear value if nothing found */
+		if (status != 0)
+			hw->phy.addr = 0;
+	} else {
+		status = 0;
+	}
+
+	return status;
+}
+
+/**
+ *  ixgbe_validate_phy_addr - Determines phy address is valid
+ *  @hw: pointer to hardware structure
+ *
+ **/
+bool ixgbe_validate_phy_addr(struct ixgbe_hw *hw, u32 phy_addr)
+{
+	u16 phy_id = 0;
+	bool valid = false;
+
+	hw->phy.addr = phy_addr;
+	hw->phy.ops.read_reg(hw, IXGBE_MDIO_PHY_ID_HIGH,
+			     IXGBE_MDIO_PMA_PMD_DEV_TYPE, &phy_id);
+
+	if (phy_id != 0xFFFF && phy_id != 0x0)
+		valid = true;
+
+	return valid;
+}
+
+/**
+ *  ixgbe_get_phy_id - Get the phy type
+ *  @hw: pointer to hardware structure
+ *
+ **/
+s32 ixgbe_get_phy_id(struct ixgbe_hw *hw)
+{
+	u32 status;
+	u16 phy_id_high = 0;
+	u16 phy_id_low = 0;
+
+	status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_PHY_ID_HIGH,
+				      IXGBE_MDIO_PMA_PMD_DEV_TYPE,
+				      &phy_id_high);
+
+	if (status == 0) {
+		hw->phy.id = (u32)(phy_id_high << 16);
+		status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_PHY_ID_LOW,
+					      IXGBE_MDIO_PMA_PMD_DEV_TYPE,
+					      &phy_id_low);
+		hw->phy.id |= (u32)(phy_id_low & IXGBE_PHY_REVISION_MASK);
+		hw->phy.revision = (u32)(phy_id_low & ~IXGBE_PHY_REVISION_MASK);
+	}
+	return status;
+}
+
+/**
+ *  ixgbe_get_phy_type_from_id - Get the phy type
+ *  @hw: pointer to hardware structure
+ *
+ **/
+enum ixgbe_phy_type ixgbe_get_phy_type_from_id(u32 phy_id)
+{
+	enum ixgbe_phy_type phy_type;
+
+	switch (phy_id) {
+	case TN1010_PHY_ID:
+		phy_type = ixgbe_phy_tn;
+		break;
+	case X540_PHY_ID:
+		phy_type = ixgbe_phy_aq;
+		break;
+	case QT2022_PHY_ID:
+		phy_type = ixgbe_phy_qt;
+		break;
+	case ATH_PHY_ID:
+		phy_type = ixgbe_phy_nl;
+		break;
+	default:
+		phy_type = ixgbe_phy_unknown;
+		break;
+	}
+
+	hw_dbg(hw, "phy type found is %d\n", phy_type);
+	return phy_type;
+}
+
+/**
+ *  ixgbe_reset_phy_generic - Performs a PHY reset
+ *  @hw: pointer to hardware structure
+ **/
+s32 ixgbe_reset_phy_generic(struct ixgbe_hw *hw)
+{
+	u32 i;
+	u16 ctrl = 0;
+	s32 status = 0;
+
+	if (hw->phy.type == ixgbe_phy_unknown)
+		status = ixgbe_identify_phy_generic(hw);
+
+	if (status != 0 || hw->phy.type == ixgbe_phy_none)
+		goto out;
+
+	/* Don't reset PHY if it's shut down due to overtemp. */
+	if (!hw->phy.reset_if_overtemp &&
+	    (IXGBE_ERR_OVERTEMP == hw->phy.ops.check_overtemp(hw)))
+		goto out;
+
+	/*
+	 * Perform soft PHY reset to the PHY_XS.
+	 * This will cause a soft reset to the PHY
+	 */
+	hw->phy.ops.write_reg(hw, IXGBE_MDIO_PHY_XS_CONTROL,
+			      IXGBE_MDIO_PHY_XS_DEV_TYPE,
+			      IXGBE_MDIO_PHY_XS_RESET);
+
+	/*
+	 * Poll for reset bit to self-clear indicating reset is complete.
+	 * Some PHYs could take up to 3 seconds to complete and need about
+	 * 1.7 usec delay after the reset is complete.
+	 */
+	for (i = 0; i < 30; i++) {
+		msleep(100);
+		hw->phy.ops.read_reg(hw, IXGBE_MDIO_PHY_XS_CONTROL,
+				     IXGBE_MDIO_PHY_XS_DEV_TYPE, &ctrl);
+		if (!(ctrl & IXGBE_MDIO_PHY_XS_RESET)) {
+			udelay(2);
+			break;
+		}
+	}
+
+	if (ctrl & IXGBE_MDIO_PHY_XS_RESET) {
+		status = IXGBE_ERR_RESET_FAILED;
+		hw_dbg(hw, "PHY reset polling failed to complete.\n");
+	}
+
+out:
+	return status;
+}
+
+/**
+ *  ixgbe_read_phy_reg_generic - Reads a value from a specified PHY register
+ *  @hw: pointer to hardware structure
+ *  @reg_addr: 32 bit address of PHY register to read
+ *  @phy_data: Pointer to read data from PHY register
+ **/
+s32 ixgbe_read_phy_reg_generic(struct ixgbe_hw *hw, u32 reg_addr,
+			       u32 device_type, u16 *phy_data)
+{
+	u32 command;
+	u32 i;
+	u32 data;
+	s32 status = 0;
+	u16 gssr;
+
+	if (IXGBE_READ_REG(hw, IXGBE_STATUS) & IXGBE_STATUS_LAN_ID_1)
+		gssr = IXGBE_GSSR_PHY1_SM;
+	else
+		gssr = IXGBE_GSSR_PHY0_SM;
+
+	if (hw->mac.ops.acquire_swfw_sync(hw, gssr) != 0)
+		status = IXGBE_ERR_SWFW_SYNC;
+
+	if (status == 0) {
+		/* Setup and write the address cycle command */
+		command = ((reg_addr << IXGBE_MSCA_NP_ADDR_SHIFT)  |
+			   (device_type << IXGBE_MSCA_DEV_TYPE_SHIFT) |
+			   (hw->phy.addr << IXGBE_MSCA_PHY_ADDR_SHIFT) |
+			   (IXGBE_MSCA_ADDR_CYCLE | IXGBE_MSCA_MDI_COMMAND));
+
+		IXGBE_WRITE_REG(hw, IXGBE_MSCA, command);
+
+		/*
+		 * Check every 10 usec to see if the address cycle completed.
+		 * The MDI Command bit will clear when the operation is
+		 * complete
+		 */
+		for (i = 0; i < IXGBE_MDIO_COMMAND_TIMEOUT; i++) {
+			udelay(10);
+
+			command = IXGBE_READ_REG(hw, IXGBE_MSCA);
+
+			if ((command & IXGBE_MSCA_MDI_COMMAND) == 0)
+				break;
+		}
+
+		if ((command & IXGBE_MSCA_MDI_COMMAND) != 0) {
+			hw_dbg(hw, "PHY address command did not complete.\n");
+			status = IXGBE_ERR_PHY;
+		}
+
+		if (status == 0) {
+			/*
+			 * Address cycle complete, setup and write the read
+			 * command
+			 */
+			command = ((reg_addr << IXGBE_MSCA_NP_ADDR_SHIFT)  |
+				   (device_type << IXGBE_MSCA_DEV_TYPE_SHIFT) |
+				   (hw->phy.addr << IXGBE_MSCA_PHY_ADDR_SHIFT) |
+				   (IXGBE_MSCA_READ | IXGBE_MSCA_MDI_COMMAND));
+
+			IXGBE_WRITE_REG(hw, IXGBE_MSCA, command);
+
+			/*
+			 * Check every 10 usec to see if the address cycle
+			 * completed. The MDI Command bit will clear when the
+			 * operation is complete
+			 */
+			for (i = 0; i < IXGBE_MDIO_COMMAND_TIMEOUT; i++) {
+				udelay(10);
+
+				command = IXGBE_READ_REG(hw, IXGBE_MSCA);
+
+				if ((command & IXGBE_MSCA_MDI_COMMAND) == 0)
+					break;
+			}
+
+			if ((command & IXGBE_MSCA_MDI_COMMAND) != 0) {
+				hw_dbg(hw, "PHY read command didn't complete\n");
+				status = IXGBE_ERR_PHY;
+			} else {
+				/*
+				 * Read operation is complete.  Get the data
+				 * from MSRWD
+				 */
+				data = IXGBE_READ_REG(hw, IXGBE_MSRWD);
+				data >>= IXGBE_MSRWD_READ_DATA_SHIFT;
+				*phy_data = (u16)(data);
+			}
+		}
+
+		hw->mac.ops.release_swfw_sync(hw, gssr);
+	}
+
+	return status;
+}
+
+/**
+ *  ixgbe_write_phy_reg_generic - Writes a value to specified PHY register
+ *  @hw: pointer to hardware structure
+ *  @reg_addr: 32 bit PHY register to write
+ *  @device_type: 5 bit device type
+ *  @phy_data: Data to write to the PHY register
+ **/
+s32 ixgbe_write_phy_reg_generic(struct ixgbe_hw *hw, u32 reg_addr,
+				u32 device_type, u16 phy_data)
+{
+	u32 command;
+	u32 i;
+	s32 status = 0;
+	u16 gssr;
+
+	if (IXGBE_READ_REG(hw, IXGBE_STATUS) & IXGBE_STATUS_LAN_ID_1)
+		gssr = IXGBE_GSSR_PHY1_SM;
+	else
+		gssr = IXGBE_GSSR_PHY0_SM;
+
+	if (hw->mac.ops.acquire_swfw_sync(hw, gssr) != 0)
+		status = IXGBE_ERR_SWFW_SYNC;
+
+	if (status == 0) {
+		/* Put the data in the MDI single read and write data register*/
+		IXGBE_WRITE_REG(hw, IXGBE_MSRWD, (u32)phy_data);
+
+		/* Setup and write the address cycle command */
+		command = ((reg_addr << IXGBE_MSCA_NP_ADDR_SHIFT)  |
+			   (device_type << IXGBE_MSCA_DEV_TYPE_SHIFT) |
+			   (hw->phy.addr << IXGBE_MSCA_PHY_ADDR_SHIFT) |
+			   (IXGBE_MSCA_ADDR_CYCLE | IXGBE_MSCA_MDI_COMMAND));
+
+		IXGBE_WRITE_REG(hw, IXGBE_MSCA, command);
+
+		/*
+		 * Check every 10 usec to see if the address cycle completed.
+		 * The MDI Command bit will clear when the operation is
+		 * complete
+		 */
+		for (i = 0; i < IXGBE_MDIO_COMMAND_TIMEOUT; i++) {
+			udelay(10);
+
+			command = IXGBE_READ_REG(hw, IXGBE_MSCA);
+
+			if ((command & IXGBE_MSCA_MDI_COMMAND) == 0)
+				break;
+		}
+
+		if ((command & IXGBE_MSCA_MDI_COMMAND) != 0) {
+			hw_dbg(hw, "PHY address cmd didn't complete\n");
+			status = IXGBE_ERR_PHY;
+		}
+
+		if (status == 0) {
+			/*
+			 * Address cycle complete, setup and write the write
+			 * command
+			 */
+			command = ((reg_addr << IXGBE_MSCA_NP_ADDR_SHIFT)  |
+				   (device_type << IXGBE_MSCA_DEV_TYPE_SHIFT) |
+				   (hw->phy.addr << IXGBE_MSCA_PHY_ADDR_SHIFT) |
+				   (IXGBE_MSCA_WRITE | IXGBE_MSCA_MDI_COMMAND));
+
+			IXGBE_WRITE_REG(hw, IXGBE_MSCA, command);
+
+			/*
+			 * Check every 10 usec to see if the address cycle
+			 * completed. The MDI Command bit will clear when the
+			 * operation is complete
+			 */
+			for (i = 0; i < IXGBE_MDIO_COMMAND_TIMEOUT; i++) {
+				udelay(10);
+
+				command = IXGBE_READ_REG(hw, IXGBE_MSCA);
+
+				if ((command & IXGBE_MSCA_MDI_COMMAND) == 0)
+					break;
+			}
+
+			if ((command & IXGBE_MSCA_MDI_COMMAND) != 0) {
+				hw_dbg(hw, "PHY address cmd didn't complete\n");
+				status = IXGBE_ERR_PHY;
+			}
+		}
+
+		hw->mac.ops.release_swfw_sync(hw, gssr);
+	}
+
+	return status;
+}
+
+/**
+ *  ixgbe_setup_phy_link_generic - Set and restart autoneg
+ *  @hw: pointer to hardware structure
+ *
+ *  Restart autonegotiation and PHY and waits for completion.
+ **/
+s32 ixgbe_setup_phy_link_generic(struct ixgbe_hw *hw)
+{
+	s32 status = 0;
+	u32 time_out;
+	u32 max_time_out = 10;
+	u16 autoneg_reg = IXGBE_MII_AUTONEG_REG;
+	bool autoneg = false;
+	ixgbe_link_speed speed;
+
+	ixgbe_get_copper_link_capabilities_generic(hw, &speed, &autoneg);
+
+	if (speed & IXGBE_LINK_SPEED_10GB_FULL) {
+		/* Set or unset auto-negotiation 10G advertisement */
+		hw->phy.ops.read_reg(hw, IXGBE_MII_10GBASE_T_AUTONEG_CTRL_REG,
+				     IXGBE_MDIO_AUTO_NEG_DEV_TYPE,
+				     &autoneg_reg);
+
+		autoneg_reg &= ~IXGBE_MII_10GBASE_T_ADVERTISE;
+		if (hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_10GB_FULL)
+			autoneg_reg |= IXGBE_MII_10GBASE_T_ADVERTISE;
+
+		hw->phy.ops.write_reg(hw, IXGBE_MII_10GBASE_T_AUTONEG_CTRL_REG,
+				      IXGBE_MDIO_AUTO_NEG_DEV_TYPE,
+				      autoneg_reg);
+	}
+
+	if (speed & IXGBE_LINK_SPEED_1GB_FULL) {
+		/* Set or unset auto-negotiation 1G advertisement */
+		hw->phy.ops.read_reg(hw,
+				     IXGBE_MII_AUTONEG_VENDOR_PROVISION_1_REG,
+				     IXGBE_MDIO_AUTO_NEG_DEV_TYPE,
+				     &autoneg_reg);
+
+		autoneg_reg &= ~IXGBE_MII_1GBASE_T_ADVERTISE;
+		if (hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_1GB_FULL)
+			autoneg_reg |= IXGBE_MII_1GBASE_T_ADVERTISE;
+
+		hw->phy.ops.write_reg(hw,
+				      IXGBE_MII_AUTONEG_VENDOR_PROVISION_1_REG,
+				      IXGBE_MDIO_AUTO_NEG_DEV_TYPE,
+				      autoneg_reg);
+	}
+
+	if (speed & IXGBE_LINK_SPEED_100_FULL) {
+		/* Set or unset auto-negotiation 100M advertisement */
+		hw->phy.ops.read_reg(hw, IXGBE_MII_AUTONEG_ADVERTISE_REG,
+				     IXGBE_MDIO_AUTO_NEG_DEV_TYPE,
+				     &autoneg_reg);
+
+		autoneg_reg &= ~(IXGBE_MII_100BASE_T_ADVERTISE |
+				 IXGBE_MII_100BASE_T_ADVERTISE_HALF);
+		if (hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_100_FULL)
+			autoneg_reg |= IXGBE_MII_100BASE_T_ADVERTISE;
+
+		hw->phy.ops.write_reg(hw, IXGBE_MII_AUTONEG_ADVERTISE_REG,
+				      IXGBE_MDIO_AUTO_NEG_DEV_TYPE,
+				      autoneg_reg);
+	}
+
+	/* Restart PHY autonegotiation and wait for completion */
+	hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_CONTROL,
+			     IXGBE_MDIO_AUTO_NEG_DEV_TYPE, &autoneg_reg);
+
+	autoneg_reg |= IXGBE_MII_RESTART;
+
+	hw->phy.ops.write_reg(hw, IXGBE_MDIO_AUTO_NEG_CONTROL,
+			      IXGBE_MDIO_AUTO_NEG_DEV_TYPE, autoneg_reg);
+
+	/* Wait for autonegotiation to finish */
+	for (time_out = 0; time_out < max_time_out; time_out++) {
+		udelay(10);
+		/* Restart PHY autonegotiation and wait for completion */
+		status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_STATUS,
+					      IXGBE_MDIO_AUTO_NEG_DEV_TYPE,
+					      &autoneg_reg);
+
+		autoneg_reg &= IXGBE_MII_AUTONEG_COMPLETE;
+		if (autoneg_reg == IXGBE_MII_AUTONEG_COMPLETE)
+			break;
+	}
+
+	if (time_out == max_time_out) {
+		status = IXGBE_ERR_LINK_SETUP;
+		hw_dbg(hw, "ixgbe_setup_phy_link_generic: time out");
+	}
+
+	return status;
+}
+
+/**
+ *  ixgbe_setup_phy_link_speed_generic - Sets the auto advertised capabilities
+ *  @hw: pointer to hardware structure
+ *  @speed: new link speed
+ *  @autoneg: true if autonegotiation enabled
+ **/
+s32 ixgbe_setup_phy_link_speed_generic(struct ixgbe_hw *hw,
+				       ixgbe_link_speed speed,
+				       bool autoneg,
+				       bool autoneg_wait_to_complete)
+{
+
+	/*
+	 * Clear autoneg_advertised and set new values based on input link
+	 * speed.
+	 */
+	hw->phy.autoneg_advertised = 0;
+
+	if (speed & IXGBE_LINK_SPEED_10GB_FULL)
+		hw->phy.autoneg_advertised |= IXGBE_LINK_SPEED_10GB_FULL;
+
+	if (speed & IXGBE_LINK_SPEED_1GB_FULL)
+		hw->phy.autoneg_advertised |= IXGBE_LINK_SPEED_1GB_FULL;
+
+	if (speed & IXGBE_LINK_SPEED_100_FULL)
+		hw->phy.autoneg_advertised |= IXGBE_LINK_SPEED_100_FULL;
+
+	/* Setup link based on the new speed settings */
+	hw->phy.ops.setup_link(hw);
+
+	return 0;
+}
+
+/**
+ *  ixgbe_get_copper_link_capabilities_generic - Determines link capabilities
+ *  @hw: pointer to hardware structure
+ *  @speed: pointer to link speed
+ *  @autoneg: boolean auto-negotiation value
+ *
+ *  Determines the link capabilities by reading the AUTOC register.
+ **/
+s32 ixgbe_get_copper_link_capabilities_generic(struct ixgbe_hw *hw,
+					       ixgbe_link_speed *speed,
+					       bool *autoneg)
+{
+	s32 status = IXGBE_ERR_LINK_SETUP;
+	u16 speed_ability;
+
+	*speed = 0;
+	*autoneg = true;
+
+	status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_PHY_SPEED_ABILITY,
+				      IXGBE_MDIO_PMA_PMD_DEV_TYPE,
+				      &speed_ability);
+
+	if (status == 0) {
+		if (speed_ability & IXGBE_MDIO_PHY_SPEED_10G)
+			*speed |= IXGBE_LINK_SPEED_10GB_FULL;
+		if (speed_ability & IXGBE_MDIO_PHY_SPEED_1G)
+			*speed |= IXGBE_LINK_SPEED_1GB_FULL;
+		if (speed_ability & IXGBE_MDIO_PHY_SPEED_100M)
+			*speed |= IXGBE_LINK_SPEED_100_FULL;
+	}
+
+	return status;
+}
+
+/**
+ *  ixgbe_check_phy_link_tnx - Determine link and speed status
+ *  @hw: pointer to hardware structure
+ *
+ *  Reads the VS1 register to determine if link is up and the current speed for
+ *  the PHY.
+ **/
+s32 ixgbe_check_phy_link_tnx(struct ixgbe_hw *hw, ixgbe_link_speed *speed,
+			     bool *link_up)
+{
+	s32 status = 0;
+	u32 time_out;
+	u32 max_time_out = 10;
+	u16 phy_link = 0;
+	u16 phy_speed = 0;
+	u16 phy_data = 0;
+
+	/* Initialize speed and link to default case */
+	*link_up = false;
+	*speed = IXGBE_LINK_SPEED_10GB_FULL;
+
+	/*
+	 * Check current speed and link status of the PHY register.
+	 * This is a vendor specific register and may have to
+	 * be changed for other copper PHYs.
+	 */
+	for (time_out = 0; time_out < max_time_out; time_out++) {
+		udelay(10);
+		status = hw->phy.ops.read_reg(hw,
+					IXGBE_MDIO_VENDOR_SPECIFIC_1_STATUS,
+					IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE,
+					&phy_data);
+		phy_link = phy_data & IXGBE_MDIO_VENDOR_SPECIFIC_1_LINK_STATUS;
+		phy_speed = phy_data &
+				 IXGBE_MDIO_VENDOR_SPECIFIC_1_SPEED_STATUS;
+		if (phy_link == IXGBE_MDIO_VENDOR_SPECIFIC_1_LINK_STATUS) {
+			*link_up = true;
+			if (phy_speed ==
+			    IXGBE_MDIO_VENDOR_SPECIFIC_1_SPEED_STATUS)
+				*speed = IXGBE_LINK_SPEED_1GB_FULL;
+			break;
+		}
+	}
+
+	return status;
+}
+
+/**
+ *	ixgbe_setup_phy_link_tnx - Set and restart autoneg
+ *	@hw: pointer to hardware structure
+ *
+ *	Restart autonegotiation and PHY and waits for completion.
+ **/
+s32 ixgbe_setup_phy_link_tnx(struct ixgbe_hw *hw)
+{
+	s32 status = 0;
+	u32 time_out;
+	u32 max_time_out = 10;
+	u16 autoneg_reg = IXGBE_MII_AUTONEG_REG;
+	bool autoneg = false;
+	ixgbe_link_speed speed;
+
+	ixgbe_get_copper_link_capabilities_generic(hw, &speed, &autoneg);
+
+	if (speed & IXGBE_LINK_SPEED_10GB_FULL) {
+		/* Set or unset auto-negotiation 10G advertisement */
+		hw->phy.ops.read_reg(hw, IXGBE_MII_10GBASE_T_AUTONEG_CTRL_REG,
+				     IXGBE_MDIO_AUTO_NEG_DEV_TYPE,
+				     &autoneg_reg);
+
+		autoneg_reg &= ~IXGBE_MII_10GBASE_T_ADVERTISE;
+		if (hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_10GB_FULL)
+			autoneg_reg |= IXGBE_MII_10GBASE_T_ADVERTISE;
+
+		hw->phy.ops.write_reg(hw, IXGBE_MII_10GBASE_T_AUTONEG_CTRL_REG,
+				      IXGBE_MDIO_AUTO_NEG_DEV_TYPE,
+				      autoneg_reg);
+	}
+
+	if (speed & IXGBE_LINK_SPEED_1GB_FULL) {
+		/* Set or unset auto-negotiation 1G advertisement */
+		hw->phy.ops.read_reg(hw, IXGBE_MII_AUTONEG_XNP_TX_REG,
+				     IXGBE_MDIO_AUTO_NEG_DEV_TYPE,
+				     &autoneg_reg);
+
+		autoneg_reg &= ~IXGBE_MII_1GBASE_T_ADVERTISE_XNP_TX;
+		if (hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_1GB_FULL)
+			autoneg_reg |= IXGBE_MII_1GBASE_T_ADVERTISE_XNP_TX;
+
+		hw->phy.ops.write_reg(hw, IXGBE_MII_AUTONEG_XNP_TX_REG,
+				      IXGBE_MDIO_AUTO_NEG_DEV_TYPE,
+				      autoneg_reg);
+	}
+
+	if (speed & IXGBE_LINK_SPEED_100_FULL) {
+		/* Set or unset auto-negotiation 100M advertisement */
+		hw->phy.ops.read_reg(hw, IXGBE_MII_AUTONEG_ADVERTISE_REG,
+				     IXGBE_MDIO_AUTO_NEG_DEV_TYPE,
+				     &autoneg_reg);
+
+		autoneg_reg &= ~IXGBE_MII_100BASE_T_ADVERTISE;
+		if (hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_100_FULL)
+			autoneg_reg |= IXGBE_MII_100BASE_T_ADVERTISE;
+
+		hw->phy.ops.write_reg(hw, IXGBE_MII_AUTONEG_ADVERTISE_REG,
+				      IXGBE_MDIO_AUTO_NEG_DEV_TYPE,
+				      autoneg_reg);
+	}
+
+	/* Restart PHY autonegotiation and wait for completion */
+	hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_CONTROL,
+			     IXGBE_MDIO_AUTO_NEG_DEV_TYPE, &autoneg_reg);
+
+	autoneg_reg |= IXGBE_MII_RESTART;
+
+	hw->phy.ops.write_reg(hw, IXGBE_MDIO_AUTO_NEG_CONTROL,
+			      IXGBE_MDIO_AUTO_NEG_DEV_TYPE, autoneg_reg);
+
+	/* Wait for autonegotiation to finish */
+	for (time_out = 0; time_out < max_time_out; time_out++) {
+		udelay(10);
+		/* Restart PHY autonegotiation and wait for completion */
+		status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_STATUS,
+					      IXGBE_MDIO_AUTO_NEG_DEV_TYPE,
+					      &autoneg_reg);
+
+		autoneg_reg &= IXGBE_MII_AUTONEG_COMPLETE;
+		if (autoneg_reg == IXGBE_MII_AUTONEG_COMPLETE)
+			break;
+	}
+
+	if (time_out == max_time_out) {
+		status = IXGBE_ERR_LINK_SETUP;
+		hw_dbg(hw, "ixgbe_setup_phy_link_tnx: time out");
+	}
+
+	return status;
+}
+
+/**
+ *  ixgbe_get_phy_firmware_version_tnx - Gets the PHY Firmware Version
+ *  @hw: pointer to hardware structure
+ *  @firmware_version: pointer to the PHY Firmware Version
+ **/
+s32 ixgbe_get_phy_firmware_version_tnx(struct ixgbe_hw *hw,
+				       u16 *firmware_version)
+{
+	s32 status = 0;
+
+	status = hw->phy.ops.read_reg(hw, TNX_FW_REV,
+				      IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE,
+				      firmware_version);
+
+	return status;
+}
+
+/**
+ *  ixgbe_get_phy_firmware_version_generic - Gets the PHY Firmware Version
+ *  @hw: pointer to hardware structure
+ *  @firmware_version: pointer to the PHY Firmware Version
+ **/
+s32 ixgbe_get_phy_firmware_version_generic(struct ixgbe_hw *hw,
+					   u16 *firmware_version)
+{
+	s32 status = 0;
+
+	status = hw->phy.ops.read_reg(hw, AQ_FW_REV,
+				      IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE,
+				      firmware_version);
+
+	return status;
+}
+
+/**
+ *  ixgbe_reset_phy_nl - Performs a PHY reset
+ *  @hw: pointer to hardware structure
+ **/
+s32 ixgbe_reset_phy_nl(struct ixgbe_hw *hw)
+{
+	u16 phy_offset, control, eword, edata, block_crc;
+	bool end_data = false;
+	u16 list_offset, data_offset;
+	u16 phy_data = 0;
+	s32 ret_val = 0;
+	u32 i;
+
+	hw->phy.ops.read_reg(hw, IXGBE_MDIO_PHY_XS_CONTROL,
+			     IXGBE_MDIO_PHY_XS_DEV_TYPE, &phy_data);
+
+	/* reset the PHY and poll for completion */
+	hw->phy.ops.write_reg(hw, IXGBE_MDIO_PHY_XS_CONTROL,
+			      IXGBE_MDIO_PHY_XS_DEV_TYPE,
+			      (phy_data | IXGBE_MDIO_PHY_XS_RESET));
+
+	for (i = 0; i < 100; i++) {
+		hw->phy.ops.read_reg(hw, IXGBE_MDIO_PHY_XS_CONTROL,
+				     IXGBE_MDIO_PHY_XS_DEV_TYPE, &phy_data);
+		if ((phy_data & IXGBE_MDIO_PHY_XS_RESET) == 0)
+			break;
+		msleep(10);
+	}
+
+	if ((phy_data & IXGBE_MDIO_PHY_XS_RESET) != 0) {
+		hw_dbg(hw, "PHY reset did not complete.\n");
+		ret_val = IXGBE_ERR_PHY;
+		goto out;
+	}
+
+	/* Get init offsets */
+	ret_val = ixgbe_get_sfp_init_sequence_offsets(hw, &list_offset,
+						      &data_offset);
+	if (ret_val != 0)
+		goto out;
+
+	ret_val = hw->eeprom.ops.read(hw, data_offset, &block_crc);
+	data_offset++;
+	while (!end_data) {
+		/*
+		 * Read control word from PHY init contents offset
+		 */
+		ret_val = hw->eeprom.ops.read(hw, data_offset, &eword);
+		control = (eword & IXGBE_CONTROL_MASK_NL) >>
+			   IXGBE_CONTROL_SHIFT_NL;
+		edata = eword & IXGBE_DATA_MASK_NL;
+		switch (control) {
+		case IXGBE_DELAY_NL:
+			data_offset++;
+			hw_dbg(hw, "DELAY: %d MS\n", edata);
+			msleep(edata);
+			break;
+		case IXGBE_DATA_NL:
+			hw_dbg(hw, "DATA:\n");
+			data_offset++;
+			hw->eeprom.ops.read(hw, data_offset++,
+					    &phy_offset);
+			for (i = 0; i < edata; i++) {
+				hw->eeprom.ops.read(hw, data_offset, &eword);
+				hw->phy.ops.write_reg(hw, phy_offset,
+						      IXGBE_TWINAX_DEV, eword);
+				hw_dbg(hw, "Wrote %4.4x to %4.4x\n", eword,
+					  phy_offset);
+				data_offset++;
+				phy_offset++;
+			}
+			break;
+		case IXGBE_CONTROL_NL:
+			data_offset++;
+			hw_dbg(hw, "CONTROL:\n");
+			if (edata == IXGBE_CONTROL_EOL_NL) {
+				hw_dbg(hw, "EOL\n");
+				end_data = true;
+			} else if (edata == IXGBE_CONTROL_SOL_NL) {
+				hw_dbg(hw, "SOL\n");
+			} else {
+				hw_dbg(hw, "Bad control value\n");
+				ret_val = IXGBE_ERR_PHY;
+				goto out;
+			}
+			break;
+		default:
+			hw_dbg(hw, "Bad control type\n");
+			ret_val = IXGBE_ERR_PHY;
+			goto out;
+		}
+	}
+
+out:
+	return ret_val;
+}
+
+/**
+ *  ixgbe_identify_module_generic - Identifies module type
+ *  @hw: pointer to hardware structure
+ *
+ *  Determines HW type and calls appropriate function.
+ **/
+s32 ixgbe_identify_module_generic(struct ixgbe_hw *hw)
+{
+	s32 status = IXGBE_ERR_SFP_NOT_PRESENT;
+
+	switch (hw->mac.ops.get_media_type(hw)) {
+	case ixgbe_media_type_fiber:
+		status = ixgbe_identify_sfp_module_generic(hw);
+		break;
+
+	case ixgbe_media_type_fiber_qsfp:
+		status = ixgbe_identify_qsfp_module_generic(hw);
+		break;
+
+	default:
+		hw->phy.sfp_type = ixgbe_sfp_type_not_present;
+		status = IXGBE_ERR_SFP_NOT_PRESENT;
+		break;
+	}
+
+	return status;
+}
+
+/**
+ *  ixgbe_identify_sfp_module_generic - Identifies SFP modules
+ *  @hw: pointer to hardware structure
+ *
+ *  Searches for and identifies the SFP module and assigns appropriate PHY type.
+ **/
+s32 ixgbe_identify_sfp_module_generic(struct ixgbe_hw *hw)
+{
+	s32 status = IXGBE_ERR_PHY_ADDR_INVALID;
+	u32 vendor_oui = 0;
+	enum ixgbe_sfp_type stored_sfp_type = hw->phy.sfp_type;
+	u8 identifier = 0;
+	u8 comp_codes_1g = 0;
+	u8 comp_codes_10g = 0;
+	u8 oui_bytes[3] = {0, 0, 0};
+	u8 cable_tech = 0;
+	u8 cable_spec = 0;
+	u16 enforce_sfp = 0;
+
+	if (hw->mac.ops.get_media_type(hw) != ixgbe_media_type_fiber) {
+		hw->phy.sfp_type = ixgbe_sfp_type_not_present;
+		status = IXGBE_ERR_SFP_NOT_PRESENT;
+		goto out;
+	}
+
+	status = hw->phy.ops.read_i2c_eeprom(hw,
+					     IXGBE_SFF_IDENTIFIER,
+					     &identifier);
+
+	if (status == IXGBE_ERR_SWFW_SYNC ||
+	    status == IXGBE_ERR_I2C ||
+	    status == IXGBE_ERR_SFP_NOT_PRESENT)
+		goto err_read_i2c_eeprom;
+
+	/* LAN ID is needed for sfp_type determination */
+	hw->mac.ops.set_lan_id(hw);
+
+	if (identifier != IXGBE_SFF_IDENTIFIER_SFP) {
+		hw->phy.type = ixgbe_phy_sfp_unsupported;
+		status = IXGBE_ERR_SFP_NOT_SUPPORTED;
+	} else {
+		status = hw->phy.ops.read_i2c_eeprom(hw,
+						     IXGBE_SFF_1GBE_COMP_CODES,
+						     &comp_codes_1g);
+
+		if (status == IXGBE_ERR_SWFW_SYNC ||
+		    status == IXGBE_ERR_I2C ||
+		    status == IXGBE_ERR_SFP_NOT_PRESENT)
+			goto err_read_i2c_eeprom;
+
+		status = hw->phy.ops.read_i2c_eeprom(hw,
+						     IXGBE_SFF_10GBE_COMP_CODES,
+						     &comp_codes_10g);
+
+		if (status == IXGBE_ERR_SWFW_SYNC ||
+		    status == IXGBE_ERR_I2C ||
+		    status == IXGBE_ERR_SFP_NOT_PRESENT)
+			goto err_read_i2c_eeprom;
+		status = hw->phy.ops.read_i2c_eeprom(hw,
+						     IXGBE_SFF_CABLE_TECHNOLOGY,
+						     &cable_tech);
+
+		if (status == IXGBE_ERR_SWFW_SYNC ||
+		    status == IXGBE_ERR_I2C ||
+		    status == IXGBE_ERR_SFP_NOT_PRESENT)
+			goto err_read_i2c_eeprom;
+
+		 /* ID Module
+		  * =========
+		  * 0   SFP_DA_CU
+		  * 1   SFP_SR
+		  * 2   SFP_LR
+		  * 3   SFP_DA_CORE0 - 82599-specific
+		  * 4   SFP_DA_CORE1 - 82599-specific
+		  * 5   SFP_SR/LR_CORE0 - 82599-specific
+		  * 6   SFP_SR/LR_CORE1 - 82599-specific
+		  * 7   SFP_act_lmt_DA_CORE0 - 82599-specific
+		  * 8   SFP_act_lmt_DA_CORE1 - 82599-specific
+		  * 9   SFP_1g_cu_CORE0 - 82599-specific
+		  * 10  SFP_1g_cu_CORE1 - 82599-specific
+		  * 11  SFP_1g_sx_CORE0 - 82599-specific
+		  * 12  SFP_1g_sx_CORE1 - 82599-specific
+		  */
+		if (hw->mac.type == ixgbe_mac_82598EB) {
+			if (cable_tech & IXGBE_SFF_DA_PASSIVE_CABLE)
+				hw->phy.sfp_type = ixgbe_sfp_type_da_cu;
+			else if (comp_codes_10g & IXGBE_SFF_10GBASESR_CAPABLE)
+				hw->phy.sfp_type = ixgbe_sfp_type_sr;
+			else if (comp_codes_10g & IXGBE_SFF_10GBASELR_CAPABLE)
+				hw->phy.sfp_type = ixgbe_sfp_type_lr;
+			else
+				hw->phy.sfp_type = ixgbe_sfp_type_unknown;
+		} else if (hw->mac.type == ixgbe_mac_82599EB) {
+			if (cable_tech & IXGBE_SFF_DA_PASSIVE_CABLE) {
+				if (hw->bus.lan_id == 0)
+					hw->phy.sfp_type =
+						     ixgbe_sfp_type_da_cu_core0;
+				else
+					hw->phy.sfp_type =
+						     ixgbe_sfp_type_da_cu_core1;
+			} else if (cable_tech & IXGBE_SFF_DA_ACTIVE_CABLE) {
+				hw->phy.ops.read_i2c_eeprom(
+						hw, IXGBE_SFF_CABLE_SPEC_COMP,
+						&cable_spec);
+				if (cable_spec &
+				    IXGBE_SFF_DA_SPEC_ACTIVE_LIMITING) {
+					if (hw->bus.lan_id == 0)
+						hw->phy.sfp_type =
+						ixgbe_sfp_type_da_act_lmt_core0;
+					else
+						hw->phy.sfp_type =
+						ixgbe_sfp_type_da_act_lmt_core1;
+				} else {
+					hw->phy.sfp_type =
+							ixgbe_sfp_type_unknown;
+				}
+			} else if (comp_codes_10g &
+				   (IXGBE_SFF_10GBASESR_CAPABLE |
+				    IXGBE_SFF_10GBASELR_CAPABLE)) {
+				if (hw->bus.lan_id == 0)
+					hw->phy.sfp_type =
+						      ixgbe_sfp_type_srlr_core0;
+				else
+					hw->phy.sfp_type =
+						      ixgbe_sfp_type_srlr_core1;
+			} else if (comp_codes_1g & IXGBE_SFF_1GBASET_CAPABLE) {
+				if (hw->bus.lan_id == 0)
+					hw->phy.sfp_type =
+						ixgbe_sfp_type_1g_cu_core0;
+				else
+					hw->phy.sfp_type =
+						ixgbe_sfp_type_1g_cu_core1;
+			} else if (comp_codes_1g & IXGBE_SFF_1GBASESX_CAPABLE) {
+				if (hw->bus.lan_id == 0)
+					hw->phy.sfp_type =
+						ixgbe_sfp_type_1g_sx_core0;
+				else
+					hw->phy.sfp_type =
+						ixgbe_sfp_type_1g_sx_core1;
+			} else {
+				hw->phy.sfp_type = ixgbe_sfp_type_unknown;
+			}
+		}
+
+		if (hw->phy.sfp_type != stored_sfp_type)
+			hw->phy.sfp_setup_needed = true;
+
+		/* Determine if the SFP+ PHY is dual speed or not. */
+		hw->phy.multispeed_fiber = false;
+		if (((comp_codes_1g & IXGBE_SFF_1GBASESX_CAPABLE) &&
+		   (comp_codes_10g & IXGBE_SFF_10GBASESR_CAPABLE)) ||
+		   ((comp_codes_1g & IXGBE_SFF_1GBASELX_CAPABLE) &&
+		   (comp_codes_10g & IXGBE_SFF_10GBASELR_CAPABLE)))
+			hw->phy.multispeed_fiber = true;
+
+		/* Determine PHY vendor */
+		if (hw->phy.type != ixgbe_phy_nl) {
+			hw->phy.id = identifier;
+			status = hw->phy.ops.read_i2c_eeprom(hw,
+						    IXGBE_SFF_VENDOR_OUI_BYTE0,
+						    &oui_bytes[0]);
+
+			if (status == IXGBE_ERR_SWFW_SYNC ||
+			    status == IXGBE_ERR_I2C ||
+			    status == IXGBE_ERR_SFP_NOT_PRESENT)
+				goto err_read_i2c_eeprom;
+
+			status = hw->phy.ops.read_i2c_eeprom(hw,
+						    IXGBE_SFF_VENDOR_OUI_BYTE1,
+						    &oui_bytes[1]);
+
+			if (status == IXGBE_ERR_SWFW_SYNC ||
+			    status == IXGBE_ERR_I2C ||
+			    status == IXGBE_ERR_SFP_NOT_PRESENT)
+				goto err_read_i2c_eeprom;
+
+			status = hw->phy.ops.read_i2c_eeprom(hw,
+						    IXGBE_SFF_VENDOR_OUI_BYTE2,
+						    &oui_bytes[2]);
+
+			if (status == IXGBE_ERR_SWFW_SYNC ||
+			    status == IXGBE_ERR_I2C ||
+			    status == IXGBE_ERR_SFP_NOT_PRESENT)
+				goto err_read_i2c_eeprom;
+
+			vendor_oui =
+			  ((oui_bytes[0] << IXGBE_SFF_VENDOR_OUI_BYTE0_SHIFT) |
+			   (oui_bytes[1] << IXGBE_SFF_VENDOR_OUI_BYTE1_SHIFT) |
+			   (oui_bytes[2] << IXGBE_SFF_VENDOR_OUI_BYTE2_SHIFT));
+
+			switch (vendor_oui) {
+			case IXGBE_SFF_VENDOR_OUI_TYCO:
+				if (cable_tech & IXGBE_SFF_DA_PASSIVE_CABLE)
+					hw->phy.type =
+						    ixgbe_phy_sfp_passive_tyco;
+				break;
+			case IXGBE_SFF_VENDOR_OUI_FTL:
+				if (cable_tech & IXGBE_SFF_DA_ACTIVE_CABLE)
+					hw->phy.type = ixgbe_phy_sfp_ftl_active;
+				else
+					hw->phy.type = ixgbe_phy_sfp_ftl;
+				break;
+			case IXGBE_SFF_VENDOR_OUI_AVAGO:
+				hw->phy.type = ixgbe_phy_sfp_avago;
+				break;
+			case IXGBE_SFF_VENDOR_OUI_INTEL:
+				hw->phy.type = ixgbe_phy_sfp_intel;
+				break;
+			default:
+				if (cable_tech & IXGBE_SFF_DA_PASSIVE_CABLE)
+					hw->phy.type =
+						 ixgbe_phy_sfp_passive_unknown;
+				else if (cable_tech & IXGBE_SFF_DA_ACTIVE_CABLE)
+					hw->phy.type =
+						ixgbe_phy_sfp_active_unknown;
+				else
+					hw->phy.type = ixgbe_phy_sfp_unknown;
+				break;
+			}
+		}
+
+		/* Allow any DA cable vendor */
+		if (cable_tech & (IXGBE_SFF_DA_PASSIVE_CABLE |
+		    IXGBE_SFF_DA_ACTIVE_CABLE)) {
+			status = 0;
+			goto out;
+		}
+
+		/* Verify supported 1G SFP modules */
+		if (comp_codes_10g == 0 &&
+		    !(hw->phy.sfp_type == ixgbe_sfp_type_1g_cu_core1 ||
+		      hw->phy.sfp_type == ixgbe_sfp_type_1g_cu_core0 ||
+		      hw->phy.sfp_type == ixgbe_sfp_type_1g_sx_core0  ||
+		      hw->phy.sfp_type == ixgbe_sfp_type_1g_sx_core1)) {
+			hw->phy.type = ixgbe_phy_sfp_unsupported;
+			status = IXGBE_ERR_SFP_NOT_SUPPORTED;
+			goto out;
+		}
+
+		/* Anything else 82598-based is supported */
+		if (hw->mac.type == ixgbe_mac_82598EB) {
+			status = 0;
+			goto out;
+		}
+
+		ixgbe_get_device_caps(hw, &enforce_sfp);
+		if (!(enforce_sfp & IXGBE_DEVICE_CAPS_ALLOW_ANY_SFP) &&
+		    !((hw->phy.sfp_type == ixgbe_sfp_type_1g_cu_core0) ||
+		      (hw->phy.sfp_type == ixgbe_sfp_type_1g_cu_core1) ||
+		      (hw->phy.sfp_type == ixgbe_sfp_type_1g_sx_core0)  ||
+		      (hw->phy.sfp_type == ixgbe_sfp_type_1g_sx_core1))) {
+			/* Make sure we're a supported PHY type */
+			if (hw->phy.type == ixgbe_phy_sfp_intel) {
+				status = 0;
+			} else {
+				if (hw->allow_unsupported_sfp == true) {
+					EWARN(hw, "WARNING: Intel (R) Network "
+					      "Connections are quality tested "
+					      "using Intel (R) Ethernet Optics."
+					      " Using untested modules is not "
+					      "supported and may cause unstable"
+					      " operation or damage to the "
+					      "module or the adapter. Intel "
+					      "Corporation is not responsible "
+					      "for any harm caused by using "
+					      "untested modules.\n", status);
+					status = 0;
+				} else {
+					hw_dbg(hw, "SFP+ module not supported\n");
+					hw->phy.type =
+						ixgbe_phy_sfp_unsupported;
+					status = IXGBE_ERR_SFP_NOT_SUPPORTED;
+				}
+			}
+		} else {
+			status = 0;
+		}
+	}
+
+out:
+	return status;
+
+err_read_i2c_eeprom:
+	hw->phy.sfp_type = ixgbe_sfp_type_not_present;
+	if (hw->phy.type != ixgbe_phy_nl) {
+		hw->phy.id = 0;
+		hw->phy.type = ixgbe_phy_unknown;
+	}
+	return IXGBE_ERR_SFP_NOT_PRESENT;
+}
+
+/**
+ *  ixgbe_identify_qsfp_module_generic - Identifies QSFP modules
+ *  @hw: pointer to hardware structure
+ *
+ *  Searches for and identifies the QSFP module and assigns appropriate PHY type
+ **/
+s32 ixgbe_identify_qsfp_module_generic(struct ixgbe_hw *hw)
+{
+	s32 status = 0;
+
+	if (hw->mac.ops.get_media_type(hw) != ixgbe_media_type_fiber_qsfp) {
+		hw->phy.sfp_type = ixgbe_sfp_type_not_present;
+		status = IXGBE_ERR_SFP_NOT_PRESENT;
+	}
+
+	return status;
+}
+
+
+/**
+ *  ixgbe_get_sfp_init_sequence_offsets - Provides offset of PHY init sequence
+ *  @hw: pointer to hardware structure
+ *  @list_offset: offset to the SFP ID list
+ *  @data_offset: offset to the SFP data block
+ *
+ *  Checks the MAC's EEPROM to see if it supports a given SFP+ module type, if
+ *  so it returns the offsets to the phy init sequence block.
+ **/
+s32 ixgbe_get_sfp_init_sequence_offsets(struct ixgbe_hw *hw,
+					u16 *list_offset,
+					u16 *data_offset)
+{
+	u16 sfp_id;
+	u16 sfp_type = hw->phy.sfp_type;
+
+	if (hw->phy.sfp_type == ixgbe_sfp_type_unknown)
+		return IXGBE_ERR_SFP_NOT_SUPPORTED;
+
+	if (hw->phy.sfp_type == ixgbe_sfp_type_not_present)
+		return IXGBE_ERR_SFP_NOT_PRESENT;
+
+	if ((hw->device_id == IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM) &&
+	    (hw->phy.sfp_type == ixgbe_sfp_type_da_cu))
+		return IXGBE_ERR_SFP_NOT_SUPPORTED;
+
+	/*
+	 * Limiting active cables and 1G Phys must be initialized as
+	 * SR modules
+	 */
+	if (sfp_type == ixgbe_sfp_type_da_act_lmt_core0 ||
+	    sfp_type == ixgbe_sfp_type_1g_cu_core0 ||
+	    sfp_type == ixgbe_sfp_type_1g_sx_core0)
+		sfp_type = ixgbe_sfp_type_srlr_core0;
+	else if (sfp_type == ixgbe_sfp_type_da_act_lmt_core1 ||
+		 sfp_type == ixgbe_sfp_type_1g_cu_core1 ||
+		 sfp_type == ixgbe_sfp_type_1g_sx_core1)
+		sfp_type = ixgbe_sfp_type_srlr_core1;
+
+	/* Read offset to PHY init contents */
+	hw->eeprom.ops.read(hw, IXGBE_PHY_INIT_OFFSET_NL, list_offset);
+
+	if ((!*list_offset) || (*list_offset == 0xFFFF))
+		return IXGBE_ERR_SFP_NO_INIT_SEQ_PRESENT;
+
+	/* Shift offset to first ID word */
+	(*list_offset)++;
+
+	/*
+	 * Find the matching SFP ID in the EEPROM
+	 * and program the init sequence
+	 */
+	hw->eeprom.ops.read(hw, *list_offset, &sfp_id);
+
+	while (sfp_id != IXGBE_PHY_INIT_END_NL) {
+		if (sfp_id == sfp_type) {
+			(*list_offset)++;
+			hw->eeprom.ops.read(hw, *list_offset, data_offset);
+			if ((!*data_offset) || (*data_offset == 0xFFFF)) {
+				hw_dbg(hw, "SFP+ module not supported\n");
+				return IXGBE_ERR_SFP_NOT_SUPPORTED;
+			} else {
+				break;
+			}
+		} else {
+			(*list_offset) += 2;
+			if (hw->eeprom.ops.read(hw, *list_offset, &sfp_id))
+				return IXGBE_ERR_PHY;
+		}
+	}
+
+	if (sfp_id == IXGBE_PHY_INIT_END_NL) {
+		hw_dbg(hw, "No matching SFP+ module found\n");
+		return IXGBE_ERR_SFP_NOT_SUPPORTED;
+	}
+
+	return 0;
+}
+
+/**
+ *  ixgbe_read_i2c_eeprom_generic - Reads 8 bit EEPROM word over I2C interface
+ *  @hw: pointer to hardware structure
+ *  @byte_offset: EEPROM byte offset to read
+ *  @eeprom_data: value read
+ *
+ *  Performs byte read operation to SFP module's EEPROM over I2C interface.
+ **/
+s32 ixgbe_read_i2c_eeprom_generic(struct ixgbe_hw *hw, u8 byte_offset,
+				  u8 *eeprom_data)
+{
+	return hw->phy.ops.read_i2c_byte(hw, byte_offset,
+					 IXGBE_I2C_EEPROM_DEV_ADDR,
+					 eeprom_data);
+}
+
+/**
+ *  ixgbe_write_i2c_eeprom_generic - Writes 8 bit EEPROM word over I2C interface
+ *  @hw: pointer to hardware structure
+ *  @byte_offset: EEPROM byte offset to write
+ *  @eeprom_data: value to write
+ *
+ *  Performs byte write operation to SFP module's EEPROM over I2C interface.
+ **/
+s32 ixgbe_write_i2c_eeprom_generic(struct ixgbe_hw *hw, u8 byte_offset,
+				   u8 eeprom_data)
+{
+	return hw->phy.ops.write_i2c_byte(hw, byte_offset,
+					  IXGBE_I2C_EEPROM_DEV_ADDR,
+					  eeprom_data);
+}
+
+/**
+ *  ixgbe_read_i2c_byte_generic - Reads 8 bit word over I2C
+ *  @hw: pointer to hardware structure
+ *  @byte_offset: byte offset to read
+ *  @data: value read
+ *
+ *  Performs byte read operation to SFP module's EEPROM over I2C interface at
+ *  a specified device address.
+ **/
+s32 ixgbe_read_i2c_byte_generic(struct ixgbe_hw *hw, u8 byte_offset,
+				u8 dev_addr, u8 *data)
+{
+	s32 status = 0;
+	u32 max_retry = 10;
+	u32 retry = 0;
+	u16 swfw_mask = 0;
+	bool nack = 1;
+	*data = 0;
+
+	if (IXGBE_READ_REG(hw, IXGBE_STATUS) & IXGBE_STATUS_LAN_ID_1)
+		swfw_mask = IXGBE_GSSR_PHY1_SM;
+	else
+		swfw_mask = IXGBE_GSSR_PHY0_SM;
+
+	do {
+		if (hw->mac.ops.acquire_swfw_sync(hw, swfw_mask)
+		    != 0) {
+			status = IXGBE_ERR_SWFW_SYNC;
+			goto read_byte_out;
+		}
+
+		ixgbe_i2c_start(hw);
+
+		/* Device Address and write indication */
+		status = ixgbe_clock_out_i2c_byte(hw, dev_addr);
+		if (status != 0)
+			goto fail;
+
+		status = ixgbe_get_i2c_ack(hw);
+		if (status != 0)
+			goto fail;
+
+		status = ixgbe_clock_out_i2c_byte(hw, byte_offset);
+		if (status != 0)
+			goto fail;
+
+		status = ixgbe_get_i2c_ack(hw);
+		if (status != 0)
+			goto fail;
+
+		ixgbe_i2c_start(hw);
+
+		/* Device Address and read indication */
+		status = ixgbe_clock_out_i2c_byte(hw, (dev_addr | 0x1));
+		if (status != 0)
+			goto fail;
+
+		status = ixgbe_get_i2c_ack(hw);
+		if (status != 0)
+			goto fail;
+
+		status = ixgbe_clock_in_i2c_byte(hw, data);
+		if (status != 0)
+			goto fail;
+
+		status = ixgbe_clock_out_i2c_bit(hw, nack);
+		if (status != 0)
+			goto fail;
+
+		ixgbe_i2c_stop(hw);
+		break;
+
+fail:
+		hw->mac.ops.release_swfw_sync(hw, swfw_mask);
+		msleep(100);
+		ixgbe_i2c_bus_clear(hw);
+		retry++;
+		if (retry < max_retry)
+			hw_dbg(hw, "I2C byte read error - Retrying.\n");
+		else
+			hw_dbg(hw, "I2C byte read error.\n");
+
+	} while (retry < max_retry);
+
+	hw->mac.ops.release_swfw_sync(hw, swfw_mask);
+
+read_byte_out:
+	return status;
+}
+
+/**
+ *  ixgbe_write_i2c_byte_generic - Writes 8 bit word over I2C
+ *  @hw: pointer to hardware structure
+ *  @byte_offset: byte offset to write
+ *  @data: value to write
+ *
+ *  Performs byte write operation to SFP module's EEPROM over I2C interface at
+ *  a specified device address.
+ **/
+s32 ixgbe_write_i2c_byte_generic(struct ixgbe_hw *hw, u8 byte_offset,
+				 u8 dev_addr, u8 data)
+{
+	s32 status = 0;
+	u32 max_retry = 1;
+	u32 retry = 0;
+	u16 swfw_mask = 0;
+
+	if (IXGBE_READ_REG(hw, IXGBE_STATUS) & IXGBE_STATUS_LAN_ID_1)
+		swfw_mask = IXGBE_GSSR_PHY1_SM;
+	else
+		swfw_mask = IXGBE_GSSR_PHY0_SM;
+
+	if (hw->mac.ops.acquire_swfw_sync(hw, swfw_mask) != 0) {
+		status = IXGBE_ERR_SWFW_SYNC;
+		goto write_byte_out;
+	}
+
+	do {
+		ixgbe_i2c_start(hw);
+
+		status = ixgbe_clock_out_i2c_byte(hw, dev_addr);
+		if (status != 0)
+			goto fail;
+
+		status = ixgbe_get_i2c_ack(hw);
+		if (status != 0)
+			goto fail;
+
+		status = ixgbe_clock_out_i2c_byte(hw, byte_offset);
+		if (status != 0)
+			goto fail;
+
+		status = ixgbe_get_i2c_ack(hw);
+		if (status != 0)
+			goto fail;
+
+		status = ixgbe_clock_out_i2c_byte(hw, data);
+		if (status != 0)
+			goto fail;
+
+		status = ixgbe_get_i2c_ack(hw);
+		if (status != 0)
+			goto fail;
+
+		ixgbe_i2c_stop(hw);
+		break;
+
+fail:
+		ixgbe_i2c_bus_clear(hw);
+		retry++;
+		if (retry < max_retry)
+			hw_dbg(hw, "I2C byte write error - Retrying.\n");
+		else
+			hw_dbg(hw, "I2C byte write error.\n");
+	} while (retry < max_retry);
+
+	hw->mac.ops.release_swfw_sync(hw, swfw_mask);
+
+write_byte_out:
+	return status;
+}
+
+/**
+ *  ixgbe_i2c_start - Sets I2C start condition
+ *  @hw: pointer to hardware structure
+ *
+ *  Sets I2C start condition (High -> Low on SDA while SCL is High)
+ **/
+static void ixgbe_i2c_start(struct ixgbe_hw *hw)
+{
+	u32 i2cctl = IXGBE_READ_REG(hw, IXGBE_I2CCTL);
+
+	/* Start condition must begin with data and clock high */
+	ixgbe_set_i2c_data(hw, &i2cctl, 1);
+	ixgbe_raise_i2c_clk(hw, &i2cctl);
+
+	/* Setup time for start condition (4.7us) */
+	udelay(IXGBE_I2C_T_SU_STA);
+
+	ixgbe_set_i2c_data(hw, &i2cctl, 0);
+
+	/* Hold time for start condition (4us) */
+	udelay(IXGBE_I2C_T_HD_STA);
+
+	ixgbe_lower_i2c_clk(hw, &i2cctl);
+
+	/* Minimum low period of clock is 4.7 us */
+	udelay(IXGBE_I2C_T_LOW);
+
+}
+
+/**
+ *  ixgbe_i2c_stop - Sets I2C stop condition
+ *  @hw: pointer to hardware structure
+ *
+ *  Sets I2C stop condition (Low -> High on SDA while SCL is High)
+ **/
+static void ixgbe_i2c_stop(struct ixgbe_hw *hw)
+{
+	u32 i2cctl = IXGBE_READ_REG(hw, IXGBE_I2CCTL);
+
+	/* Stop condition must begin with data low and clock high */
+	ixgbe_set_i2c_data(hw, &i2cctl, 0);
+	ixgbe_raise_i2c_clk(hw, &i2cctl);
+
+	/* Setup time for stop condition (4us) */
+	udelay(IXGBE_I2C_T_SU_STO);
+
+	ixgbe_set_i2c_data(hw, &i2cctl, 1);
+
+	/* bus free time between stop and start (4.7us)*/
+	udelay(IXGBE_I2C_T_BUF);
+}
+
+/**
+ *  ixgbe_clock_in_i2c_byte - Clocks in one byte via I2C
+ *  @hw: pointer to hardware structure
+ *  @data: data byte to clock in
+ *
+ *  Clocks in one byte data via I2C data/clock
+ **/
+static s32 ixgbe_clock_in_i2c_byte(struct ixgbe_hw *hw, u8 *data)
+{
+	s32 i;
+	bool bit = 0;
+
+	for (i = 7; i >= 0; i--) {
+		ixgbe_clock_in_i2c_bit(hw, &bit);
+		*data |= bit << i;
+	}
+
+	return 0;
+}
+
+/**
+ *  ixgbe_clock_out_i2c_byte - Clocks out one byte via I2C
+ *  @hw: pointer to hardware structure
+ *  @data: data byte clocked out
+ *
+ *  Clocks out one byte data via I2C data/clock
+ **/
+static s32 ixgbe_clock_out_i2c_byte(struct ixgbe_hw *hw, u8 data)
+{
+	s32 status = 0;
+	s32 i;
+	u32 i2cctl;
+	bool bit = 0;
+
+	for (i = 7; i >= 0; i--) {
+		bit = (data >> i) & 0x1;
+		status = ixgbe_clock_out_i2c_bit(hw, bit);
+
+		if (status != 0)
+			break;
+	}
+
+	/* Release SDA line (set high) */
+	i2cctl = IXGBE_READ_REG(hw, IXGBE_I2CCTL);
+	i2cctl |= IXGBE_I2C_DATA_OUT;
+	IXGBE_WRITE_REG(hw, IXGBE_I2CCTL, i2cctl);
+	IXGBE_WRITE_FLUSH(hw);
+
+	return status;
+}
+
+/**
+ *  ixgbe_get_i2c_ack - Polls for I2C ACK
+ *  @hw: pointer to hardware structure
+ *
+ *  Clocks in/out one bit via I2C data/clock
+ **/
+static s32 ixgbe_get_i2c_ack(struct ixgbe_hw *hw)
+{
+	s32 status = 0;
+	u32 i = 0;
+	u32 i2cctl = IXGBE_READ_REG(hw, IXGBE_I2CCTL);
+	u32 timeout = 10;
+	bool ack = 1;
+
+	ixgbe_raise_i2c_clk(hw, &i2cctl);
+
+
+	/* Minimum high period of clock is 4us */
+	udelay(IXGBE_I2C_T_HIGH);
+
+	/* Poll for ACK.  Note that ACK in I2C spec is
+	 * transition from 1 to 0 */
+	for (i = 0; i < timeout; i++) {
+		i2cctl = IXGBE_READ_REG(hw, IXGBE_I2CCTL);
+		ack = ixgbe_get_i2c_data(&i2cctl);
+
+		udelay(1);
+		if (ack == 0)
+			break;
+	}
+
+	if (ack == 1) {
+		hw_dbg(hw, "I2C ack was not received.\n");
+		status = IXGBE_ERR_I2C;
+	}
+
+	ixgbe_lower_i2c_clk(hw, &i2cctl);
+
+	/* Minimum low period of clock is 4.7 us */
+	udelay(IXGBE_I2C_T_LOW);
+
+	return status;
+}
+
+/**
+ *  ixgbe_clock_in_i2c_bit - Clocks in one bit via I2C data/clock
+ *  @hw: pointer to hardware structure
+ *  @data: read data value
+ *
+ *  Clocks in one bit via I2C data/clock
+ **/
+static s32 ixgbe_clock_in_i2c_bit(struct ixgbe_hw *hw, bool *data)
+{
+	u32 i2cctl = IXGBE_READ_REG(hw, IXGBE_I2CCTL);
+
+	ixgbe_raise_i2c_clk(hw, &i2cctl);
+
+	/* Minimum high period of clock is 4us */
+	udelay(IXGBE_I2C_T_HIGH);
+
+	i2cctl = IXGBE_READ_REG(hw, IXGBE_I2CCTL);
+	*data = ixgbe_get_i2c_data(&i2cctl);
+
+	ixgbe_lower_i2c_clk(hw, &i2cctl);
+
+	/* Minimum low period of clock is 4.7 us */
+	udelay(IXGBE_I2C_T_LOW);
+
+	return 0;
+}
+
+/**
+ *  ixgbe_clock_out_i2c_bit - Clocks in/out one bit via I2C data/clock
+ *  @hw: pointer to hardware structure
+ *  @data: data value to write
+ *
+ *  Clocks out one bit via I2C data/clock
+ **/
+static s32 ixgbe_clock_out_i2c_bit(struct ixgbe_hw *hw, bool data)
+{
+	s32 status;
+	u32 i2cctl = IXGBE_READ_REG(hw, IXGBE_I2CCTL);
+
+	status = ixgbe_set_i2c_data(hw, &i2cctl, data);
+	if (status == 0) {
+		ixgbe_raise_i2c_clk(hw, &i2cctl);
+
+		/* Minimum high period of clock is 4us */
+		udelay(IXGBE_I2C_T_HIGH);
+
+		ixgbe_lower_i2c_clk(hw, &i2cctl);
+
+		/* Minimum low period of clock is 4.7 us.
+		 * This also takes care of the data hold time.
+		 */
+		udelay(IXGBE_I2C_T_LOW);
+	} else {
+		status = IXGBE_ERR_I2C;
+		hw_dbg(hw, "I2C data was not set to %X\n", data);
+	}
+
+	return status;
+}
+/**
+ *  ixgbe_raise_i2c_clk - Raises the I2C SCL clock
+ *  @hw: pointer to hardware structure
+ *  @i2cctl: Current value of I2CCTL register
+ *
+ *  Raises the I2C clock line '0'->'1'
+ **/
+static void ixgbe_raise_i2c_clk(struct ixgbe_hw *hw, u32 *i2cctl)
+{
+	u32 i = 0;
+	u32 timeout = IXGBE_I2C_CLOCK_STRETCHING_TIMEOUT;
+	u32 i2cctl_r = 0;
+
+	for (i = 0; i < timeout; i++) {
+		*i2cctl |= IXGBE_I2C_CLK_OUT;
+
+		IXGBE_WRITE_REG(hw, IXGBE_I2CCTL, *i2cctl);
+		IXGBE_WRITE_FLUSH(hw);
+		/* SCL rise time (1000ns) */
+		udelay(IXGBE_I2C_T_RISE);
+
+		i2cctl_r = IXGBE_READ_REG(hw, IXGBE_I2CCTL);
+		if (i2cctl_r & IXGBE_I2C_CLK_IN)
+			break;
+	}
+}
+
+/**
+ *  ixgbe_lower_i2c_clk - Lowers the I2C SCL clock
+ *  @hw: pointer to hardware structure
+ *  @i2cctl: Current value of I2CCTL register
+ *
+ *  Lowers the I2C clock line '1'->'0'
+ **/
+static void ixgbe_lower_i2c_clk(struct ixgbe_hw *hw, u32 *i2cctl)
+{
+
+	*i2cctl &= ~IXGBE_I2C_CLK_OUT;
+
+	IXGBE_WRITE_REG(hw, IXGBE_I2CCTL, *i2cctl);
+	IXGBE_WRITE_FLUSH(hw);
+
+	/* SCL fall time (300ns) */
+	udelay(IXGBE_I2C_T_FALL);
+}
+
+/**
+ *  ixgbe_set_i2c_data - Sets the I2C data bit
+ *  @hw: pointer to hardware structure
+ *  @i2cctl: Current value of I2CCTL register
+ *  @data: I2C data value (0 or 1) to set
+ *
+ *  Sets the I2C data bit
+ **/
+static s32 ixgbe_set_i2c_data(struct ixgbe_hw *hw, u32 *i2cctl, bool data)
+{
+	s32 status = 0;
+
+	if (data)
+		*i2cctl |= IXGBE_I2C_DATA_OUT;
+	else
+		*i2cctl &= ~IXGBE_I2C_DATA_OUT;
+
+	IXGBE_WRITE_REG(hw, IXGBE_I2CCTL, *i2cctl);
+	IXGBE_WRITE_FLUSH(hw);
+
+	/* Data rise/fall (1000ns/300ns) and set-up time (250ns) */
+	udelay(IXGBE_I2C_T_RISE + IXGBE_I2C_T_FALL + IXGBE_I2C_T_SU_DATA);
+
+	/* Verify data was set correctly */
+	*i2cctl = IXGBE_READ_REG(hw, IXGBE_I2CCTL);
+	if (data != ixgbe_get_i2c_data(i2cctl)) {
+		status = IXGBE_ERR_I2C;
+		hw_dbg(hw, "Error - I2C data was not set to %X.\n", data);
+	}
+
+	return status;
+}
+
+/**
+ *  ixgbe_get_i2c_data - Reads the I2C SDA data bit
+ *  @hw: pointer to hardware structure
+ *  @i2cctl: Current value of I2CCTL register
+ *
+ *  Returns the I2C data bit value
+ **/
+static bool ixgbe_get_i2c_data(u32 *i2cctl)
+{
+	bool data;
+
+	if (*i2cctl & IXGBE_I2C_DATA_IN)
+		data = 1;
+	else
+		data = 0;
+
+	return data;
+}
+
+/**
+ *  ixgbe_i2c_bus_clear - Clears the I2C bus
+ *  @hw: pointer to hardware structure
+ *
+ *  Clears the I2C bus by sending nine clock pulses.
+ *  Used when data line is stuck low.
+ **/
+void ixgbe_i2c_bus_clear(struct ixgbe_hw *hw)
+{
+	u32 i2cctl = IXGBE_READ_REG(hw, IXGBE_I2CCTL);
+	u32 i;
+
+	ixgbe_i2c_start(hw);
+
+	ixgbe_set_i2c_data(hw, &i2cctl, 1);
+
+	for (i = 0; i < 9; i++) {
+		ixgbe_raise_i2c_clk(hw, &i2cctl);
+
+		/* Min high period of clock is 4us */
+		udelay(IXGBE_I2C_T_HIGH);
+
+		ixgbe_lower_i2c_clk(hw, &i2cctl);
+
+		/* Min low period of clock is 4.7us*/
+		udelay(IXGBE_I2C_T_LOW);
+	}
+
+	ixgbe_i2c_start(hw);
+
+	/* Put the i2c bus back to default state */
+	ixgbe_i2c_stop(hw);
+}
+
+/**
+ *  ixgbe_tn_check_overtemp - Checks if an overtemp occurred.
+ *  @hw: pointer to hardware structure
+ *
+ *  Checks if the LASI temp alarm status was triggered due to overtemp
+ **/
+s32 ixgbe_tn_check_overtemp(struct ixgbe_hw *hw)
+{
+	s32 status = 0;
+	u16 phy_data = 0;
+
+	if (hw->device_id != IXGBE_DEV_ID_82599_T3_LOM)
+		goto out;
+
+	/* Check that the LASI temp alarm status was triggered */
+	hw->phy.ops.read_reg(hw, IXGBE_TN_LASI_STATUS_REG,
+			     IXGBE_MDIO_PMA_PMD_DEV_TYPE, &phy_data);
+
+	if (!(phy_data & IXGBE_TN_LASI_STATUS_TEMP_ALARM))
+		goto out;
+
+	status = IXGBE_ERR_OVERTEMP;
+out:
+	return status;
+}
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_phy.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_phy.h
new file mode 100644
index 00000000..bbe5a9e3
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_phy.h
@@ -0,0 +1,137 @@
+/*******************************************************************************
+
+  Intel 10 Gigabit PCI Express Linux driver
+  Copyright(c) 1999 - 2012 Intel Corporation.
+
+  This program is free software; you can redistribute it and/or modify it
+  under the terms and conditions of the GNU General Public License,
+  version 2, as published by the Free Software Foundation.
+
+  This program is distributed in the hope it will be useful, but WITHOUT
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+  more details.
+
+  You should have received a copy of the GNU General Public License along with
+  this program; if not, write to the Free Software Foundation, Inc.,
+  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+  The full GNU General Public License is included in this distribution in
+  the file called "COPYING".
+
+  Contact Information:
+  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+#ifndef _IXGBE_PHY_H_
+#define _IXGBE_PHY_H_
+
+#include "ixgbe_type.h"
+#define IXGBE_I2C_EEPROM_DEV_ADDR    0xA0
+
+/* EEPROM byte offsets */
+#define IXGBE_SFF_IDENTIFIER		0x0
+#define IXGBE_SFF_IDENTIFIER_SFP	0x3
+#define IXGBE_SFF_VENDOR_OUI_BYTE0	0x25
+#define IXGBE_SFF_VENDOR_OUI_BYTE1	0x26
+#define IXGBE_SFF_VENDOR_OUI_BYTE2	0x27
+#define IXGBE_SFF_1GBE_COMP_CODES	0x6
+#define IXGBE_SFF_10GBE_COMP_CODES	0x3
+#define IXGBE_SFF_CABLE_TECHNOLOGY	0x8
+#define IXGBE_SFF_CABLE_SPEC_COMP	0x3C
+
+/* Bitmasks */
+#define IXGBE_SFF_DA_PASSIVE_CABLE	0x4
+#define IXGBE_SFF_DA_ACTIVE_CABLE	0x8
+#define IXGBE_SFF_DA_SPEC_ACTIVE_LIMITING	0x4
+#define IXGBE_SFF_1GBASESX_CAPABLE	0x1
+#define IXGBE_SFF_1GBASELX_CAPABLE	0x2
+#define IXGBE_SFF_1GBASET_CAPABLE	0x8
+#define IXGBE_SFF_10GBASESR_CAPABLE	0x10
+#define IXGBE_SFF_10GBASELR_CAPABLE	0x20
+#define IXGBE_I2C_EEPROM_READ_MASK	0x100
+#define IXGBE_I2C_EEPROM_STATUS_MASK	0x3
+#define IXGBE_I2C_EEPROM_STATUS_NO_OPERATION	0x0
+#define IXGBE_I2C_EEPROM_STATUS_PASS	0x1
+#define IXGBE_I2C_EEPROM_STATUS_FAIL	0x2
+#define IXGBE_I2C_EEPROM_STATUS_IN_PROGRESS	0x3
+
+/* Flow control defines */
+#define IXGBE_TAF_SYM_PAUSE		0x400
+#define IXGBE_TAF_ASM_PAUSE		0x800
+
+/* Bit-shift macros */
+#define IXGBE_SFF_VENDOR_OUI_BYTE0_SHIFT	24
+#define IXGBE_SFF_VENDOR_OUI_BYTE1_SHIFT	16
+#define IXGBE_SFF_VENDOR_OUI_BYTE2_SHIFT	8
+
+/* Vendor OUIs: format of OUI is 0x[byte0][byte1][byte2][00] */
+#define IXGBE_SFF_VENDOR_OUI_TYCO	0x00407600
+#define IXGBE_SFF_VENDOR_OUI_FTL	0x00906500
+#define IXGBE_SFF_VENDOR_OUI_AVAGO	0x00176A00
+#define IXGBE_SFF_VENDOR_OUI_INTEL	0x001B2100
+
+/* I2C SDA and SCL timing parameters for standard mode */
+#define IXGBE_I2C_T_HD_STA	4
+#define IXGBE_I2C_T_LOW		5
+#define IXGBE_I2C_T_HIGH	4
+#define IXGBE_I2C_T_SU_STA	5
+#define IXGBE_I2C_T_HD_DATA	5
+#define IXGBE_I2C_T_SU_DATA	1
+#define IXGBE_I2C_T_RISE	1
+#define IXGBE_I2C_T_FALL	1
+#define IXGBE_I2C_T_SU_STO	4
+#define IXGBE_I2C_T_BUF		5
+
+#define IXGBE_TN_LASI_STATUS_REG	0x9005
+#define IXGBE_TN_LASI_STATUS_TEMP_ALARM	0x0008
+
+s32 ixgbe_init_phy_ops_generic(struct ixgbe_hw *hw);
+bool ixgbe_validate_phy_addr(struct ixgbe_hw *hw, u32 phy_addr);
+enum ixgbe_phy_type ixgbe_get_phy_type_from_id(u32 phy_id);
+s32 ixgbe_get_phy_id(struct ixgbe_hw *hw);
+s32 ixgbe_identify_phy_generic(struct ixgbe_hw *hw);
+s32 ixgbe_reset_phy_generic(struct ixgbe_hw *hw);
+s32 ixgbe_read_phy_reg_generic(struct ixgbe_hw *hw, u32 reg_addr,
+			       u32 device_type, u16 *phy_data);
+s32 ixgbe_write_phy_reg_generic(struct ixgbe_hw *hw, u32 reg_addr,
+				u32 device_type, u16 phy_data);
+s32 ixgbe_setup_phy_link_generic(struct ixgbe_hw *hw);
+s32 ixgbe_setup_phy_link_speed_generic(struct ixgbe_hw *hw,
+				       ixgbe_link_speed speed,
+				       bool autoneg,
+				       bool autoneg_wait_to_complete);
+s32 ixgbe_get_copper_link_capabilities_generic(struct ixgbe_hw *hw,
+					       ixgbe_link_speed *speed,
+					       bool *autoneg);
+
+/* PHY specific */
+s32 ixgbe_check_phy_link_tnx(struct ixgbe_hw *hw,
+			     ixgbe_link_speed *speed,
+			     bool *link_up);
+s32 ixgbe_setup_phy_link_tnx(struct ixgbe_hw *hw);
+s32 ixgbe_get_phy_firmware_version_tnx(struct ixgbe_hw *hw,
+				       u16 *firmware_version);
+s32 ixgbe_get_phy_firmware_version_generic(struct ixgbe_hw *hw,
+					   u16 *firmware_version);
+
+s32 ixgbe_reset_phy_nl(struct ixgbe_hw *hw);
+s32 ixgbe_identify_module_generic(struct ixgbe_hw *hw);
+s32 ixgbe_identify_sfp_module_generic(struct ixgbe_hw *hw);
+s32 ixgbe_identify_qsfp_module_generic(struct ixgbe_hw *hw);
+s32 ixgbe_get_sfp_init_sequence_offsets(struct ixgbe_hw *hw,
+					u16 *list_offset,
+					u16 *data_offset);
+s32 ixgbe_tn_check_overtemp(struct ixgbe_hw *hw);
+s32 ixgbe_read_i2c_byte_generic(struct ixgbe_hw *hw, u8 byte_offset,
+				u8 dev_addr, u8 *data);
+s32 ixgbe_write_i2c_byte_generic(struct ixgbe_hw *hw, u8 byte_offset,
+				 u8 dev_addr, u8 data);
+s32 ixgbe_read_i2c_eeprom_generic(struct ixgbe_hw *hw, u8 byte_offset,
+				  u8 *eeprom_data);
+s32 ixgbe_write_i2c_eeprom_generic(struct ixgbe_hw *hw, u8 byte_offset,
+				   u8 eeprom_data);
+void ixgbe_i2c_bus_clear(struct ixgbe_hw *hw);
+#endif /* _IXGBE_PHY_H_ */
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_sriov.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_sriov.h
new file mode 100644
index 00000000..5e3559fd
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_sriov.h
@@ -0,0 +1,73 @@
+/*******************************************************************************
+
+  Intel 10 Gigabit PCI Express Linux driver
+  Copyright(c) 1999 - 2012 Intel Corporation.
+
+  This program is free software; you can redistribute it and/or modify it
+  under the terms and conditions of the GNU General Public License,
+  version 2, as published by the Free Software Foundation.
+
+  This program is distributed in the hope it will be useful, but WITHOUT
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+  more details.
+
+  You should have received a copy of the GNU General Public License along with
+  this program; if not, write to the Free Software Foundation, Inc.,
+  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+  The full GNU General Public License is included in this distribution in
+  the file called "COPYING".
+
+  Contact Information:
+  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+
+#ifndef _IXGBE_SRIOV_H_
+#define _IXGBE_SRIOV_H_
+
+int ixgbe_set_vf_multicasts(struct ixgbe_adapter *adapter,
+			    int entries, u16 *hash_list, u32 vf);
+void ixgbe_restore_vf_multicasts(struct ixgbe_adapter *adapter);
+int ixgbe_set_vf_vlan(struct ixgbe_adapter *adapter, int add, int vid, u32 vf);
+void ixgbe_set_vmolr(struct ixgbe_hw *hw, u32 vf, bool aupe);
+void ixgbe_vf_reset_event(struct ixgbe_adapter *adapter, u32 vf);
+void ixgbe_vf_reset_msg(struct ixgbe_adapter *adapter, u32 vf);
+void ixgbe_msg_task(struct ixgbe_adapter *adapter);
+int ixgbe_set_vf_mac(struct ixgbe_adapter *adapter,
+		     int vf, unsigned char *mac_addr);
+void ixgbe_disable_tx_rx(struct ixgbe_adapter *adapter);
+void ixgbe_ping_all_vfs(struct ixgbe_adapter *adapter);
+#ifdef IFLA_VF_MAX
+int ixgbe_ndo_set_vf_mac(struct net_device *netdev, int queue, u8 *mac);
+int ixgbe_ndo_set_vf_vlan(struct net_device *netdev, int queue, u16 vlan,
+			  u8 qos);
+int ixgbe_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
+#ifdef HAVE_VF_SPOOFCHK_CONFIGURE
+int ixgbe_ndo_set_vf_spoofchk(struct net_device *netdev, int vf, bool setting);
+#endif
+int ixgbe_ndo_get_vf_config(struct net_device *netdev,
+			    int vf, struct ifla_vf_info *ivi);
+#endif
+void ixgbe_disable_sriov(struct ixgbe_adapter *adapter);
+#ifdef CONFIG_PCI_IOV
+int ixgbe_vf_configuration(struct pci_dev *pdev, unsigned int event_mask);
+void ixgbe_enable_sriov(struct ixgbe_adapter *adapter);
+#endif
+int ixgbe_check_vf_assignment(struct ixgbe_adapter *adapter);
+#ifdef IFLA_VF_MAX
+void ixgbe_check_vf_rate_limit(struct ixgbe_adapter *adapter);
+#endif /* IFLA_VF_MAX */
+void ixgbe_dump_registers(struct ixgbe_adapter *adapter);
+
+/*
+ * These are defined in ixgbe_type.h on behalf of the VF driver
+ * but we need them here unwrapped for the PF driver.
+ */
+#define IXGBE_DEV_ID_82599_VF			0x10ED
+#define IXGBE_DEV_ID_X540_VF			0x1515
+
+#endif /* _IXGBE_SRIOV_H_ */
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_type.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_type.h
new file mode 100644
index 00000000..6b21c879
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_type.h
@@ -0,0 +1,3254 @@
+/*******************************************************************************
+
+  Intel 10 Gigabit PCI Express Linux driver
+  Copyright(c) 1999 - 2012 Intel Corporation.
+
+  This program is free software; you can redistribute it and/or modify it
+  under the terms and conditions of the GNU General Public License,
+  version 2, as published by the Free Software Foundation.
+
+  This program is distributed in the hope it will be useful, but WITHOUT
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+  more details.
+
+  You should have received a copy of the GNU General Public License along with
+  this program; if not, write to the Free Software Foundation, Inc.,
+  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+  The full GNU General Public License is included in this distribution in
+  the file called "COPYING".
+
+  Contact Information:
+  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+#ifndef _IXGBE_TYPE_H_
+#define _IXGBE_TYPE_H_
+
+#include "ixgbe_osdep.h"
+
+
+/* Vendor ID */
+#define IXGBE_INTEL_VENDOR_ID			0x8086
+
+/* Device IDs */
+#define IXGBE_DEV_ID_82598			0x10B6
+#define IXGBE_DEV_ID_82598_BX			0x1508
+#define IXGBE_DEV_ID_82598AF_DUAL_PORT		0x10C6
+#define IXGBE_DEV_ID_82598AF_SINGLE_PORT	0x10C7
+#define IXGBE_DEV_ID_82598AT			0x10C8
+#define IXGBE_DEV_ID_82598AT2			0x150B
+#define IXGBE_DEV_ID_82598EB_SFP_LOM		0x10DB
+#define IXGBE_DEV_ID_82598EB_CX4		0x10DD
+#define IXGBE_DEV_ID_82598_CX4_DUAL_PORT	0x10EC
+#define IXGBE_DEV_ID_82598_DA_DUAL_PORT		0x10F1
+#define IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM	0x10E1
+#define IXGBE_DEV_ID_82598EB_XF_LR		0x10F4
+#define IXGBE_DEV_ID_82599_KX4			0x10F7
+#define IXGBE_DEV_ID_82599_KX4_MEZZ		0x1514
+#define IXGBE_DEV_ID_82599_KR			0x1517
+#define IXGBE_DEV_ID_82599_COMBO_BACKPLANE	0x10F8
+#define IXGBE_SUBDEV_ID_82599_KX4_KR_MEZZ	0x000C
+#define IXGBE_DEV_ID_82599_CX4			0x10F9
+#define IXGBE_DEV_ID_82599_SFP			0x10FB
+#define IXGBE_SUBDEV_ID_82599_SFP		0x11A9
+#define IXGBE_SUBDEV_ID_82599_560FLR		0x17D0
+#define IXGBE_DEV_ID_82599_BACKPLANE_FCOE	0x152A
+#define IXGBE_DEV_ID_82599_SFP_FCOE		0x1529
+#define IXGBE_DEV_ID_82599_SFP_EM		0x1507
+#define IXGBE_DEV_ID_82599_SFP_SF2		0x154D
+#define IXGBE_DEV_ID_82599_QSFP_SF_QP		0x1558
+#define IXGBE_DEV_ID_82599EN_SFP		0x1557
+#define IXGBE_DEV_ID_82599_XAUI_LOM		0x10FC
+#define IXGBE_DEV_ID_82599_T3_LOM		0x151C
+#define IXGBE_DEV_ID_82599_LS			0x154F
+#define IXGBE_DEV_ID_X540T			0x1528
+
+/* General Registers */
+#define IXGBE_CTRL		0x00000
+#define IXGBE_STATUS		0x00008
+#define IXGBE_CTRL_EXT		0x00018
+#define IXGBE_ESDP		0x00020
+#define IXGBE_EODSDP		0x00028
+#define IXGBE_I2CCTL		0x00028
+#define IXGBE_PHY_GPIO		0x00028
+#define IXGBE_MAC_GPIO		0x00030
+#define IXGBE_PHYINT_STATUS0	0x00100
+#define IXGBE_PHYINT_STATUS1	0x00104
+#define IXGBE_PHYINT_STATUS2	0x00108
+#define IXGBE_LEDCTL		0x00200
+#define IXGBE_FRTIMER		0x00048
+#define IXGBE_TCPTIMER		0x0004C
+#define IXGBE_CORESPARE		0x00600
+#define IXGBE_EXVET		0x05078
+
+/* NVM Registers */
+#define IXGBE_EEC	0x10010
+#define IXGBE_EERD	0x10014
+#define IXGBE_EEWR	0x10018
+#define IXGBE_FLA	0x1001C
+#define IXGBE_EEMNGCTL	0x10110
+#define IXGBE_EEMNGDATA	0x10114
+#define IXGBE_FLMNGCTL	0x10118
+#define IXGBE_FLMNGDATA	0x1011C
+#define IXGBE_FLMNGCNT	0x10120
+#define IXGBE_FLOP	0x1013C
+#define IXGBE_GRC	0x10200
+#define IXGBE_SRAMREL	0x10210
+#define IXGBE_PHYDBG	0x10218
+
+/* General Receive Control */
+#define IXGBE_GRC_MNG	0x00000001 /* Manageability Enable */
+#define IXGBE_GRC_APME	0x00000002 /* APM enabled in EEPROM */
+
+#define IXGBE_VPDDIAG0	0x10204
+#define IXGBE_VPDDIAG1	0x10208
+
+/* I2CCTL Bit Masks */
+#define IXGBE_I2C_CLK_IN	0x00000001
+#define IXGBE_I2C_CLK_OUT	0x00000002
+#define IXGBE_I2C_DATA_IN	0x00000004
+#define IXGBE_I2C_DATA_OUT	0x00000008
+#define IXGBE_I2C_CLOCK_STRETCHING_TIMEOUT	500
+
+#define IXGBE_I2C_THERMAL_SENSOR_ADDR	0xF8
+#define IXGBE_EMC_INTERNAL_DATA		0x00
+#define IXGBE_EMC_INTERNAL_THERM_LIMIT	0x20
+#define IXGBE_EMC_DIODE1_DATA		0x01
+#define IXGBE_EMC_DIODE1_THERM_LIMIT	0x19
+#define IXGBE_EMC_DIODE2_DATA		0x23
+#define IXGBE_EMC_DIODE2_THERM_LIMIT	0x1A
+
+#define IXGBE_MAX_SENSORS		3
+
+struct ixgbe_thermal_diode_data {
+	u8 location;
+	u8 temp;
+	u8 caution_thresh;
+	u8 max_op_thresh;
+};
+
+struct ixgbe_thermal_sensor_data {
+	struct ixgbe_thermal_diode_data sensor[IXGBE_MAX_SENSORS];
+};
+
+/* Interrupt Registers */
+#define IXGBE_EICR		0x00800
+#define IXGBE_EICS		0x00808
+#define IXGBE_EIMS		0x00880
+#define IXGBE_EIMC		0x00888
+#define IXGBE_EIAC		0x00810
+#define IXGBE_EIAM		0x00890
+#define IXGBE_EICS_EX(_i)	(0x00A90 + (_i) * 4)
+#define IXGBE_EIMS_EX(_i)	(0x00AA0 + (_i) * 4)
+#define IXGBE_EIMC_EX(_i)	(0x00AB0 + (_i) * 4)
+#define IXGBE_EIAM_EX(_i)	(0x00AD0 + (_i) * 4)
+/* 82599 EITR is only 12 bits, with the lower 3 always zero */
+/*
+ * 82598 EITR is 16 bits but set the limits based on the max
+ * supported by all ixgbe hardware
+ */
+#define IXGBE_MAX_INT_RATE	488281
+#define IXGBE_MIN_INT_RATE	956
+#define IXGBE_MAX_EITR		0x00000FF8
+#define IXGBE_MIN_EITR		8
+#define IXGBE_EITR(_i)		(((_i) <= 23) ? (0x00820 + ((_i) * 4)) : \
+				 (0x012300 + (((_i) - 24) * 4)))
+#define IXGBE_EITR_ITR_INT_MASK	0x00000FF8
+#define IXGBE_EITR_LLI_MOD	0x00008000
+#define IXGBE_EITR_CNT_WDIS	0x80000000
+#define IXGBE_IVAR(_i)		(0x00900 + ((_i) * 4)) /* 24 at 0x900-0x960 */
+#define IXGBE_IVAR_MISC		0x00A00 /* misc MSI-X interrupt causes */
+#define IXGBE_EITRSEL		0x00894
+#define IXGBE_MSIXT		0x00000 /* MSI-X Table. 0x0000 - 0x01C */
+#define IXGBE_MSIXPBA		0x02000 /* MSI-X Pending bit array */
+#define IXGBE_PBACL(_i)	(((_i) == 0) ? (0x11068) : (0x110C0 + ((_i) * 4)))
+#define IXGBE_GPIE		0x00898
+
+/* Flow Control Registers */
+#define IXGBE_FCADBUL		0x03210
+#define IXGBE_FCADBUH		0x03214
+#define IXGBE_FCAMACL		0x04328
+#define IXGBE_FCAMACH		0x0432C
+#define IXGBE_FCRTH_82599(_i)	(0x03260 + ((_i) * 4)) /* 8 of these (0-7) */
+#define IXGBE_FCRTL_82599(_i)	(0x03220 + ((_i) * 4)) /* 8 of these (0-7) */
+#define IXGBE_PFCTOP		0x03008
+#define IXGBE_FCTTV(_i)		(0x03200 + ((_i) * 4)) /* 4 of these (0-3) */
+#define IXGBE_FCRTL(_i)		(0x03220 + ((_i) * 8)) /* 8 of these (0-7) */
+#define IXGBE_FCRTH(_i)		(0x03260 + ((_i) * 8)) /* 8 of these (0-7) */
+#define IXGBE_FCRTV		0x032A0
+#define IXGBE_FCCFG		0x03D00
+#define IXGBE_TFCS		0x0CE00
+
+/* Receive DMA Registers */
+#define IXGBE_RDBAL(_i)	(((_i) < 64) ? (0x01000 + ((_i) * 0x40)) : \
+			 (0x0D000 + (((_i) - 64) * 0x40)))
+#define IXGBE_RDBAH(_i)	(((_i) < 64) ? (0x01004 + ((_i) * 0x40)) : \
+			 (0x0D004 + (((_i) - 64) * 0x40)))
+#define IXGBE_RDLEN(_i)	(((_i) < 64) ? (0x01008 + ((_i) * 0x40)) : \
+			 (0x0D008 + (((_i) - 64) * 0x40)))
+#define IXGBE_RDH(_i)	(((_i) < 64) ? (0x01010 + ((_i) * 0x40)) : \
+			 (0x0D010 + (((_i) - 64) * 0x40)))
+#define IXGBE_RDT(_i)	(((_i) < 64) ? (0x01018 + ((_i) * 0x40)) : \
+			 (0x0D018 + (((_i) - 64) * 0x40)))
+#define IXGBE_RXDCTL(_i)	(((_i) < 64) ? (0x01028 + ((_i) * 0x40)) : \
+				 (0x0D028 + (((_i) - 64) * 0x40)))
+#define IXGBE_RSCCTL(_i)	(((_i) < 64) ? (0x0102C + ((_i) * 0x40)) : \
+				 (0x0D02C + (((_i) - 64) * 0x40)))
+#define IXGBE_RSCDBU	0x03028
+#define IXGBE_RDDCC	0x02F20
+#define IXGBE_RXMEMWRAP	0x03190
+#define IXGBE_STARCTRL	0x03024
+/*
+ * Split and Replication Receive Control Registers
+ * 00-15 : 0x02100 + n*4
+ * 16-64 : 0x01014 + n*0x40
+ * 64-127: 0x0D014 + (n-64)*0x40
+ */
+#define IXGBE_SRRCTL(_i)	(((_i) <= 15) ? (0x02100 + ((_i) * 4)) : \
+				 (((_i) < 64) ? (0x01014 + ((_i) * 0x40)) : \
+				 (0x0D014 + (((_i) - 64) * 0x40))))
+/*
+ * Rx DCA Control Register:
+ * 00-15 : 0x02200 + n*4
+ * 16-64 : 0x0100C + n*0x40
+ * 64-127: 0x0D00C + (n-64)*0x40
+ */
+#define IXGBE_DCA_RXCTRL(_i)	(((_i) <= 15) ? (0x02200 + ((_i) * 4)) : \
+				 (((_i) < 64) ? (0x0100C + ((_i) * 0x40)) : \
+				 (0x0D00C + (((_i) - 64) * 0x40))))
+#define IXGBE_RDRXCTL		0x02F00
+#define IXGBE_RDRXCTL_RSC_PUSH	0x80
+/* 8 of these 0x03C00 - 0x03C1C */
+#define IXGBE_RXPBSIZE(_i)	(0x03C00 + ((_i) * 4))
+#define IXGBE_RXCTRL		0x03000
+#define IXGBE_DROPEN		0x03D04
+#define IXGBE_RXPBSIZE_SHIFT	10
+
+/* Receive Registers */
+#define IXGBE_RXCSUM		0x05000
+#define IXGBE_RFCTL		0x05008
+#define IXGBE_DRECCCTL		0x02F08
+#define IXGBE_DRECCCTL_DISABLE	0
+#define IXGBE_DRECCCTL2		0x02F8C
+
+/* Multicast Table Array - 128 entries */
+#define IXGBE_MTA(_i)		(0x05200 + ((_i) * 4))
+#define IXGBE_RAL(_i)		(((_i) <= 15) ? (0x05400 + ((_i) * 8)) : \
+				 (0x0A200 + ((_i) * 8)))
+#define IXGBE_RAH(_i)		(((_i) <= 15) ? (0x05404 + ((_i) * 8)) : \
+				 (0x0A204 + ((_i) * 8)))
+#define IXGBE_MPSAR_LO(_i)	(0x0A600 + ((_i) * 8))
+#define IXGBE_MPSAR_HI(_i)	(0x0A604 + ((_i) * 8))
+/* Packet split receive type */
+#define IXGBE_PSRTYPE(_i)	(((_i) <= 15) ? (0x05480 + ((_i) * 4)) : \
+				 (0x0EA00 + ((_i) * 4)))
+/* array of 4096 1-bit vlan filters */
+#define IXGBE_VFTA(_i)		(0x0A000 + ((_i) * 4))
+/*array of 4096 4-bit vlan vmdq indices */
+#define IXGBE_VFTAVIND(_j, _i)	(0x0A200 + ((_j) * 0x200) + ((_i) * 4))
+#define IXGBE_FCTRL		0x05080
+#define IXGBE_VLNCTRL		0x05088
+#define IXGBE_MCSTCTRL		0x05090
+#define IXGBE_MRQC		0x05818
+#define IXGBE_SAQF(_i)	(0x0E000 + ((_i) * 4)) /* Source Address Queue Filter */
+#define IXGBE_DAQF(_i)	(0x0E200 + ((_i) * 4)) /* Dest. Address Queue Filter */
+#define IXGBE_SDPQF(_i)	(0x0E400 + ((_i) * 4)) /* Src Dest. Addr Queue Filter */
+#define IXGBE_FTQF(_i)	(0x0E600 + ((_i) * 4)) /* Five Tuple Queue Filter */
+#define IXGBE_ETQF(_i)	(0x05128 + ((_i) * 4)) /* EType Queue Filter */
+#define IXGBE_ETQS(_i)	(0x0EC00 + ((_i) * 4)) /* EType Queue Select */
+#define IXGBE_SYNQF	0x0EC30 /* SYN Packet Queue Filter */
+#define IXGBE_RQTC	0x0EC70
+#define IXGBE_MTQC	0x08120
+#define IXGBE_VLVF(_i)	(0x0F100 + ((_i) * 4))  /* 64 of these (0-63) */
+#define IXGBE_VLVFB(_i)	(0x0F200 + ((_i) * 4))  /* 128 of these (0-127) */
+#define IXGBE_VMVIR(_i)	(0x08000 + ((_i) * 4))  /* 64 of these (0-63) */
+#define IXGBE_VT_CTL		0x051B0
+#define IXGBE_PFMAILBOX(_i)	(0x04B00 + (4 * (_i))) /* 64 total */
+/* 64 Mailboxes, 16 DW each */
+#define IXGBE_PFMBMEM(_i)	(0x13000 + (64 * (_i)))
+#define IXGBE_PFMBICR(_i)	(0x00710 + (4 * (_i))) /* 4 total */
+#define IXGBE_PFMBIMR(_i)	(0x00720 + (4 * (_i))) /* 4 total */
+#define IXGBE_VFRE(_i)		(0x051E0 + ((_i) * 4))
+#define IXGBE_VFTE(_i)		(0x08110 + ((_i) * 4))
+#define IXGBE_VMECM(_i)		(0x08790 + ((_i) * 4))
+#define IXGBE_QDE		0x2F04
+#define IXGBE_VMTXSW(_i)	(0x05180 + ((_i) * 4)) /* 2 total */
+#define IXGBE_VMOLR(_i)		(0x0F000 + ((_i) * 4)) /* 64 total */
+#define IXGBE_UTA(_i)		(0x0F400 + ((_i) * 4))
+#define IXGBE_MRCTL(_i)		(0x0F600 + ((_i) * 4))
+#define IXGBE_VMRVLAN(_i)	(0x0F610 + ((_i) * 4))
+#define IXGBE_VMRVM(_i)		(0x0F630 + ((_i) * 4))
+#define IXGBE_L34T_IMIR(_i)	(0x0E800 + ((_i) * 4)) /*128 of these (0-127)*/
+#define IXGBE_RXFECCERR0	0x051B8
+#define IXGBE_LLITHRESH		0x0EC90
+#define IXGBE_IMIR(_i)		(0x05A80 + ((_i) * 4))  /* 8 of these (0-7) */
+#define IXGBE_IMIREXT(_i)	(0x05AA0 + ((_i) * 4))  /* 8 of these (0-7) */
+#define IXGBE_IMIRVP		0x05AC0
+#define IXGBE_VMD_CTL		0x0581C
+#define IXGBE_RETA(_i)		(0x05C00 + ((_i) * 4))  /* 32 of these (0-31) */
+#define IXGBE_RSSRK(_i)		(0x05C80 + ((_i) * 4))  /* 10 of these (0-9) */
+
+/* Flow Director registers */
+#define IXGBE_FDIRCTRL	0x0EE00
+#define IXGBE_FDIRHKEY	0x0EE68
+#define IXGBE_FDIRSKEY	0x0EE6C
+#define IXGBE_FDIRDIP4M	0x0EE3C
+#define IXGBE_FDIRSIP4M	0x0EE40
+#define IXGBE_FDIRTCPM	0x0EE44
+#define IXGBE_FDIRUDPM	0x0EE48
+#define IXGBE_FDIRIP6M	0x0EE74
+#define IXGBE_FDIRM	0x0EE70
+
+/* Flow Director Stats registers */
+#define IXGBE_FDIRFREE	0x0EE38
+#define IXGBE_FDIRLEN	0x0EE4C
+#define IXGBE_FDIRUSTAT	0x0EE50
+#define IXGBE_FDIRFSTAT	0x0EE54
+#define IXGBE_FDIRMATCH	0x0EE58
+#define IXGBE_FDIRMISS	0x0EE5C
+
+/* Flow Director Programming registers */
+#define IXGBE_FDIRSIPv6(_i) (0x0EE0C + ((_i) * 4)) /* 3 of these (0-2) */
+#define IXGBE_FDIRIPSA	0x0EE18
+#define IXGBE_FDIRIPDA	0x0EE1C
+#define IXGBE_FDIRPORT	0x0EE20
+#define IXGBE_FDIRVLAN	0x0EE24
+#define IXGBE_FDIRHASH	0x0EE28
+#define IXGBE_FDIRCMD	0x0EE2C
+
+/* Transmit DMA registers */
+#define IXGBE_TDBAL(_i)		(0x06000 + ((_i) * 0x40)) /* 32 of them (0-31)*/
+#define IXGBE_TDBAH(_i)		(0x06004 + ((_i) * 0x40))
+#define IXGBE_TDLEN(_i)		(0x06008 + ((_i) * 0x40))
+#define IXGBE_TDH(_i)		(0x06010 + ((_i) * 0x40))
+#define IXGBE_TDT(_i)		(0x06018 + ((_i) * 0x40))
+#define IXGBE_TXDCTL(_i)	(0x06028 + ((_i) * 0x40))
+#define IXGBE_TDWBAL(_i)	(0x06038 + ((_i) * 0x40))
+#define IXGBE_TDWBAH(_i)	(0x0603C + ((_i) * 0x40))
+#define IXGBE_DTXCTL		0x07E00
+
+#define IXGBE_DMATXCTL		0x04A80
+#define IXGBE_PFVFSPOOF(_i)	(0x08200 + ((_i) * 4)) /* 8 of these 0 - 7 */
+#define IXGBE_PFDTXGSWC		0x08220
+#define IXGBE_DTXMXSZRQ		0x08100
+#define IXGBE_DTXTCPFLGL	0x04A88
+#define IXGBE_DTXTCPFLGH	0x04A8C
+#define IXGBE_LBDRPEN		0x0CA00
+#define IXGBE_TXPBTHRESH(_i)	(0x04950 + ((_i) * 4)) /* 8 of these 0 - 7 */
+
+#define IXGBE_DMATXCTL_TE	0x1 /* Transmit Enable */
+#define IXGBE_DMATXCTL_NS	0x2 /* No Snoop LSO hdr buffer */
+#define IXGBE_DMATXCTL_GDV	0x8 /* Global Double VLAN */
+#define IXGBE_DMATXCTL_VT_SHIFT	16  /* VLAN EtherType */
+
+#define IXGBE_PFDTXGSWC_VT_LBEN	0x1 /* Local L2 VT switch enable */
+
+/* Anti-spoofing defines */
+#define IXGBE_SPOOF_MACAS_MASK		0xFF
+#define IXGBE_SPOOF_VLANAS_MASK		0xFF00
+#define IXGBE_SPOOF_VLANAS_SHIFT	8
+#define IXGBE_PFVFSPOOF_REG_COUNT	8
+/* 16 of these (0-15) */
+#define IXGBE_DCA_TXCTRL(_i)		(0x07200 + ((_i) * 4))
+/* Tx DCA Control register : 128 of these (0-127) */
+#define IXGBE_DCA_TXCTRL_82599(_i)	(0x0600C + ((_i) * 0x40))
+#define IXGBE_TIPG			0x0CB00
+#define IXGBE_TXPBSIZE(_i)		(0x0CC00 + ((_i) * 4)) /* 8 of these */
+#define IXGBE_MNGTXMAP			0x0CD10
+#define IXGBE_TIPG_FIBER_DEFAULT	3
+#define IXGBE_TXPBSIZE_SHIFT		10
+
+/* Wake up registers */
+#define IXGBE_WUC	0x05800
+#define IXGBE_WUFC	0x05808
+#define IXGBE_WUS	0x05810
+#define IXGBE_IPAV	0x05838
+#define IXGBE_IP4AT	0x05840 /* IPv4 table 0x5840-0x5858 */
+#define IXGBE_IP6AT	0x05880 /* IPv6 table 0x5880-0x588F */
+
+#define IXGBE_WUPL	0x05900
+#define IXGBE_WUPM	0x05A00 /* wake up pkt memory 0x5A00-0x5A7C */
+#define IXGBE_FHFT(_n)	(0x09000 + (_n * 0x100)) /* Flex host filter table */
+/* Ext Flexible Host Filter Table */
+#define IXGBE_FHFT_EXT(_n)	(0x09800 + (_n * 0x100))
+
+#define IXGBE_FLEXIBLE_FILTER_COUNT_MAX		4
+#define IXGBE_EXT_FLEXIBLE_FILTER_COUNT_MAX	2
+
+/* Each Flexible Filter is at most 128 (0x80) bytes in length */
+#define IXGBE_FLEXIBLE_FILTER_SIZE_MAX		128
+#define IXGBE_FHFT_LENGTH_OFFSET		0xFC  /* Length byte in FHFT */
+#define IXGBE_FHFT_LENGTH_MASK			0x0FF /* Length in lower byte */
+
+/* Definitions for power management and wakeup registers */
+/* Wake Up Control */
+#define IXGBE_WUC_PME_EN	0x00000002 /* PME Enable */
+#define IXGBE_WUC_PME_STATUS	0x00000004 /* PME Status */
+#define IXGBE_WUC_WKEN		0x00000010 /* Enable PE_WAKE_N pin assertion  */
+
+/* Wake Up Filter Control */
+#define IXGBE_WUFC_LNKC	0x00000001 /* Link Status Change Wakeup Enable */
+#define IXGBE_WUFC_MAG	0x00000002 /* Magic Packet Wakeup Enable */
+#define IXGBE_WUFC_EX	0x00000004 /* Directed Exact Wakeup Enable */
+#define IXGBE_WUFC_MC	0x00000008 /* Directed Multicast Wakeup Enable */
+#define IXGBE_WUFC_BC	0x00000010 /* Broadcast Wakeup Enable */
+#define IXGBE_WUFC_ARP	0x00000020 /* ARP Request Packet Wakeup Enable */
+#define IXGBE_WUFC_IPV4	0x00000040 /* Directed IPv4 Packet Wakeup Enable */
+#define IXGBE_WUFC_IPV6	0x00000080 /* Directed IPv6 Packet Wakeup Enable */
+#define IXGBE_WUFC_MNG	0x00000100 /* Directed Mgmt Packet Wakeup Enable */
+
+#define IXGBE_WUFC_IGNORE_TCO	0x00008000 /* Ignore WakeOn TCO packets */
+#define IXGBE_WUFC_FLX0	0x00010000 /* Flexible Filter 0 Enable */
+#define IXGBE_WUFC_FLX1	0x00020000 /* Flexible Filter 1 Enable */
+#define IXGBE_WUFC_FLX2	0x00040000 /* Flexible Filter 2 Enable */
+#define IXGBE_WUFC_FLX3	0x00080000 /* Flexible Filter 3 Enable */
+#define IXGBE_WUFC_FLX4	0x00100000 /* Flexible Filter 4 Enable */
+#define IXGBE_WUFC_FLX5	0x00200000 /* Flexible Filter 5 Enable */
+#define IXGBE_WUFC_FLX_FILTERS	0x000F0000 /* Mask for 4 flex filters */
+/* Mask for Ext. flex filters */
+#define IXGBE_WUFC_EXT_FLX_FILTERS	0x00300000
+#define IXGBE_WUFC_ALL_FILTERS	0x003F00FF /* Mask for all wakeup filters */
+#define IXGBE_WUFC_FLX_OFFSET	16 /* Offset to the Flexible Filters bits */
+
+/* Wake Up Status */
+#define IXGBE_WUS_LNKC		IXGBE_WUFC_LNKC
+#define IXGBE_WUS_MAG		IXGBE_WUFC_MAG
+#define IXGBE_WUS_EX		IXGBE_WUFC_EX
+#define IXGBE_WUS_MC		IXGBE_WUFC_MC
+#define IXGBE_WUS_BC		IXGBE_WUFC_BC
+#define IXGBE_WUS_ARP		IXGBE_WUFC_ARP
+#define IXGBE_WUS_IPV4		IXGBE_WUFC_IPV4
+#define IXGBE_WUS_IPV6		IXGBE_WUFC_IPV6
+#define IXGBE_WUS_MNG		IXGBE_WUFC_MNG
+#define IXGBE_WUS_FLX0		IXGBE_WUFC_FLX0
+#define IXGBE_WUS_FLX1		IXGBE_WUFC_FLX1
+#define IXGBE_WUS_FLX2		IXGBE_WUFC_FLX2
+#define IXGBE_WUS_FLX3		IXGBE_WUFC_FLX3
+#define IXGBE_WUS_FLX4		IXGBE_WUFC_FLX4
+#define IXGBE_WUS_FLX5		IXGBE_WUFC_FLX5
+#define IXGBE_WUS_FLX_FILTERS	IXGBE_WUFC_FLX_FILTERS
+
+/* Wake Up Packet Length */
+#define IXGBE_WUPL_LENGTH_MASK	0xFFFF
+
+/* DCB registers */
+#define IXGBE_DCB_MAX_TRAFFIC_CLASS	8
+#define IXGBE_RMCS		0x03D00
+#define IXGBE_DPMCS		0x07F40
+#define IXGBE_PDPMCS		0x0CD00
+#define IXGBE_RUPPBMR		0x050A0
+#define IXGBE_RT2CR(_i)		(0x03C20 + ((_i) * 4)) /* 8 of these (0-7) */
+#define IXGBE_RT2SR(_i)		(0x03C40 + ((_i) * 4)) /* 8 of these (0-7) */
+#define IXGBE_TDTQ2TCCR(_i)	(0x0602C + ((_i) * 0x40)) /* 8 of these (0-7) */
+#define IXGBE_TDTQ2TCSR(_i)	(0x0622C + ((_i) * 0x40)) /* 8 of these (0-7) */
+#define IXGBE_TDPT2TCCR(_i)	(0x0CD20 + ((_i) * 4)) /* 8 of these (0-7) */
+#define IXGBE_TDPT2TCSR(_i)	(0x0CD40 + ((_i) * 4)) /* 8 of these (0-7) */
+
+
+/* Security Control Registers */
+#define IXGBE_SECTXCTRL		0x08800
+#define IXGBE_SECTXSTAT		0x08804
+#define IXGBE_SECTXBUFFAF	0x08808
+#define IXGBE_SECTXMINIFG	0x08810
+#define IXGBE_SECRXCTRL		0x08D00
+#define IXGBE_SECRXSTAT		0x08D04
+
+/* Security Bit Fields and Masks */
+#define IXGBE_SECTXCTRL_SECTX_DIS	0x00000001
+#define IXGBE_SECTXCTRL_TX_DIS		0x00000002
+#define IXGBE_SECTXCTRL_STORE_FORWARD	0x00000004
+
+#define IXGBE_SECTXSTAT_SECTX_RDY	0x00000001
+#define IXGBE_SECTXSTAT_ECC_TXERR	0x00000002
+
+#define IXGBE_SECRXCTRL_SECRX_DIS	0x00000001
+#define IXGBE_SECRXCTRL_RX_DIS		0x00000002
+
+#define IXGBE_SECRXSTAT_SECRX_RDY	0x00000001
+#define IXGBE_SECRXSTAT_ECC_RXERR	0x00000002
+
+/* LinkSec (MacSec) Registers */
+#define IXGBE_LSECTXCAP		0x08A00
+#define IXGBE_LSECRXCAP		0x08F00
+#define IXGBE_LSECTXCTRL	0x08A04
+#define IXGBE_LSECTXSCL		0x08A08 /* SCI Low */
+#define IXGBE_LSECTXSCH		0x08A0C /* SCI High */
+#define IXGBE_LSECTXSA		0x08A10
+#define IXGBE_LSECTXPN0		0x08A14
+#define IXGBE_LSECTXPN1		0x08A18
+#define IXGBE_LSECTXKEY0(_n)	(0x08A1C + (4 * (_n))) /* 4 of these (0-3) */
+#define IXGBE_LSECTXKEY1(_n)	(0x08A2C + (4 * (_n))) /* 4 of these (0-3) */
+#define IXGBE_LSECRXCTRL	0x08F04
+#define IXGBE_LSECRXSCL		0x08F08
+#define IXGBE_LSECRXSCH		0x08F0C
+#define IXGBE_LSECRXSA(_i)	(0x08F10 + (4 * (_i))) /* 2 of these (0-1) */
+#define IXGBE_LSECRXPN(_i)	(0x08F18 + (4 * (_i))) /* 2 of these (0-1) */
+#define IXGBE_LSECRXKEY(_n, _m)	(0x08F20 + ((0x10 * (_n)) + (4 * (_m))))
+#define IXGBE_LSECTXUT		0x08A3C /* OutPktsUntagged */
+#define IXGBE_LSECTXPKTE	0x08A40 /* OutPktsEncrypted */
+#define IXGBE_LSECTXPKTP	0x08A44 /* OutPktsProtected */
+#define IXGBE_LSECTXOCTE	0x08A48 /* OutOctetsEncrypted */
+#define IXGBE_LSECTXOCTP	0x08A4C /* OutOctetsProtected */
+#define IXGBE_LSECRXUT		0x08F40 /* InPktsUntagged/InPktsNoTag */
+#define IXGBE_LSECRXOCTD	0x08F44 /* InOctetsDecrypted */
+#define IXGBE_LSECRXOCTV	0x08F48 /* InOctetsValidated */
+#define IXGBE_LSECRXBAD		0x08F4C /* InPktsBadTag */
+#define IXGBE_LSECRXNOSCI	0x08F50 /* InPktsNoSci */
+#define IXGBE_LSECRXUNSCI	0x08F54 /* InPktsUnknownSci */
+#define IXGBE_LSECRXUNCH	0x08F58 /* InPktsUnchecked */
+#define IXGBE_LSECRXDELAY	0x08F5C /* InPktsDelayed */
+#define IXGBE_LSECRXLATE	0x08F60 /* InPktsLate */
+#define IXGBE_LSECRXOK(_n)	(0x08F64 + (0x04 * (_n))) /* InPktsOk */
+#define IXGBE_LSECRXINV(_n)	(0x08F6C + (0x04 * (_n))) /* InPktsInvalid */
+#define IXGBE_LSECRXNV(_n)	(0x08F74 + (0x04 * (_n))) /* InPktsNotValid */
+#define IXGBE_LSECRXUNSA	0x08F7C /* InPktsUnusedSa */
+#define IXGBE_LSECRXNUSA	0x08F80 /* InPktsNotUsingSa */
+
+/* LinkSec (MacSec) Bit Fields and Masks */
+#define IXGBE_LSECTXCAP_SUM_MASK	0x00FF0000
+#define IXGBE_LSECTXCAP_SUM_SHIFT	16
+#define IXGBE_LSECRXCAP_SUM_MASK	0x00FF0000
+#define IXGBE_LSECRXCAP_SUM_SHIFT	16
+
+#define IXGBE_LSECTXCTRL_EN_MASK	0x00000003
+#define IXGBE_LSECTXCTRL_DISABLE	0x0
+#define IXGBE_LSECTXCTRL_AUTH		0x1
+#define IXGBE_LSECTXCTRL_AUTH_ENCRYPT	0x2
+#define IXGBE_LSECTXCTRL_AISCI		0x00000020
+#define IXGBE_LSECTXCTRL_PNTHRSH_MASK	0xFFFFFF00
+#define IXGBE_LSECTXCTRL_RSV_MASK	0x000000D8
+
+#define IXGBE_LSECRXCTRL_EN_MASK	0x0000000C
+#define IXGBE_LSECRXCTRL_EN_SHIFT	2
+#define IXGBE_LSECRXCTRL_DISABLE	0x0
+#define IXGBE_LSECRXCTRL_CHECK		0x1
+#define IXGBE_LSECRXCTRL_STRICT		0x2
+#define IXGBE_LSECRXCTRL_DROP		0x3
+#define IXGBE_LSECRXCTRL_PLSH		0x00000040
+#define IXGBE_LSECRXCTRL_RP		0x00000080
+#define IXGBE_LSECRXCTRL_RSV_MASK	0xFFFFFF33
+
+/* IpSec Registers */
+#define IXGBE_IPSTXIDX		0x08900
+#define IXGBE_IPSTXSALT		0x08904
+#define IXGBE_IPSTXKEY(_i)	(0x08908 + (4 * (_i))) /* 4 of these (0-3) */
+#define IXGBE_IPSRXIDX		0x08E00
+#define IXGBE_IPSRXIPADDR(_i)	(0x08E04 + (4 * (_i))) /* 4 of these (0-3) */
+#define IXGBE_IPSRXSPI		0x08E14
+#define IXGBE_IPSRXIPIDX	0x08E18
+#define IXGBE_IPSRXKEY(_i)	(0x08E1C + (4 * (_i))) /* 4 of these (0-3) */
+#define IXGBE_IPSRXSALT		0x08E2C
+#define IXGBE_IPSRXMOD		0x08E30
+
+#define IXGBE_SECTXCTRL_STORE_FORWARD_ENABLE	0x4
+
+/* DCB registers */
+#define IXGBE_RTRPCS		0x02430
+#define IXGBE_RTTDCS		0x04900
+#define IXGBE_RTTDCS_ARBDIS	0x00000040 /* DCB arbiter disable */
+#define IXGBE_RTTPCS		0x0CD00
+#define IXGBE_RTRUP2TC		0x03020
+#define IXGBE_RTTUP2TC		0x0C800
+#define IXGBE_RTRPT4C(_i)	(0x02140 + ((_i) * 4)) /* 8 of these (0-7) */
+#define IXGBE_TXLLQ(_i)		(0x082E0 + ((_i) * 4)) /* 4 of these (0-3) */
+#define IXGBE_RTRPT4S(_i)	(0x02160 + ((_i) * 4)) /* 8 of these (0-7) */
+#define IXGBE_RTTDT2C(_i)	(0x04910 + ((_i) * 4)) /* 8 of these (0-7) */
+#define IXGBE_RTTDT2S(_i)	(0x04930 + ((_i) * 4)) /* 8 of these (0-7) */
+#define IXGBE_RTTPT2C(_i)	(0x0CD20 + ((_i) * 4)) /* 8 of these (0-7) */
+#define IXGBE_RTTPT2S(_i)	(0x0CD40 + ((_i) * 4)) /* 8 of these (0-7) */
+#define IXGBE_RTTDQSEL		0x04904
+#define IXGBE_RTTDT1C		0x04908
+#define IXGBE_RTTDT1S		0x0490C
+#define IXGBE_RTTDTECC		0x04990
+#define IXGBE_RTTDTECC_NO_BCN	0x00000100
+
+#define IXGBE_RTTBCNRC			0x04984
+#define IXGBE_RTTBCNRC_RS_ENA		0x80000000
+#define IXGBE_RTTBCNRC_RF_DEC_MASK	0x00003FFF
+#define IXGBE_RTTBCNRC_RF_INT_SHIFT	14
+#define IXGBE_RTTBCNRC_RF_INT_MASK \
+	(IXGBE_RTTBCNRC_RF_DEC_MASK << IXGBE_RTTBCNRC_RF_INT_SHIFT)
+#define IXGBE_RTTBCNRM	0x04980
+
+/* FCoE DMA Context Registers */
+#define IXGBE_FCPTRL		0x02410 /* FC User Desc. PTR Low */
+#define IXGBE_FCPTRH		0x02414 /* FC USer Desc. PTR High */
+#define IXGBE_FCBUFF		0x02418 /* FC Buffer Control */
+#define IXGBE_FCDMARW		0x02420 /* FC Receive DMA RW */
+#define IXGBE_FCINVST0		0x03FC0 /* FC Invalid DMA Context Status Reg 0*/
+#define IXGBE_FCINVST(_i)	(IXGBE_FCINVST0 + ((_i) * 4))
+#define IXGBE_FCBUFF_VALID	(1 << 0)   /* DMA Context Valid */
+#define IXGBE_FCBUFF_BUFFSIZE	(3 << 3)   /* User Buffer Size */
+#define IXGBE_FCBUFF_WRCONTX	(1 << 7)   /* 0: Initiator, 1: Target */
+#define IXGBE_FCBUFF_BUFFCNT	0x0000ff00 /* Number of User Buffers */
+#define IXGBE_FCBUFF_OFFSET	0xffff0000 /* User Buffer Offset */
+#define IXGBE_FCBUFF_BUFFSIZE_SHIFT	3
+#define IXGBE_FCBUFF_BUFFCNT_SHIFT	8
+#define IXGBE_FCBUFF_OFFSET_SHIFT	16
+#define IXGBE_FCDMARW_WE		(1 << 14)   /* Write enable */
+#define IXGBE_FCDMARW_RE		(1 << 15)   /* Read enable */
+#define IXGBE_FCDMARW_FCOESEL		0x000001ff  /* FC X_ID: 11 bits */
+#define IXGBE_FCDMARW_LASTSIZE		0xffff0000  /* Last User Buffer Size */
+#define IXGBE_FCDMARW_LASTSIZE_SHIFT	16
+/* FCoE SOF/EOF */
+#define IXGBE_TEOFF		0x04A94 /* Tx FC EOF */
+#define IXGBE_TSOFF		0x04A98 /* Tx FC SOF */
+#define IXGBE_REOFF		0x05158 /* Rx FC EOF */
+#define IXGBE_RSOFF		0x051F8 /* Rx FC SOF */
+/* FCoE Filter Context Registers */
+#define IXGBE_FCFLT		0x05108 /* FC FLT Context */
+#define IXGBE_FCFLTRW		0x05110 /* FC Filter RW Control */
+#define IXGBE_FCPARAM		0x051d8 /* FC Offset Parameter */
+#define IXGBE_FCFLT_VALID	(1 << 0)   /* Filter Context Valid */
+#define IXGBE_FCFLT_FIRST	(1 << 1)   /* Filter First */
+#define IXGBE_FCFLT_SEQID	0x00ff0000 /* Sequence ID */
+#define IXGBE_FCFLT_SEQCNT	0xff000000 /* Sequence Count */
+#define IXGBE_FCFLTRW_RVALDT	(1 << 13)  /* Fast Re-Validation */
+#define IXGBE_FCFLTRW_WE	(1 << 14)  /* Write Enable */
+#define IXGBE_FCFLTRW_RE	(1 << 15)  /* Read Enable */
+/* FCoE Receive Control */
+#define IXGBE_FCRXCTRL		0x05100 /* FC Receive Control */
+#define IXGBE_FCRXCTRL_FCOELLI	(1 << 0)   /* Low latency interrupt */
+#define IXGBE_FCRXCTRL_SAVBAD	(1 << 1)   /* Save Bad Frames */
+#define IXGBE_FCRXCTRL_FRSTRDH	(1 << 2)   /* EN 1st Read Header */
+#define IXGBE_FCRXCTRL_LASTSEQH	(1 << 3)   /* EN Last Header in Seq */
+#define IXGBE_FCRXCTRL_ALLH	(1 << 4)   /* EN All Headers */
+#define IXGBE_FCRXCTRL_FRSTSEQH	(1 << 5)   /* EN 1st Seq. Header */
+#define IXGBE_FCRXCTRL_ICRC	(1 << 6)   /* Ignore Bad FC CRC */
+#define IXGBE_FCRXCTRL_FCCRCBO	(1 << 7)   /* FC CRC Byte Ordering */
+#define IXGBE_FCRXCTRL_FCOEVER	0x00000f00 /* FCoE Version: 4 bits */
+#define IXGBE_FCRXCTRL_FCOEVER_SHIFT	8
+/* FCoE Redirection */
+#define IXGBE_FCRECTL		0x0ED00 /* FC Redirection Control */
+#define IXGBE_FCRETA0		0x0ED10 /* FC Redirection Table 0 */
+#define IXGBE_FCRETA(_i)	(IXGBE_FCRETA0 + ((_i) * 4)) /* FCoE Redir */
+#define IXGBE_FCRECTL_ENA	0x1 /* FCoE Redir Table Enable */
+#define IXGBE_FCRETASEL_ENA	0x2 /* FCoE FCRETASEL bit */
+#define IXGBE_FCRETA_SIZE	8 /* Max entries in FCRETA */
+#define IXGBE_FCRETA_ENTRY_MASK	0x0000007f /* 7 bits for the queue index */
+
+/* Stats registers */
+#define IXGBE_CRCERRS	0x04000
+#define IXGBE_ILLERRC	0x04004
+#define IXGBE_ERRBC	0x04008
+#define IXGBE_MSPDC	0x04010
+#define IXGBE_MPC(_i)	(0x03FA0 + ((_i) * 4)) /* 8 of these 3FA0-3FBC*/
+#define IXGBE_MLFC	0x04034
+#define IXGBE_MRFC	0x04038
+#define IXGBE_RLEC	0x04040
+#define IXGBE_LXONTXC	0x03F60
+#define IXGBE_LXONRXC	0x0CF60
+#define IXGBE_LXOFFTXC	0x03F68
+#define IXGBE_LXOFFRXC	0x0CF68
+#define IXGBE_LXONRXCNT		0x041A4
+#define IXGBE_LXOFFRXCNT	0x041A8
+#define IXGBE_PXONRXCNT(_i)	(0x04140 + ((_i) * 4)) /* 8 of these */
+#define IXGBE_PXOFFRXCNT(_i)	(0x04160 + ((_i) * 4)) /* 8 of these */
+#define IXGBE_PXON2OFFCNT(_i)	(0x03240 + ((_i) * 4)) /* 8 of these */
+#define IXGBE_PXONTXC(_i)	(0x03F00 + ((_i) * 4)) /* 8 of these 3F00-3F1C*/
+#define IXGBE_PXONRXC(_i)	(0x0CF00 + ((_i) * 4)) /* 8 of these CF00-CF1C*/
+#define IXGBE_PXOFFTXC(_i)	(0x03F20 + ((_i) * 4)) /* 8 of these 3F20-3F3C*/
+#define IXGBE_PXOFFRXC(_i)	(0x0CF20 + ((_i) * 4)) /* 8 of these CF20-CF3C*/
+#define IXGBE_PRC64		0x0405C
+#define IXGBE_PRC127		0x04060
+#define IXGBE_PRC255		0x04064
+#define IXGBE_PRC511		0x04068
+#define IXGBE_PRC1023		0x0406C
+#define IXGBE_PRC1522		0x04070
+#define IXGBE_GPRC		0x04074
+#define IXGBE_BPRC		0x04078
+#define IXGBE_MPRC		0x0407C
+#define IXGBE_GPTC		0x04080
+#define IXGBE_GORCL		0x04088
+#define IXGBE_GORCH		0x0408C
+#define IXGBE_GOTCL		0x04090
+#define IXGBE_GOTCH		0x04094
+#define IXGBE_RNBC(_i)		(0x03FC0 + ((_i) * 4)) /* 8 of these 3FC0-3FDC*/
+#define IXGBE_RUC		0x040A4
+#define IXGBE_RFC		0x040A8
+#define IXGBE_ROC		0x040AC
+#define IXGBE_RJC		0x040B0
+#define IXGBE_MNGPRC		0x040B4
+#define IXGBE_MNGPDC		0x040B8
+#define IXGBE_MNGPTC		0x0CF90
+#define IXGBE_TORL		0x040C0
+#define IXGBE_TORH		0x040C4
+#define IXGBE_TPR		0x040D0
+#define IXGBE_TPT		0x040D4
+#define IXGBE_PTC64		0x040D8
+#define IXGBE_PTC127		0x040DC
+#define IXGBE_PTC255		0x040E0
+#define IXGBE_PTC511		0x040E4
+#define IXGBE_PTC1023		0x040E8
+#define IXGBE_PTC1522		0x040EC
+#define IXGBE_MPTC		0x040F0
+#define IXGBE_BPTC		0x040F4
+#define IXGBE_XEC		0x04120
+#define IXGBE_SSVPC		0x08780
+
+#define IXGBE_RQSMR(_i)	(0x02300 + ((_i) * 4))
+#define IXGBE_TQSMR(_i)	(((_i) <= 7) ? (0x07300 + ((_i) * 4)) : \
+			 (0x08600 + ((_i) * 4)))
+#define IXGBE_TQSM(_i)	(0x08600 + ((_i) * 4))
+
+#define IXGBE_QPRC(_i)	(0x01030 + ((_i) * 0x40)) /* 16 of these */
+#define IXGBE_QPTC(_i)	(0x06030 + ((_i) * 0x40)) /* 16 of these */
+#define IXGBE_QBRC(_i)	(0x01034 + ((_i) * 0x40)) /* 16 of these */
+#define IXGBE_QBTC(_i)	(0x06034 + ((_i) * 0x40)) /* 16 of these */
+#define IXGBE_QBRC_L(_i)	(0x01034 + ((_i) * 0x40)) /* 16 of these */
+#define IXGBE_QBRC_H(_i)	(0x01038 + ((_i) * 0x40)) /* 16 of these */
+#define IXGBE_QPRDC(_i)		(0x01430 + ((_i) * 0x40)) /* 16 of these */
+#define IXGBE_QBTC_L(_i)	(0x08700 + ((_i) * 0x8)) /* 16 of these */
+#define IXGBE_QBTC_H(_i)	(0x08704 + ((_i) * 0x8)) /* 16 of these */
+#define IXGBE_FCCRC		0x05118 /* Num of Good Eth CRC w/ Bad FC CRC */
+#define IXGBE_FCOERPDC		0x0241C /* FCoE Rx Packets Dropped Count */
+#define IXGBE_FCLAST		0x02424 /* FCoE Last Error Count */
+#define IXGBE_FCOEPRC		0x02428 /* Number of FCoE Packets Received */
+#define IXGBE_FCOEDWRC		0x0242C /* Number of FCoE DWords Received */
+#define IXGBE_FCOEPTC		0x08784 /* Number of FCoE Packets Transmitted */
+#define IXGBE_FCOEDWTC		0x08788 /* Number of FCoE DWords Transmitted */
+#define IXGBE_FCCRC_CNT_MASK	0x0000FFFF /* CRC_CNT: bit 0 - 15 */
+#define IXGBE_FCLAST_CNT_MASK	0x0000FFFF /* Last_CNT: bit 0 - 15 */
+#define IXGBE_O2BGPTC		0x041C4
+#define IXGBE_O2BSPC		0x087B0
+#define IXGBE_B2OSPC		0x041C0
+#define IXGBE_B2OGPRC		0x02F90
+#define IXGBE_BUPRC		0x04180
+#define IXGBE_BMPRC		0x04184
+#define IXGBE_BBPRC		0x04188
+#define IXGBE_BUPTC		0x0418C
+#define IXGBE_BMPTC		0x04190
+#define IXGBE_BBPTC		0x04194
+#define IXGBE_BCRCERRS		0x04198
+#define IXGBE_BXONRXC		0x0419C
+#define IXGBE_BXOFFRXC		0x041E0
+#define IXGBE_BXONTXC		0x041E4
+#define IXGBE_BXOFFTXC		0x041E8
+#define IXGBE_PCRC8ECL		0x0E810
+#define IXGBE_PCRC8ECH		0x0E811
+#define IXGBE_PCRC8ECH_MASK	0x1F
+#define IXGBE_LDPCECL		0x0E820
+#define IXGBE_LDPCECH		0x0E821
+
+/* Management */
+#define IXGBE_MAVTV(_i)		(0x05010 + ((_i) * 4)) /* 8 of these (0-7) */
+#define IXGBE_MFUTP(_i)		(0x05030 + ((_i) * 4)) /* 8 of these (0-7) */
+#define IXGBE_MANC		0x05820
+#define IXGBE_MFVAL		0x05824
+#define IXGBE_MANC2H		0x05860
+#define IXGBE_MDEF(_i)		(0x05890 + ((_i) * 4)) /* 8 of these (0-7) */
+#define IXGBE_MIPAF		0x058B0
+#define IXGBE_MMAL(_i)		(0x05910 + ((_i) * 8)) /* 4 of these (0-3) */
+#define IXGBE_MMAH(_i)		(0x05914 + ((_i) * 8)) /* 4 of these (0-3) */
+#define IXGBE_FTFT		0x09400 /* 0x9400-0x97FC */
+#define IXGBE_METF(_i)		(0x05190 + ((_i) * 4)) /* 4 of these (0-3) */
+#define IXGBE_MDEF_EXT(_i)	(0x05160 + ((_i) * 4)) /* 8 of these (0-7) */
+#define IXGBE_LSWFW		0x15014
+#define IXGBE_BMCIP(_i)		(0x05050 + ((_i) * 4)) /* 0x5050-0x505C */
+#define IXGBE_BMCIPVAL		0x05060
+#define IXGBE_BMCIP_IPADDR_TYPE	0x00000001
+#define IXGBE_BMCIP_IPADDR_VALID	0x00000002
+
+/* Management Bit Fields and Masks */
+#define IXGBE_MANC_EN_BMC2OS	0x10000000 /* Ena BMC2OS and OS2BMC traffic */
+#define IXGBE_MANC_EN_BMC2OS_SHIFT	28
+
+/* Firmware Semaphore Register */
+#define IXGBE_FWSM_MODE_MASK	0xE
+
+/* ARC Subsystem registers */
+#define IXGBE_HICR		0x15F00
+#define IXGBE_FWSTS		0x15F0C
+#define IXGBE_HSMC0R		0x15F04
+#define IXGBE_HSMC1R		0x15F08
+#define IXGBE_SWSR		0x15F10
+#define IXGBE_HFDR		0x15FE8
+#define IXGBE_FLEX_MNG		0x15800 /* 0x15800 - 0x15EFC */
+
+#define IXGBE_HICR_EN		0x01  /* Enable bit - RO */
+/* Driver sets this bit when done to put command in RAM */
+#define IXGBE_HICR_C		0x02
+#define IXGBE_HICR_SV		0x04  /* Status Validity */
+#define IXGBE_HICR_FW_RESET_ENABLE	0x40
+#define IXGBE_HICR_FW_RESET	0x80
+
+/* PCI-E registers */
+#define IXGBE_GCR		0x11000
+#define IXGBE_GTV		0x11004
+#define IXGBE_FUNCTAG		0x11008
+#define IXGBE_GLT		0x1100C
+#define IXGBE_PCIEPIPEADR	0x11004
+#define IXGBE_PCIEPIPEDAT	0x11008
+#define IXGBE_GSCL_1		0x11010
+#define IXGBE_GSCL_2		0x11014
+#define IXGBE_GSCL_3		0x11018
+#define IXGBE_GSCL_4		0x1101C
+#define IXGBE_GSCN_0		0x11020
+#define IXGBE_GSCN_1		0x11024
+#define IXGBE_GSCN_2		0x11028
+#define IXGBE_GSCN_3		0x1102C
+#define IXGBE_FACTPS		0x10150
+#define IXGBE_PCIEANACTL	0x11040
+#define IXGBE_SWSM		0x10140
+#define IXGBE_FWSM		0x10148
+#define IXGBE_GSSR		0x10160
+#define IXGBE_MREVID		0x11064
+#define IXGBE_DCA_ID		0x11070
+#define IXGBE_DCA_CTRL		0x11074
+#define IXGBE_SWFW_SYNC		IXGBE_GSSR
+
+/* PCI-E registers 82599-Specific */
+#define IXGBE_GCR_EXT		0x11050
+#define IXGBE_GSCL_5_82599	0x11030
+#define IXGBE_GSCL_6_82599	0x11034
+#define IXGBE_GSCL_7_82599	0x11038
+#define IXGBE_GSCL_8_82599	0x1103C
+#define IXGBE_PHYADR_82599	0x11040
+#define IXGBE_PHYDAT_82599	0x11044
+#define IXGBE_PHYCTL_82599	0x11048
+#define IXGBE_PBACLR_82599	0x11068
+#define IXGBE_CIAA_82599	0x11088
+#define IXGBE_CIAD_82599	0x1108C
+#define IXGBE_PICAUSE		0x110B0
+#define IXGBE_PIENA		0x110B8
+#define IXGBE_CDQ_MBR_82599	0x110B4
+#define IXGBE_PCIESPARE		0x110BC
+#define IXGBE_MISC_REG_82599	0x110F0
+#define IXGBE_ECC_CTRL_0_82599	0x11100
+#define IXGBE_ECC_CTRL_1_82599	0x11104
+#define IXGBE_ECC_STATUS_82599	0x110E0
+#define IXGBE_BAR_CTRL_82599	0x110F4
+
+/* PCI Express Control */
+#define IXGBE_GCR_CMPL_TMOUT_MASK	0x0000F000
+#define IXGBE_GCR_CMPL_TMOUT_10ms	0x00001000
+#define IXGBE_GCR_CMPL_TMOUT_RESEND	0x00010000
+#define IXGBE_GCR_CAP_VER2		0x00040000
+
+#define IXGBE_GCR_EXT_MSIX_EN		0x80000000
+#define IXGBE_GCR_EXT_BUFFERS_CLEAR	0x40000000
+#define IXGBE_GCR_EXT_VT_MODE_16	0x00000001
+#define IXGBE_GCR_EXT_VT_MODE_32	0x00000002
+#define IXGBE_GCR_EXT_VT_MODE_64	0x00000003
+#define IXGBE_GCR_EXT_SRIOV		(IXGBE_GCR_EXT_MSIX_EN | \
+					 IXGBE_GCR_EXT_VT_MODE_64)
+/* Time Sync Registers */
+#define IXGBE_TSYNCRXCTL	0x05188 /* Rx Time Sync Control register - RW */
+#define IXGBE_TSYNCTXCTL	0x08C00 /* Tx Time Sync Control register - RW */
+#define IXGBE_RXSTMPL	0x051E8 /* Rx timestamp Low - RO */
+#define IXGBE_RXSTMPH	0x051A4 /* Rx timestamp High - RO */
+#define IXGBE_RXSATRL	0x051A0 /* Rx timestamp attribute low - RO */
+#define IXGBE_RXSATRH	0x051A8 /* Rx timestamp attribute high - RO */
+#define IXGBE_RXMTRL	0x05120 /* RX message type register low - RW */
+#define IXGBE_TXSTMPL	0x08C04 /* Tx timestamp value Low - RO */
+#define IXGBE_TXSTMPH	0x08C08 /* Tx timestamp value High - RO */
+#define IXGBE_SYSTIML	0x08C0C /* System time register Low - RO */
+#define IXGBE_SYSTIMH	0x08C10 /* System time register High - RO */
+#define IXGBE_TIMINCA	0x08C14 /* Increment attributes register - RW */
+#define IXGBE_TIMADJL	0x08C18 /* Time Adjustment Offset register Low - RW */
+#define IXGBE_TIMADJH	0x08C1C /* Time Adjustment Offset register High - RW */
+#define IXGBE_TSAUXC	0x08C20 /* TimeSync Auxiliary Control register - RW */
+#define IXGBE_TRGTTIML0	0x08C24 /* Target Time Register 0 Low - RW */
+#define IXGBE_TRGTTIMH0	0x08C28 /* Target Time Register 0 High - RW */
+#define IXGBE_TRGTTIML1	0x08C2C /* Target Time Register 1 Low - RW */
+#define IXGBE_TRGTTIMH1	0x08C30 /* Target Time Register 1 High - RW */
+#define IXGBE_FREQOUT0	0x08C34 /* Frequency Out 0 Control register - RW */
+#define IXGBE_FREQOUT1	0x08C38 /* Frequency Out 1 Control register - RW */
+#define IXGBE_AUXSTMPL0	0x08C3C /* Auxiliary Time Stamp 0 register Low - RO */
+#define IXGBE_AUXSTMPH0	0x08C40 /* Auxiliary Time Stamp 0 register High - RO */
+#define IXGBE_AUXSTMPL1	0x08C44 /* Auxiliary Time Stamp 1 register Low - RO */
+#define IXGBE_AUXSTMPH1	0x08C48 /* Auxiliary Time Stamp 1 register High - RO */
+
+/* Diagnostic Registers */
+#define IXGBE_RDSTATCTL		0x02C20
+#define IXGBE_RDSTAT(_i)	(0x02C00 + ((_i) * 4)) /* 0x02C00-0x02C1C */
+#define IXGBE_RDHMPN		0x02F08
+#define IXGBE_RIC_DW(_i)	(0x02F10 + ((_i) * 4))
+#define IXGBE_RDPROBE		0x02F20
+#define IXGBE_RDMAM		0x02F30
+#define IXGBE_RDMAD		0x02F34
+#define IXGBE_TDSTATCTL		0x07C20
+#define IXGBE_TDSTAT(_i)	(0x07C00 + ((_i) * 4)) /* 0x07C00 - 0x07C1C */
+#define IXGBE_TDHMPN		0x07F08
+#define IXGBE_TDHMPN2		0x082FC
+#define IXGBE_TXDESCIC		0x082CC
+#define IXGBE_TIC_DW(_i)	(0x07F10 + ((_i) * 4))
+#define IXGBE_TIC_DW2(_i)	(0x082B0 + ((_i) * 4))
+#define IXGBE_TDPROBE		0x07F20
+#define IXGBE_TXBUFCTRL		0x0C600
+#define IXGBE_TXBUFDATA0	0x0C610
+#define IXGBE_TXBUFDATA1	0x0C614
+#define IXGBE_TXBUFDATA2	0x0C618
+#define IXGBE_TXBUFDATA3	0x0C61C
+#define IXGBE_RXBUFCTRL		0x03600
+#define IXGBE_RXBUFDATA0	0x03610
+#define IXGBE_RXBUFDATA1	0x03614
+#define IXGBE_RXBUFDATA2	0x03618
+#define IXGBE_RXBUFDATA3	0x0361C
+#define IXGBE_PCIE_DIAG(_i)	(0x11090 + ((_i) * 4)) /* 8 of these */
+#define IXGBE_RFVAL		0x050A4
+#define IXGBE_MDFTC1		0x042B8
+#define IXGBE_MDFTC2		0x042C0
+#define IXGBE_MDFTFIFO1		0x042C4
+#define IXGBE_MDFTFIFO2		0x042C8
+#define IXGBE_MDFTS		0x042CC
+#define IXGBE_RXDATAWRPTR(_i)	(0x03700 + ((_i) * 4)) /* 8 of these 3700-370C*/
+#define IXGBE_RXDESCWRPTR(_i)	(0x03710 + ((_i) * 4)) /* 8 of these 3710-371C*/
+#define IXGBE_RXDATARDPTR(_i)	(0x03720 + ((_i) * 4)) /* 8 of these 3720-372C*/
+#define IXGBE_RXDESCRDPTR(_i)	(0x03730 + ((_i) * 4)) /* 8 of these 3730-373C*/
+#define IXGBE_TXDATAWRPTR(_i)	(0x0C700 + ((_i) * 4)) /* 8 of these C700-C70C*/
+#define IXGBE_TXDESCWRPTR(_i)	(0x0C710 + ((_i) * 4)) /* 8 of these C710-C71C*/
+#define IXGBE_TXDATARDPTR(_i)	(0x0C720 + ((_i) * 4)) /* 8 of these C720-C72C*/
+#define IXGBE_TXDESCRDPTR(_i)	(0x0C730 + ((_i) * 4)) /* 8 of these C730-C73C*/
+#define IXGBE_PCIEECCCTL	0x1106C
+#define IXGBE_RXWRPTR(_i)	(0x03100 + ((_i) * 4)) /* 8 of these 3100-310C*/
+#define IXGBE_RXUSED(_i)	(0x03120 + ((_i) * 4)) /* 8 of these 3120-312C*/
+#define IXGBE_RXRDPTR(_i)	(0x03140 + ((_i) * 4)) /* 8 of these 3140-314C*/
+#define IXGBE_RXRDWRPTR(_i)	(0x03160 + ((_i) * 4)) /* 8 of these 3160-310C*/
+#define IXGBE_TXWRPTR(_i)	(0x0C100 + ((_i) * 4)) /* 8 of these C100-C10C*/
+#define IXGBE_TXUSED(_i)	(0x0C120 + ((_i) * 4)) /* 8 of these C120-C12C*/
+#define IXGBE_TXRDPTR(_i)	(0x0C140 + ((_i) * 4)) /* 8 of these C140-C14C*/
+#define IXGBE_TXRDWRPTR(_i)	(0x0C160 + ((_i) * 4)) /* 8 of these C160-C10C*/
+#define IXGBE_PCIEECCCTL0	0x11100
+#define IXGBE_PCIEECCCTL1	0x11104
+#define IXGBE_RXDBUECC		0x03F70
+#define IXGBE_TXDBUECC		0x0CF70
+#define IXGBE_RXDBUEST		0x03F74
+#define IXGBE_TXDBUEST		0x0CF74
+#define IXGBE_PBTXECC		0x0C300
+#define IXGBE_PBRXECC		0x03300
+#define IXGBE_GHECCR		0x110B0
+
+/* MAC Registers */
+#define IXGBE_PCS1GCFIG		0x04200
+#define IXGBE_PCS1GLCTL		0x04208
+#define IXGBE_PCS1GLSTA		0x0420C
+#define IXGBE_PCS1GDBG0		0x04210
+#define IXGBE_PCS1GDBG1		0x04214
+#define IXGBE_PCS1GANA		0x04218
+#define IXGBE_PCS1GANLP		0x0421C
+#define IXGBE_PCS1GANNP		0x04220
+#define IXGBE_PCS1GANLPNP	0x04224
+#define IXGBE_HLREG0		0x04240
+#define IXGBE_HLREG1		0x04244
+#define IXGBE_PAP		0x04248
+#define IXGBE_MACA		0x0424C
+#define IXGBE_APAE		0x04250
+#define IXGBE_ARD		0x04254
+#define IXGBE_AIS		0x04258
+#define IXGBE_MSCA		0x0425C
+#define IXGBE_MSRWD		0x04260
+#define IXGBE_MLADD		0x04264
+#define IXGBE_MHADD		0x04268
+#define IXGBE_MAXFRS		0x04268
+#define IXGBE_TREG		0x0426C
+#define IXGBE_PCSS1		0x04288
+#define IXGBE_PCSS2		0x0428C
+#define IXGBE_XPCSS		0x04290
+#define IXGBE_MFLCN		0x04294
+#define IXGBE_SERDESC		0x04298
+#define IXGBE_MACS		0x0429C
+#define IXGBE_AUTOC		0x042A0
+#define IXGBE_LINKS		0x042A4
+#define IXGBE_LINKS2		0x04324
+#define IXGBE_AUTOC2		0x042A8
+#define IXGBE_AUTOC3		0x042AC
+#define IXGBE_ANLP1		0x042B0
+#define IXGBE_ANLP2		0x042B4
+#define IXGBE_MACC		0x04330
+#define IXGBE_ATLASCTL		0x04800
+#define IXGBE_MMNGC		0x042D0
+#define IXGBE_ANLPNP1		0x042D4
+#define IXGBE_ANLPNP2		0x042D8
+#define IXGBE_KRPCSFC		0x042E0
+#define IXGBE_KRPCSS		0x042E4
+#define IXGBE_FECS1		0x042E8
+#define IXGBE_FECS2		0x042EC
+#define IXGBE_SMADARCTL		0x14F10
+#define IXGBE_MPVC		0x04318
+#define IXGBE_SGMIIC		0x04314
+
+/* Statistics Registers */
+#define IXGBE_RXNFGPC		0x041B0
+#define IXGBE_RXNFGBCL		0x041B4
+#define IXGBE_RXNFGBCH		0x041B8
+#define IXGBE_RXDGPC		0x02F50
+#define IXGBE_RXDGBCL		0x02F54
+#define IXGBE_RXDGBCH		0x02F58
+#define IXGBE_RXDDGPC		0x02F5C
+#define IXGBE_RXDDGBCL		0x02F60
+#define IXGBE_RXDDGBCH		0x02F64
+#define IXGBE_RXLPBKGPC		0x02F68
+#define IXGBE_RXLPBKGBCL	0x02F6C
+#define IXGBE_RXLPBKGBCH	0x02F70
+#define IXGBE_RXDLPBKGPC	0x02F74
+#define IXGBE_RXDLPBKGBCL	0x02F78
+#define IXGBE_RXDLPBKGBCH	0x02F7C
+#define IXGBE_TXDGPC		0x087A0
+#define IXGBE_TXDGBCL		0x087A4
+#define IXGBE_TXDGBCH		0x087A8
+
+#define IXGBE_RXDSTATCTRL	0x02F40
+
+/* Copper Pond 2 link timeout */
+#define IXGBE_VALIDATE_LINK_READY_TIMEOUT 50
+
+/* Omer CORECTL */
+#define IXGBE_CORECTL			0x014F00
+/* BARCTRL */
+#define IXGBE_BARCTRL			0x110F4
+#define IXGBE_BARCTRL_FLSIZE		0x0700
+#define IXGBE_BARCTRL_FLSIZE_SHIFT	8
+#define IXGBE_BARCTRL_CSRSIZE		0x2000
+
+/* RSCCTL Bit Masks */
+#define IXGBE_RSCCTL_RSCEN	0x01
+#define IXGBE_RSCCTL_MAXDESC_1	0x00
+#define IXGBE_RSCCTL_MAXDESC_4	0x04
+#define IXGBE_RSCCTL_MAXDESC_8	0x08
+#define IXGBE_RSCCTL_MAXDESC_16	0x0C
+
+/* RSCDBU Bit Masks */
+#define IXGBE_RSCDBU_RSCSMALDIS_MASK	0x0000007F
+#define IXGBE_RSCDBU_RSCACKDIS		0x00000080
+
+/* RDRXCTL Bit Masks */
+#define IXGBE_RDRXCTL_RDMTS_1_2		0x00000000 /* Rx Desc Min THLD Size */
+#define IXGBE_RDRXCTL_CRCSTRIP		0x00000002 /* CRC Strip */
+#define IXGBE_RDRXCTL_MVMEN		0x00000020
+#define IXGBE_RDRXCTL_DMAIDONE		0x00000008 /* DMA init cycle done */
+#define IXGBE_RDRXCTL_AGGDIS		0x00010000 /* Aggregation disable */
+#define IXGBE_RDRXCTL_RSCFRSTSIZE	0x003E0000 /* RSC First packet size */
+#define IXGBE_RDRXCTL_RSCLLIDIS		0x00800000 /* Disabl RSC compl on LLI */
+#define IXGBE_RDRXCTL_RSCACKC		0x02000000 /* must set 1 when RSC ena */
+#define IXGBE_RDRXCTL_FCOE_WRFIX	0x04000000 /* must set 1 when RSC ena */
+
+/* RQTC Bit Masks and Shifts */
+#define IXGBE_RQTC_SHIFT_TC(_i)	((_i) * 4)
+#define IXGBE_RQTC_TC0_MASK	(0x7 << 0)
+#define IXGBE_RQTC_TC1_MASK	(0x7 << 4)
+#define IXGBE_RQTC_TC2_MASK	(0x7 << 8)
+#define IXGBE_RQTC_TC3_MASK	(0x7 << 12)
+#define IXGBE_RQTC_TC4_MASK	(0x7 << 16)
+#define IXGBE_RQTC_TC5_MASK	(0x7 << 20)
+#define IXGBE_RQTC_TC6_MASK	(0x7 << 24)
+#define IXGBE_RQTC_TC7_MASK	(0x7 << 28)
+
+/* PSRTYPE.RQPL Bit masks and shift */
+#define IXGBE_PSRTYPE_RQPL_MASK		0x7
+#define IXGBE_PSRTYPE_RQPL_SHIFT	29
+
+/* CTRL Bit Masks */
+#define IXGBE_CTRL_GIO_DIS	0x00000004 /* Global IO Master Disable bit */
+#define IXGBE_CTRL_LNK_RST	0x00000008 /* Link Reset. Resets everything. */
+#define IXGBE_CTRL_RST		0x04000000 /* Reset (SW) */
+#define IXGBE_CTRL_RST_MASK	(IXGBE_CTRL_LNK_RST | IXGBE_CTRL_RST)
+
+/* FACTPS */
+#define IXGBE_FACTPS_LFS	0x40000000 /* LAN Function Select */
+
+/* MHADD Bit Masks */
+#define IXGBE_MHADD_MFS_MASK	0xFFFF0000
+#define IXGBE_MHADD_MFS_SHIFT	16
+
+/* Extended Device Control */
+#define IXGBE_CTRL_EXT_PFRSTD	0x00004000 /* Physical Function Reset Done */
+#define IXGBE_CTRL_EXT_NS_DIS	0x00010000 /* No Snoop disable */
+#define IXGBE_CTRL_EXT_RO_DIS	0x00020000 /* Relaxed Ordering disable */
+#define IXGBE_CTRL_EXT_DRV_LOAD	0x10000000 /* Driver loaded bit for FW */
+
+/* Direct Cache Access (DCA) definitions */
+#define IXGBE_DCA_CTRL_DCA_ENABLE	0x00000000 /* DCA Enable */
+#define IXGBE_DCA_CTRL_DCA_DISABLE	0x00000001 /* DCA Disable */
+
+#define IXGBE_DCA_CTRL_DCA_MODE_CB1	0x00 /* DCA Mode CB1 */
+#define IXGBE_DCA_CTRL_DCA_MODE_CB2	0x02 /* DCA Mode CB2 */
+
+#define IXGBE_DCA_RXCTRL_CPUID_MASK	0x0000001F /* Rx CPUID Mask */
+#define IXGBE_DCA_RXCTRL_CPUID_MASK_82599	0xFF000000 /* Rx CPUID Mask */
+#define IXGBE_DCA_RXCTRL_CPUID_SHIFT_82599	24 /* Rx CPUID Shift */
+#define IXGBE_DCA_RXCTRL_DESC_DCA_EN	(1 << 5) /* Rx Desc enable */
+#define IXGBE_DCA_RXCTRL_HEAD_DCA_EN	(1 << 6) /* Rx Desc header ena */
+#define IXGBE_DCA_RXCTRL_DATA_DCA_EN	(1 << 7) /* Rx Desc payload ena */
+#define IXGBE_DCA_RXCTRL_DESC_RRO_EN	(1 << 9) /* Rx rd Desc Relax Order */
+#define IXGBE_DCA_RXCTRL_DATA_WRO_EN	(1 << 13) /* Rx wr data Relax Order */
+#define IXGBE_DCA_RXCTRL_HEAD_WRO_EN	(1 << 15) /* Rx wr header RO */
+
+#define IXGBE_DCA_TXCTRL_CPUID_MASK	0x0000001F /* Tx CPUID Mask */
+#define IXGBE_DCA_TXCTRL_CPUID_MASK_82599	0xFF000000 /* Tx CPUID Mask */
+#define IXGBE_DCA_TXCTRL_CPUID_SHIFT_82599	24 /* Tx CPUID Shift */
+#define IXGBE_DCA_TXCTRL_DESC_DCA_EN	(1 << 5) /* DCA Tx Desc enable */
+#define IXGBE_DCA_TXCTRL_DESC_RRO_EN	(1 << 9) /* Tx rd Desc Relax Order */
+#define IXGBE_DCA_TXCTRL_DESC_WRO_EN	(1 << 11) /* Tx Desc writeback RO bit */
+#define IXGBE_DCA_TXCTRL_DATA_RRO_EN	(1 << 13) /* Tx rd data Relax Order */
+#define IXGBE_DCA_MAX_QUEUES_82598	16 /* DCA regs only on 16 queues */
+
+/* MSCA Bit Masks */
+#define IXGBE_MSCA_NP_ADDR_MASK		0x0000FFFF /* MDI Addr (new prot) */
+#define IXGBE_MSCA_NP_ADDR_SHIFT	0
+#define IXGBE_MSCA_DEV_TYPE_MASK	0x001F0000 /* Dev Type (new prot) */
+#define IXGBE_MSCA_DEV_TYPE_SHIFT	16 /* Register Address (old prot */
+#define IXGBE_MSCA_PHY_ADDR_MASK	0x03E00000 /* PHY Address mask */
+#define IXGBE_MSCA_PHY_ADDR_SHIFT	21 /* PHY Address shift*/
+#define IXGBE_MSCA_OP_CODE_MASK		0x0C000000 /* OP CODE mask */
+#define IXGBE_MSCA_OP_CODE_SHIFT	26 /* OP CODE shift */
+#define IXGBE_MSCA_ADDR_CYCLE		0x00000000 /* OP CODE 00 (addr cycle) */
+#define IXGBE_MSCA_WRITE		0x04000000 /* OP CODE 01 (wr) */
+#define IXGBE_MSCA_READ			0x0C000000 /* OP CODE 11 (rd) */
+#define IXGBE_MSCA_READ_AUTOINC		0x08000000 /* OP CODE 10 (rd auto inc)*/
+#define IXGBE_MSCA_ST_CODE_MASK		0x30000000 /* ST Code mask */
+#define IXGBE_MSCA_ST_CODE_SHIFT	28 /* ST Code shift */
+#define IXGBE_MSCA_NEW_PROTOCOL		0x00000000 /* ST CODE 00 (new prot) */
+#define IXGBE_MSCA_OLD_PROTOCOL		0x10000000 /* ST CODE 01 (old prot) */
+#define IXGBE_MSCA_MDI_COMMAND		0x40000000 /* Initiate MDI command */
+#define IXGBE_MSCA_MDI_IN_PROG_EN	0x80000000 /* MDI in progress ena */
+
+/* MSRWD bit masks */
+#define IXGBE_MSRWD_WRITE_DATA_MASK	0x0000FFFF
+#define IXGBE_MSRWD_WRITE_DATA_SHIFT	0
+#define IXGBE_MSRWD_READ_DATA_MASK	0xFFFF0000
+#define IXGBE_MSRWD_READ_DATA_SHIFT	16
+
+/* Atlas registers */
+#define IXGBE_ATLAS_PDN_LPBK		0x24
+#define IXGBE_ATLAS_PDN_10G		0xB
+#define IXGBE_ATLAS_PDN_1G		0xC
+#define IXGBE_ATLAS_PDN_AN		0xD
+
+/* Atlas bit masks */
+#define IXGBE_ATLASCTL_WRITE_CMD	0x00010000
+#define IXGBE_ATLAS_PDN_TX_REG_EN	0x10
+#define IXGBE_ATLAS_PDN_TX_10G_QL_ALL	0xF0
+#define IXGBE_ATLAS_PDN_TX_1G_QL_ALL	0xF0
+#define IXGBE_ATLAS_PDN_TX_AN_QL_ALL	0xF0
+
+/* Omer bit masks */
+#define IXGBE_CORECTL_WRITE_CMD		0x00010000
+
+/* Device Type definitions for new protocol MDIO commands */
+#define IXGBE_MDIO_PMA_PMD_DEV_TYPE		0x1
+#define IXGBE_MDIO_PCS_DEV_TYPE			0x3
+#define IXGBE_MDIO_PHY_XS_DEV_TYPE		0x4
+#define IXGBE_MDIO_AUTO_NEG_DEV_TYPE		0x7
+#define IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE	0x1E   /* Device 30 */
+#define IXGBE_TWINAX_DEV			1
+
+#define IXGBE_MDIO_COMMAND_TIMEOUT	100 /* PHY Timeout for 1 GB mode */
+
+#define IXGBE_MDIO_VENDOR_SPECIFIC_1_CONTROL		0x0 /* VS1 Ctrl Reg */
+#define IXGBE_MDIO_VENDOR_SPECIFIC_1_STATUS		0x1 /* VS1 Status Reg */
+#define IXGBE_MDIO_VENDOR_SPECIFIC_1_LINK_STATUS	0x0008 /* 1 = Link Up */
+#define IXGBE_MDIO_VENDOR_SPECIFIC_1_SPEED_STATUS	0x0010 /* 0-10G, 1-1G */
+#define IXGBE_MDIO_VENDOR_SPECIFIC_1_10G_SPEED		0x0018
+#define IXGBE_MDIO_VENDOR_SPECIFIC_1_1G_SPEED		0x0010
+
+#define IXGBE_MDIO_AUTO_NEG_CONTROL	0x0 /* AUTO_NEG Control Reg */
+#define IXGBE_MDIO_AUTO_NEG_STATUS	0x1 /* AUTO_NEG Status Reg */
+#define IXGBE_MDIO_AUTO_NEG_ADVT	0x10 /* AUTO_NEG Advt Reg */
+#define IXGBE_MDIO_AUTO_NEG_LP		0x13 /* AUTO_NEG LP Status Reg */
+#define IXGBE_MDIO_PHY_XS_CONTROL	0x0 /* PHY_XS Control Reg */
+#define IXGBE_MDIO_PHY_XS_RESET		0x8000 /* PHY_XS Reset */
+#define IXGBE_MDIO_PHY_ID_HIGH		0x2 /* PHY ID High Reg*/
+#define IXGBE_MDIO_PHY_ID_LOW		0x3 /* PHY ID Low Reg*/
+#define IXGBE_MDIO_PHY_SPEED_ABILITY	0x4 /* Speed Ability Reg */
+#define IXGBE_MDIO_PHY_SPEED_10G	0x0001 /* 10G capable */
+#define IXGBE_MDIO_PHY_SPEED_1G		0x0010 /* 1G capable */
+#define IXGBE_MDIO_PHY_SPEED_100M	0x0020 /* 100M capable */
+#define IXGBE_MDIO_PHY_EXT_ABILITY	0xB /* Ext Ability Reg */
+#define IXGBE_MDIO_PHY_10GBASET_ABILITY		0x0004 /* 10GBaseT capable */
+#define IXGBE_MDIO_PHY_1000BASET_ABILITY	0x0020 /* 1000BaseT capable */
+#define IXGBE_MDIO_PHY_100BASETX_ABILITY	0x0080 /* 100BaseTX capable */
+#define IXGBE_MDIO_PHY_SET_LOW_POWER_MODE	0x0800 /* Set low power mode */
+
+#define IXGBE_MDIO_PMA_PMD_CONTROL_ADDR	0x0000 /* PMA/PMD Control Reg */
+#define IXGBE_MDIO_PMA_PMD_SDA_SCL_ADDR	0xC30A /* PHY_XS SDA/SCL Addr Reg */
+#define IXGBE_MDIO_PMA_PMD_SDA_SCL_DATA	0xC30B /* PHY_XS SDA/SCL Data Reg */
+#define IXGBE_MDIO_PMA_PMD_SDA_SCL_STAT	0xC30C /* PHY_XS SDA/SCL Status Reg */
+
+/* MII clause 22/28 definitions */
+#define IXGBE_MDIO_PHY_LOW_POWER_MODE	0x0800
+
+#define IXGBE_MII_10GBASE_T_AUTONEG_CTRL_REG	0x20   /* 10G Control Reg */
+#define IXGBE_MII_AUTONEG_VENDOR_PROVISION_1_REG 0xC400 /* 1G Provisioning 1 */
+#define IXGBE_MII_AUTONEG_XNP_TX_REG		0x17   /* 1G XNP Transmit */
+#define IXGBE_MII_AUTONEG_ADVERTISE_REG		0x10   /* 100M Advertisement */
+#define IXGBE_MII_10GBASE_T_ADVERTISE		0x1000 /* full duplex, bit:12*/
+#define IXGBE_MII_1GBASE_T_ADVERTISE_XNP_TX	0x4000 /* full duplex, bit:14*/
+#define IXGBE_MII_1GBASE_T_ADVERTISE		0x8000 /* full duplex, bit:15*/
+#define IXGBE_MII_100BASE_T_ADVERTISE		0x0100 /* full duplex, bit:8 */
+#define IXGBE_MII_100BASE_T_ADVERTISE_HALF	0x0080 /* half duplex, bit:7 */
+#define IXGBE_MII_RESTART			0x200
+#define IXGBE_MII_AUTONEG_COMPLETE		0x20
+#define IXGBE_MII_AUTONEG_LINK_UP		0x04
+#define IXGBE_MII_AUTONEG_REG			0x0
+
+#define IXGBE_PHY_REVISION_MASK		0xFFFFFFF0
+#define IXGBE_MAX_PHY_ADDR		32
+
+/* PHY IDs*/
+#define TN1010_PHY_ID	0x00A19410
+#define TNX_FW_REV	0xB
+#define X540_PHY_ID	0x01540200
+#define AQ_FW_REV	0x20
+#define QT2022_PHY_ID	0x0043A400
+#define ATH_PHY_ID	0x03429050
+
+/* PHY Types */
+#define IXGBE_M88E1145_E_PHY_ID	0x01410CD0
+
+/* Special PHY Init Routine */
+#define IXGBE_PHY_INIT_OFFSET_NL	0x002B
+#define IXGBE_PHY_INIT_END_NL		0xFFFF
+#define IXGBE_CONTROL_MASK_NL		0xF000
+#define IXGBE_DATA_MASK_NL		0x0FFF
+#define IXGBE_CONTROL_SHIFT_NL		12
+#define IXGBE_DELAY_NL			0
+#define IXGBE_DATA_NL			1
+#define IXGBE_CONTROL_NL		0x000F
+#define IXGBE_CONTROL_EOL_NL		0x0FFF
+#define IXGBE_CONTROL_SOL_NL		0x0000
+
+/* General purpose Interrupt Enable */
+#define IXGBE_SDP0_GPIEN	0x00000001 /* SDP0 */
+#define IXGBE_SDP1_GPIEN	0x00000002 /* SDP1 */
+#define IXGBE_SDP2_GPIEN	0x00000004 /* SDP2 */
+#define IXGBE_GPIE_MSIX_MODE	0x00000010 /* MSI-X mode */
+#define IXGBE_GPIE_OCD		0x00000020 /* Other Clear Disable */
+#define IXGBE_GPIE_EIMEN	0x00000040 /* Immediate Interrupt Enable */
+#define IXGBE_GPIE_EIAME	0x40000000
+#define IXGBE_GPIE_PBA_SUPPORT	0x80000000
+#define IXGBE_GPIE_RSC_DELAY_SHIFT	11
+#define IXGBE_GPIE_VTMODE_MASK	0x0000C000 /* VT Mode Mask */
+#define IXGBE_GPIE_VTMODE_16	0x00004000 /* 16 VFs 8 queues per VF */
+#define IXGBE_GPIE_VTMODE_32	0x00008000 /* 32 VFs 4 queues per VF */
+#define IXGBE_GPIE_VTMODE_64	0x0000C000 /* 64 VFs 2 queues per VF */
+
+/* Packet Buffer Initialization */
+#define IXGBE_MAX_PACKET_BUFFERS	8
+
+#define IXGBE_TXPBSIZE_20KB	0x00005000 /* 20KB Packet Buffer */
+#define IXGBE_TXPBSIZE_40KB	0x0000A000 /* 40KB Packet Buffer */
+#define IXGBE_RXPBSIZE_48KB	0x0000C000 /* 48KB Packet Buffer */
+#define IXGBE_RXPBSIZE_64KB	0x00010000 /* 64KB Packet Buffer */
+#define IXGBE_RXPBSIZE_80KB	0x00014000 /* 80KB Packet Buffer */
+#define IXGBE_RXPBSIZE_128KB	0x00020000 /* 128KB Packet Buffer */
+#define IXGBE_RXPBSIZE_MAX	0x00080000 /* 512KB Packet Buffer */
+#define IXGBE_TXPBSIZE_MAX	0x00028000 /* 160KB Packet Buffer */
+
+#define IXGBE_TXPKT_SIZE_MAX	0xA /* Max Tx Packet size */
+#define IXGBE_MAX_PB		8
+
+/* Packet buffer allocation strategies */
+enum {
+	PBA_STRATEGY_EQUAL	= 0, /* Distribute PB space equally */
+#define PBA_STRATEGY_EQUAL	PBA_STRATEGY_EQUAL
+	PBA_STRATEGY_WEIGHTED	= 1, /* Weight front half of TCs */
+#define PBA_STRATEGY_WEIGHTED	PBA_STRATEGY_WEIGHTED
+};
+
+/* Transmit Flow Control status */
+#define IXGBE_TFCS_TXOFF	0x00000001
+#define IXGBE_TFCS_TXOFF0	0x00000100
+#define IXGBE_TFCS_TXOFF1	0x00000200
+#define IXGBE_TFCS_TXOFF2	0x00000400
+#define IXGBE_TFCS_TXOFF3	0x00000800
+#define IXGBE_TFCS_TXOFF4	0x00001000
+#define IXGBE_TFCS_TXOFF5	0x00002000
+#define IXGBE_TFCS_TXOFF6	0x00004000
+#define IXGBE_TFCS_TXOFF7	0x00008000
+
+/* TCP Timer */
+#define IXGBE_TCPTIMER_KS		0x00000100
+#define IXGBE_TCPTIMER_COUNT_ENABLE	0x00000200
+#define IXGBE_TCPTIMER_COUNT_FINISH	0x00000400
+#define IXGBE_TCPTIMER_LOOP		0x00000800
+#define IXGBE_TCPTIMER_DURATION_MASK	0x000000FF
+
+/* HLREG0 Bit Masks */
+#define IXGBE_HLREG0_TXCRCEN		0x00000001 /* bit  0 */
+#define IXGBE_HLREG0_RXCRCSTRP		0x00000002 /* bit  1 */
+#define IXGBE_HLREG0_JUMBOEN		0x00000004 /* bit  2 */
+#define IXGBE_HLREG0_TXPADEN		0x00000400 /* bit 10 */
+#define IXGBE_HLREG0_TXPAUSEEN		0x00001000 /* bit 12 */
+#define IXGBE_HLREG0_RXPAUSEEN		0x00004000 /* bit 14 */
+#define IXGBE_HLREG0_LPBK		0x00008000 /* bit 15 */
+#define IXGBE_HLREG0_MDCSPD		0x00010000 /* bit 16 */
+#define IXGBE_HLREG0_CONTMDC		0x00020000 /* bit 17 */
+#define IXGBE_HLREG0_CTRLFLTR		0x00040000 /* bit 18 */
+#define IXGBE_HLREG0_PREPEND		0x00F00000 /* bits 20-23 */
+#define IXGBE_HLREG0_PRIPAUSEEN		0x01000000 /* bit 24 */
+#define IXGBE_HLREG0_RXPAUSERECDA	0x06000000 /* bits 25-26 */
+#define IXGBE_HLREG0_RXLNGTHERREN	0x08000000 /* bit 27 */
+#define IXGBE_HLREG0_RXPADSTRIPEN	0x10000000 /* bit 28 */
+
+/* VMD_CTL bitmasks */
+#define IXGBE_VMD_CTL_VMDQ_EN		0x00000001
+#define IXGBE_VMD_CTL_VMDQ_FILTER	0x00000002
+
+/* VT_CTL bitmasks */
+#define IXGBE_VT_CTL_DIS_DEFPL		0x20000000 /* disable default pool */
+#define IXGBE_VT_CTL_REPLEN		0x40000000 /* replication enabled */
+#define IXGBE_VT_CTL_VT_ENABLE		0x00000001  /* Enable VT Mode */
+#define IXGBE_VT_CTL_POOL_SHIFT		7
+#define IXGBE_VT_CTL_POOL_MASK		(0x3F << IXGBE_VT_CTL_POOL_SHIFT)
+
+/* VMOLR bitmasks */
+#define IXGBE_VMOLR_AUPE	0x01000000 /* accept untagged packets */
+#define IXGBE_VMOLR_ROMPE	0x02000000 /* accept packets in MTA tbl */
+#define IXGBE_VMOLR_ROPE	0x04000000 /* accept packets in UC tbl */
+#define IXGBE_VMOLR_BAM		0x08000000 /* accept broadcast packets */
+#define IXGBE_VMOLR_MPE		0x10000000 /* multicast promiscuous */
+
+/* VFRE bitmask */
+#define IXGBE_VFRE_ENABLE_ALL	0xFFFFFFFF
+
+#define IXGBE_VF_INIT_TIMEOUT	200 /* Number of retries to clear RSTI */
+
+/* RDHMPN and TDHMPN bitmasks */
+#define IXGBE_RDHMPN_RDICADDR		0x007FF800
+#define IXGBE_RDHMPN_RDICRDREQ		0x00800000
+#define IXGBE_RDHMPN_RDICADDR_SHIFT	11
+#define IXGBE_TDHMPN_TDICADDR		0x003FF800
+#define IXGBE_TDHMPN_TDICRDREQ		0x00800000
+#define IXGBE_TDHMPN_TDICADDR_SHIFT	11
+
+#define IXGBE_RDMAM_MEM_SEL_SHIFT		13
+#define IXGBE_RDMAM_DWORD_SHIFT			9
+#define IXGBE_RDMAM_DESC_COMP_FIFO		1
+#define IXGBE_RDMAM_DFC_CMD_FIFO		2
+#define IXGBE_RDMAM_RSC_HEADER_ADDR		3
+#define IXGBE_RDMAM_TCN_STATUS_RAM		4
+#define IXGBE_RDMAM_WB_COLL_FIFO		5
+#define IXGBE_RDMAM_QSC_CNT_RAM			6
+#define IXGBE_RDMAM_QSC_FCOE_RAM		7
+#define IXGBE_RDMAM_QSC_QUEUE_CNT		8
+#define IXGBE_RDMAM_QSC_QUEUE_RAM		0xA
+#define IXGBE_RDMAM_QSC_RSC_RAM			0xB
+#define IXGBE_RDMAM_DESC_COM_FIFO_RANGE		135
+#define IXGBE_RDMAM_DESC_COM_FIFO_COUNT		4
+#define IXGBE_RDMAM_DFC_CMD_FIFO_RANGE		48
+#define IXGBE_RDMAM_DFC_CMD_FIFO_COUNT		7
+#define IXGBE_RDMAM_RSC_HEADER_ADDR_RANGE	32
+#define IXGBE_RDMAM_RSC_HEADER_ADDR_COUNT	4
+#define IXGBE_RDMAM_TCN_STATUS_RAM_RANGE	256
+#define IXGBE_RDMAM_TCN_STATUS_RAM_COUNT	9
+#define IXGBE_RDMAM_WB_COLL_FIFO_RANGE		8
+#define IXGBE_RDMAM_WB_COLL_FIFO_COUNT		4
+#define IXGBE_RDMAM_QSC_CNT_RAM_RANGE		64
+#define IXGBE_RDMAM_QSC_CNT_RAM_COUNT		4
+#define IXGBE_RDMAM_QSC_FCOE_RAM_RANGE		512
+#define IXGBE_RDMAM_QSC_FCOE_RAM_COUNT		5
+#define IXGBE_RDMAM_QSC_QUEUE_CNT_RANGE		32
+#define IXGBE_RDMAM_QSC_QUEUE_CNT_COUNT		4
+#define IXGBE_RDMAM_QSC_QUEUE_RAM_RANGE		128
+#define IXGBE_RDMAM_QSC_QUEUE_RAM_COUNT		8
+#define IXGBE_RDMAM_QSC_RSC_RAM_RANGE		32
+#define IXGBE_RDMAM_QSC_RSC_RAM_COUNT		8
+
+#define IXGBE_TXDESCIC_READY	0x80000000
+
+/* Receive Checksum Control */
+#define IXGBE_RXCSUM_IPPCSE	0x00001000 /* IP payload checksum enable */
+#define IXGBE_RXCSUM_PCSD	0x00002000 /* packet checksum disabled */
+
+/* FCRTL Bit Masks */
+#define IXGBE_FCRTL_XONE	0x80000000 /* XON enable */
+#define IXGBE_FCRTH_FCEN	0x80000000 /* Packet buffer fc enable */
+
+/* PAP bit masks*/
+#define IXGBE_PAP_TXPAUSECNT_MASK	0x0000FFFF /* Pause counter mask */
+
+/* RMCS Bit Masks */
+#define IXGBE_RMCS_RRM			0x00000002 /* Rx Recycle Mode enable */
+/* Receive Arbitration Control: 0 Round Robin, 1 DFP */
+#define IXGBE_RMCS_RAC			0x00000004
+/* Deficit Fixed Prio ena */
+#define IXGBE_RMCS_DFP			IXGBE_RMCS_RAC
+#define IXGBE_RMCS_TFCE_802_3X		0x00000008 /* Tx Priority FC ena */
+#define IXGBE_RMCS_TFCE_PRIORITY	0x00000010 /* Tx Priority FC ena */
+#define IXGBE_RMCS_ARBDIS		0x00000040 /* Arbitration disable bit */
+
+/* FCCFG Bit Masks */
+#define IXGBE_FCCFG_TFCE_802_3X		0x00000008 /* Tx link FC enable */
+#define IXGBE_FCCFG_TFCE_PRIORITY	0x00000010 /* Tx priority FC enable */
+
+/* Interrupt register bitmasks */
+
+/* Extended Interrupt Cause Read */
+#define IXGBE_EICR_RTX_QUEUE	0x0000FFFF /* RTx Queue Interrupt */
+#define IXGBE_EICR_FLOW_DIR	0x00010000 /* FDir Exception */
+#define IXGBE_EICR_RX_MISS	0x00020000 /* Packet Buffer Overrun */
+#define IXGBE_EICR_PCI		0x00040000 /* PCI Exception */
+#define IXGBE_EICR_MAILBOX	0x00080000 /* VF to PF Mailbox Interrupt */
+#define IXGBE_EICR_LSC		0x00100000 /* Link Status Change */
+#define IXGBE_EICR_LINKSEC	0x00200000 /* PN Threshold */
+#define IXGBE_EICR_MNG		0x00400000 /* Manageability Event Interrupt */
+#define IXGBE_EICR_TS		0x00800000 /* Thermal Sensor Event */
+#define IXGBE_EICR_GPI_SDP0	0x01000000 /* Gen Purpose Interrupt on SDP0 */
+#define IXGBE_EICR_GPI_SDP1	0x02000000 /* Gen Purpose Interrupt on SDP1 */
+#define IXGBE_EICR_GPI_SDP2	0x04000000 /* Gen Purpose Interrupt on SDP2 */
+#define IXGBE_EICR_ECC		0x10000000 /* ECC Error */
+#define IXGBE_EICR_PBUR		0x10000000 /* Packet Buffer Handler Error */
+#define IXGBE_EICR_DHER		0x20000000 /* Descriptor Handler Error */
+#define IXGBE_EICR_TCP_TIMER	0x40000000 /* TCP Timer */
+#define IXGBE_EICR_OTHER	0x80000000 /* Interrupt Cause Active */
+
+/* Extended Interrupt Cause Set */
+#define IXGBE_EICS_RTX_QUEUE	IXGBE_EICR_RTX_QUEUE /* RTx Queue Interrupt */
+#define IXGBE_EICS_FLOW_DIR	IXGBE_EICR_FLOW_DIR  /* FDir Exception */
+#define IXGBE_EICS_RX_MISS	IXGBE_EICR_RX_MISS   /* Pkt Buffer Overrun */
+#define IXGBE_EICS_PCI		IXGBE_EICR_PCI /* PCI Exception */
+#define IXGBE_EICS_MAILBOX	IXGBE_EICR_MAILBOX   /* VF to PF Mailbox Int */
+#define IXGBE_EICS_LSC		IXGBE_EICR_LSC /* Link Status Change */
+#define IXGBE_EICS_MNG		IXGBE_EICR_MNG /* MNG Event Interrupt */
+#define IXGBE_EICS_GPI_SDP0	IXGBE_EICR_GPI_SDP0 /* SDP0 Gen Purpose Int */
+#define IXGBE_EICS_GPI_SDP1	IXGBE_EICR_GPI_SDP1 /* SDP1 Gen Purpose Int */
+#define IXGBE_EICS_GPI_SDP2	IXGBE_EICR_GPI_SDP2 /* SDP2 Gen Purpose Int */
+#define IXGBE_EICS_ECC		IXGBE_EICR_ECC /* ECC Error */
+#define IXGBE_EICS_PBUR		IXGBE_EICR_PBUR /* Pkt Buf Handler Err */
+#define IXGBE_EICS_DHER		IXGBE_EICR_DHER /* Desc Handler Error */
+#define IXGBE_EICS_TCP_TIMER	IXGBE_EICR_TCP_TIMER /* TCP Timer */
+#define IXGBE_EICS_OTHER	IXGBE_EICR_OTHER /* INT Cause Active */
+
+/* Extended Interrupt Mask Set */
+#define IXGBE_EIMS_RTX_QUEUE	IXGBE_EICR_RTX_QUEUE /* RTx Queue Interrupt */
+#define IXGBE_EIMS_FLOW_DIR	IXGBE_EICR_FLOW_DIR /* FDir Exception */
+#define IXGBE_EIMS_RX_MISS	IXGBE_EICR_RX_MISS /* Packet Buffer Overrun */
+#define IXGBE_EIMS_PCI		IXGBE_EICR_PCI /* PCI Exception */
+#define IXGBE_EIMS_MAILBOX	IXGBE_EICR_MAILBOX   /* VF to PF Mailbox Int */
+#define IXGBE_EIMS_LSC		IXGBE_EICR_LSC /* Link Status Change */
+#define IXGBE_EIMS_MNG		IXGBE_EICR_MNG /* MNG Event Interrupt */
+#define IXGBE_EIMS_TS		IXGBE_EICR_TS /* Thermal Sensor Event */
+#define IXGBE_EIMS_GPI_SDP0	IXGBE_EICR_GPI_SDP0 /* SDP0 Gen Purpose Int */
+#define IXGBE_EIMS_GPI_SDP1	IXGBE_EICR_GPI_SDP1 /* SDP1 Gen Purpose Int */
+#define IXGBE_EIMS_GPI_SDP2	IXGBE_EICR_GPI_SDP2 /* SDP2 Gen Purpose Int */
+#define IXGBE_EIMS_ECC		IXGBE_EICR_ECC /* ECC Error */
+#define IXGBE_EIMS_PBUR		IXGBE_EICR_PBUR /* Pkt Buf Handler Err */
+#define IXGBE_EIMS_DHER		IXGBE_EICR_DHER /* Descr Handler Error */
+#define IXGBE_EIMS_TCP_TIMER	IXGBE_EICR_TCP_TIMER /* TCP Timer */
+#define IXGBE_EIMS_OTHER	IXGBE_EICR_OTHER /* INT Cause Active */
+
+/* Extended Interrupt Mask Clear */
+#define IXGBE_EIMC_RTX_QUEUE	IXGBE_EICR_RTX_QUEUE /* RTx Queue Interrupt */
+#define IXGBE_EIMC_FLOW_DIR	IXGBE_EICR_FLOW_DIR /* FDir Exception */
+#define IXGBE_EIMC_RX_MISS	IXGBE_EICR_RX_MISS /* Packet Buffer Overrun */
+#define IXGBE_EIMC_PCI		IXGBE_EICR_PCI /* PCI Exception */
+#define IXGBE_EIMC_MAILBOX	IXGBE_EICR_MAILBOX /* VF to PF Mailbox Int */
+#define IXGBE_EIMC_LSC		IXGBE_EICR_LSC /* Link Status Change */
+#define IXGBE_EIMC_MNG		IXGBE_EICR_MNG /* MNG Event Interrupt */
+#define IXGBE_EIMC_GPI_SDP0	IXGBE_EICR_GPI_SDP0 /* SDP0 Gen Purpose Int */
+#define IXGBE_EIMC_GPI_SDP1	IXGBE_EICR_GPI_SDP1 /* SDP1 Gen Purpose Int */
+#define IXGBE_EIMC_GPI_SDP2	IXGBE_EICR_GPI_SDP2  /* SDP2 Gen Purpose Int */
+#define IXGBE_EIMC_ECC		IXGBE_EICR_ECC /* ECC Error */
+#define IXGBE_EIMC_PBUR		IXGBE_EICR_PBUR /* Pkt Buf Handler Err */
+#define IXGBE_EIMC_DHER		IXGBE_EICR_DHER /* Desc Handler Err */
+#define IXGBE_EIMC_TCP_TIMER	IXGBE_EICR_TCP_TIMER /* TCP Timer */
+#define IXGBE_EIMC_OTHER	IXGBE_EICR_OTHER /* INT Cause Active */
+
+#define IXGBE_EIMS_ENABLE_MASK ( \
+				IXGBE_EIMS_RTX_QUEUE	| \
+				IXGBE_EIMS_LSC		| \
+				IXGBE_EIMS_TCP_TIMER	| \
+				IXGBE_EIMS_OTHER)
+
+/* Immediate Interrupt Rx (A.K.A. Low Latency Interrupt) */
+#define IXGBE_IMIR_PORT_IM_EN	0x00010000  /* TCP port enable */
+#define IXGBE_IMIR_PORT_BP	0x00020000  /* TCP port check bypass */
+#define IXGBE_IMIREXT_SIZE_BP	0x00001000  /* Packet size bypass */
+#define IXGBE_IMIREXT_CTRL_URG	0x00002000  /* Check URG bit in header */
+#define IXGBE_IMIREXT_CTRL_ACK	0x00004000  /* Check ACK bit in header */
+#define IXGBE_IMIREXT_CTRL_PSH	0x00008000  /* Check PSH bit in header */
+#define IXGBE_IMIREXT_CTRL_RST	0x00010000  /* Check RST bit in header */
+#define IXGBE_IMIREXT_CTRL_SYN	0x00020000  /* Check SYN bit in header */
+#define IXGBE_IMIREXT_CTRL_FIN	0x00040000  /* Check FIN bit in header */
+#define IXGBE_IMIREXT_CTRL_BP	0x00080000  /* Bypass check of control bits */
+#define IXGBE_IMIR_SIZE_BP_82599	0x00001000 /* Packet size bypass */
+#define IXGBE_IMIR_CTRL_URG_82599	0x00002000 /* Check URG bit in header */
+#define IXGBE_IMIR_CTRL_ACK_82599	0x00004000 /* Check ACK bit in header */
+#define IXGBE_IMIR_CTRL_PSH_82599	0x00008000 /* Check PSH bit in header */
+#define IXGBE_IMIR_CTRL_RST_82599	0x00010000 /* Check RST bit in header */
+#define IXGBE_IMIR_CTRL_SYN_82599	0x00020000 /* Check SYN bit in header */
+#define IXGBE_IMIR_CTRL_FIN_82599	0x00040000 /* Check FIN bit in header */
+#define IXGBE_IMIR_CTRL_BP_82599	0x00080000 /* Bypass chk of ctrl bits */
+#define IXGBE_IMIR_LLI_EN_82599		0x00100000 /* Enables low latency Int */
+#define IXGBE_IMIR_RX_QUEUE_MASK_82599	0x0000007F /* Rx Queue Mask */
+#define IXGBE_IMIR_RX_QUEUE_SHIFT_82599	21 /* Rx Queue Shift */
+#define IXGBE_IMIRVP_PRIORITY_MASK	0x00000007 /* VLAN priority mask */
+#define IXGBE_IMIRVP_PRIORITY_EN	0x00000008 /* VLAN priority enable */
+
+#define IXGBE_MAX_FTQF_FILTERS		128
+#define IXGBE_FTQF_PROTOCOL_MASK	0x00000003
+#define IXGBE_FTQF_PROTOCOL_TCP		0x00000000
+#define IXGBE_FTQF_PROTOCOL_UDP		0x00000001
+#define IXGBE_FTQF_PROTOCOL_SCTP	2
+#define IXGBE_FTQF_PRIORITY_MASK	0x00000007
+#define IXGBE_FTQF_PRIORITY_SHIFT	2
+#define IXGBE_FTQF_POOL_MASK		0x0000003F
+#define IXGBE_FTQF_POOL_SHIFT		8
+#define IXGBE_FTQF_5TUPLE_MASK_MASK	0x0000001F
+#define IXGBE_FTQF_5TUPLE_MASK_SHIFT	25
+#define IXGBE_FTQF_SOURCE_ADDR_MASK	0x1E
+#define IXGBE_FTQF_DEST_ADDR_MASK	0x1D
+#define IXGBE_FTQF_SOURCE_PORT_MASK	0x1B
+#define IXGBE_FTQF_DEST_PORT_MASK	0x17
+#define IXGBE_FTQF_PROTOCOL_COMP_MASK	0x0F
+#define IXGBE_FTQF_POOL_MASK_EN		0x40000000
+#define IXGBE_FTQF_QUEUE_ENABLE		0x80000000
+
+/* Interrupt clear mask */
+#define IXGBE_IRQ_CLEAR_MASK	0xFFFFFFFF
+
+/* Interrupt Vector Allocation Registers */
+#define IXGBE_IVAR_REG_NUM		25
+#define IXGBE_IVAR_REG_NUM_82599	64
+#define IXGBE_IVAR_TXRX_ENTRY		96
+#define IXGBE_IVAR_RX_ENTRY		64
+#define IXGBE_IVAR_RX_QUEUE(_i)		(0 + (_i))
+#define IXGBE_IVAR_TX_QUEUE(_i)		(64 + (_i))
+#define IXGBE_IVAR_TX_ENTRY		32
+
+#define IXGBE_IVAR_TCP_TIMER_INDEX	96 /* 0 based index */
+#define IXGBE_IVAR_OTHER_CAUSES_INDEX	97 /* 0 based index */
+
+#define IXGBE_MSIX_VECTOR(_i)		(0 + (_i))
+
+#define IXGBE_IVAR_ALLOC_VAL		0x80 /* Interrupt Allocation valid */
+
+/* ETYPE Queue Filter/Select Bit Masks */
+#define IXGBE_MAX_ETQF_FILTERS		8
+#define IXGBE_ETQF_FCOE			0x08000000 /* bit 27 */
+#define IXGBE_ETQF_BCN			0x10000000 /* bit 28 */
+#define IXGBE_ETQF_1588			0x40000000 /* bit 30 */
+#define IXGBE_ETQF_FILTER_EN		0x80000000 /* bit 31 */
+#define IXGBE_ETQF_POOL_ENABLE		(1 << 26) /* bit 26 */
+
+#define IXGBE_ETQS_RX_QUEUE		0x007F0000 /* bits 22:16 */
+#define IXGBE_ETQS_RX_QUEUE_SHIFT	16
+#define IXGBE_ETQS_LLI			0x20000000 /* bit 29 */
+#define IXGBE_ETQS_QUEUE_EN		0x80000000 /* bit 31 */
+
+/*
+ * ETQF filter list: one static filter per filter consumer. This is
+ *		   to avoid filter collisions later. Add new filters
+ *		   here!!
+ *
+ * Current filters:
+ *	EAPOL 802.1x (0x888e): Filter 0
+ *	FCoE (0x8906):	 Filter 2
+ *	1588 (0x88f7):	 Filter 3
+ *	FIP  (0x8914):	 Filter 4
+ */
+#define IXGBE_ETQF_FILTER_EAPOL		0
+#define IXGBE_ETQF_FILTER_FCOE		2
+#define IXGBE_ETQF_FILTER_1588		3
+#define IXGBE_ETQF_FILTER_FIP		4
+/* VLAN Control Bit Masks */
+#define IXGBE_VLNCTRL_VET		0x0000FFFF  /* bits 0-15 */
+#define IXGBE_VLNCTRL_CFI		0x10000000  /* bit 28 */
+#define IXGBE_VLNCTRL_CFIEN		0x20000000  /* bit 29 */
+#define IXGBE_VLNCTRL_VFE		0x40000000  /* bit 30 */
+#define IXGBE_VLNCTRL_VME		0x80000000  /* bit 31 */
+
+/* VLAN pool filtering masks */
+#define IXGBE_VLVF_VIEN			0x80000000  /* filter is valid */
+#define IXGBE_VLVF_ENTRIES		64
+#define IXGBE_VLVF_VLANID_MASK		0x00000FFF
+/* Per VF Port VLAN insertion rules */
+#define IXGBE_VMVIR_VLANA_DEFAULT	0x40000000 /* Always use default VLAN */
+#define IXGBE_VMVIR_VLANA_NEVER		0x80000000 /* Never insert VLAN tag */
+
+#define IXGBE_ETHERNET_IEEE_VLAN_TYPE	0x8100  /* 802.1q protocol */
+
+/* STATUS Bit Masks */
+#define IXGBE_STATUS_LAN_ID		0x0000000C /* LAN ID */
+#define IXGBE_STATUS_LAN_ID_SHIFT	2 /* LAN ID Shift*/
+#define IXGBE_STATUS_GIO		0x00080000 /* GIO Master Ena Status */
+
+#define IXGBE_STATUS_LAN_ID_0	0x00000000 /* LAN ID 0 */
+#define IXGBE_STATUS_LAN_ID_1	0x00000004 /* LAN ID 1 */
+
+/* ESDP Bit Masks */
+#define IXGBE_ESDP_SDP0		0x00000001 /* SDP0 Data Value */
+#define IXGBE_ESDP_SDP1		0x00000002 /* SDP1 Data Value */
+#define IXGBE_ESDP_SDP2		0x00000004 /* SDP2 Data Value */
+#define IXGBE_ESDP_SDP3		0x00000008 /* SDP3 Data Value */
+#define IXGBE_ESDP_SDP4		0x00000010 /* SDP4 Data Value */
+#define IXGBE_ESDP_SDP5		0x00000020 /* SDP5 Data Value */
+#define IXGBE_ESDP_SDP6		0x00000040 /* SDP6 Data Value */
+#define IXGBE_ESDP_SDP0_DIR	0x00000100 /* SDP0 IO direction */
+#define IXGBE_ESDP_SDP1_DIR	0x00000200 /* SDP1 IO direction */
+#define IXGBE_ESDP_SDP4_DIR	0x00001000 /* SDP4 IO direction */
+#define IXGBE_ESDP_SDP5_DIR	0x00002000 /* SDP5 IO direction */
+#define IXGBE_ESDP_SDP0_NATIVE	0x00010000 /* SDP0 IO mode */
+#define IXGBE_ESDP_SDP1_NATIVE	0x00020000 /* SDP1 IO mode */
+
+
+/* LEDCTL Bit Masks */
+#define IXGBE_LED_IVRT_BASE		0x00000040
+#define IXGBE_LED_BLINK_BASE		0x00000080
+#define IXGBE_LED_MODE_MASK_BASE	0x0000000F
+#define IXGBE_LED_OFFSET(_base, _i)	(_base << (8 * (_i)))
+#define IXGBE_LED_MODE_SHIFT(_i)	(8*(_i))
+#define IXGBE_LED_IVRT(_i)	IXGBE_LED_OFFSET(IXGBE_LED_IVRT_BASE, _i)
+#define IXGBE_LED_BLINK(_i)	IXGBE_LED_OFFSET(IXGBE_LED_BLINK_BASE, _i)
+#define IXGBE_LED_MODE_MASK(_i)	IXGBE_LED_OFFSET(IXGBE_LED_MODE_MASK_BASE, _i)
+
+/* LED modes */
+#define IXGBE_LED_LINK_UP	0x0
+#define IXGBE_LED_LINK_10G	0x1
+#define IXGBE_LED_MAC		0x2
+#define IXGBE_LED_FILTER	0x3
+#define IXGBE_LED_LINK_ACTIVE	0x4
+#define IXGBE_LED_LINK_1G	0x5
+#define IXGBE_LED_ON		0xE
+#define IXGBE_LED_OFF		0xF
+
+/* AUTOC Bit Masks */
+#define IXGBE_AUTOC_KX4_KX_SUPP_MASK 0xC0000000
+#define IXGBE_AUTOC_KX4_SUPP	0x80000000
+#define IXGBE_AUTOC_KX_SUPP	0x40000000
+#define IXGBE_AUTOC_PAUSE	0x30000000
+#define IXGBE_AUTOC_ASM_PAUSE	0x20000000
+#define IXGBE_AUTOC_SYM_PAUSE	0x10000000
+#define IXGBE_AUTOC_RF		0x08000000
+#define IXGBE_AUTOC_PD_TMR	0x06000000
+#define IXGBE_AUTOC_AN_RX_LOOSE	0x01000000
+#define IXGBE_AUTOC_AN_RX_DRIFT	0x00800000
+#define IXGBE_AUTOC_AN_RX_ALIGN	0x007C0000
+#define IXGBE_AUTOC_FECA	0x00040000
+#define IXGBE_AUTOC_FECR	0x00020000
+#define IXGBE_AUTOC_KR_SUPP	0x00010000
+#define IXGBE_AUTOC_AN_RESTART	0x00001000
+#define IXGBE_AUTOC_FLU		0x00000001
+#define IXGBE_AUTOC_LMS_SHIFT	13
+#define IXGBE_AUTOC_LMS_10G_SERIAL	(0x3 << IXGBE_AUTOC_LMS_SHIFT)
+#define IXGBE_AUTOC_LMS_KX4_KX_KR	(0x4 << IXGBE_AUTOC_LMS_SHIFT)
+#define IXGBE_AUTOC_LMS_SGMII_1G_100M	(0x5 << IXGBE_AUTOC_LMS_SHIFT)
+#define IXGBE_AUTOC_LMS_KX4_KX_KR_1G_AN	(0x6 << IXGBE_AUTOC_LMS_SHIFT)
+#define IXGBE_AUTOC_LMS_KX4_KX_KR_SGMII	(0x7 << IXGBE_AUTOC_LMS_SHIFT)
+#define IXGBE_AUTOC_LMS_MASK		(0x7 << IXGBE_AUTOC_LMS_SHIFT)
+#define IXGBE_AUTOC_LMS_1G_LINK_NO_AN	(0x0 << IXGBE_AUTOC_LMS_SHIFT)
+#define IXGBE_AUTOC_LMS_10G_LINK_NO_AN	(0x1 << IXGBE_AUTOC_LMS_SHIFT)
+#define IXGBE_AUTOC_LMS_1G_AN		(0x2 << IXGBE_AUTOC_LMS_SHIFT)
+#define IXGBE_AUTOC_LMS_KX4_AN		(0x4 << IXGBE_AUTOC_LMS_SHIFT)
+#define IXGBE_AUTOC_LMS_KX4_AN_1G_AN	(0x6 << IXGBE_AUTOC_LMS_SHIFT)
+#define IXGBE_AUTOC_LMS_ATTACH_TYPE	(0x7 << IXGBE_AUTOC_10G_PMA_PMD_SHIFT)
+
+#define IXGBE_AUTOC_1G_PMA_PMD_MASK	0x00000200
+#define IXGBE_AUTOC_1G_PMA_PMD_SHIFT	9
+#define IXGBE_AUTOC_10G_PMA_PMD_MASK	0x00000180
+#define IXGBE_AUTOC_10G_PMA_PMD_SHIFT	7
+#define IXGBE_AUTOC_10G_XAUI	(0x0 << IXGBE_AUTOC_10G_PMA_PMD_SHIFT)
+#define IXGBE_AUTOC_10G_KX4	(0x1 << IXGBE_AUTOC_10G_PMA_PMD_SHIFT)
+#define IXGBE_AUTOC_10G_CX4	(0x2 << IXGBE_AUTOC_10G_PMA_PMD_SHIFT)
+#define IXGBE_AUTOC_1G_BX	(0x0 << IXGBE_AUTOC_1G_PMA_PMD_SHIFT)
+#define IXGBE_AUTOC_1G_KX	(0x1 << IXGBE_AUTOC_1G_PMA_PMD_SHIFT)
+#define IXGBE_AUTOC_1G_SFI	(0x0 << IXGBE_AUTOC_1G_PMA_PMD_SHIFT)
+#define IXGBE_AUTOC_1G_KX_BX	(0x1 << IXGBE_AUTOC_1G_PMA_PMD_SHIFT)
+
+#define IXGBE_AUTOC2_UPPER_MASK	0xFFFF0000
+#define IXGBE_AUTOC2_10G_SERIAL_PMA_PMD_MASK	0x00030000
+#define IXGBE_AUTOC2_10G_SERIAL_PMA_PMD_SHIFT	16
+#define IXGBE_AUTOC2_10G_KR	(0x0 << IXGBE_AUTOC2_10G_SERIAL_PMA_PMD_SHIFT)
+#define IXGBE_AUTOC2_10G_XFI	(0x1 << IXGBE_AUTOC2_10G_SERIAL_PMA_PMD_SHIFT)
+#define IXGBE_AUTOC2_10G_SFI	(0x2 << IXGBE_AUTOC2_10G_SERIAL_PMA_PMD_SHIFT)
+
+#define IXGBE_MACC_FLU		0x00000001
+#define IXGBE_MACC_FSV_10G	0x00030000
+#define IXGBE_MACC_FS		0x00040000
+#define IXGBE_MAC_RX2TX_LPBK	0x00000002
+
+/* LINKS Bit Masks */
+#define IXGBE_LINKS_KX_AN_COMP	0x80000000
+#define IXGBE_LINKS_UP		0x40000000
+#define IXGBE_LINKS_SPEED	0x20000000
+#define IXGBE_LINKS_MODE	0x18000000
+#define IXGBE_LINKS_RX_MODE	0x06000000
+#define IXGBE_LINKS_TX_MODE	0x01800000
+#define IXGBE_LINKS_XGXS_EN	0x00400000
+#define IXGBE_LINKS_SGMII_EN	0x02000000
+#define IXGBE_LINKS_PCS_1G_EN	0x00200000
+#define IXGBE_LINKS_1G_AN_EN	0x00100000
+#define IXGBE_LINKS_KX_AN_IDLE	0x00080000
+#define IXGBE_LINKS_1G_SYNC	0x00040000
+#define IXGBE_LINKS_10G_ALIGN	0x00020000
+#define IXGBE_LINKS_10G_LANE_SYNC	0x00017000
+#define IXGBE_LINKS_TL_FAULT		0x00001000
+#define IXGBE_LINKS_SIGNAL		0x00000F00
+
+#define IXGBE_LINKS_SPEED_82599		0x30000000
+#define IXGBE_LINKS_SPEED_10G_82599	0x30000000
+#define IXGBE_LINKS_SPEED_1G_82599	0x20000000
+#define IXGBE_LINKS_SPEED_100_82599	0x10000000
+#define IXGBE_LINK_UP_TIME		90 /* 9.0 Seconds */
+#define IXGBE_AUTO_NEG_TIME		45 /* 4.5 Seconds */
+
+#define IXGBE_LINKS2_AN_SUPPORTED	0x00000040
+
+/* PCS1GLSTA Bit Masks */
+#define IXGBE_PCS1GLSTA_LINK_OK		1
+#define IXGBE_PCS1GLSTA_SYNK_OK		0x10
+#define IXGBE_PCS1GLSTA_AN_COMPLETE	0x10000
+#define IXGBE_PCS1GLSTA_AN_PAGE_RX	0x20000
+#define IXGBE_PCS1GLSTA_AN_TIMED_OUT	0x40000
+#define IXGBE_PCS1GLSTA_AN_REMOTE_FAULT	0x80000
+#define IXGBE_PCS1GLSTA_AN_ERROR_RWS	0x100000
+
+#define IXGBE_PCS1GANA_SYM_PAUSE	0x80
+#define IXGBE_PCS1GANA_ASM_PAUSE	0x100
+
+/* PCS1GLCTL Bit Masks */
+#define IXGBE_PCS1GLCTL_AN_1G_TIMEOUT_EN 0x00040000 /* PCS 1G autoneg to en */
+#define IXGBE_PCS1GLCTL_FLV_LINK_UP	1
+#define IXGBE_PCS1GLCTL_FORCE_LINK	0x20
+#define IXGBE_PCS1GLCTL_LOW_LINK_LATCH	0x40
+#define IXGBE_PCS1GLCTL_AN_ENABLE	0x10000
+#define IXGBE_PCS1GLCTL_AN_RESTART	0x20000
+
+/* ANLP1 Bit Masks */
+#define IXGBE_ANLP1_PAUSE		0x0C00
+#define IXGBE_ANLP1_SYM_PAUSE		0x0400
+#define IXGBE_ANLP1_ASM_PAUSE		0x0800
+#define IXGBE_ANLP1_AN_STATE_MASK	0x000f0000
+
+/* SW Semaphore Register bitmasks */
+#define IXGBE_SWSM_SMBI		0x00000001 /* Driver Semaphore bit */
+#define IXGBE_SWSM_SWESMBI	0x00000002 /* FW Semaphore bit */
+#define IXGBE_SWSM_WMNG		0x00000004 /* Wake MNG Clock */
+#define IXGBE_SWFW_REGSMP	0x80000000 /* Register Semaphore bit 31 */
+
+/* SW_FW_SYNC/GSSR definitions */
+#define IXGBE_GSSR_EEP_SM	0x0001
+#define IXGBE_GSSR_PHY0_SM	0x0002
+#define IXGBE_GSSR_PHY1_SM	0x0004
+#define IXGBE_GSSR_MAC_CSR_SM	0x0008
+#define IXGBE_GSSR_FLASH_SM	0x0010
+#define IXGBE_GSSR_SW_MNG_SM	0x0400
+
+/* FW Status register bitmask */
+#define IXGBE_FWSTS_FWRI	0x00000200 /* Firmware Reset Indication */
+
+/* EEC Register */
+#define IXGBE_EEC_SK		0x00000001 /* EEPROM Clock */
+#define IXGBE_EEC_CS		0x00000002 /* EEPROM Chip Select */
+#define IXGBE_EEC_DI		0x00000004 /* EEPROM Data In */
+#define IXGBE_EEC_DO		0x00000008 /* EEPROM Data Out */
+#define IXGBE_EEC_FWE_MASK	0x00000030 /* FLASH Write Enable */
+#define IXGBE_EEC_FWE_DIS	0x00000010 /* Disable FLASH writes */
+#define IXGBE_EEC_FWE_EN	0x00000020 /* Enable FLASH writes */
+#define IXGBE_EEC_FWE_SHIFT	4
+#define IXGBE_EEC_REQ		0x00000040 /* EEPROM Access Request */
+#define IXGBE_EEC_GNT		0x00000080 /* EEPROM Access Grant */
+#define IXGBE_EEC_PRES		0x00000100 /* EEPROM Present */
+#define IXGBE_EEC_ARD		0x00000200 /* EEPROM Auto Read Done */
+#define IXGBE_EEC_FLUP		0x00800000 /* Flash update command */
+#define IXGBE_EEC_SEC1VAL	0x02000000 /* Sector 1 Valid */
+#define IXGBE_EEC_FLUDONE	0x04000000 /* Flash update done */
+/* EEPROM Addressing bits based on type (0-small, 1-large) */
+#define IXGBE_EEC_ADDR_SIZE	0x00000400
+#define IXGBE_EEC_SIZE		0x00007800 /* EEPROM Size */
+#define IXGBE_EERD_MAX_ADDR	0x00003FFF /* EERD alows 14 bits for addr. */
+
+#define IXGBE_EEC_SIZE_SHIFT		11
+#define IXGBE_EEPROM_WORD_SIZE_SHIFT	6
+#define IXGBE_EEPROM_OPCODE_BITS	8
+
+/* Part Number String Length */
+#define IXGBE_PBANUM_LENGTH	11
+
+/* Checksum and EEPROM pointers */
+#define IXGBE_PBANUM_PTR_GUARD	0xFAFA
+#define IXGBE_EEPROM_CHECKSUM	0x3F
+#define IXGBE_EEPROM_SUM	0xBABA
+#define IXGBE_PCIE_ANALOG_PTR	0x03
+#define IXGBE_ATLAS0_CONFIG_PTR	0x04
+#define IXGBE_PHY_PTR		0x04
+#define IXGBE_ATLAS1_CONFIG_PTR	0x05
+#define IXGBE_OPTION_ROM_PTR	0x05
+#define IXGBE_PCIE_GENERAL_PTR	0x06
+#define IXGBE_PCIE_CONFIG0_PTR	0x07
+#define IXGBE_PCIE_CONFIG1_PTR	0x08
+#define IXGBE_CORE0_PTR		0x09
+#define IXGBE_CORE1_PTR		0x0A
+#define IXGBE_MAC0_PTR		0x0B
+#define IXGBE_MAC1_PTR		0x0C
+#define IXGBE_CSR0_CONFIG_PTR	0x0D
+#define IXGBE_CSR1_CONFIG_PTR	0x0E
+#define IXGBE_FW_PTR		0x0F
+#define IXGBE_PBANUM0_PTR	0x15
+#define IXGBE_PBANUM1_PTR	0x16
+#define IXGBE_ALT_MAC_ADDR_PTR	0x37
+#define IXGBE_FREE_SPACE_PTR	0X3E
+
+/* External Thermal Sensor Config */
+#define IXGBE_ETS_CFG			0x26
+#define IXGBE_ETS_LTHRES_DELTA_MASK	0x07C0
+#define IXGBE_ETS_LTHRES_DELTA_SHIFT	6
+#define IXGBE_ETS_TYPE_MASK		0x0038
+#define IXGBE_ETS_TYPE_SHIFT		3
+#define IXGBE_ETS_TYPE_EMC		0x000
+#define IXGBE_ETS_NUM_SENSORS_MASK	0x0007
+#define IXGBE_ETS_DATA_LOC_MASK		0x3C00
+#define IXGBE_ETS_DATA_LOC_SHIFT	10
+#define IXGBE_ETS_DATA_INDEX_MASK	0x0300
+#define IXGBE_ETS_DATA_INDEX_SHIFT	8
+#define IXGBE_ETS_DATA_HTHRESH_MASK	0x00FF
+
+#define IXGBE_SAN_MAC_ADDR_PTR		0x28
+#define IXGBE_DEVICE_CAPS		0x2C
+#define IXGBE_SERIAL_NUMBER_MAC_ADDR	0x11
+#define IXGBE_PCIE_MSIX_82599_CAPS	0x72
+#define IXGBE_MAX_MSIX_VECTORS_82599	0x40
+#define IXGBE_PCIE_MSIX_82598_CAPS	0x62
+#define IXGBE_MAX_MSIX_VECTORS_82598	0x13
+
+/* MSI-X capability fields masks */
+#define IXGBE_PCIE_MSIX_TBL_SZ_MASK	0x7FF
+
+/* Legacy EEPROM word offsets */
+#define IXGBE_ISCSI_BOOT_CAPS		0x0033
+#define IXGBE_ISCSI_SETUP_PORT_0	0x0030
+#define IXGBE_ISCSI_SETUP_PORT_1	0x0034
+
+/* EEPROM Commands - SPI */
+#define IXGBE_EEPROM_MAX_RETRY_SPI	5000 /* Max wait 5ms for RDY signal */
+#define IXGBE_EEPROM_STATUS_RDY_SPI	0x01
+#define IXGBE_EEPROM_READ_OPCODE_SPI	0x03  /* EEPROM read opcode */
+#define IXGBE_EEPROM_WRITE_OPCODE_SPI	0x02  /* EEPROM write opcode */
+#define IXGBE_EEPROM_A8_OPCODE_SPI	0x08  /* opcode bit-3 = addr bit-8 */
+#define IXGBE_EEPROM_WREN_OPCODE_SPI	0x06  /* EEPROM set Write Ena latch */
+/* EEPROM reset Write Enable latch */
+#define IXGBE_EEPROM_WRDI_OPCODE_SPI	0x04
+#define IXGBE_EEPROM_RDSR_OPCODE_SPI	0x05  /* EEPROM read Status reg */
+#define IXGBE_EEPROM_WRSR_OPCODE_SPI	0x01  /* EEPROM write Status reg */
+#define IXGBE_EEPROM_ERASE4K_OPCODE_SPI	0x20  /* EEPROM ERASE 4KB */
+#define IXGBE_EEPROM_ERASE64K_OPCODE_SPI	0xD8  /* EEPROM ERASE 64KB */
+#define IXGBE_EEPROM_ERASE256_OPCODE_SPI	0xDB  /* EEPROM ERASE 256B */
+
+/* EEPROM Read Register */
+#define IXGBE_EEPROM_RW_REG_DATA	16 /* data offset in EEPROM read reg */
+#define IXGBE_EEPROM_RW_REG_DONE	2 /* Offset to READ done bit */
+#define IXGBE_EEPROM_RW_REG_START	1 /* First bit to start operation */
+#define IXGBE_EEPROM_RW_ADDR_SHIFT	2 /* Shift to the address bits */
+#define IXGBE_NVM_POLL_WRITE		1 /* Flag for polling for wr complete */
+#define IXGBE_NVM_POLL_READ		0 /* Flag for polling for rd complete */
+
+#define IXGBE_ETH_LENGTH_OF_ADDRESS	6
+
+#define IXGBE_EEPROM_PAGE_SIZE_MAX	128
+#define IXGBE_EEPROM_RD_BUFFER_MAX_COUNT	512 /* words rd in burst */
+#define IXGBE_EEPROM_WR_BUFFER_MAX_COUNT	256 /* words wr in burst */
+
+#ifndef IXGBE_EEPROM_GRANT_ATTEMPTS
+#define IXGBE_EEPROM_GRANT_ATTEMPTS	1000 /* EEPROM attempts to gain grant */
+#endif
+
+#ifndef IXGBE_EERD_EEWR_ATTEMPTS
+/* Number of 5 microseconds we wait for EERD read and
+ * EERW write to complete */
+#define IXGBE_EERD_EEWR_ATTEMPTS	100000
+#endif
+
+#ifndef IXGBE_FLUDONE_ATTEMPTS
+/* # attempts we wait for flush update to complete */
+#define IXGBE_FLUDONE_ATTEMPTS		20000
+#endif
+
+#define IXGBE_PCIE_CTRL2		0x5   /* PCIe Control 2 Offset */
+#define IXGBE_PCIE_CTRL2_DUMMY_ENABLE	0x8   /* Dummy Function Enable */
+#define IXGBE_PCIE_CTRL2_LAN_DISABLE	0x2   /* LAN PCI Disable */
+#define IXGBE_PCIE_CTRL2_DISABLE_SELECT	0x1   /* LAN Disable Select */
+
+#define IXGBE_SAN_MAC_ADDR_PORT0_OFFSET		0x0
+#define IXGBE_SAN_MAC_ADDR_PORT1_OFFSET		0x3
+#define IXGBE_DEVICE_CAPS_ALLOW_ANY_SFP		0x1
+#define IXGBE_DEVICE_CAPS_FCOE_OFFLOADS		0x2
+#define IXGBE_FW_LESM_PARAMETERS_PTR		0x2
+#define IXGBE_FW_LESM_STATE_1			0x1
+#define IXGBE_FW_LESM_STATE_ENABLED		0x8000 /* LESM Enable bit */
+#define IXGBE_FW_PASSTHROUGH_PATCH_CONFIG_PTR	0x4
+#define IXGBE_FW_PATCH_VERSION_4		0x7
+#define IXGBE_FCOE_IBA_CAPS_BLK_PTR		0x33 /* iSCSI/FCOE block */
+#define IXGBE_FCOE_IBA_CAPS_FCOE		0x20 /* FCOE flags */
+#define IXGBE_ISCSI_FCOE_BLK_PTR		0x17 /* iSCSI/FCOE block */
+#define IXGBE_ISCSI_FCOE_FLAGS_OFFSET		0x0 /* FCOE flags */
+#define IXGBE_ISCSI_FCOE_FLAGS_ENABLE		0x1 /* FCOE flags enable bit */
+#define IXGBE_ALT_SAN_MAC_ADDR_BLK_PTR		0x27 /* Alt. SAN MAC block */
+#define IXGBE_ALT_SAN_MAC_ADDR_CAPS_OFFSET	0x0 /* Alt SAN MAC capability */
+#define IXGBE_ALT_SAN_MAC_ADDR_PORT0_OFFSET	0x1 /* Alt SAN MAC 0 offset */
+#define IXGBE_ALT_SAN_MAC_ADDR_PORT1_OFFSET	0x4 /* Alt SAN MAC 1 offset */
+#define IXGBE_ALT_SAN_MAC_ADDR_WWNN_OFFSET	0x7 /* Alt WWNN prefix offset */
+#define IXGBE_ALT_SAN_MAC_ADDR_WWPN_OFFSET	0x8 /* Alt WWPN prefix offset */
+#define IXGBE_ALT_SAN_MAC_ADDR_CAPS_SANMAC	0x0 /* Alt SAN MAC exists */
+#define IXGBE_ALT_SAN_MAC_ADDR_CAPS_ALTWWN	0x1 /* Alt WWN base exists */
+
+#define IXGBE_DEVICE_CAPS_WOL_PORT0_1	0x4 /* WoL supported on ports 0 & 1 */
+#define IXGBE_DEVICE_CAPS_WOL_PORT0	0x8 /* WoL supported on port 0 */
+#define IXGBE_DEVICE_CAPS_WOL_MASK	0xC /* Mask for WoL capabilities */
+
+/* PCI Bus Info */
+#define IXGBE_PCI_DEVICE_STATUS		0xAA
+#define IXGBE_PCI_DEVICE_STATUS_TRANSACTION_PENDING	0x0020
+#define IXGBE_PCI_LINK_STATUS		0xB2
+#define IXGBE_PCI_DEVICE_CONTROL2	0xC8
+#define IXGBE_PCI_LINK_WIDTH		0x3F0
+#define IXGBE_PCI_LINK_WIDTH_1		0x10
+#define IXGBE_PCI_LINK_WIDTH_2		0x20
+#define IXGBE_PCI_LINK_WIDTH_4		0x40
+#define IXGBE_PCI_LINK_WIDTH_8		0x80
+#define IXGBE_PCI_LINK_SPEED		0xF
+#define IXGBE_PCI_LINK_SPEED_2500	0x1
+#define IXGBE_PCI_LINK_SPEED_5000	0x2
+#define IXGBE_PCI_LINK_SPEED_8000	0x3
+#define IXGBE_PCI_HEADER_TYPE_REGISTER	0x0E
+#define IXGBE_PCI_HEADER_TYPE_MULTIFUNC	0x80
+#define IXGBE_PCI_DEVICE_CONTROL2_16ms	0x0005
+
+/* Number of 100 microseconds we wait for PCI Express master disable */
+#define IXGBE_PCI_MASTER_DISABLE_TIMEOUT	800
+
+/* Check whether address is multicast. This is little-endian specific check.*/
+#define IXGBE_IS_MULTICAST(Address) \
+		(bool)(((u8 *)(Address))[0] & ((u8)0x01))
+
+/* Check whether an address is broadcast. */
+#define IXGBE_IS_BROADCAST(Address) \
+		((((u8 *)(Address))[0] == ((u8)0xff)) && \
+		(((u8 *)(Address))[1] == ((u8)0xff)))
+
+/* RAH */
+#define IXGBE_RAH_VIND_MASK	0x003C0000
+#define IXGBE_RAH_VIND_SHIFT	18
+#define IXGBE_RAH_AV		0x80000000
+#define IXGBE_CLEAR_VMDQ_ALL	0xFFFFFFFF
+
+/* Header split receive */
+#define IXGBE_RFCTL_ISCSI_DIS		0x00000001
+#define IXGBE_RFCTL_ISCSI_DWC_MASK	0x0000003E
+#define IXGBE_RFCTL_ISCSI_DWC_SHIFT	1
+#define IXGBE_RFCTL_RSC_DIS		0x00000010
+#define IXGBE_RFCTL_NFSW_DIS		0x00000040
+#define IXGBE_RFCTL_NFSR_DIS		0x00000080
+#define IXGBE_RFCTL_NFS_VER_MASK	0x00000300
+#define IXGBE_RFCTL_NFS_VER_SHIFT	8
+#define IXGBE_RFCTL_NFS_VER_2		0
+#define IXGBE_RFCTL_NFS_VER_3		1
+#define IXGBE_RFCTL_NFS_VER_4		2
+#define IXGBE_RFCTL_IPV6_DIS		0x00000400
+#define IXGBE_RFCTL_IPV6_XSUM_DIS	0x00000800
+#define IXGBE_RFCTL_IPFRSP_DIS		0x00004000
+#define IXGBE_RFCTL_IPV6_EX_DIS		0x00010000
+#define IXGBE_RFCTL_NEW_IPV6_EXT_DIS	0x00020000
+
+/* Transmit Config masks */
+#define IXGBE_TXDCTL_ENABLE		0x02000000 /* Ena specific Tx Queue */
+#define IXGBE_TXDCTL_SWFLSH		0x04000000 /* Tx Desc. wr-bk flushing */
+#define IXGBE_TXDCTL_WTHRESH_SHIFT	16 /* shift to WTHRESH bits */
+/* Enable short packet padding to 64 bytes */
+#define IXGBE_TX_PAD_ENABLE		0x00000400
+#define IXGBE_JUMBO_FRAME_ENABLE	0x00000004  /* Allow jumbo frames */
+/* This allows for 16K packets + 4k for vlan */
+#define IXGBE_MAX_FRAME_SZ		0x40040000
+
+#define IXGBE_TDWBAL_HEAD_WB_ENABLE	0x1 /* Tx head write-back enable */
+#define IXGBE_TDWBAL_SEQNUM_WB_ENABLE	0x2 /* Tx seq# write-back enable */
+
+/* Receive Config masks */
+#define IXGBE_RXCTRL_RXEN		0x00000001 /* Enable Receiver */
+#define IXGBE_RXCTRL_DMBYPS		0x00000002 /* Desc Monitor Bypass */
+#define IXGBE_RXDCTL_ENABLE		0x02000000 /* Ena specific Rx Queue */
+#define IXGBE_RXDCTL_SWFLSH		0x04000000 /* Rx Desc wr-bk flushing */
+#define IXGBE_RXDCTL_RLPMLMASK		0x00003FFF /* X540 supported only */
+#define IXGBE_RXDCTL_RLPML_EN		0x00008000
+#define IXGBE_RXDCTL_VME		0x40000000 /* VLAN mode enable */
+
+#define IXGBE_TSYNCTXCTL_VALID		0x00000001 /* Tx timestamp valid */
+#define IXGBE_TSYNCTXCTL_ENABLED	0x00000010 /* Tx timestamping enabled */
+
+#define IXGBE_TSYNCRXCTL_VALID		0x00000001 /* Rx timestamp valid */
+#define IXGBE_TSYNCRXCTL_TYPE_MASK	0x0000000E /* Rx type mask */
+#define IXGBE_TSYNCRXCTL_TYPE_L2_V2	0x00
+#define IXGBE_TSYNCRXCTL_TYPE_L4_V1	0x02
+#define IXGBE_TSYNCRXCTL_TYPE_L2_L4_V2	0x04
+#define IXGBE_TSYNCRXCTL_TYPE_EVENT_V2	0x0A
+#define IXGBE_TSYNCRXCTL_ENABLED	0x00000010 /* Rx Timestamping enabled */
+
+#define IXGBE_RXMTRL_V1_CTRLT_MASK	0x000000FF
+#define IXGBE_RXMTRL_V1_SYNC_MSG	0x00
+#define IXGBE_RXMTRL_V1_DELAY_REQ_MSG	0x01
+#define IXGBE_RXMTRL_V1_FOLLOWUP_MSG	0x02
+#define IXGBE_RXMTRL_V1_DELAY_RESP_MSG	0x03
+#define IXGBE_RXMTRL_V1_MGMT_MSG	0x04
+
+#define IXGBE_RXMTRL_V2_MSGID_MASK	0x0000FF00
+#define IXGBE_RXMTRL_V2_SYNC_MSG	0x0000
+#define IXGBE_RXMTRL_V2_DELAY_REQ_MSG	0x0100
+#define IXGBE_RXMTRL_V2_PDELAY_REQ_MSG	0x0200
+#define IXGBE_RXMTRL_V2_PDELAY_RESP_MSG	0x0300
+#define IXGBE_RXMTRL_V2_FOLLOWUP_MSG	0x0800
+#define IXGBE_RXMTRL_V2_DELAY_RESP_MSG	0x0900
+#define IXGBE_RXMTRL_V2_PDELAY_FOLLOWUP_MSG 0x0A00
+#define IXGBE_RXMTRL_V2_ANNOUNCE_MSG	0x0B00
+#define IXGBE_RXMTRL_V2_SIGNALLING_MSG	0x0C00
+#define IXGBE_RXMTRL_V2_MGMT_MSG	0x0D00
+
+#define IXGBE_FCTRL_SBP		0x00000002 /* Store Bad Packet */
+#define IXGBE_FCTRL_MPE		0x00000100 /* Multicast Promiscuous Ena*/
+#define IXGBE_FCTRL_UPE		0x00000200 /* Unicast Promiscuous Ena */
+#define IXGBE_FCTRL_BAM		0x00000400 /* Broadcast Accept Mode */
+#define IXGBE_FCTRL_PMCF	0x00001000 /* Pass MAC Control Frames */
+#define IXGBE_FCTRL_DPF		0x00002000 /* Discard Pause Frame */
+/* Receive Priority Flow Control Enable */
+#define IXGBE_FCTRL_RPFCE	0x00004000
+#define IXGBE_FCTRL_RFCE	0x00008000 /* Receive Flow Control Ena */
+#define IXGBE_MFLCN_PMCF	0x00000001 /* Pass MAC Control Frames */
+#define IXGBE_MFLCN_DPF		0x00000002 /* Discard Pause Frame */
+#define IXGBE_MFLCN_RPFCE	0x00000004 /* Receive Priority FC Enable */
+#define IXGBE_MFLCN_RFCE	0x00000008 /* Receive FC Enable */
+#define IXGBE_MFLCN_RPFCE_MASK	0x00000FF4 /* Rx Priority FC bitmap mask */
+#define IXGBE_MFLCN_RPFCE_SHIFT	4 /* Rx Priority FC bitmap shift */
+
+/* Multiple Receive Queue Control */
+#define IXGBE_MRQC_RSSEN	0x00000001  /* RSS Enable */
+#define IXGBE_MRQC_MRQE_MASK	0xF /* Bits 3:0 */
+#define IXGBE_MRQC_RT8TCEN	0x00000002 /* 8 TC no RSS */
+#define IXGBE_MRQC_RT4TCEN	0x00000003 /* 4 TC no RSS */
+#define IXGBE_MRQC_RTRSS8TCEN	0x00000004 /* 8 TC w/ RSS */
+#define IXGBE_MRQC_RTRSS4TCEN	0x00000005 /* 4 TC w/ RSS */
+#define IXGBE_MRQC_VMDQEN	0x00000008 /* VMDq2 64 pools no RSS */
+#define IXGBE_MRQC_VMDQRSS32EN	0x0000000A /* VMDq2 32 pools w/ RSS */
+#define IXGBE_MRQC_VMDQRSS64EN	0x0000000B /* VMDq2 64 pools w/ RSS */
+#define IXGBE_MRQC_VMDQRT8TCEN	0x0000000C /* VMDq2/RT 16 pool 8 TC */
+#define IXGBE_MRQC_VMDQRT4TCEN	0x0000000D /* VMDq2/RT 32 pool 4 TC */
+#define IXGBE_MRQC_RSS_FIELD_MASK	0xFFFF0000
+#define IXGBE_MRQC_RSS_FIELD_IPV4_TCP	0x00010000
+#define IXGBE_MRQC_RSS_FIELD_IPV4	0x00020000
+#define IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP 0x00040000
+#define IXGBE_MRQC_RSS_FIELD_IPV6_EX	0x00080000
+#define IXGBE_MRQC_RSS_FIELD_IPV6	0x00100000
+#define IXGBE_MRQC_RSS_FIELD_IPV6_TCP	0x00200000
+#define IXGBE_MRQC_RSS_FIELD_IPV4_UDP	0x00400000
+#define IXGBE_MRQC_RSS_FIELD_IPV6_UDP	0x00800000
+#define IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP 0x01000000
+#define IXGBE_MRQC_L3L4TXSWEN		0x00008000
+
+/* Queue Drop Enable */
+#define IXGBE_QDE_ENABLE	0x00000001
+#define IXGBE_QDE_IDX_MASK	0x00007F00
+#define IXGBE_QDE_IDX_SHIFT	8
+#define IXGBE_QDE_WRITE		0x00010000
+#define IXGBE_QDE_READ		0x00020000
+
+#define IXGBE_TXD_POPTS_IXSM	0x01 /* Insert IP checksum */
+#define IXGBE_TXD_POPTS_TXSM	0x02 /* Insert TCP/UDP checksum */
+#define IXGBE_TXD_CMD_EOP	0x01000000 /* End of Packet */
+#define IXGBE_TXD_CMD_IFCS	0x02000000 /* Insert FCS (Ethernet CRC) */
+#define IXGBE_TXD_CMD_IC	0x04000000 /* Insert Checksum */
+#define IXGBE_TXD_CMD_RS	0x08000000 /* Report Status */
+#define IXGBE_TXD_CMD_DEXT	0x20000000 /* Desc extension (0 = legacy) */
+#define IXGBE_TXD_CMD_VLE	0x40000000 /* Add VLAN tag */
+#define IXGBE_TXD_STAT_DD	0x00000001 /* Descriptor Done */
+
+#define IXGBE_RXDADV_IPSEC_STATUS_SECP		0x00020000
+#define IXGBE_RXDADV_IPSEC_ERROR_INVALID_PROTOCOL 0x08000000
+#define IXGBE_RXDADV_IPSEC_ERROR_INVALID_LENGTH	0x10000000
+#define IXGBE_RXDADV_IPSEC_ERROR_AUTH_FAILED	0x18000000
+#define IXGBE_RXDADV_IPSEC_ERROR_BIT_MASK	0x18000000
+/* Multiple Transmit Queue Command Register */
+#define IXGBE_MTQC_RT_ENA	0x1 /* DCB Enable */
+#define IXGBE_MTQC_VT_ENA	0x2 /* VMDQ2 Enable */
+#define IXGBE_MTQC_64Q_1PB	0x0 /* 64 queues 1 pack buffer */
+#define IXGBE_MTQC_32VF		0x8 /* 4 TX Queues per pool w/32VF's */
+#define IXGBE_MTQC_64VF		0x4 /* 2 TX Queues per pool w/64VF's */
+#define IXGBE_MTQC_4TC_4TQ	0x8 /* 4 TC if RT_ENA and VT_ENA */
+#define IXGBE_MTQC_8TC_8TQ	0xC /* 8 TC if RT_ENA or 8 TQ if VT_ENA */
+
+/* Receive Descriptor bit definitions */
+#define IXGBE_RXD_STAT_DD	0x01 /* Descriptor Done */
+#define IXGBE_RXD_STAT_EOP	0x02 /* End of Packet */
+#define IXGBE_RXD_STAT_FLM	0x04 /* FDir Match */
+#define IXGBE_RXD_STAT_VP	0x08 /* IEEE VLAN Packet */
+#define IXGBE_RXDADV_NEXTP_MASK	0x000FFFF0 /* Next Descriptor Index */
+#define IXGBE_RXDADV_NEXTP_SHIFT	0x00000004
+#define IXGBE_RXD_STAT_UDPCS	0x10 /* UDP xsum calculated */
+#define IXGBE_RXD_STAT_L4CS	0x20 /* L4 xsum calculated */
+#define IXGBE_RXD_STAT_IPCS	0x40 /* IP xsum calculated */
+#define IXGBE_RXD_STAT_PIF	0x80 /* passed in-exact filter */
+#define IXGBE_RXD_STAT_CRCV	0x100 /* Speculative CRC Valid */
+#define IXGBE_RXD_STAT_VEXT	0x200 /* 1st VLAN found */
+#define IXGBE_RXD_STAT_UDPV	0x400 /* Valid UDP checksum */
+#define IXGBE_RXD_STAT_DYNINT	0x800 /* Pkt caused INT via DYNINT */
+#define IXGBE_RXD_STAT_LLINT	0x800 /* Pkt caused Low Latency Interrupt */
+#define IXGBE_RXD_STAT_TS	0x10000 /* Time Stamp */
+#define IXGBE_RXD_STAT_SECP	0x20000 /* Security Processing */
+#define IXGBE_RXD_STAT_LB	0x40000 /* Loopback Status */
+#define IXGBE_RXD_STAT_ACK	0x8000 /* ACK Packet indication */
+#define IXGBE_RXD_ERR_CE	0x01 /* CRC Error */
+#define IXGBE_RXD_ERR_LE	0x02 /* Length Error */
+#define IXGBE_RXD_ERR_PE	0x08 /* Packet Error */
+#define IXGBE_RXD_ERR_OSE	0x10 /* Oversize Error */
+#define IXGBE_RXD_ERR_USE	0x20 /* Undersize Error */
+#define IXGBE_RXD_ERR_TCPE	0x40 /* TCP/UDP Checksum Error */
+#define IXGBE_RXD_ERR_IPE	0x80 /* IP Checksum Error */
+#define IXGBE_RXDADV_ERR_MASK		0xfff00000 /* RDESC.ERRORS mask */
+#define IXGBE_RXDADV_ERR_SHIFT		20 /* RDESC.ERRORS shift */
+#define IXGBE_RXDADV_ERR_RXE		0x20000000 /* Any MAC Error */
+#define IXGBE_RXDADV_ERR_FCEOFE		0x80000000 /* FCoEFe/IPE */
+#define IXGBE_RXDADV_ERR_FCERR		0x00700000 /* FCERR/FDIRERR */
+#define IXGBE_RXDADV_ERR_FDIR_LEN	0x00100000 /* FDIR Length error */
+#define IXGBE_RXDADV_ERR_FDIR_DROP	0x00200000 /* FDIR Drop error */
+#define IXGBE_RXDADV_ERR_FDIR_COLL	0x00400000 /* FDIR Collision error */
+#define IXGBE_RXDADV_ERR_HBO	0x00800000 /*Header Buffer Overflow */
+#define IXGBE_RXDADV_ERR_CE	0x01000000 /* CRC Error */
+#define IXGBE_RXDADV_ERR_LE	0x02000000 /* Length Error */
+#define IXGBE_RXDADV_ERR_PE	0x08000000 /* Packet Error */
+#define IXGBE_RXDADV_ERR_OSE	0x10000000 /* Oversize Error */
+#define IXGBE_RXDADV_ERR_USE	0x20000000 /* Undersize Error */
+#define IXGBE_RXDADV_ERR_TCPE	0x40000000 /* TCP/UDP Checksum Error */
+#define IXGBE_RXDADV_ERR_IPE	0x80000000 /* IP Checksum Error */
+#define IXGBE_RXD_VLAN_ID_MASK	0x0FFF  /* VLAN ID is in lower 12 bits */
+#define IXGBE_RXD_PRI_MASK	0xE000  /* Priority is in upper 3 bits */
+#define IXGBE_RXD_PRI_SHIFT	13
+#define IXGBE_RXD_CFI_MASK	0x1000  /* CFI is bit 12 */
+#define IXGBE_RXD_CFI_SHIFT	12
+
+#define IXGBE_RXDADV_STAT_DD		IXGBE_RXD_STAT_DD  /* Done */
+#define IXGBE_RXDADV_STAT_EOP		IXGBE_RXD_STAT_EOP /* End of Packet */
+#define IXGBE_RXDADV_STAT_FLM		IXGBE_RXD_STAT_FLM /* FDir Match */
+#define IXGBE_RXDADV_STAT_VP		IXGBE_RXD_STAT_VP  /* IEEE VLAN Pkt */
+#define IXGBE_RXDADV_STAT_MASK		0x000fffff /* Stat/NEXTP: bit 0-19 */
+#define IXGBE_RXDADV_STAT_FCEOFS	0x00000040 /* FCoE EOF/SOF Stat */
+#define IXGBE_RXDADV_STAT_FCSTAT	0x00000030 /* FCoE Pkt Stat */
+#define IXGBE_RXDADV_STAT_FCSTAT_NOMTCH	0x00000000 /* 00: No Ctxt Match */
+#define IXGBE_RXDADV_STAT_FCSTAT_NODDP	0x00000010 /* 01: Ctxt w/o DDP */
+#define IXGBE_RXDADV_STAT_FCSTAT_FCPRSP	0x00000020 /* 10: Recv. FCP_RSP */
+#define IXGBE_RXDADV_STAT_FCSTAT_DDP	0x00000030 /* 11: Ctxt w/ DDP */
+#define IXGBE_RXDADV_STAT_TS		0x00010000 /* IEEE1588 Time Stamp */
+
+/* PSRTYPE bit definitions */
+#define IXGBE_PSRTYPE_TCPHDR	0x00000010
+#define IXGBE_PSRTYPE_UDPHDR	0x00000020
+#define IXGBE_PSRTYPE_IPV4HDR	0x00000100
+#define IXGBE_PSRTYPE_IPV6HDR	0x00000200
+#define IXGBE_PSRTYPE_L2HDR	0x00001000
+
+/* SRRCTL bit definitions */
+#define IXGBE_SRRCTL_BSIZEPKT_SHIFT	10 /* so many KBs */
+#define IXGBE_SRRCTL_RDMTS_SHIFT	22
+#define IXGBE_SRRCTL_RDMTS_MASK		0x01C00000
+#define IXGBE_SRRCTL_DROP_EN		0x10000000
+#define IXGBE_SRRCTL_BSIZEPKT_MASK	0x0000007F
+#define IXGBE_SRRCTL_BSIZEHDR_MASK	0x00003F00
+#define IXGBE_SRRCTL_DESCTYPE_LEGACY	0x00000000
+#define IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF 0x02000000
+#define IXGBE_SRRCTL_DESCTYPE_HDR_SPLIT	0x04000000
+#define IXGBE_SRRCTL_DESCTYPE_HDR_REPLICATION_LARGE_PKT 0x08000000
+#define IXGBE_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS 0x0A000000
+#define IXGBE_SRRCTL_DESCTYPE_MASK	0x0E000000
+
+#define IXGBE_RXDPS_HDRSTAT_HDRSP	0x00008000
+#define IXGBE_RXDPS_HDRSTAT_HDRLEN_MASK	0x000003FF
+
+#define IXGBE_RXDADV_RSSTYPE_MASK	0x0000000F
+#define IXGBE_RXDADV_PKTTYPE_MASK	0x0000FFF0
+#define IXGBE_RXDADV_PKTTYPE_MASK_EX	0x0001FFF0
+#define IXGBE_RXDADV_HDRBUFLEN_MASK	0x00007FE0
+#define IXGBE_RXDADV_RSCCNT_MASK	0x001E0000
+#define IXGBE_RXDADV_RSCCNT_SHIFT	17
+#define IXGBE_RXDADV_HDRBUFLEN_SHIFT	5
+#define IXGBE_RXDADV_SPLITHEADER_EN	0x00001000
+#define IXGBE_RXDADV_SPH		0x8000
+
+/* RSS Hash results */
+#define IXGBE_RXDADV_RSSTYPE_NONE	0x00000000
+#define IXGBE_RXDADV_RSSTYPE_IPV4_TCP	0x00000001
+#define IXGBE_RXDADV_RSSTYPE_IPV4	0x00000002
+#define IXGBE_RXDADV_RSSTYPE_IPV6_TCP	0x00000003
+#define IXGBE_RXDADV_RSSTYPE_IPV6_EX	0x00000004
+#define IXGBE_RXDADV_RSSTYPE_IPV6	0x00000005
+#define IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX 0x00000006
+#define IXGBE_RXDADV_RSSTYPE_IPV4_UDP	0x00000007
+#define IXGBE_RXDADV_RSSTYPE_IPV6_UDP	0x00000008
+#define IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX 0x00000009
+
+/* RSS Packet Types as indicated in the receive descriptor. */
+#define IXGBE_RXDADV_PKTTYPE_NONE	0x00000000
+#define IXGBE_RXDADV_PKTTYPE_IPV4	0x00000010 /* IPv4 hdr present */
+#define IXGBE_RXDADV_PKTTYPE_IPV4_EX	0x00000020 /* IPv4 hdr + extensions */
+#define IXGBE_RXDADV_PKTTYPE_IPV6	0x00000040 /* IPv6 hdr present */
+#define IXGBE_RXDADV_PKTTYPE_IPV6_EX	0x00000080 /* IPv6 hdr + extensions */
+#define IXGBE_RXDADV_PKTTYPE_TCP	0x00000100 /* TCP hdr present */
+#define IXGBE_RXDADV_PKTTYPE_UDP	0x00000200 /* UDP hdr present */
+#define IXGBE_RXDADV_PKTTYPE_SCTP	0x00000400 /* SCTP hdr present */
+#define IXGBE_RXDADV_PKTTYPE_NFS	0x00000800 /* NFS hdr present */
+#define IXGBE_RXDADV_PKTTYPE_IPSEC_ESP	0x00001000 /* IPSec ESP */
+#define IXGBE_RXDADV_PKTTYPE_IPSEC_AH	0x00002000 /* IPSec AH */
+#define IXGBE_RXDADV_PKTTYPE_LINKSEC	0x00004000 /* LinkSec Encap */
+#define IXGBE_RXDADV_PKTTYPE_ETQF	0x00008000 /* PKTTYPE is ETQF index */
+#define IXGBE_RXDADV_PKTTYPE_ETQF_MASK	0x00000070 /* ETQF has 8 indices */
+#define IXGBE_RXDADV_PKTTYPE_ETQF_SHIFT	4 /* Right-shift 4 bits */
+
+/* Security Processing bit Indication */
+#define IXGBE_RXDADV_LNKSEC_STATUS_SECP		0x00020000
+#define IXGBE_RXDADV_LNKSEC_ERROR_NO_SA_MATCH	0x08000000
+#define IXGBE_RXDADV_LNKSEC_ERROR_REPLAY_ERROR	0x10000000
+#define IXGBE_RXDADV_LNKSEC_ERROR_BIT_MASK	0x18000000
+#define IXGBE_RXDADV_LNKSEC_ERROR_BAD_SIG	0x18000000
+
+/* Masks to determine if packets should be dropped due to frame errors */
+#define IXGBE_RXD_ERR_FRAME_ERR_MASK ( \
+				IXGBE_RXD_ERR_CE | \
+				IXGBE_RXD_ERR_LE | \
+				IXGBE_RXD_ERR_PE | \
+				IXGBE_RXD_ERR_OSE | \
+				IXGBE_RXD_ERR_USE)
+
+#define IXGBE_RXDADV_ERR_FRAME_ERR_MASK ( \
+				IXGBE_RXDADV_ERR_CE | \
+				IXGBE_RXDADV_ERR_LE | \
+				IXGBE_RXDADV_ERR_PE | \
+				IXGBE_RXDADV_ERR_OSE | \
+				IXGBE_RXDADV_ERR_USE)
+
+#define IXGBE_RXDADV_ERR_FRAME_ERR_MASK_82599	IXGBE_RXDADV_ERR_RXE
+
+/* Multicast bit mask */
+#define IXGBE_MCSTCTRL_MFE	0x4
+
+/* Number of Transmit and Receive Descriptors must be a multiple of 8 */
+#define IXGBE_REQ_TX_DESCRIPTOR_MULTIPLE	8
+#define IXGBE_REQ_RX_DESCRIPTOR_MULTIPLE	8
+#define IXGBE_REQ_TX_BUFFER_GRANULARITY		1024
+
+/* Vlan-specific macros */
+#define IXGBE_RX_DESC_SPECIAL_VLAN_MASK	0x0FFF /* VLAN ID in lower 12 bits */
+#define IXGBE_RX_DESC_SPECIAL_PRI_MASK	0xE000 /* Priority in upper 3 bits */
+#define IXGBE_RX_DESC_SPECIAL_PRI_SHIFT	0x000D /* Priority in upper 3 of 16 */
+#define IXGBE_TX_DESC_SPECIAL_PRI_SHIFT	IXGBE_RX_DESC_SPECIAL_PRI_SHIFT
+
+/* SR-IOV specific macros */
+#define IXGBE_MBVFICR_INDEX(vf_number)	(vf_number >> 4)
+#define IXGBE_MBVFICR(_i)		(0x00710 + ((_i) * 4))
+#define IXGBE_VFLRE(_i)			(((_i & 1) ? 0x001C0 : 0x00600))
+#define IXGBE_VFLREC(_i)		 (0x00700 + ((_i) * 4))
+/* Translated register #defines */
+#define IXGBE_PVFCTRL(P)	(0x00300 + (4 * (P)))
+#define IXGBE_PVFSTATUS(P)	(0x00008 + (0 * (P)))
+#define IXGBE_PVFLINKS(P)	(0x042A4 + (0 * (P)))
+#define IXGBE_PVFRTIMER(P)	(0x00048 + (0 * (P)))
+#define IXGBE_PVFMAILBOX(P)	(0x04C00 + (4 * (P)))
+#define IXGBE_PVFRXMEMWRAP(P)	(0x03190 + (0 * (P)))
+#define IXGBE_PVTEICR(P)	(0x00B00 + (4 * (P)))
+#define IXGBE_PVTEICS(P)	(0x00C00 + (4 * (P)))
+#define IXGBE_PVTEIMS(P)	(0x00D00 + (4 * (P)))
+#define IXGBE_PVTEIMC(P)	(0x00E00 + (4 * (P)))
+#define IXGBE_PVTEIAC(P)	(0x00F00 + (4 * (P)))
+#define IXGBE_PVTEIAM(P)	(0x04D00 + (4 * (P)))
+#define IXGBE_PVTEITR(P)	(((P) < 24) ? (0x00820 + ((P) * 4)) : \
+				 (0x012300 + (((P) - 24) * 4)))
+#define IXGBE_PVTIVAR(P)	(0x12500 + (4 * (P)))
+#define IXGBE_PVTIVAR_MISC(P)	(0x04E00 + (4 * (P)))
+#define IXGBE_PVTRSCINT(P)	(0x12000 + (4 * (P)))
+#define IXGBE_VFPBACL(P)	(0x110C8 + (4 * (P)))
+#define IXGBE_PVFRDBAL(P)	((P < 64) ? (0x01000 + (0x40 * (P))) \
+				 : (0x0D000 + (0x40 * ((P) - 64))))
+#define IXGBE_PVFRDBAH(P)	((P < 64) ? (0x01004 + (0x40 * (P))) \
+				 : (0x0D004 + (0x40 * ((P) - 64))))
+#define IXGBE_PVFRDLEN(P)	((P < 64) ? (0x01008 + (0x40 * (P))) \
+				 : (0x0D008 + (0x40 * ((P) - 64))))
+#define IXGBE_PVFRDH(P)		((P < 64) ? (0x01010 + (0x40 * (P))) \
+				 : (0x0D010 + (0x40 * ((P) - 64))))
+#define IXGBE_PVFRDT(P)		((P < 64) ? (0x01018 + (0x40 * (P))) \
+				 : (0x0D018 + (0x40 * ((P) - 64))))
+#define IXGBE_PVFRXDCTL(P)	((P < 64) ? (0x01028 + (0x40 * (P))) \
+				 : (0x0D028 + (0x40 * ((P) - 64))))
+#define IXGBE_PVFSRRCTL(P)	((P < 64) ? (0x01014 + (0x40 * (P))) \
+				 : (0x0D014 + (0x40 * ((P) - 64))))
+#define IXGBE_PVFPSRTYPE(P)	(0x0EA00 + (4 * (P)))
+#define IXGBE_PVFTDBAL(P)	(0x06000 + (0x40 * (P)))
+#define IXGBE_PVFTDBAH(P)	(0x06004 + (0x40 * (P)))
+#define IXGBE_PVFTTDLEN(P)	(0x06008 + (0x40 * (P)))
+#define IXGBE_PVFTDH(P)		(0x06010 + (0x40 * (P)))
+#define IXGBE_PVFTDT(P)		(0x06018 + (0x40 * (P)))
+#define IXGBE_PVFTXDCTL(P)	(0x06028 + (0x40 * (P)))
+#define IXGBE_PVFTDWBAL(P)	(0x06038 + (0x40 * (P)))
+#define IXGBE_PVFTDWBAH(P)	(0x0603C + (0x40 * (P)))
+#define IXGBE_PVFDCA_RXCTRL(P)	(((P) < 64) ? (0x0100C + (0x40 * (P))) \
+				 : (0x0D00C + (0x40 * ((P) - 64))))
+#define IXGBE_PVFDCA_TXCTRL(P)	(0x0600C + (0x40 * (P)))
+#define IXGBE_PVFGPRC(x)	(0x0101C + (0x40 * (x)))
+#define IXGBE_PVFGPTC(x)	(0x08300 + (0x04 * (x)))
+#define IXGBE_PVFGORC_LSB(x)	(0x01020 + (0x40 * (x)))
+#define IXGBE_PVFGORC_MSB(x)	(0x0D020 + (0x40 * (x)))
+#define IXGBE_PVFGOTC_LSB(x)	(0x08400 + (0x08 * (x)))
+#define IXGBE_PVFGOTC_MSB(x)	(0x08404 + (0x08 * (x)))
+#define IXGBE_PVFMPRC(x)	(0x0D01C + (0x40 * (x)))
+
+#define IXGBE_PVFTDWBALn(q_per_pool, vf_number, vf_q_index) \
+		(IXGBE_PVFTDWBAL((q_per_pool)*(vf_number) + (vf_q_index)))
+#define IXGBE_PVFTDWBAHn(q_per_pool, vf_number, vf_q_index) \
+		(IXGBE_PVFTDWBAH((q_per_pool)*(vf_number) + (vf_q_index)))
+
+/* Little Endian defines */
+#ifndef __le16
+#define __le16  u16
+#endif
+#ifndef __le32
+#define __le32  u32
+#endif
+#ifndef __le64
+#define __le64  u64
+
+#endif
+#ifndef __be16
+/* Big Endian defines */
+#define __be16  u16
+#define __be32  u32
+#define __be64  u64
+
+#endif
+enum ixgbe_fdir_pballoc_type {
+	IXGBE_FDIR_PBALLOC_NONE = 0,
+	IXGBE_FDIR_PBALLOC_64K  = 1,
+	IXGBE_FDIR_PBALLOC_128K = 2,
+	IXGBE_FDIR_PBALLOC_256K = 3,
+};
+
+/* Flow Director register values */
+#define IXGBE_FDIRCTRL_PBALLOC_64K		0x00000001
+#define IXGBE_FDIRCTRL_PBALLOC_128K		0x00000002
+#define IXGBE_FDIRCTRL_PBALLOC_256K		0x00000003
+#define IXGBE_FDIRCTRL_INIT_DONE		0x00000008
+#define IXGBE_FDIRCTRL_PERFECT_MATCH		0x00000010
+#define IXGBE_FDIRCTRL_REPORT_STATUS		0x00000020
+#define IXGBE_FDIRCTRL_REPORT_STATUS_ALWAYS	0x00000080
+#define IXGBE_FDIRCTRL_DROP_Q_SHIFT		8
+#define IXGBE_FDIRCTRL_FLEX_SHIFT		16
+#define IXGBE_FDIRCTRL_SEARCHLIM		0x00800000
+#define IXGBE_FDIRCTRL_MAX_LENGTH_SHIFT		24
+#define IXGBE_FDIRCTRL_FULL_THRESH_MASK		0xF0000000
+#define IXGBE_FDIRCTRL_FULL_THRESH_SHIFT	28
+
+#define IXGBE_FDIRTCPM_DPORTM_SHIFT		16
+#define IXGBE_FDIRUDPM_DPORTM_SHIFT		16
+#define IXGBE_FDIRIP6M_DIPM_SHIFT		16
+#define IXGBE_FDIRM_VLANID			0x00000001
+#define IXGBE_FDIRM_VLANP			0x00000002
+#define IXGBE_FDIRM_POOL			0x00000004
+#define IXGBE_FDIRM_L4P				0x00000008
+#define IXGBE_FDIRM_FLEX			0x00000010
+#define IXGBE_FDIRM_DIPv6			0x00000020
+
+#define IXGBE_FDIRFREE_FREE_MASK		0xFFFF
+#define IXGBE_FDIRFREE_FREE_SHIFT		0
+#define IXGBE_FDIRFREE_COLL_MASK		0x7FFF0000
+#define IXGBE_FDIRFREE_COLL_SHIFT		16
+#define IXGBE_FDIRLEN_MAXLEN_MASK		0x3F
+#define IXGBE_FDIRLEN_MAXLEN_SHIFT		0
+#define IXGBE_FDIRLEN_MAXHASH_MASK		0x7FFF0000
+#define IXGBE_FDIRLEN_MAXHASH_SHIFT		16
+#define IXGBE_FDIRUSTAT_ADD_MASK		0xFFFF
+#define IXGBE_FDIRUSTAT_ADD_SHIFT		0
+#define IXGBE_FDIRUSTAT_REMOVE_MASK		0xFFFF0000
+#define IXGBE_FDIRUSTAT_REMOVE_SHIFT		16
+#define IXGBE_FDIRFSTAT_FADD_MASK		0x00FF
+#define IXGBE_FDIRFSTAT_FADD_SHIFT		0
+#define IXGBE_FDIRFSTAT_FREMOVE_MASK		0xFF00
+#define IXGBE_FDIRFSTAT_FREMOVE_SHIFT		8
+#define IXGBE_FDIRPORT_DESTINATION_SHIFT	16
+#define IXGBE_FDIRVLAN_FLEX_SHIFT		16
+#define IXGBE_FDIRHASH_BUCKET_VALID_SHIFT	15
+#define IXGBE_FDIRHASH_SIG_SW_INDEX_SHIFT	16
+
+#define IXGBE_FDIRCMD_CMD_MASK			0x00000003
+#define IXGBE_FDIRCMD_CMD_ADD_FLOW		0x00000001
+#define IXGBE_FDIRCMD_CMD_REMOVE_FLOW		0x00000002
+#define IXGBE_FDIRCMD_CMD_QUERY_REM_FILT	0x00000003
+#define IXGBE_FDIRCMD_FILTER_VALID		0x00000004
+#define IXGBE_FDIRCMD_FILTER_UPDATE		0x00000008
+#define IXGBE_FDIRCMD_IPv6DMATCH		0x00000010
+#define IXGBE_FDIRCMD_L4TYPE_UDP		0x00000020
+#define IXGBE_FDIRCMD_L4TYPE_TCP		0x00000040
+#define IXGBE_FDIRCMD_L4TYPE_SCTP		0x00000060
+#define IXGBE_FDIRCMD_IPV6			0x00000080
+#define IXGBE_FDIRCMD_CLEARHT			0x00000100
+#define IXGBE_FDIRCMD_DROP			0x00000200
+#define IXGBE_FDIRCMD_INT			0x00000400
+#define IXGBE_FDIRCMD_LAST			0x00000800
+#define IXGBE_FDIRCMD_COLLISION			0x00001000
+#define IXGBE_FDIRCMD_QUEUE_EN			0x00008000
+#define IXGBE_FDIRCMD_FLOW_TYPE_SHIFT		5
+#define IXGBE_FDIRCMD_RX_QUEUE_SHIFT		16
+#define IXGBE_FDIRCMD_VT_POOL_SHIFT		24
+#define IXGBE_FDIR_INIT_DONE_POLL		10
+#define IXGBE_FDIRCMD_CMD_POLL			10
+
+#define IXGBE_FDIR_DROP_QUEUE			127
+
+#define IXGBE_STATUS_OVERHEATING_BIT		20 /* STATUS overtemp bit num */
+
+/* Manageablility Host Interface defines */
+#define IXGBE_HI_MAX_BLOCK_BYTE_LENGTH	1792 /* Num of bytes in range */
+#define IXGBE_HI_MAX_BLOCK_DWORD_LENGTH	448 /* Num of dwords in range */
+#define IXGBE_HI_COMMAND_TIMEOUT	500 /* Process HI command limit */
+
+/* CEM Support */
+#define FW_CEM_HDR_LEN			0x4
+#define FW_CEM_CMD_DRIVER_INFO		0xDD
+#define FW_CEM_CMD_DRIVER_INFO_LEN	0x5
+#define FW_CEM_CMD_RESERVED		0X0
+#define FW_CEM_UNUSED_VER		0x0
+#define FW_CEM_MAX_RETRIES		3
+#define FW_CEM_RESP_STATUS_SUCCESS	0x1
+
+/* Host Interface Command Structures */
+
+struct ixgbe_hic_hdr {
+	u8 cmd;
+	u8 buf_len;
+	union {
+		u8 cmd_resv;
+		u8 ret_status;
+	} cmd_or_resp;
+	u8 checksum;
+};
+
+struct ixgbe_hic_drv_info {
+	struct ixgbe_hic_hdr hdr;
+	u8 port_num;
+	u8 ver_sub;
+	u8 ver_build;
+	u8 ver_min;
+	u8 ver_maj;
+	u8 pad; /* end spacing to ensure length is mult. of dword */
+	u16 pad2; /* end spacing to ensure length is mult. of dword2 */
+};
+
+/* Transmit Descriptor - Legacy */
+struct ixgbe_legacy_tx_desc {
+	u64 buffer_addr; /* Address of the descriptor's data buffer */
+	union {
+		__le32 data;
+		struct {
+			__le16 length; /* Data buffer length */
+			u8 cso; /* Checksum offset */
+			u8 cmd; /* Descriptor control */
+		} flags;
+	} lower;
+	union {
+		__le32 data;
+		struct {
+			u8 status; /* Descriptor status */
+			u8 css; /* Checksum start */
+			__le16 vlan;
+		} fields;
+	} upper;
+};
+
+/* Transmit Descriptor - Advanced */
+union ixgbe_adv_tx_desc {
+	struct {
+		__le64 buffer_addr; /* Address of descriptor's data buf */
+		__le32 cmd_type_len;
+		__le32 olinfo_status;
+	} read;
+	struct {
+		__le64 rsvd; /* Reserved */
+		__le32 nxtseq_seed;
+		__le32 status;
+	} wb;
+};
+
+/* Receive Descriptor - Legacy */
+struct ixgbe_legacy_rx_desc {
+	__le64 buffer_addr; /* Address of the descriptor's data buffer */
+	__le16 length; /* Length of data DMAed into data buffer */
+	__le16 csum; /* Packet checksum */
+	u8 status;   /* Descriptor status */
+	u8 errors;   /* Descriptor Errors */
+	__le16 vlan;
+};
+
+/* Receive Descriptor - Advanced */
+union ixgbe_adv_rx_desc {
+	struct {
+		__le64 pkt_addr; /* Packet buffer address */
+		__le64 hdr_addr; /* Header buffer address */
+	} read;
+	struct {
+		struct {
+			union {
+				__le32 data;
+				struct {
+					__le16 pkt_info; /* RSS, Pkt type */
+					__le16 hdr_info; /* Splithdr, hdrlen */
+				} hs_rss;
+			} lo_dword;
+			union {
+				__le32 rss; /* RSS Hash */
+				struct {
+					__le16 ip_id; /* IP id */
+					__le16 csum; /* Packet Checksum */
+				} csum_ip;
+			} hi_dword;
+		} lower;
+		struct {
+			__le32 status_error; /* ext status/error */
+			__le16 length; /* Packet length */
+			__le16 vlan; /* VLAN tag */
+		} upper;
+	} wb;  /* writeback */
+};
+
+/* Context descriptors */
+struct ixgbe_adv_tx_context_desc {
+	__le32 vlan_macip_lens;
+	__le32 seqnum_seed;
+	__le32 type_tucmd_mlhl;
+	__le32 mss_l4len_idx;
+};
+
+/* Adv Transmit Descriptor Config Masks */
+#define IXGBE_ADVTXD_DTALEN_MASK	0x0000FFFF /* Data buf length(bytes) */
+#define IXGBE_ADVTXD_MAC_LINKSEC	0x00040000 /* Insert LinkSec */
+#define IXGBE_ADVTXD_MAC_TSTAMP		0x00080000 /* IEEE1588 time stamp */
+#define IXGBE_ADVTXD_IPSEC_SA_INDEX_MASK 0x000003FF /* IPSec SA index */
+#define IXGBE_ADVTXD_IPSEC_ESP_LEN_MASK	0x000001FF /* IPSec ESP length */
+#define IXGBE_ADVTXD_DTYP_MASK		0x00F00000 /* DTYP mask */
+#define IXGBE_ADVTXD_DTYP_CTXT		0x00200000 /* Adv Context Desc */
+#define IXGBE_ADVTXD_DTYP_DATA		0x00300000 /* Adv Data Descriptor */
+#define IXGBE_ADVTXD_DCMD_EOP		IXGBE_TXD_CMD_EOP  /* End of Packet */
+#define IXGBE_ADVTXD_DCMD_IFCS		IXGBE_TXD_CMD_IFCS /* Insert FCS */
+#define IXGBE_ADVTXD_DCMD_RS		IXGBE_TXD_CMD_RS /* Report Status */
+#define IXGBE_ADVTXD_DCMD_DDTYP_ISCSI	0x10000000 /* DDP hdr type or iSCSI */
+#define IXGBE_ADVTXD_DCMD_DEXT		IXGBE_TXD_CMD_DEXT /* Desc ext 1=Adv */
+#define IXGBE_ADVTXD_DCMD_VLE		IXGBE_TXD_CMD_VLE  /* VLAN pkt enable */
+#define IXGBE_ADVTXD_DCMD_TSE		0x80000000 /* TCP Seg enable */
+#define IXGBE_ADVTXD_STAT_DD		IXGBE_TXD_STAT_DD  /* Descriptor Done */
+#define IXGBE_ADVTXD_STAT_SN_CRC	0x00000002 /* NXTSEQ/SEED pres in WB */
+#define IXGBE_ADVTXD_STAT_RSV		0x0000000C /* STA Reserved */
+#define IXGBE_ADVTXD_IDX_SHIFT		4 /* Adv desc Index shift */
+#define IXGBE_ADVTXD_CC			0x00000080 /* Check Context */
+#define IXGBE_ADVTXD_POPTS_SHIFT	8  /* Adv desc POPTS shift */
+#define IXGBE_ADVTXD_POPTS_IXSM		(IXGBE_TXD_POPTS_IXSM << \
+					 IXGBE_ADVTXD_POPTS_SHIFT)
+#define IXGBE_ADVTXD_POPTS_TXSM		(IXGBE_TXD_POPTS_TXSM << \
+					 IXGBE_ADVTXD_POPTS_SHIFT)
+#define IXGBE_ADVTXD_POPTS_ISCO_1ST	0x00000000 /* 1st TSO of iSCSI PDU */
+#define IXGBE_ADVTXD_POPTS_ISCO_MDL	0x00000800 /* Middle TSO of iSCSI PDU */
+#define IXGBE_ADVTXD_POPTS_ISCO_LAST	0x00001000 /* Last TSO of iSCSI PDU */
+/* 1st&Last TSO-full iSCSI PDU */
+#define IXGBE_ADVTXD_POPTS_ISCO_FULL	0x00001800
+#define IXGBE_ADVTXD_POPTS_RSV		0x00002000 /* POPTS Reserved */
+#define IXGBE_ADVTXD_PAYLEN_SHIFT	14 /* Adv desc PAYLEN shift */
+#define IXGBE_ADVTXD_MACLEN_SHIFT	9  /* Adv ctxt desc mac len shift */
+#define IXGBE_ADVTXD_VLAN_SHIFT		16  /* Adv ctxt vlan tag shift */
+#define IXGBE_ADVTXD_TUCMD_IPV4		0x00000400 /* IP Packet Type: 1=IPv4 */
+#define IXGBE_ADVTXD_TUCMD_IPV6		0x00000000 /* IP Packet Type: 0=IPv6 */
+#define IXGBE_ADVTXD_TUCMD_L4T_UDP	0x00000000 /* L4 Packet TYPE of UDP */
+#define IXGBE_ADVTXD_TUCMD_L4T_TCP	0x00000800 /* L4 Packet TYPE of TCP */
+#define IXGBE_ADVTXD_TUCMD_L4T_SCTP	0x00001000 /* L4 Packet TYPE of SCTP */
+#define IXGBE_ADVTXD_TUCMD_MKRREQ	0x00002000 /* req Markers and CRC */
+#define IXGBE_ADVTXD_POPTS_IPSEC	0x00000400 /* IPSec offload request */
+#define IXGBE_ADVTXD_TUCMD_IPSEC_TYPE_ESP 0x00002000 /* IPSec Type ESP */
+#define IXGBE_ADVTXD_TUCMD_IPSEC_ENCRYPT_EN 0x00004000/* ESP Encrypt Enable */
+#define IXGBE_ADVTXT_TUCMD_FCOE		0x00008000 /* FCoE Frame Type */
+#define IXGBE_ADVTXD_FCOEF_EOF_MASK	(0x3 << 10) /* FC EOF index */
+#define IXGBE_ADVTXD_FCOEF_SOF		((1 << 2) << 10) /* FC SOF index */
+#define IXGBE_ADVTXD_FCOEF_PARINC	((1 << 3) << 10) /* Rel_Off in F_CTL */
+#define IXGBE_ADVTXD_FCOEF_ORIE		((1 << 4) << 10) /* Orientation End */
+#define IXGBE_ADVTXD_FCOEF_ORIS		((1 << 5) << 10) /* Orientation Start */
+#define IXGBE_ADVTXD_FCOEF_EOF_N	(0x0 << 10) /* 00: EOFn */
+#define IXGBE_ADVTXD_FCOEF_EOF_T	(0x1 << 10) /* 01: EOFt */
+#define IXGBE_ADVTXD_FCOEF_EOF_NI	(0x2 << 10) /* 10: EOFni */
+#define IXGBE_ADVTXD_FCOEF_EOF_A	(0x3 << 10) /* 11: EOFa */
+#define IXGBE_ADVTXD_L4LEN_SHIFT	8  /* Adv ctxt L4LEN shift */
+#define IXGBE_ADVTXD_MSS_SHIFT		16  /* Adv ctxt MSS shift */
+
+/* Autonegotiation advertised speeds */
+typedef u32 ixgbe_autoneg_advertised;
+/* Link speed */
+typedef u32 ixgbe_link_speed;
+#define IXGBE_LINK_SPEED_UNKNOWN	0
+#define IXGBE_LINK_SPEED_100_FULL	0x0008
+#define IXGBE_LINK_SPEED_1GB_FULL	0x0020
+#define IXGBE_LINK_SPEED_10GB_FULL	0x0080
+#define IXGBE_LINK_SPEED_82598_AUTONEG	(IXGBE_LINK_SPEED_1GB_FULL | \
+					 IXGBE_LINK_SPEED_10GB_FULL)
+#define IXGBE_LINK_SPEED_82599_AUTONEG	(IXGBE_LINK_SPEED_100_FULL | \
+					 IXGBE_LINK_SPEED_1GB_FULL | \
+					 IXGBE_LINK_SPEED_10GB_FULL)
+
+
+/* Physical layer type */
+typedef u32 ixgbe_physical_layer;
+#define IXGBE_PHYSICAL_LAYER_UNKNOWN		0
+#define IXGBE_PHYSICAL_LAYER_10GBASE_T		0x0001
+#define IXGBE_PHYSICAL_LAYER_1000BASE_T		0x0002
+#define IXGBE_PHYSICAL_LAYER_100BASE_TX		0x0004
+#define IXGBE_PHYSICAL_LAYER_SFP_PLUS_CU	0x0008
+#define IXGBE_PHYSICAL_LAYER_10GBASE_LR		0x0010
+#define IXGBE_PHYSICAL_LAYER_10GBASE_LRM	0x0020
+#define IXGBE_PHYSICAL_LAYER_10GBASE_SR		0x0040
+#define IXGBE_PHYSICAL_LAYER_10GBASE_KX4	0x0080
+#define IXGBE_PHYSICAL_LAYER_10GBASE_CX4	0x0100
+#define IXGBE_PHYSICAL_LAYER_1000BASE_KX	0x0200
+#define IXGBE_PHYSICAL_LAYER_1000BASE_BX	0x0400
+#define IXGBE_PHYSICAL_LAYER_10GBASE_KR		0x0800
+#define IXGBE_PHYSICAL_LAYER_10GBASE_XAUI	0x1000
+#define IXGBE_PHYSICAL_LAYER_SFP_ACTIVE_DA	0x2000
+#define IXGBE_PHYSICAL_LAYER_1000BASE_SX	0x4000
+
+/* Flow Control Data Sheet defined values
+ * Calculation and defines taken from 802.1bb Annex O
+ */
+
+/* BitTimes (BT) conversion */
+#define IXGBE_BT2KB(BT)		((BT + (8 * 1024 - 1)) / (8 * 1024))
+#define IXGBE_B2BT(BT)		(BT * 8)
+
+/* Calculate Delay to respond to PFC */
+#define IXGBE_PFC_D	672
+
+/* Calculate Cable Delay */
+#define IXGBE_CABLE_DC	5556 /* Delay Copper */
+#define IXGBE_CABLE_DO	5000 /* Delay Optical */
+
+/* Calculate Interface Delay X540 */
+#define IXGBE_PHY_DC	25600 /* Delay 10G BASET */
+#define IXGBE_MAC_DC	8192  /* Delay Copper XAUI interface */
+#define IXGBE_XAUI_DC	(2 * 2048) /* Delay Copper Phy */
+
+#define IXGBE_ID_X540	(IXGBE_MAC_DC + IXGBE_XAUI_DC + IXGBE_PHY_DC)
+
+/* Calculate Interface Delay 82598, 82599 */
+#define IXGBE_PHY_D	12800
+#define IXGBE_MAC_D	4096
+#define IXGBE_XAUI_D	(2 * 1024)
+
+#define IXGBE_ID	(IXGBE_MAC_D + IXGBE_XAUI_D + IXGBE_PHY_D)
+
+/* Calculate Delay incurred from higher layer */
+#define IXGBE_HD	6144
+
+/* Calculate PCI Bus delay for low thresholds */
+#define IXGBE_PCI_DELAY	10000
+
+/* Calculate X540 delay value in bit times */
+#define IXGBE_DV_X540(_max_frame_link, _max_frame_tc) \
+			((36 * \
+			  (IXGBE_B2BT(_max_frame_link) + \
+			   IXGBE_PFC_D + \
+			   (2 * IXGBE_CABLE_DC) + \
+			   (2 * IXGBE_ID_X540) + \
+			   IXGBE_HD) / 25 + 1) + \
+			 2 * IXGBE_B2BT(_max_frame_tc))
+
+/* Calculate 82599, 82598 delay value in bit times */
+#define IXGBE_DV(_max_frame_link, _max_frame_tc) \
+			((36 * \
+			  (IXGBE_B2BT(_max_frame_link) + \
+			   IXGBE_PFC_D + \
+			   (2 * IXGBE_CABLE_DC) + \
+			   (2 * IXGBE_ID) + \
+			   IXGBE_HD) / 25 + 1) + \
+			 2 * IXGBE_B2BT(_max_frame_tc))
+
+/* Calculate low threshold delay values */
+#define IXGBE_LOW_DV_X540(_max_frame_tc) \
+			(2 * IXGBE_B2BT(_max_frame_tc) + \
+			(36 * IXGBE_PCI_DELAY / 25) + 1)
+#define IXGBE_LOW_DV(_max_frame_tc) \
+			(2 * IXGBE_LOW_DV_X540(_max_frame_tc))
+
+/* Software ATR hash keys */
+#define IXGBE_ATR_BUCKET_HASH_KEY	0x3DAD14E2
+#define IXGBE_ATR_SIGNATURE_HASH_KEY	0x174D3614
+
+/* Software ATR input stream values and masks */
+#define IXGBE_ATR_HASH_MASK		0x7fff
+#define IXGBE_ATR_L4TYPE_MASK		0x3
+#define IXGBE_ATR_L4TYPE_UDP		0x1
+#define IXGBE_ATR_L4TYPE_TCP		0x2
+#define IXGBE_ATR_L4TYPE_SCTP		0x3
+#define IXGBE_ATR_L4TYPE_IPV6_MASK	0x4
+enum ixgbe_atr_flow_type {
+	IXGBE_ATR_FLOW_TYPE_IPV4	= 0x0,
+	IXGBE_ATR_FLOW_TYPE_UDPV4	= 0x1,
+	IXGBE_ATR_FLOW_TYPE_TCPV4	= 0x2,
+	IXGBE_ATR_FLOW_TYPE_SCTPV4	= 0x3,
+	IXGBE_ATR_FLOW_TYPE_IPV6	= 0x4,
+	IXGBE_ATR_FLOW_TYPE_UDPV6	= 0x5,
+	IXGBE_ATR_FLOW_TYPE_TCPV6	= 0x6,
+	IXGBE_ATR_FLOW_TYPE_SCTPV6	= 0x7,
+};
+
+/* Flow Director ATR input struct. */
+union ixgbe_atr_input {
+	/*
+	 * Byte layout in order, all values with MSB first:
+	 *
+	 * vm_pool	- 1 byte
+	 * flow_type	- 1 byte
+	 * vlan_id	- 2 bytes
+	 * src_ip	- 16 bytes
+	 * dst_ip	- 16 bytes
+	 * src_port	- 2 bytes
+	 * dst_port	- 2 bytes
+	 * flex_bytes	- 2 bytes
+	 * bkt_hash	- 2 bytes
+	 */
+	struct {
+		u8 vm_pool;
+		u8 flow_type;
+		__be16 vlan_id;
+		__be32 dst_ip[4];
+		__be32 src_ip[4];
+		__be16 src_port;
+		__be16 dst_port;
+		__be16 flex_bytes;
+		__be16 bkt_hash;
+	} formatted;
+	__be32 dword_stream[11];
+};
+
+/* Flow Director compressed ATR hash input struct */
+union ixgbe_atr_hash_dword {
+	struct {
+		u8 vm_pool;
+		u8 flow_type;
+		__be16 vlan_id;
+	} formatted;
+	__be32 ip;
+	struct {
+		__be16 src;
+		__be16 dst;
+	} port;
+	__be16 flex_bytes;
+	__be32 dword;
+};
+
+
+/*
+ * Unavailable: The FCoE Boot Option ROM is not present in the flash.
+ * Disabled: Present; boot order is not set for any targets on the port.
+ * Enabled: Present; boot order is set for at least one target on the port.
+ */
+enum ixgbe_fcoe_boot_status {
+	ixgbe_fcoe_bootstatus_disabled = 0,
+	ixgbe_fcoe_bootstatus_enabled = 1,
+	ixgbe_fcoe_bootstatus_unavailable = 0xFFFF
+};
+
+enum ixgbe_eeprom_type {
+	ixgbe_eeprom_uninitialized = 0,
+	ixgbe_eeprom_spi,
+	ixgbe_flash,
+	ixgbe_eeprom_none /* No NVM support */
+};
+
+enum ixgbe_mac_type {
+	ixgbe_mac_unknown = 0,
+	ixgbe_mac_82598EB,
+	ixgbe_mac_82599EB,
+	ixgbe_mac_X540,
+	ixgbe_num_macs
+};
+
+enum ixgbe_phy_type {
+	ixgbe_phy_unknown = 0,
+	ixgbe_phy_none,
+	ixgbe_phy_tn,
+	ixgbe_phy_aq,
+	ixgbe_phy_cu_unknown,
+	ixgbe_phy_qt,
+	ixgbe_phy_xaui,
+	ixgbe_phy_nl,
+	ixgbe_phy_sfp_passive_tyco,
+	ixgbe_phy_sfp_passive_unknown,
+	ixgbe_phy_sfp_active_unknown,
+	ixgbe_phy_sfp_avago,
+	ixgbe_phy_sfp_ftl,
+	ixgbe_phy_sfp_ftl_active,
+	ixgbe_phy_sfp_unknown,
+	ixgbe_phy_sfp_intel,
+	ixgbe_phy_sfp_unsupported, /*Enforce bit set with unsupported module*/
+	ixgbe_phy_generic
+};
+
+/*
+ * SFP+ module type IDs:
+ *
+ * ID	Module Type
+ * =============
+ * 0	SFP_DA_CU
+ * 1	SFP_SR
+ * 2	SFP_LR
+ * 3	SFP_DA_CU_CORE0 - 82599-specific
+ * 4	SFP_DA_CU_CORE1 - 82599-specific
+ * 5	SFP_SR/LR_CORE0 - 82599-specific
+ * 6	SFP_SR/LR_CORE1 - 82599-specific
+ */
+enum ixgbe_sfp_type {
+	ixgbe_sfp_type_da_cu = 0,
+	ixgbe_sfp_type_sr = 1,
+	ixgbe_sfp_type_lr = 2,
+	ixgbe_sfp_type_da_cu_core0 = 3,
+	ixgbe_sfp_type_da_cu_core1 = 4,
+	ixgbe_sfp_type_srlr_core0 = 5,
+	ixgbe_sfp_type_srlr_core1 = 6,
+	ixgbe_sfp_type_da_act_lmt_core0 = 7,
+	ixgbe_sfp_type_da_act_lmt_core1 = 8,
+	ixgbe_sfp_type_1g_cu_core0 = 9,
+	ixgbe_sfp_type_1g_cu_core1 = 10,
+	ixgbe_sfp_type_1g_sx_core0 = 11,
+	ixgbe_sfp_type_1g_sx_core1 = 12,
+	ixgbe_sfp_type_not_present = 0xFFFE,
+	ixgbe_sfp_type_unknown = 0xFFFF
+};
+
+enum ixgbe_media_type {
+	ixgbe_media_type_unknown = 0,
+	ixgbe_media_type_fiber,
+	ixgbe_media_type_fiber_qsfp,
+	ixgbe_media_type_fiber_lco,
+	ixgbe_media_type_copper,
+	ixgbe_media_type_backplane,
+	ixgbe_media_type_cx4,
+	ixgbe_media_type_virtual
+};
+
+/* Flow Control Settings */
+enum ixgbe_fc_mode {
+	ixgbe_fc_none = 0,
+	ixgbe_fc_rx_pause,
+	ixgbe_fc_tx_pause,
+	ixgbe_fc_full,
+	ixgbe_fc_default
+};
+
+/* Smart Speed Settings */
+#define IXGBE_SMARTSPEED_MAX_RETRIES	3
+enum ixgbe_smart_speed {
+	ixgbe_smart_speed_auto = 0,
+	ixgbe_smart_speed_on,
+	ixgbe_smart_speed_off
+};
+
+/* PCI bus types */
+enum ixgbe_bus_type {
+	ixgbe_bus_type_unknown = 0,
+	ixgbe_bus_type_pci,
+	ixgbe_bus_type_pcix,
+	ixgbe_bus_type_pci_express,
+	ixgbe_bus_type_reserved
+};
+
+/* PCI bus speeds */
+enum ixgbe_bus_speed {
+	ixgbe_bus_speed_unknown	= 0,
+	ixgbe_bus_speed_33	= 33,
+	ixgbe_bus_speed_66	= 66,
+	ixgbe_bus_speed_100	= 100,
+	ixgbe_bus_speed_120	= 120,
+	ixgbe_bus_speed_133	= 133,
+	ixgbe_bus_speed_2500	= 2500,
+	ixgbe_bus_speed_5000	= 5000,
+	ixgbe_bus_speed_8000	= 8000,
+	ixgbe_bus_speed_reserved
+};
+
+/* PCI bus widths */
+enum ixgbe_bus_width {
+	ixgbe_bus_width_unknown	= 0,
+	ixgbe_bus_width_pcie_x1	= 1,
+	ixgbe_bus_width_pcie_x2	= 2,
+	ixgbe_bus_width_pcie_x4	= 4,
+	ixgbe_bus_width_pcie_x8	= 8,
+	ixgbe_bus_width_32	= 32,
+	ixgbe_bus_width_64	= 64,
+	ixgbe_bus_width_reserved
+};
+
+struct ixgbe_addr_filter_info {
+	u32 num_mc_addrs;
+	u32 rar_used_count;
+	u32 mta_in_use;
+	u32 overflow_promisc;
+	bool user_set_promisc;
+};
+
+/* Bus parameters */
+struct ixgbe_bus_info {
+	enum ixgbe_bus_speed speed;
+	enum ixgbe_bus_width width;
+	enum ixgbe_bus_type type;
+
+	u16 func;
+	u16 lan_id;
+};
+
+/* Flow control parameters */
+struct ixgbe_fc_info {
+	u32 high_water[IXGBE_DCB_MAX_TRAFFIC_CLASS]; /* Flow Ctrl High-water */
+	u32 low_water[IXGBE_DCB_MAX_TRAFFIC_CLASS]; /* Flow Ctrl Low-water */
+	u16 pause_time; /* Flow Control Pause timer */
+	bool send_xon; /* Flow control send XON */
+	bool strict_ieee; /* Strict IEEE mode */
+	bool disable_fc_autoneg; /* Do not autonegotiate FC */
+	bool fc_was_autonegged; /* Is current_mode the result of autonegging? */
+	enum ixgbe_fc_mode current_mode; /* FC mode in effect */
+	enum ixgbe_fc_mode requested_mode; /* FC mode requested by caller */
+};
+
+/* Statistics counters collected by the MAC */
+struct ixgbe_hw_stats {
+	u64 crcerrs;
+	u64 illerrc;
+	u64 errbc;
+	u64 mspdc;
+	u64 mpctotal;
+	u64 mpc[8];
+	u64 mlfc;
+	u64 mrfc;
+	u64 rlec;
+	u64 lxontxc;
+	u64 lxonrxc;
+	u64 lxofftxc;
+	u64 lxoffrxc;
+	u64 pxontxc[8];
+	u64 pxonrxc[8];
+	u64 pxofftxc[8];
+	u64 pxoffrxc[8];
+	u64 prc64;
+	u64 prc127;
+	u64 prc255;
+	u64 prc511;
+	u64 prc1023;
+	u64 prc1522;
+	u64 gprc;
+	u64 bprc;
+	u64 mprc;
+	u64 gptc;
+	u64 gorc;
+	u64 gotc;
+	u64 rnbc[8];
+	u64 ruc;
+	u64 rfc;
+	u64 roc;
+	u64 rjc;
+	u64 mngprc;
+	u64 mngpdc;
+	u64 mngptc;
+	u64 tor;
+	u64 tpr;
+	u64 tpt;
+	u64 ptc64;
+	u64 ptc127;
+	u64 ptc255;
+	u64 ptc511;
+	u64 ptc1023;
+	u64 ptc1522;
+	u64 mptc;
+	u64 bptc;
+	u64 xec;
+	u64 qprc[16];
+	u64 qptc[16];
+	u64 qbrc[16];
+	u64 qbtc[16];
+	u64 qprdc[16];
+	u64 pxon2offc[8];
+	u64 fdirustat_add;
+	u64 fdirustat_remove;
+	u64 fdirfstat_fadd;
+	u64 fdirfstat_fremove;
+	u64 fdirmatch;
+	u64 fdirmiss;
+	u64 fccrc;
+	u64 fclast;
+	u64 fcoerpdc;
+	u64 fcoeprc;
+	u64 fcoeptc;
+	u64 fcoedwrc;
+	u64 fcoedwtc;
+	u64 fcoe_noddp;
+	u64 fcoe_noddp_ext_buff;
+	u64 ldpcec;
+	u64 pcrc8ec;
+	u64 b2ospc;
+	u64 b2ogprc;
+	u64 o2bgptc;
+	u64 o2bspc;
+};
+
+/* forward declaration */
+struct ixgbe_hw;
+
+/* iterator type for walking multicast address lists */
+typedef u8* (*ixgbe_mc_addr_itr) (struct ixgbe_hw *hw, u8 **mc_addr_ptr,
+				  u32 *vmdq);
+
+/* Function pointer table */
+struct ixgbe_eeprom_operations {
+	s32 (*init_params)(struct ixgbe_hw *);
+	s32 (*read)(struct ixgbe_hw *, u16, u16 *);
+	s32 (*read_buffer)(struct ixgbe_hw *, u16, u16, u16 *);
+	s32 (*write)(struct ixgbe_hw *, u16, u16);
+	s32 (*write_buffer)(struct ixgbe_hw *, u16, u16, u16 *);
+	s32 (*validate_checksum)(struct ixgbe_hw *, u16 *);
+	s32 (*update_checksum)(struct ixgbe_hw *);
+	u16 (*calc_checksum)(struct ixgbe_hw *);
+};
+
+struct ixgbe_mac_operations {
+	s32 (*init_hw)(struct ixgbe_hw *);
+	s32 (*reset_hw)(struct ixgbe_hw *);
+	s32 (*start_hw)(struct ixgbe_hw *);
+	s32 (*clear_hw_cntrs)(struct ixgbe_hw *);
+	enum ixgbe_media_type (*get_media_type)(struct ixgbe_hw *);
+	u32 (*get_supported_physical_layer)(struct ixgbe_hw *);
+	s32 (*get_mac_addr)(struct ixgbe_hw *, u8 *);
+	s32 (*get_san_mac_addr)(struct ixgbe_hw *, u8 *);
+	s32 (*set_san_mac_addr)(struct ixgbe_hw *, u8 *);
+	s32 (*get_device_caps)(struct ixgbe_hw *, u16 *);
+	s32 (*get_wwn_prefix)(struct ixgbe_hw *, u16 *, u16 *);
+	s32 (*get_fcoe_boot_status)(struct ixgbe_hw *, u16 *);
+	s32 (*stop_adapter)(struct ixgbe_hw *);
+	s32 (*get_bus_info)(struct ixgbe_hw *);
+	void (*set_lan_id)(struct ixgbe_hw *);
+	s32 (*read_analog_reg8)(struct ixgbe_hw*, u32, u8*);
+	s32 (*write_analog_reg8)(struct ixgbe_hw*, u32, u8);
+	s32 (*setup_sfp)(struct ixgbe_hw *);
+	s32 (*enable_rx_dma)(struct ixgbe_hw *, u32);
+	s32 (*disable_sec_rx_path)(struct ixgbe_hw *);
+	s32 (*enable_sec_rx_path)(struct ixgbe_hw *);
+	s32 (*acquire_swfw_sync)(struct ixgbe_hw *, u16);
+	void (*release_swfw_sync)(struct ixgbe_hw *, u16);
+
+	/* Link */
+	void (*disable_tx_laser)(struct ixgbe_hw *);
+	void (*enable_tx_laser)(struct ixgbe_hw *);
+	void (*flap_tx_laser)(struct ixgbe_hw *);
+	s32 (*setup_link)(struct ixgbe_hw *, ixgbe_link_speed, bool, bool);
+	s32 (*check_link)(struct ixgbe_hw *, ixgbe_link_speed *, bool *, bool);
+	s32 (*get_link_capabilities)(struct ixgbe_hw *, ixgbe_link_speed *,
+				     bool *);
+
+	/* Packet Buffer manipulation */
+	void (*setup_rxpba)(struct ixgbe_hw *, int, u32, int);
+
+	/* LED */
+	s32 (*led_on)(struct ixgbe_hw *, u32);
+	s32 (*led_off)(struct ixgbe_hw *, u32);
+	s32 (*blink_led_start)(struct ixgbe_hw *, u32);
+	s32 (*blink_led_stop)(struct ixgbe_hw *, u32);
+
+	/* RAR, Multicast, VLAN */
+	s32 (*set_rar)(struct ixgbe_hw *, u32, u8 *, u32, u32);
+	s32 (*set_uc_addr)(struct ixgbe_hw *, u32, u8 *);
+	s32 (*clear_rar)(struct ixgbe_hw *, u32);
+	s32 (*insert_mac_addr)(struct ixgbe_hw *, u8 *, u32);
+	s32 (*set_vmdq)(struct ixgbe_hw *, u32, u32);
+	s32 (*set_vmdq_san_mac)(struct ixgbe_hw *, u32);
+	s32 (*clear_vmdq)(struct ixgbe_hw *, u32, u32);
+	s32 (*init_rx_addrs)(struct ixgbe_hw *);
+	s32 (*update_uc_addr_list)(struct ixgbe_hw *, u8 *, u32,
+				   ixgbe_mc_addr_itr);
+	s32 (*update_mc_addr_list)(struct ixgbe_hw *, u8 *, u32,
+				   ixgbe_mc_addr_itr, bool clear);
+	s32 (*enable_mc)(struct ixgbe_hw *);
+	s32 (*disable_mc)(struct ixgbe_hw *);
+	s32 (*clear_vfta)(struct ixgbe_hw *);
+	s32 (*set_vfta)(struct ixgbe_hw *, u32, u32, bool);
+	s32 (*set_vlvf)(struct ixgbe_hw *, u32, u32, bool, bool *);
+	s32 (*init_uta_tables)(struct ixgbe_hw *);
+	void (*set_mac_anti_spoofing)(struct ixgbe_hw *, bool, int);
+	void (*set_vlan_anti_spoofing)(struct ixgbe_hw *, bool, int);
+
+	/* Flow Control */
+	s32 (*fc_enable)(struct ixgbe_hw *);
+
+	/* Manageability interface */
+	s32 (*set_fw_drv_ver)(struct ixgbe_hw *, u8, u8, u8, u8);
+	s32 (*get_thermal_sensor_data)(struct ixgbe_hw *);
+	s32 (*init_thermal_sensor_thresh)(struct ixgbe_hw *hw);
+};
+
+struct ixgbe_phy_operations {
+	s32 (*identify)(struct ixgbe_hw *);
+	s32 (*identify_sfp)(struct ixgbe_hw *);
+	s32 (*init)(struct ixgbe_hw *);
+	s32 (*reset)(struct ixgbe_hw *);
+	s32 (*read_reg)(struct ixgbe_hw *, u32, u32, u16 *);
+	s32 (*write_reg)(struct ixgbe_hw *, u32, u32, u16);
+	s32 (*setup_link)(struct ixgbe_hw *);
+	s32 (*setup_link_speed)(struct ixgbe_hw *, ixgbe_link_speed, bool,
+				bool);
+	s32 (*check_link)(struct ixgbe_hw *, ixgbe_link_speed *, bool *);
+	s32 (*get_firmware_version)(struct ixgbe_hw *, u16 *);
+	s32 (*read_i2c_byte)(struct ixgbe_hw *, u8, u8, u8 *);
+	s32 (*write_i2c_byte)(struct ixgbe_hw *, u8, u8, u8);
+	s32 (*read_i2c_eeprom)(struct ixgbe_hw *, u8 , u8 *);
+	s32 (*write_i2c_eeprom)(struct ixgbe_hw *, u8, u8);
+	void (*i2c_bus_clear)(struct ixgbe_hw *);
+	s32 (*check_overtemp)(struct ixgbe_hw *);
+};
+
+struct ixgbe_eeprom_info {
+	struct ixgbe_eeprom_operations ops;
+	enum ixgbe_eeprom_type type;
+	u32 semaphore_delay;
+	u16 word_size;
+	u16 address_bits;
+	u16 word_page_size;
+};
+
+#define IXGBE_FLAGS_DOUBLE_RESET_REQUIRED	0x01
+struct ixgbe_mac_info {
+	struct ixgbe_mac_operations ops;
+	enum ixgbe_mac_type type;
+	u8 addr[IXGBE_ETH_LENGTH_OF_ADDRESS];
+	u8 perm_addr[IXGBE_ETH_LENGTH_OF_ADDRESS];
+	u8 san_addr[IXGBE_ETH_LENGTH_OF_ADDRESS];
+	/* prefix for World Wide Node Name (WWNN) */
+	u16 wwnn_prefix;
+	/* prefix for World Wide Port Name (WWPN) */
+	u16 wwpn_prefix;
+#define IXGBE_MAX_MTA			128
+	u32 mta_shadow[IXGBE_MAX_MTA];
+	s32 mc_filter_type;
+	u32 mcft_size;
+	u32 vft_size;
+	u32 num_rar_entries;
+	u32 rar_highwater;
+	u32 rx_pb_size;
+	u32 max_tx_queues;
+	u32 max_rx_queues;
+	u32 orig_autoc;
+	u8  san_mac_rar_index;
+	u32 orig_autoc2;
+	u16 max_msix_vectors;
+	bool arc_subsystem_valid;
+	bool orig_link_settings_stored;
+	bool autotry_restart;
+	u8 flags;
+	struct ixgbe_thermal_sensor_data  thermal_sensor_data;
+};
+
+struct ixgbe_phy_info {
+	struct ixgbe_phy_operations ops;
+	enum ixgbe_phy_type type;
+	u32 addr;
+	u32 id;
+	enum ixgbe_sfp_type sfp_type;
+	bool sfp_setup_needed;
+	u32 revision;
+	enum ixgbe_media_type media_type;
+	bool reset_disable;
+	ixgbe_autoneg_advertised autoneg_advertised;
+	enum ixgbe_smart_speed smart_speed;
+	bool smart_speed_active;
+	bool multispeed_fiber;
+	bool reset_if_overtemp;
+	bool qsfp_shared_i2c_bus;
+};
+
+#include "ixgbe_mbx.h"
+
+struct ixgbe_mbx_operations {
+	void (*init_params)(struct ixgbe_hw *hw);
+	s32  (*read)(struct ixgbe_hw *, u32 *, u16,  u16);
+	s32  (*write)(struct ixgbe_hw *, u32 *, u16, u16);
+	s32  (*read_posted)(struct ixgbe_hw *, u32 *, u16,  u16);
+	s32  (*write_posted)(struct ixgbe_hw *, u32 *, u16, u16);
+	s32  (*check_for_msg)(struct ixgbe_hw *, u16);
+	s32  (*check_for_ack)(struct ixgbe_hw *, u16);
+	s32  (*check_for_rst)(struct ixgbe_hw *, u16);
+};
+
+struct ixgbe_mbx_stats {
+	u32 msgs_tx;
+	u32 msgs_rx;
+
+	u32 acks;
+	u32 reqs;
+	u32 rsts;
+};
+
+struct ixgbe_mbx_info {
+	struct ixgbe_mbx_operations ops;
+	struct ixgbe_mbx_stats stats;
+	u32 timeout;
+	u32 udelay;
+	u32 v2p_mailbox;
+	u16 size;
+};
+
+struct ixgbe_hw {
+	u8 __iomem *hw_addr;
+	void *back;
+	struct ixgbe_mac_info mac;
+	struct ixgbe_addr_filter_info addr_ctrl;
+	struct ixgbe_fc_info fc;
+	struct ixgbe_phy_info phy;
+	struct ixgbe_eeprom_info eeprom;
+	struct ixgbe_bus_info bus;
+	struct ixgbe_mbx_info mbx;
+	u16 device_id;
+	u16 vendor_id;
+	u16 subsystem_device_id;
+	u16 subsystem_vendor_id;
+	u8 revision_id;
+	bool adapter_stopped;
+	bool force_full_reset;
+	bool allow_unsupported_sfp;
+};
+
+#define ixgbe_call_func(hw, func, params, error) \
+		(func != NULL) ? func params : error
+
+
+/* Error Codes */
+#define IXGBE_ERR_EEPROM			-1
+#define IXGBE_ERR_EEPROM_CHECKSUM		-2
+#define IXGBE_ERR_PHY				-3
+#define IXGBE_ERR_CONFIG			-4
+#define IXGBE_ERR_PARAM				-5
+#define IXGBE_ERR_MAC_TYPE			-6
+#define IXGBE_ERR_UNKNOWN_PHY			-7
+#define IXGBE_ERR_LINK_SETUP			-8
+#define IXGBE_ERR_ADAPTER_STOPPED		-9
+#define IXGBE_ERR_INVALID_MAC_ADDR		-10
+#define IXGBE_ERR_DEVICE_NOT_SUPPORTED		-11
+#define IXGBE_ERR_MASTER_REQUESTS_PENDING	-12
+#define IXGBE_ERR_INVALID_LINK_SETTINGS		-13
+#define IXGBE_ERR_AUTONEG_NOT_COMPLETE		-14
+#define IXGBE_ERR_RESET_FAILED			-15
+#define IXGBE_ERR_SWFW_SYNC			-16
+#define IXGBE_ERR_PHY_ADDR_INVALID		-17
+#define IXGBE_ERR_I2C				-18
+#define IXGBE_ERR_SFP_NOT_SUPPORTED		-19
+#define IXGBE_ERR_SFP_NOT_PRESENT		-20
+#define IXGBE_ERR_SFP_NO_INIT_SEQ_PRESENT	-21
+#define IXGBE_ERR_NO_SAN_ADDR_PTR		-22
+#define IXGBE_ERR_FDIR_REINIT_FAILED		-23
+#define IXGBE_ERR_EEPROM_VERSION		-24
+#define IXGBE_ERR_NO_SPACE			-25
+#define IXGBE_ERR_OVERTEMP			-26
+#define IXGBE_ERR_FC_NOT_NEGOTIATED		-27
+#define IXGBE_ERR_FC_NOT_SUPPORTED		-28
+#define IXGBE_ERR_SFP_SETUP_NOT_COMPLETE	-30
+#define IXGBE_ERR_PBA_SECTION			-31
+#define IXGBE_ERR_INVALID_ARGUMENT		-32
+#define IXGBE_ERR_HOST_INTERFACE_COMMAND	-33
+#define IXGBE_ERR_OUT_OF_MEM			-34
+
+#define IXGBE_NOT_IMPLEMENTED			0x7FFFFFFF
+
+#define UNREFERENCED_XPARAMETER
+
+#endif /* _IXGBE_TYPE_H_ */
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_x540.c b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_x540.c
new file mode 100644
index 00000000..b99d9e84
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_x540.c
@@ -0,0 +1,937 @@
+/*******************************************************************************
+
+  Intel 10 Gigabit PCI Express Linux driver
+  Copyright(c) 1999 - 2012 Intel Corporation.
+
+  This program is free software; you can redistribute it and/or modify it
+  under the terms and conditions of the GNU General Public License,
+  version 2, as published by the Free Software Foundation.
+
+  This program is distributed in the hope it will be useful, but WITHOUT
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+  more details.
+
+  You should have received a copy of the GNU General Public License along with
+  this program; if not, write to the Free Software Foundation, Inc.,
+  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+  The full GNU General Public License is included in this distribution in
+  the file called "COPYING".
+
+  Contact Information:
+  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+#include "ixgbe_x540.h"
+#include "ixgbe_type.h"
+#include "ixgbe_api.h"
+#include "ixgbe_common.h"
+#include "ixgbe_phy.h"
+
+static s32 ixgbe_update_flash_X540(struct ixgbe_hw *hw);
+static s32 ixgbe_poll_flash_update_done_X540(struct ixgbe_hw *hw);
+static s32 ixgbe_get_swfw_sync_semaphore(struct ixgbe_hw *hw);
+static void ixgbe_release_swfw_sync_semaphore(struct ixgbe_hw *hw);
+
+/**
+ *  ixgbe_init_ops_X540 - Inits func ptrs and MAC type
+ *  @hw: pointer to hardware structure
+ *
+ *  Initialize the function pointers and assign the MAC type for X540.
+ *  Does not touch the hardware.
+ **/
+s32 ixgbe_init_ops_X540(struct ixgbe_hw *hw)
+{
+	struct ixgbe_mac_info *mac = &hw->mac;
+	struct ixgbe_phy_info *phy = &hw->phy;
+	struct ixgbe_eeprom_info *eeprom = &hw->eeprom;
+	s32 ret_val;
+
+	ret_val = ixgbe_init_phy_ops_generic(hw);
+	ret_val = ixgbe_init_ops_generic(hw);
+
+
+	/* EEPROM */
+	eeprom->ops.init_params = &ixgbe_init_eeprom_params_X540;
+	eeprom->ops.read = &ixgbe_read_eerd_X540;
+	eeprom->ops.read_buffer = &ixgbe_read_eerd_buffer_X540;
+	eeprom->ops.write = &ixgbe_write_eewr_X540;
+	eeprom->ops.write_buffer = &ixgbe_write_eewr_buffer_X540;
+	eeprom->ops.update_checksum = &ixgbe_update_eeprom_checksum_X540;
+	eeprom->ops.validate_checksum = &ixgbe_validate_eeprom_checksum_X540;
+	eeprom->ops.calc_checksum = &ixgbe_calc_eeprom_checksum_X540;
+
+	/* PHY */
+	phy->ops.init = &ixgbe_init_phy_ops_generic;
+	phy->ops.reset = NULL;
+
+	/* MAC */
+	mac->ops.reset_hw = &ixgbe_reset_hw_X540;
+	mac->ops.get_media_type = &ixgbe_get_media_type_X540;
+	mac->ops.get_supported_physical_layer =
+				    &ixgbe_get_supported_physical_layer_X540;
+	mac->ops.read_analog_reg8 = NULL;
+	mac->ops.write_analog_reg8 = NULL;
+	mac->ops.start_hw = &ixgbe_start_hw_X540;
+	mac->ops.get_san_mac_addr = &ixgbe_get_san_mac_addr_generic;
+	mac->ops.set_san_mac_addr = &ixgbe_set_san_mac_addr_generic;
+	mac->ops.get_device_caps = &ixgbe_get_device_caps_generic;
+	mac->ops.get_wwn_prefix = &ixgbe_get_wwn_prefix_generic;
+	mac->ops.get_fcoe_boot_status = &ixgbe_get_fcoe_boot_status_generic;
+	mac->ops.acquire_swfw_sync = &ixgbe_acquire_swfw_sync_X540;
+	mac->ops.release_swfw_sync = &ixgbe_release_swfw_sync_X540;
+	mac->ops.disable_sec_rx_path = &ixgbe_disable_sec_rx_path_generic;
+	mac->ops.enable_sec_rx_path = &ixgbe_enable_sec_rx_path_generic;
+
+	/* RAR, Multicast, VLAN */
+	mac->ops.set_vmdq = &ixgbe_set_vmdq_generic;
+	mac->ops.set_vmdq_san_mac = &ixgbe_set_vmdq_san_mac_generic;
+	mac->ops.clear_vmdq = &ixgbe_clear_vmdq_generic;
+	mac->ops.insert_mac_addr = &ixgbe_insert_mac_addr_generic;
+	mac->rar_highwater = 1;
+	mac->ops.set_vfta = &ixgbe_set_vfta_generic;
+	mac->ops.set_vlvf = &ixgbe_set_vlvf_generic;
+	mac->ops.clear_vfta = &ixgbe_clear_vfta_generic;
+	mac->ops.init_uta_tables = &ixgbe_init_uta_tables_generic;
+	mac->ops.set_mac_anti_spoofing = &ixgbe_set_mac_anti_spoofing;
+	mac->ops.set_vlan_anti_spoofing = &ixgbe_set_vlan_anti_spoofing;
+
+	/* Link */
+	mac->ops.get_link_capabilities =
+				&ixgbe_get_copper_link_capabilities_generic;
+	mac->ops.setup_link = &ixgbe_setup_mac_link_X540;
+	mac->ops.setup_rxpba = &ixgbe_set_rxpba_generic;
+	mac->ops.check_link = &ixgbe_check_mac_link_generic;
+
+	mac->mcft_size		= 128;
+	mac->vft_size		= 128;
+	mac->num_rar_entries	= 128;
+	mac->rx_pb_size		= 384;
+	mac->max_tx_queues	= 128;
+	mac->max_rx_queues	= 128;
+	mac->max_msix_vectors	= ixgbe_get_pcie_msix_count_generic(hw);
+
+	/*
+	 * FWSM register
+	 * ARC supported; valid only if manageability features are
+	 * enabled.
+	 */
+	mac->arc_subsystem_valid = (IXGBE_READ_REG(hw, IXGBE_FWSM) &
+				   IXGBE_FWSM_MODE_MASK) ? true : false;
+
+	//hw->mbx.ops.init_params = ixgbe_init_mbx_params_pf;
+
+	/* LEDs */
+	mac->ops.blink_led_start = ixgbe_blink_led_start_X540;
+	mac->ops.blink_led_stop = ixgbe_blink_led_stop_X540;
+
+	/* Manageability interface */
+	mac->ops.set_fw_drv_ver = &ixgbe_set_fw_drv_ver_generic;
+
+	return ret_val;
+}
+
+/**
+ *  ixgbe_get_link_capabilities_X540 - Determines link capabilities
+ *  @hw: pointer to hardware structure
+ *  @speed: pointer to link speed
+ *  @autoneg: true when autoneg or autotry is enabled
+ *
+ *  Determines the link capabilities by reading the AUTOC register.
+ **/
+s32 ixgbe_get_link_capabilities_X540(struct ixgbe_hw *hw,
+				     ixgbe_link_speed *speed,
+				     bool *autoneg)
+{
+	ixgbe_get_copper_link_capabilities_generic(hw, speed, autoneg);
+
+	return 0;
+}
+
+/**
+ *  ixgbe_get_media_type_X540 - Get media type
+ *  @hw: pointer to hardware structure
+ *
+ *  Returns the media type (fiber, copper, backplane)
+ **/
+enum ixgbe_media_type ixgbe_get_media_type_X540(struct ixgbe_hw *hw)
+{
+	return ixgbe_media_type_copper;
+}
+
+/**
+ *  ixgbe_setup_mac_link_X540 - Sets the auto advertised capabilities
+ *  @hw: pointer to hardware structure
+ *  @speed: new link speed
+ *  @autoneg: true if autonegotiation enabled
+ *  @autoneg_wait_to_complete: true when waiting for completion is needed
+ **/
+s32 ixgbe_setup_mac_link_X540(struct ixgbe_hw *hw,
+			      ixgbe_link_speed speed, bool autoneg,
+			      bool autoneg_wait_to_complete)
+{
+	return hw->phy.ops.setup_link_speed(hw, speed, autoneg,
+					    autoneg_wait_to_complete);
+}
+
+/**
+ *  ixgbe_reset_hw_X540 - Perform hardware reset
+ *  @hw: pointer to hardware structure
+ *
+ *  Resets the hardware by resetting the transmit and receive units, masks
+ *  and clears all interrupts, and perform a reset.
+ **/
+s32 ixgbe_reset_hw_X540(struct ixgbe_hw *hw)
+{
+	s32 status = 0;
+
+	/*
+	 * Userland DPDK takes the ownershiop of device
+	 * Kernel driver here used as the simple path for ethtool only
+	 * Won't real reset device anyway
+	 */
+#if 0
+	u32 ctrl, i;
+
+	/* Call adapter stop to disable tx/rx and clear interrupts */
+	status = hw->mac.ops.stop_adapter(hw);
+	if (status != 0)
+		goto reset_hw_out;
+
+	/* flush pending Tx transactions */
+	ixgbe_clear_tx_pending(hw);
+
+mac_reset_top:
+	ctrl = IXGBE_CTRL_RST;
+	ctrl |= IXGBE_READ_REG(hw, IXGBE_CTRL);
+	IXGBE_WRITE_REG(hw, IXGBE_CTRL, ctrl);
+	IXGBE_WRITE_FLUSH(hw);
+
+	/* Poll for reset bit to self-clear indicating reset is complete */
+	for (i = 0; i < 10; i++) {
+		udelay(1);
+		ctrl = IXGBE_READ_REG(hw, IXGBE_CTRL);
+		if (!(ctrl & IXGBE_CTRL_RST_MASK))
+			break;
+	}
+
+	if (ctrl & IXGBE_CTRL_RST_MASK) {
+		status = IXGBE_ERR_RESET_FAILED;
+		hw_dbg(hw, "Reset polling failed to complete.\n");
+	}
+	msleep(100);
+
+	/*
+	 * Double resets are required for recovery from certain error
+	 * conditions.  Between resets, it is necessary to stall to allow time
+	 * for any pending HW events to complete.
+	 */
+	if (hw->mac.flags & IXGBE_FLAGS_DOUBLE_RESET_REQUIRED) {
+		hw->mac.flags &= ~IXGBE_FLAGS_DOUBLE_RESET_REQUIRED;
+		goto mac_reset_top;
+	}
+
+	/* Set the Rx packet buffer size. */
+	IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(0), 384 << IXGBE_RXPBSIZE_SHIFT);
+
+#endif
+
+	/* Store the permanent mac address */
+	hw->mac.ops.get_mac_addr(hw, hw->mac.perm_addr);
+
+	/*
+	 * Store MAC address from RAR0, clear receive address registers, and
+	 * clear the multicast table.  Also reset num_rar_entries to 128,
+	 * since we modify this value when programming the SAN MAC address.
+	 */
+	hw->mac.num_rar_entries = 128;
+	hw->mac.ops.init_rx_addrs(hw);
+
+	/* Store the permanent SAN mac address */
+	hw->mac.ops.get_san_mac_addr(hw, hw->mac.san_addr);
+
+	/* Add the SAN MAC address to the RAR only if it's a valid address */
+	if (ixgbe_validate_mac_addr(hw->mac.san_addr) == 0) {
+		hw->mac.ops.set_rar(hw, hw->mac.num_rar_entries - 1,
+				    hw->mac.san_addr, 0, IXGBE_RAH_AV);
+
+		/* Save the SAN MAC RAR index */
+		hw->mac.san_mac_rar_index = hw->mac.num_rar_entries - 1;
+
+		/* Reserve the last RAR for the SAN MAC address */
+		hw->mac.num_rar_entries--;
+	}
+
+	/* Store the alternative WWNN/WWPN prefix */
+	hw->mac.ops.get_wwn_prefix(hw, &hw->mac.wwnn_prefix,
+				   &hw->mac.wwpn_prefix);
+
+//reset_hw_out:
+	return status;
+}
+
+/**
+ *  ixgbe_start_hw_X540 - Prepare hardware for Tx/Rx
+ *  @hw: pointer to hardware structure
+ *
+ *  Starts the hardware using the generic start_hw function
+ *  and the generation start_hw function.
+ *  Then performs revision-specific operations, if any.
+ **/
+s32 ixgbe_start_hw_X540(struct ixgbe_hw *hw)
+{
+	s32 ret_val = 0;
+
+	ret_val = ixgbe_start_hw_generic(hw);
+	if (ret_val != 0)
+		goto out;
+
+	ret_val = ixgbe_start_hw_gen2(hw);
+
+out:
+	return ret_val;
+}
+
+/**
+ *  ixgbe_get_supported_physical_layer_X540 - Returns physical layer type
+ *  @hw: pointer to hardware structure
+ *
+ *  Determines physical layer capabilities of the current configuration.
+ **/
+u32 ixgbe_get_supported_physical_layer_X540(struct ixgbe_hw *hw)
+{
+	u32 physical_layer = IXGBE_PHYSICAL_LAYER_UNKNOWN;
+	u16 ext_ability = 0;
+
+	hw->phy.ops.read_reg(hw, IXGBE_MDIO_PHY_EXT_ABILITY,
+	IXGBE_MDIO_PMA_PMD_DEV_TYPE, &ext_ability);
+	if (ext_ability & IXGBE_MDIO_PHY_10GBASET_ABILITY)
+		physical_layer |= IXGBE_PHYSICAL_LAYER_10GBASE_T;
+	if (ext_ability & IXGBE_MDIO_PHY_1000BASET_ABILITY)
+		physical_layer |= IXGBE_PHYSICAL_LAYER_1000BASE_T;
+	if (ext_ability & IXGBE_MDIO_PHY_100BASETX_ABILITY)
+		physical_layer |= IXGBE_PHYSICAL_LAYER_100BASE_TX;
+
+	return physical_layer;
+}
+
+/**
+ *  ixgbe_init_eeprom_params_X540 - Initialize EEPROM params
+ *  @hw: pointer to hardware structure
+ *
+ *  Initializes the EEPROM parameters ixgbe_eeprom_info within the
+ *  ixgbe_hw struct in order to set up EEPROM access.
+ **/
+s32 ixgbe_init_eeprom_params_X540(struct ixgbe_hw *hw)
+{
+	struct ixgbe_eeprom_info *eeprom = &hw->eeprom;
+	u32 eec;
+	u16 eeprom_size;
+
+	if (eeprom->type == ixgbe_eeprom_uninitialized) {
+		eeprom->semaphore_delay = 10;
+		eeprom->type = ixgbe_flash;
+
+		eec = IXGBE_READ_REG(hw, IXGBE_EEC);
+		eeprom_size = (u16)((eec & IXGBE_EEC_SIZE) >>
+				    IXGBE_EEC_SIZE_SHIFT);
+		eeprom->word_size = 1 << (eeprom_size +
+					  IXGBE_EEPROM_WORD_SIZE_SHIFT);
+
+		hw_dbg(hw, "Eeprom params: type = %d, size = %d\n",
+			  eeprom->type, eeprom->word_size);
+	}
+
+	return 0;
+}
+
+/**
+ *  ixgbe_read_eerd_X540- Read EEPROM word using EERD
+ *  @hw: pointer to hardware structure
+ *  @offset: offset of  word in the EEPROM to read
+ *  @data: word read from the EEPROM
+ *
+ *  Reads a 16 bit word from the EEPROM using the EERD register.
+ **/
+s32 ixgbe_read_eerd_X540(struct ixgbe_hw *hw, u16 offset, u16 *data)
+{
+	s32 status = 0;
+
+	if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_EEP_SM) ==
+	    0)
+		status = ixgbe_read_eerd_generic(hw, offset, data);
+	else
+		status = IXGBE_ERR_SWFW_SYNC;
+
+	hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_EEP_SM);
+	return status;
+}
+
+/**
+ *  ixgbe_read_eerd_buffer_X540- Read EEPROM word(s) using EERD
+ *  @hw: pointer to hardware structure
+ *  @offset: offset of  word in the EEPROM to read
+ *  @words: number of words
+ *  @data: word(s) read from the EEPROM
+ *
+ *  Reads a 16 bit word(s) from the EEPROM using the EERD register.
+ **/
+s32 ixgbe_read_eerd_buffer_X540(struct ixgbe_hw *hw,
+				u16 offset, u16 words, u16 *data)
+{
+	s32 status = 0;
+
+	if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_EEP_SM) ==
+	    0)
+		status = ixgbe_read_eerd_buffer_generic(hw, offset,
+							words, data);
+	else
+		status = IXGBE_ERR_SWFW_SYNC;
+
+	hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_EEP_SM);
+	return status;
+}
+
+/**
+ *  ixgbe_write_eewr_X540 - Write EEPROM word using EEWR
+ *  @hw: pointer to hardware structure
+ *  @offset: offset of  word in the EEPROM to write
+ *  @data: word write to the EEPROM
+ *
+ *  Write a 16 bit word to the EEPROM using the EEWR register.
+ **/
+s32 ixgbe_write_eewr_X540(struct ixgbe_hw *hw, u16 offset, u16 data)
+{
+	s32 status = 0;
+
+	if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_EEP_SM) ==
+	    0)
+		status = ixgbe_write_eewr_generic(hw, offset, data);
+	else
+		status = IXGBE_ERR_SWFW_SYNC;
+
+	hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_EEP_SM);
+	return status;
+}
+
+/**
+ *  ixgbe_write_eewr_buffer_X540 - Write EEPROM word(s) using EEWR
+ *  @hw: pointer to hardware structure
+ *  @offset: offset of  word in the EEPROM to write
+ *  @words: number of words
+ *  @data: word(s) write to the EEPROM
+ *
+ *  Write a 16 bit word(s) to the EEPROM using the EEWR register.
+ **/
+s32 ixgbe_write_eewr_buffer_X540(struct ixgbe_hw *hw,
+				 u16 offset, u16 words, u16 *data)
+{
+	s32 status = 0;
+
+	if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_EEP_SM) ==
+	    0)
+		status = ixgbe_write_eewr_buffer_generic(hw, offset,
+							 words, data);
+	else
+		status = IXGBE_ERR_SWFW_SYNC;
+
+	hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_EEP_SM);
+	return status;
+}
+
+/**
+ *  ixgbe_calc_eeprom_checksum_X540 - Calculates and returns the checksum
+ *
+ *  This function does not use synchronization for EERD and EEWR. It can
+ *  be used internally by function which utilize ixgbe_acquire_swfw_sync_X540.
+ *
+ *  @hw: pointer to hardware structure
+ **/
+u16 ixgbe_calc_eeprom_checksum_X540(struct ixgbe_hw *hw)
+{
+	u16 i;
+	u16 j;
+	u16 checksum = 0;
+	u16 length = 0;
+	u16 pointer = 0;
+	u16 word = 0;
+
+	/*
+	 * Do not use hw->eeprom.ops.read because we do not want to take
+	 * the synchronization semaphores here. Instead use
+	 * ixgbe_read_eerd_generic
+	 */
+
+	/* Include 0x0-0x3F in the checksum */
+	for (i = 0; i < IXGBE_EEPROM_CHECKSUM; i++) {
+		if (ixgbe_read_eerd_generic(hw, i, &word) != 0) {
+			hw_dbg(hw, "EEPROM read failed\n");
+			break;
+		}
+		checksum += word;
+	}
+
+	/*
+	 * Include all data from pointers 0x3, 0x6-0xE.  This excludes the
+	 * FW, PHY module, and PCIe Expansion/Option ROM pointers.
+	 */
+	for (i = IXGBE_PCIE_ANALOG_PTR; i < IXGBE_FW_PTR; i++) {
+		if (i == IXGBE_PHY_PTR || i == IXGBE_OPTION_ROM_PTR)
+			continue;
+
+		if (ixgbe_read_eerd_generic(hw, i, &pointer) != 0) {
+			hw_dbg(hw, "EEPROM read failed\n");
+			break;
+		}
+
+		/* Skip pointer section if the pointer is invalid. */
+		if (pointer == 0xFFFF || pointer == 0 ||
+		    pointer >= hw->eeprom.word_size)
+			continue;
+
+		if (ixgbe_read_eerd_generic(hw, pointer, &length) !=
+		    0) {
+			hw_dbg(hw, "EEPROM read failed\n");
+			break;
+		}
+
+		/* Skip pointer section if length is invalid. */
+		if (length == 0xFFFF || length == 0 ||
+		    (pointer + length) >= hw->eeprom.word_size)
+			continue;
+
+		for (j = pointer+1; j <= pointer+length; j++) {
+			if (ixgbe_read_eerd_generic(hw, j, &word) !=
+			    0) {
+				hw_dbg(hw, "EEPROM read failed\n");
+				break;
+			}
+			checksum += word;
+		}
+	}
+
+	checksum = (u16)IXGBE_EEPROM_SUM - checksum;
+
+	return checksum;
+}
+
+/**
+ *  ixgbe_validate_eeprom_checksum_X540 - Validate EEPROM checksum
+ *  @hw: pointer to hardware structure
+ *  @checksum_val: calculated checksum
+ *
+ *  Performs checksum calculation and validates the EEPROM checksum.  If the
+ *  caller does not need checksum_val, the value can be NULL.
+ **/
+s32 ixgbe_validate_eeprom_checksum_X540(struct ixgbe_hw *hw,
+					u16 *checksum_val)
+{
+	s32 status;
+	u16 checksum;
+	u16 read_checksum = 0;
+
+	/*
+	 * Read the first word from the EEPROM. If this times out or fails, do
+	 * not continue or we could be in for a very long wait while every
+	 * EEPROM read fails
+	 */
+	status = hw->eeprom.ops.read(hw, 0, &checksum);
+
+	if (status != 0) {
+		hw_dbg(hw, "EEPROM read failed\n");
+		goto out;
+	}
+
+	if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_EEP_SM) ==
+	    0) {
+		checksum = hw->eeprom.ops.calc_checksum(hw);
+
+		/*
+		 * Do not use hw->eeprom.ops.read because we do not want to take
+		 * the synchronization semaphores twice here.
+		*/
+		ixgbe_read_eerd_generic(hw, IXGBE_EEPROM_CHECKSUM,
+					&read_checksum);
+
+		/*
+		 * Verify read checksum from EEPROM is the same as
+		 * calculated checksum
+		 */
+		if (read_checksum != checksum)
+			status = IXGBE_ERR_EEPROM_CHECKSUM;
+
+		/* If the user cares, return the calculated checksum */
+		if (checksum_val)
+			*checksum_val = checksum;
+	} else {
+		status = IXGBE_ERR_SWFW_SYNC;
+	}
+
+	hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_EEP_SM);
+out:
+	return status;
+}
+
+/**
+ * ixgbe_update_eeprom_checksum_X540 - Updates the EEPROM checksum and flash
+ * @hw: pointer to hardware structure
+ *
+ * After writing EEPROM to shadow RAM using EEWR register, software calculates
+ * checksum and updates the EEPROM and instructs the hardware to update
+ * the flash.
+ **/
+s32 ixgbe_update_eeprom_checksum_X540(struct ixgbe_hw *hw)
+{
+	s32 status;
+	u16 checksum;
+
+	/*
+	 * Read the first word from the EEPROM. If this times out or fails, do
+	 * not continue or we could be in for a very long wait while every
+	 * EEPROM read fails
+	 */
+	status = hw->eeprom.ops.read(hw, 0, &checksum);
+
+	if (status != 0)
+		hw_dbg(hw, "EEPROM read failed\n");
+
+	if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_EEP_SM) ==
+	    0) {
+		checksum = hw->eeprom.ops.calc_checksum(hw);
+
+		/*
+		 * Do not use hw->eeprom.ops.write because we do not want to
+		 * take the synchronization semaphores twice here.
+		*/
+		status = ixgbe_write_eewr_generic(hw, IXGBE_EEPROM_CHECKSUM,
+						  checksum);
+
+	if (status == 0)
+		status = ixgbe_update_flash_X540(hw);
+	else
+		status = IXGBE_ERR_SWFW_SYNC;
+	}
+
+	hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_EEP_SM);
+
+	return status;
+}
+
+/**
+ *  ixgbe_update_flash_X540 - Instruct HW to copy EEPROM to Flash device
+ *  @hw: pointer to hardware structure
+ *
+ *  Set FLUP (bit 23) of the EEC register to instruct Hardware to copy
+ *  EEPROM from shadow RAM to the flash device.
+ **/
+static s32 ixgbe_update_flash_X540(struct ixgbe_hw *hw)
+{
+	u32 flup;
+	s32 status = IXGBE_ERR_EEPROM;
+
+	status = ixgbe_poll_flash_update_done_X540(hw);
+	if (status == IXGBE_ERR_EEPROM) {
+		hw_dbg(hw, "Flash update time out\n");
+		goto out;
+	}
+
+	flup = IXGBE_READ_REG(hw, IXGBE_EEC) | IXGBE_EEC_FLUP;
+	IXGBE_WRITE_REG(hw, IXGBE_EEC, flup);
+
+	status = ixgbe_poll_flash_update_done_X540(hw);
+	if (status == 0)
+		hw_dbg(hw, "Flash update complete\n");
+	else
+		hw_dbg(hw, "Flash update time out\n");
+
+	if (hw->revision_id == 0) {
+		flup = IXGBE_READ_REG(hw, IXGBE_EEC);
+
+		if (flup & IXGBE_EEC_SEC1VAL) {
+			flup |= IXGBE_EEC_FLUP;
+			IXGBE_WRITE_REG(hw, IXGBE_EEC, flup);
+		}
+
+		status = ixgbe_poll_flash_update_done_X540(hw);
+		if (status == 0)
+			hw_dbg(hw, "Flash update complete\n");
+		else
+			hw_dbg(hw, "Flash update time out\n");
+	}
+out:
+	return status;
+}
+
+/**
+ *  ixgbe_poll_flash_update_done_X540 - Poll flash update status
+ *  @hw: pointer to hardware structure
+ *
+ *  Polls the FLUDONE (bit 26) of the EEC Register to determine when the
+ *  flash update is done.
+ **/
+static s32 ixgbe_poll_flash_update_done_X540(struct ixgbe_hw *hw)
+{
+	u32 i;
+	u32 reg;
+	s32 status = IXGBE_ERR_EEPROM;
+
+	for (i = 0; i < IXGBE_FLUDONE_ATTEMPTS; i++) {
+		reg = IXGBE_READ_REG(hw, IXGBE_EEC);
+		if (reg & IXGBE_EEC_FLUDONE) {
+			status = 0;
+			break;
+		}
+		udelay(5);
+	}
+	return status;
+}
+
+/**
+ *  ixgbe_acquire_swfw_sync_X540 - Acquire SWFW semaphore
+ *  @hw: pointer to hardware structure
+ *  @mask: Mask to specify which semaphore to acquire
+ *
+ *  Acquires the SWFW semaphore thought the SW_FW_SYNC register for
+ *  the specified function (CSR, PHY0, PHY1, NVM, Flash)
+ **/
+s32 ixgbe_acquire_swfw_sync_X540(struct ixgbe_hw *hw, u16 mask)
+{
+	u32 swfw_sync;
+	u32 swmask = mask;
+	u32 fwmask = mask << 5;
+	u32 hwmask = 0;
+	u32 timeout = 200;
+	u32 i;
+	s32 ret_val = 0;
+
+	if (swmask == IXGBE_GSSR_EEP_SM)
+		hwmask = IXGBE_GSSR_FLASH_SM;
+
+	/* SW only mask doesn't have FW bit pair */
+	if (swmask == IXGBE_GSSR_SW_MNG_SM)
+		fwmask = 0;
+
+	for (i = 0; i < timeout; i++) {
+		/*
+		 * SW NVM semaphore bit is used for access to all
+		 * SW_FW_SYNC bits (not just NVM)
+		 */
+		if (ixgbe_get_swfw_sync_semaphore(hw)) {
+			ret_val = IXGBE_ERR_SWFW_SYNC;
+			goto out;
+		}
+
+		swfw_sync = IXGBE_READ_REG(hw, IXGBE_SWFW_SYNC);
+		if (!(swfw_sync & (fwmask | swmask | hwmask))) {
+			swfw_sync |= swmask;
+			IXGBE_WRITE_REG(hw, IXGBE_SWFW_SYNC, swfw_sync);
+			ixgbe_release_swfw_sync_semaphore(hw);
+			msleep(5);
+			goto out;
+		} else {
+			/*
+			 * Firmware currently using resource (fwmask), hardware
+			 * currently using resource (hwmask), or other software
+			 * thread currently using resource (swmask)
+			 */
+			ixgbe_release_swfw_sync_semaphore(hw);
+			msleep(5);
+		}
+	}
+
+	/* Failed to get SW only semaphore */
+	if (swmask == IXGBE_GSSR_SW_MNG_SM) {
+		ret_val = IXGBE_ERR_SWFW_SYNC;
+		goto out;
+	}
+
+	/* If the resource is not released by the FW/HW the SW can assume that
+	 * the FW/HW malfunctions. In that case the SW should sets the SW bit(s)
+	 * of the requested resource(s) while ignoring the corresponding FW/HW
+	 * bits in the SW_FW_SYNC register.
+	 */
+	swfw_sync = IXGBE_READ_REG(hw, IXGBE_SWFW_SYNC);
+	if (swfw_sync & (fwmask | hwmask)) {
+		if (ixgbe_get_swfw_sync_semaphore(hw)) {
+			ret_val = IXGBE_ERR_SWFW_SYNC;
+			goto out;
+		}
+
+		swfw_sync |= swmask;
+		IXGBE_WRITE_REG(hw, IXGBE_SWFW_SYNC, swfw_sync);
+		ixgbe_release_swfw_sync_semaphore(hw);
+		msleep(5);
+	}
+
+out:
+	return ret_val;
+}
+
+/**
+ *  ixgbe_release_swfw_sync_X540 - Release SWFW semaphore
+ *  @hw: pointer to hardware structure
+ *  @mask: Mask to specify which semaphore to release
+ *
+ *  Releases the SWFW semaphore through the SW_FW_SYNC register
+ *  for the specified function (CSR, PHY0, PHY1, EVM, Flash)
+ **/
+void ixgbe_release_swfw_sync_X540(struct ixgbe_hw *hw, u16 mask)
+{
+	u32 swfw_sync;
+	u32 swmask = mask;
+
+	ixgbe_get_swfw_sync_semaphore(hw);
+
+	swfw_sync = IXGBE_READ_REG(hw, IXGBE_SWFW_SYNC);
+	swfw_sync &= ~swmask;
+	IXGBE_WRITE_REG(hw, IXGBE_SWFW_SYNC, swfw_sync);
+
+	ixgbe_release_swfw_sync_semaphore(hw);
+	msleep(5);
+}
+
+/**
+ *  ixgbe_get_nvm_semaphore - Get hardware semaphore
+ *  @hw: pointer to hardware structure
+ *
+ *  Sets the hardware semaphores so SW/FW can gain control of shared resources
+ **/
+static s32 ixgbe_get_swfw_sync_semaphore(struct ixgbe_hw *hw)
+{
+	s32 status = IXGBE_ERR_EEPROM;
+	u32 timeout = 2000;
+	u32 i;
+	u32 swsm;
+
+	/* Get SMBI software semaphore between device drivers first */
+	for (i = 0; i < timeout; i++) {
+		/*
+		 * If the SMBI bit is 0 when we read it, then the bit will be
+		 * set and we have the semaphore
+		 */
+		swsm = IXGBE_READ_REG(hw, IXGBE_SWSM);
+		if (!(swsm & IXGBE_SWSM_SMBI)) {
+			status = 0;
+			break;
+		}
+		udelay(50);
+	}
+
+	/* Now get the semaphore between SW/FW through the REGSMP bit */
+	if (status == 0) {
+		for (i = 0; i < timeout; i++) {
+			swsm = IXGBE_READ_REG(hw, IXGBE_SWFW_SYNC);
+			if (!(swsm & IXGBE_SWFW_REGSMP))
+				break;
+
+			udelay(50);
+		}
+
+		/*
+		 * Release semaphores and return error if SW NVM semaphore
+		 * was not granted because we don't have access to the EEPROM
+		 */
+		if (i >= timeout) {
+			hw_dbg(hw, "REGSMP Software NVM semaphore not "
+				 "granted.\n");
+			ixgbe_release_swfw_sync_semaphore(hw);
+			status = IXGBE_ERR_EEPROM;
+		}
+	} else {
+		hw_dbg(hw, "Software semaphore SMBI between device drivers "
+			 "not granted.\n");
+	}
+
+	return status;
+}
+
+/**
+ *  ixgbe_release_nvm_semaphore - Release hardware semaphore
+ *  @hw: pointer to hardware structure
+ *
+ *  This function clears hardware semaphore bits.
+ **/
+static void ixgbe_release_swfw_sync_semaphore(struct ixgbe_hw *hw)
+{
+	u32 swsm;
+
+	/* Release both semaphores by writing 0 to the bits REGSMP and SMBI */
+
+	swsm = IXGBE_READ_REG(hw, IXGBE_SWSM);
+	swsm &= ~IXGBE_SWSM_SMBI;
+	IXGBE_WRITE_REG(hw, IXGBE_SWSM, swsm);
+
+	swsm = IXGBE_READ_REG(hw, IXGBE_SWFW_SYNC);
+	swsm &= ~IXGBE_SWFW_REGSMP;
+	IXGBE_WRITE_REG(hw, IXGBE_SWFW_SYNC, swsm);
+
+	IXGBE_WRITE_FLUSH(hw);
+}
+
+/**
+ * ixgbe_blink_led_start_X540 - Blink LED based on index.
+ * @hw: pointer to hardware structure
+ * @index: led number to blink
+ *
+ * Devices that implement the version 2 interface:
+ *   X540
+ **/
+s32 ixgbe_blink_led_start_X540(struct ixgbe_hw *hw, u32 index)
+{
+	u32 macc_reg;
+	u32 ledctl_reg;
+	ixgbe_link_speed speed;
+	bool link_up;
+
+	/*
+	 * Link should be up in order for the blink bit in the LED control
+	 * register to work. Force link and speed in the MAC if link is down.
+	 * This will be reversed when we stop the blinking.
+	 */
+	hw->mac.ops.check_link(hw, &speed, &link_up, false);
+	if (link_up == false) {
+		macc_reg = IXGBE_READ_REG(hw, IXGBE_MACC);
+		macc_reg |= IXGBE_MACC_FLU | IXGBE_MACC_FSV_10G | IXGBE_MACC_FS;
+		IXGBE_WRITE_REG(hw, IXGBE_MACC, macc_reg);
+	}
+	/* Set the LED to LINK_UP + BLINK. */
+	ledctl_reg = IXGBE_READ_REG(hw, IXGBE_LEDCTL);
+	ledctl_reg &= ~IXGBE_LED_MODE_MASK(index);
+	ledctl_reg |= IXGBE_LED_BLINK(index);
+	IXGBE_WRITE_REG(hw, IXGBE_LEDCTL, ledctl_reg);
+	IXGBE_WRITE_FLUSH(hw);
+
+	return 0;
+}
+
+/**
+ * ixgbe_blink_led_stop_X540 - Stop blinking LED based on index.
+ * @hw: pointer to hardware structure
+ * @index: led number to stop blinking
+ *
+ * Devices that implement the version 2 interface:
+ *   X540
+ **/
+s32 ixgbe_blink_led_stop_X540(struct ixgbe_hw *hw, u32 index)
+{
+	u32 macc_reg;
+	u32 ledctl_reg;
+
+	/* Restore the LED to its default value. */
+	ledctl_reg = IXGBE_READ_REG(hw, IXGBE_LEDCTL);
+	ledctl_reg &= ~IXGBE_LED_MODE_MASK(index);
+	ledctl_reg |= IXGBE_LED_LINK_ACTIVE << IXGBE_LED_MODE_SHIFT(index);
+	ledctl_reg &= ~IXGBE_LED_BLINK(index);
+	IXGBE_WRITE_REG(hw, IXGBE_LEDCTL, ledctl_reg);
+
+	/* Unforce link and speed in the MAC. */
+	macc_reg = IXGBE_READ_REG(hw, IXGBE_MACC);
+	macc_reg &= ~(IXGBE_MACC_FLU | IXGBE_MACC_FSV_10G | IXGBE_MACC_FS);
+	IXGBE_WRITE_REG(hw, IXGBE_MACC, macc_reg);
+	IXGBE_WRITE_FLUSH(hw);
+
+	return 0;
+}
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_x540.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_x540.h
new file mode 100644
index 00000000..77e8952d
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_x540.h
@@ -0,0 +1,58 @@
+/*******************************************************************************
+
+  Intel 10 Gigabit PCI Express Linux driver
+  Copyright(c) 1999 - 2012 Intel Corporation.
+
+  This program is free software; you can redistribute it and/or modify it
+  under the terms and conditions of the GNU General Public License,
+  version 2, as published by the Free Software Foundation.
+
+  This program is distributed in the hope it will be useful, but WITHOUT
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+  more details.
+
+  You should have received a copy of the GNU General Public License along with
+  this program; if not, write to the Free Software Foundation, Inc.,
+  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+  The full GNU General Public License is included in this distribution in
+  the file called "COPYING".
+
+  Contact Information:
+  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+#ifndef _IXGBE_X540_H_
+#define _IXGBE_X540_H_
+
+#include "ixgbe_type.h"
+
+s32 ixgbe_get_link_capabilities_X540(struct ixgbe_hw *hw,
+				     ixgbe_link_speed *speed, bool *autoneg);
+enum ixgbe_media_type ixgbe_get_media_type_X540(struct ixgbe_hw *hw);
+s32 ixgbe_setup_mac_link_X540(struct ixgbe_hw *hw, ixgbe_link_speed speed,
+			      bool autoneg, bool link_up_wait_to_complete);
+s32 ixgbe_reset_hw_X540(struct ixgbe_hw *hw);
+s32 ixgbe_start_hw_X540(struct ixgbe_hw *hw);
+u32 ixgbe_get_supported_physical_layer_X540(struct ixgbe_hw *hw);
+
+s32 ixgbe_init_eeprom_params_X540(struct ixgbe_hw *hw);
+s32 ixgbe_read_eerd_X540(struct ixgbe_hw *hw, u16 offset, u16 *data);
+s32 ixgbe_read_eerd_buffer_X540(struct ixgbe_hw *hw, u16 offset, u16 words,
+				u16 *data);
+s32 ixgbe_write_eewr_X540(struct ixgbe_hw *hw, u16 offset, u16 data);
+s32 ixgbe_write_eewr_buffer_X540(struct ixgbe_hw *hw, u16 offset, u16 words,
+				 u16 *data);
+s32 ixgbe_update_eeprom_checksum_X540(struct ixgbe_hw *hw);
+s32 ixgbe_validate_eeprom_checksum_X540(struct ixgbe_hw *hw, u16 *checksum_val);
+u16 ixgbe_calc_eeprom_checksum_X540(struct ixgbe_hw *hw);
+
+s32 ixgbe_acquire_swfw_sync_X540(struct ixgbe_hw *hw, u16 mask);
+void ixgbe_release_swfw_sync_X540(struct ixgbe_hw *hw, u16 mask);
+
+s32 ixgbe_blink_led_start_X540(struct ixgbe_hw *hw, u32 index);
+s32 ixgbe_blink_led_stop_X540(struct ixgbe_hw *hw, u32 index);
+#endif /* _IXGBE_X540_H_ */
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/kcompat.c b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/kcompat.c
new file mode 100644
index 00000000..5f2523ed
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/kcompat.c
@@ -0,0 +1,1246 @@
+/*******************************************************************************
+
+  Intel 10 Gigabit PCI Express Linux driver
+  Copyright(c) 1999 - 2012 Intel Corporation.
+
+  This program is free software; you can redistribute it and/or modify it
+  under the terms and conditions of the GNU General Public License,
+  version 2, as published by the Free Software Foundation.
+
+  This program is distributed in the hope it will be useful, but WITHOUT
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+  more details.
+
+  You should have received a copy of the GNU General Public License along with
+  this program; if not, write to the Free Software Foundation, Inc.,
+  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+  The full GNU General Public License is included in this distribution in
+  the file called "COPYING".
+
+  Contact Information:
+  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+#include "ixgbe.h"
+#include "kcompat.h"
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,8) )
+/* From lib/vsprintf.c */
+#include <asm/div64.h>
+
+static int skip_atoi(const char **s)
+{
+	int i=0;
+
+	while (isdigit(**s))
+		i = i*10 + *((*s)++) - '0';
+	return i;
+}
+
+#define _kc_ZEROPAD	1		/* pad with zero */
+#define _kc_SIGN	2		/* unsigned/signed long */
+#define _kc_PLUS	4		/* show plus */
+#define _kc_SPACE	8		/* space if plus */
+#define _kc_LEFT	16		/* left justified */
+#define _kc_SPECIAL	32		/* 0x */
+#define _kc_LARGE	64		/* use 'ABCDEF' instead of 'abcdef' */
+
+static char * number(char * buf, char * end, long long num, int base, int size, int precision, int type)
+{
+	char c,sign,tmp[66];
+	const char *digits;
+	const char small_digits[] = "0123456789abcdefghijklmnopqrstuvwxyz";
+	const char large_digits[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
+	int i;
+
+	digits = (type & _kc_LARGE) ? large_digits : small_digits;
+	if (type & _kc_LEFT)
+		type &= ~_kc_ZEROPAD;
+	if (base < 2 || base > 36)
+		return 0;
+	c = (type & _kc_ZEROPAD) ? '0' : ' ';
+	sign = 0;
+	if (type & _kc_SIGN) {
+		if (num < 0) {
+			sign = '-';
+			num = -num;
+			size--;
+		} else if (type & _kc_PLUS) {
+			sign = '+';
+			size--;
+		} else if (type & _kc_SPACE) {
+			sign = ' ';
+			size--;
+		}
+	}
+	if (type & _kc_SPECIAL) {
+		if (base == 16)
+			size -= 2;
+		else if (base == 8)
+			size--;
+	}
+	i = 0;
+	if (num == 0)
+		tmp[i++]='0';
+	else while (num != 0)
+		tmp[i++] = digits[do_div(num,base)];
+	if (i > precision)
+		precision = i;
+	size -= precision;
+	if (!(type&(_kc_ZEROPAD+_kc_LEFT))) {
+		while(size-->0) {
+			if (buf <= end)
+				*buf = ' ';
+			++buf;
+		}
+	}
+	if (sign) {
+		if (buf <= end)
+			*buf = sign;
+		++buf;
+	}
+	if (type & _kc_SPECIAL) {
+		if (base==8) {
+			if (buf <= end)
+				*buf = '0';
+			++buf;
+		} else if (base==16) {
+			if (buf <= end)
+				*buf = '0';
+			++buf;
+			if (buf <= end)
+				*buf = digits[33];
+			++buf;
+		}
+	}
+	if (!(type & _kc_LEFT)) {
+		while (size-- > 0) {
+			if (buf <= end)
+				*buf = c;
+			++buf;
+		}
+	}
+	while (i < precision--) {
+		if (buf <= end)
+			*buf = '0';
+		++buf;
+	}
+	while (i-- > 0) {
+		if (buf <= end)
+			*buf = tmp[i];
+		++buf;
+	}
+	while (size-- > 0) {
+		if (buf <= end)
+			*buf = ' ';
+		++buf;
+	}
+	return buf;
+}
+
+int _kc_vsnprintf(char *buf, size_t size, const char *fmt, va_list args)
+{
+	int len;
+	unsigned long long num;
+	int i, base;
+	char *str, *end, c;
+	const char *s;
+
+	int flags;		/* flags to number() */
+
+	int field_width;	/* width of output field */
+	int precision;		/* min. # of digits for integers; max
+				   number of chars for from string */
+	int qualifier;		/* 'h', 'l', or 'L' for integer fields */
+				/* 'z' support added 23/7/1999 S.H.    */
+				/* 'z' changed to 'Z' --davidm 1/25/99 */
+
+	str = buf;
+	end = buf + size - 1;
+
+	if (end < buf - 1) {
+		end = ((void *) -1);
+		size = end - buf + 1;
+	}
+
+	for (; *fmt ; ++fmt) {
+		if (*fmt != '%') {
+			if (str <= end)
+				*str = *fmt;
+			++str;
+			continue;
+		}
+
+		/* process flags */
+		flags = 0;
+		repeat:
+			++fmt;		/* this also skips first '%' */
+			switch (*fmt) {
+				case '-': flags |= _kc_LEFT; goto repeat;
+				case '+': flags |= _kc_PLUS; goto repeat;
+				case ' ': flags |= _kc_SPACE; goto repeat;
+				case '#': flags |= _kc_SPECIAL; goto repeat;
+				case '0': flags |= _kc_ZEROPAD; goto repeat;
+			}
+
+		/* get field width */
+		field_width = -1;
+		if (isdigit(*fmt))
+			field_width = skip_atoi(&fmt);
+		else if (*fmt == '*') {
+			++fmt;
+			/* it's the next argument */
+			field_width = va_arg(args, int);
+			if (field_width < 0) {
+				field_width = -field_width;
+				flags |= _kc_LEFT;
+			}
+		}
+
+		/* get the precision */
+		precision = -1;
+		if (*fmt == '.') {
+			++fmt;
+			if (isdigit(*fmt))
+				precision = skip_atoi(&fmt);
+			else if (*fmt == '*') {
+				++fmt;
+				/* it's the next argument */
+				precision = va_arg(args, int);
+			}
+			if (precision < 0)
+				precision = 0;
+		}
+
+		/* get the conversion qualifier */
+		qualifier = -1;
+		if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L' || *fmt =='Z') {
+			qualifier = *fmt;
+			++fmt;
+		}
+
+		/* default base */
+		base = 10;
+
+		switch (*fmt) {
+			case 'c':
+				if (!(flags & _kc_LEFT)) {
+					while (--field_width > 0) {
+						if (str <= end)
+							*str = ' ';
+						++str;
+					}
+				}
+				c = (unsigned char) va_arg(args, int);
+				if (str <= end)
+					*str = c;
+				++str;
+				while (--field_width > 0) {
+					if (str <= end)
+						*str = ' ';
+					++str;
+				}
+				continue;
+
+			case 's':
+				s = va_arg(args, char *);
+				if (!s)
+					s = "<NULL>";
+
+				len = strnlen(s, precision);
+
+				if (!(flags & _kc_LEFT)) {
+					while (len < field_width--) {
+						if (str <= end)
+							*str = ' ';
+						++str;
+					}
+				}
+				for (i = 0; i < len; ++i) {
+					if (str <= end)
+						*str = *s;
+					++str; ++s;
+				}
+				while (len < field_width--) {
+					if (str <= end)
+						*str = ' ';
+					++str;
+				}
+				continue;
+
+			case 'p':
+				if (field_width == -1) {
+					field_width = 2*sizeof(void *);
+					flags |= _kc_ZEROPAD;
+				}
+				str = number(str, end,
+						(unsigned long) va_arg(args, void *),
+						16, field_width, precision, flags);
+				continue;
+
+
+			case 'n':
+				/* FIXME:
+				* What does C99 say about the overflow case here? */
+				if (qualifier == 'l') {
+					long * ip = va_arg(args, long *);
+					*ip = (str - buf);
+				} else if (qualifier == 'Z') {
+					size_t * ip = va_arg(args, size_t *);
+					*ip = (str - buf);
+				} else {
+					int * ip = va_arg(args, int *);
+					*ip = (str - buf);
+				}
+				continue;
+
+			case '%':
+				if (str <= end)
+					*str = '%';
+				++str;
+				continue;
+
+				/* integer number formats - set up the flags and "break" */
+			case 'o':
+				base = 8;
+				break;
+
+			case 'X':
+				flags |= _kc_LARGE;
+			case 'x':
+				base = 16;
+				break;
+
+			case 'd':
+			case 'i':
+				flags |= _kc_SIGN;
+			case 'u':
+				break;
+
+			default:
+				if (str <= end)
+					*str = '%';
+				++str;
+				if (*fmt) {
+					if (str <= end)
+						*str = *fmt;
+					++str;
+				} else {
+					--fmt;
+				}
+				continue;
+		}
+		if (qualifier == 'L')
+			num = va_arg(args, long long);
+		else if (qualifier == 'l') {
+			num = va_arg(args, unsigned long);
+			if (flags & _kc_SIGN)
+				num = (signed long) num;
+		} else if (qualifier == 'Z') {
+			num = va_arg(args, size_t);
+		} else if (qualifier == 'h') {
+			num = (unsigned short) va_arg(args, int);
+			if (flags & _kc_SIGN)
+				num = (signed short) num;
+		} else {
+			num = va_arg(args, unsigned int);
+			if (flags & _kc_SIGN)
+				num = (signed int) num;
+		}
+		str = number(str, end, num, base,
+				field_width, precision, flags);
+	}
+	if (str <= end)
+		*str = '\0';
+	else if (size > 0)
+		/* don't write out a null byte if the buf size is zero */
+		*end = '\0';
+	/* the trailing null byte doesn't count towards the total
+	* ++str;
+	*/
+	return str-buf;
+}
+
+int _kc_snprintf(char * buf, size_t size, const char *fmt, ...)
+{
+	va_list args;
+	int i;
+
+	va_start(args, fmt);
+	i = _kc_vsnprintf(buf,size,fmt,args);
+	va_end(args);
+	return i;
+}
+#endif /* < 2.4.8 */
+
+
+
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0) )
+#ifdef CONFIG_PCI_IOV
+int __kc_pci_vfs_assigned(struct pci_dev *dev)
+{
+        unsigned int vfs_assigned = 0;
+#ifdef HAVE_PCI_DEV_FLAGS_ASSIGNED
+        int pos;
+        struct pci_dev *vfdev;
+        unsigned short dev_id;
+
+        /* only search if we are a PF */
+        if (!dev->is_physfn)
+                return 0;
+
+        /* find SR-IOV capability */
+        pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_SRIOV);
+        if (!pos)
+                return 0;
+
+        /*
+ *          * determine the device ID for the VFs, the vendor ID will be the
+ *                   * same as the PF so there is no need to check for that one
+ *                            */
+        pci_read_config_word(dev, pos + PCI_SRIOV_VF_DID, &dev_id);
+
+        /* loop through all the VFs to see if we own any that are assigned */
+       vfdev = pci_get_device(dev->vendor, dev_id, NULL);
+        while (vfdev) {
+                /*
+ *                  * It is considered assigned if it is a virtual function with
+ *                                   * our dev as the physical function and the assigned bit is set
+ *                                                    */
+               if (vfdev->is_virtfn && (vfdev->physfn == dev) &&
+                   (vfdev->dev_flags & PCI_DEV_FLAGS_ASSIGNED))
+                       vfs_assigned++;
+
+               vfdev = pci_get_device(dev->vendor, dev_id, vfdev);
+       }
+
+#endif /* HAVE_PCI_DEV_FLAGS_ASSIGNED */
+        return vfs_assigned;
+}
+
+#endif /* CONFIG_PCI_IOV */
+#endif /* 3.10.0 */
+
+
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,13) )
+
+/**************************************/
+/* PCI DMA MAPPING */
+
+#if defined(CONFIG_HIGHMEM)
+
+#ifndef PCI_DRAM_OFFSET
+#define PCI_DRAM_OFFSET 0
+#endif
+
+u64
+_kc_pci_map_page(struct pci_dev *dev, struct page *page, unsigned long offset,
+                 size_t size, int direction)
+{
+	return ((u64) (page - mem_map) << PAGE_SHIFT) + offset +
+		PCI_DRAM_OFFSET;
+}
+
+#else /* CONFIG_HIGHMEM */
+
+u64
+_kc_pci_map_page(struct pci_dev *dev, struct page *page, unsigned long offset,
+                 size_t size, int direction)
+{
+	return pci_map_single(dev, (void *)page_address(page) + offset, size,
+			      direction);
+}
+
+#endif /* CONFIG_HIGHMEM */
+
+void
+_kc_pci_unmap_page(struct pci_dev *dev, u64 dma_addr, size_t size,
+                   int direction)
+{
+	return pci_unmap_single(dev, dma_addr, size, direction);
+}
+
+#endif /* 2.4.13 => 2.4.3 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,3) )
+
+/**************************************/
+/* PCI DRIVER API */
+
+int
+_kc_pci_set_dma_mask(struct pci_dev *dev, dma_addr_t mask)
+{
+	if (!pci_dma_supported(dev, mask))
+		return -EIO;
+	dev->dma_mask = mask;
+	return 0;
+}
+
+int
+_kc_pci_request_regions(struct pci_dev *dev, char *res_name)
+{
+	int i;
+
+	for (i = 0; i < 6; i++) {
+		if (pci_resource_len(dev, i) == 0)
+			continue;
+
+		if (pci_resource_flags(dev, i) & IORESOURCE_IO) {
+			if (!request_region(pci_resource_start(dev, i), pci_resource_len(dev, i), res_name)) {
+				pci_release_regions(dev);
+				return -EBUSY;
+			}
+		} else if (pci_resource_flags(dev, i) & IORESOURCE_MEM) {
+			if (!request_mem_region(pci_resource_start(dev, i), pci_resource_len(dev, i), res_name)) {
+				pci_release_regions(dev);
+				return -EBUSY;
+			}
+		}
+	}
+	return 0;
+}
+
+void
+_kc_pci_release_regions(struct pci_dev *dev)
+{
+	int i;
+
+	for (i = 0; i < 6; i++) {
+		if (pci_resource_len(dev, i) == 0)
+			continue;
+
+		if (pci_resource_flags(dev, i) & IORESOURCE_IO)
+			release_region(pci_resource_start(dev, i), pci_resource_len(dev, i));
+
+		else if (pci_resource_flags(dev, i) & IORESOURCE_MEM)
+			release_mem_region(pci_resource_start(dev, i), pci_resource_len(dev, i));
+	}
+}
+
+/**************************************/
+/* NETWORK DRIVER API */
+
+struct net_device *
+_kc_alloc_etherdev(int sizeof_priv)
+{
+	struct net_device *dev;
+	int alloc_size;
+
+	alloc_size = sizeof(*dev) + sizeof_priv + IFNAMSIZ + 31;
+	dev = kzalloc(alloc_size, GFP_KERNEL);
+	if (!dev)
+		return NULL;
+
+	if (sizeof_priv)
+		dev->priv = (void *) (((unsigned long)(dev + 1) + 31) & ~31);
+	dev->name[0] = '\0';
+	ether_setup(dev);
+
+	return dev;
+}
+
+int
+_kc_is_valid_ether_addr(u8 *addr)
+{
+	const char zaddr[6] = { 0, };
+
+	return !(addr[0] & 1) && memcmp(addr, zaddr, 6);
+}
+
+#endif /* 2.4.3 => 2.4.0 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,6) )
+
+int
+_kc_pci_set_power_state(struct pci_dev *dev, int state)
+{
+	return 0;
+}
+
+int
+_kc_pci_enable_wake(struct pci_dev *pdev, u32 state, int enable)
+{
+	return 0;
+}
+
+#endif /* 2.4.6 => 2.4.3 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) )
+void _kc_skb_fill_page_desc(struct sk_buff *skb, int i, struct page *page,
+                            int off, int size)
+{
+	skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+	frag->page = page;
+	frag->page_offset = off;
+	frag->size = size;
+	skb_shinfo(skb)->nr_frags = i + 1;
+}
+
+/*
+ * Original Copyright:
+ * find_next_bit.c: fallback find next bit implementation
+ *
+ * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+/**
+ * find_next_bit - find the next set bit in a memory region
+ * @addr: The address to base the search on
+ * @offset: The bitnumber to start searching at
+ * @size: The maximum size to search
+ */
+unsigned long find_next_bit(const unsigned long *addr, unsigned long size,
+                            unsigned long offset)
+{
+	const unsigned long *p = addr + BITOP_WORD(offset);
+	unsigned long result = offset & ~(BITS_PER_LONG-1);
+	unsigned long tmp;
+
+	if (offset >= size)
+		return size;
+	size -= result;
+	offset %= BITS_PER_LONG;
+	if (offset) {
+		tmp = *(p++);
+		tmp &= (~0UL << offset);
+		if (size < BITS_PER_LONG)
+			goto found_first;
+		if (tmp)
+			goto found_middle;
+		size -= BITS_PER_LONG;
+		result += BITS_PER_LONG;
+	}
+	while (size & ~(BITS_PER_LONG-1)) {
+		if ((tmp = *(p++)))
+			goto found_middle;
+		result += BITS_PER_LONG;
+		size -= BITS_PER_LONG;
+	}
+	if (!size)
+		return result;
+	tmp = *p;
+
+found_first:
+	tmp &= (~0UL >> (BITS_PER_LONG - size));
+	if (tmp == 0UL)		/* Are any bits set? */
+		return result + size;	/* Nope. */
+found_middle:
+	return result + ffs(tmp);
+}
+
+size_t _kc_strlcpy(char *dest, const char *src, size_t size)
+{
+	size_t ret = strlen(src);
+
+	if (size) {
+		size_t len = (ret >= size) ? size - 1 : ret;
+		memcpy(dest, src, len);
+		dest[len] = '\0';
+	}
+	return ret;
+}
+
+#endif /* 2.6.0 => 2.4.6 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,4) )
+int _kc_scnprintf(char * buf, size_t size, const char *fmt, ...)
+{
+	va_list args;
+	int i;
+
+	va_start(args, fmt);
+	i = vsnprintf(buf, size, fmt, args);
+	va_end(args);
+	return (i >= size) ? (size - 1) : i;
+}
+#endif /* < 2.6.4 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,10) )
+DECLARE_BITMAP(_kcompat_node_online_map, MAX_NUMNODES) = {1};
+#endif /* < 2.6.10 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,13) )
+char *_kc_kstrdup(const char *s, unsigned int gfp)
+{
+	size_t len;
+	char *buf;
+
+	if (!s)
+		return NULL;
+
+	len = strlen(s) + 1;
+	buf = kmalloc(len, gfp);
+	if (buf)
+		memcpy(buf, s, len);
+	return buf;
+}
+#endif /* < 2.6.13 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,14) )
+void *_kc_kzalloc(size_t size, int flags)
+{
+	void *ret = kmalloc(size, flags);
+	if (ret)
+		memset(ret, 0, size);
+	return ret;
+}
+#endif /* <= 2.6.13 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19) )
+int _kc_skb_pad(struct sk_buff *skb, int pad)
+{
+	int ntail;
+
+        /* If the skbuff is non linear tailroom is always zero.. */
+        if(!skb_cloned(skb) && skb_tailroom(skb) >= pad) {
+		memset(skb->data+skb->len, 0, pad);
+		return 0;
+        }
+
+	ntail = skb->data_len + pad - (skb->end - skb->tail);
+	if (likely(skb_cloned(skb) || ntail > 0)) {
+		if (pskb_expand_head(skb, 0, ntail, GFP_ATOMIC));
+			goto free_skb;
+	}
+
+#ifdef MAX_SKB_FRAGS
+	if (skb_is_nonlinear(skb) &&
+	    !__pskb_pull_tail(skb, skb->data_len))
+		goto free_skb;
+
+#endif
+	memset(skb->data + skb->len, 0, pad);
+        return 0;
+
+free_skb:
+	kfree_skb(skb);
+	return -ENOMEM;
+}
+
+#if (!(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(5,4)))
+int _kc_pci_save_state(struct pci_dev *pdev)
+{
+	struct adapter_struct *adapter = pci_get_drvdata(pdev);
+	int size = PCI_CONFIG_SPACE_LEN, i;
+	u16 pcie_cap_offset, pcie_link_status;
+
+#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) )
+	/* no ->dev for 2.4 kernels */
+	WARN_ON(pdev->dev.driver_data == NULL);
+#endif
+	pcie_cap_offset = pci_find_capability(pdev, PCI_CAP_ID_EXP);
+	if (pcie_cap_offset) {
+		if (!pci_read_config_word(pdev,
+		                          pcie_cap_offset + PCIE_LINK_STATUS,
+		                          &pcie_link_status))
+		size = PCIE_CONFIG_SPACE_LEN;
+	}
+	pci_config_space_ich8lan();
+#ifdef HAVE_PCI_ERS
+	if (adapter->config_space == NULL)
+#else
+	WARN_ON(adapter->config_space != NULL);
+#endif
+		adapter->config_space = kmalloc(size, GFP_KERNEL);
+	if (!adapter->config_space) {
+		printk(KERN_ERR "Out of memory in pci_save_state\n");
+		return -ENOMEM;
+	}
+	for (i = 0; i < (size / 4); i++)
+		pci_read_config_dword(pdev, i * 4, &adapter->config_space[i]);
+	return 0;
+}
+
+void _kc_pci_restore_state(struct pci_dev *pdev)
+{
+	struct adapter_struct *adapter = pci_get_drvdata(pdev);
+	int size = PCI_CONFIG_SPACE_LEN, i;
+	u16 pcie_cap_offset;
+	u16 pcie_link_status;
+
+	if (adapter->config_space != NULL) {
+		pcie_cap_offset = pci_find_capability(pdev, PCI_CAP_ID_EXP);
+		if (pcie_cap_offset &&
+		    !pci_read_config_word(pdev,
+		                          pcie_cap_offset + PCIE_LINK_STATUS,
+		                          &pcie_link_status))
+			size = PCIE_CONFIG_SPACE_LEN;
+
+		pci_config_space_ich8lan();
+		for (i = 0; i < (size / 4); i++)
+		pci_write_config_dword(pdev, i * 4, adapter->config_space[i]);
+#ifndef HAVE_PCI_ERS
+		kfree(adapter->config_space);
+		adapter->config_space = NULL;
+#endif
+	}
+}
+#endif /* !(RHEL_RELEASE_CODE >= RHEL 5.4) */
+
+#ifdef HAVE_PCI_ERS
+void _kc_free_netdev(struct net_device *netdev)
+{
+	struct adapter_struct *adapter = netdev_priv(netdev);
+
+	if (adapter->config_space != NULL)
+		kfree(adapter->config_space);
+#ifdef CONFIG_SYSFS
+	if (netdev->reg_state == NETREG_UNINITIALIZED) {
+		kfree((char *)netdev - netdev->padded);
+	} else {
+		BUG_ON(netdev->reg_state != NETREG_UNREGISTERED);
+		netdev->reg_state = NETREG_RELEASED;
+		class_device_put(&netdev->class_dev);
+	}
+#else
+	kfree((char *)netdev - netdev->padded);
+#endif
+}
+#endif
+
+void *_kc_kmemdup(const void *src, size_t len, unsigned gfp)
+{
+	void *p;
+
+	p = kzalloc(len, gfp);
+	if (p)
+		memcpy(p, src, len);
+	return p;
+}
+#endif /* <= 2.6.19 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22) )
+/* hexdump code taken from lib/hexdump.c */
+static void _kc_hex_dump_to_buffer(const void *buf, size_t len, int rowsize,
+			int groupsize, unsigned char *linebuf,
+			size_t linebuflen, bool ascii)
+{
+	const u8 *ptr = buf;
+	u8 ch;
+	int j, lx = 0;
+	int ascii_column;
+
+	if (rowsize != 16 && rowsize != 32)
+		rowsize = 16;
+
+	if (!len)
+		goto nil;
+	if (len > rowsize)		/* limit to one line at a time */
+		len = rowsize;
+	if ((len % groupsize) != 0)	/* no mixed size output */
+		groupsize = 1;
+
+	switch (groupsize) {
+	case 8: {
+		const u64 *ptr8 = buf;
+		int ngroups = len / groupsize;
+
+		for (j = 0; j < ngroups; j++)
+			lx += scnprintf((char *)(linebuf + lx), linebuflen - lx,
+				"%s%16.16llx", j ? " " : "",
+				(unsigned long long)*(ptr8 + j));
+		ascii_column = 17 * ngroups + 2;
+		break;
+	}
+
+	case 4: {
+		const u32 *ptr4 = buf;
+		int ngroups = len / groupsize;
+
+		for (j = 0; j < ngroups; j++)
+			lx += scnprintf((char *)(linebuf + lx), linebuflen - lx,
+				"%s%8.8x", j ? " " : "", *(ptr4 + j));
+		ascii_column = 9 * ngroups + 2;
+		break;
+	}
+
+	case 2: {
+		const u16 *ptr2 = buf;
+		int ngroups = len / groupsize;
+
+		for (j = 0; j < ngroups; j++)
+			lx += scnprintf((char *)(linebuf + lx), linebuflen - lx,
+				"%s%4.4x", j ? " " : "", *(ptr2 + j));
+		ascii_column = 5 * ngroups + 2;
+		break;
+	}
+
+	default:
+		for (j = 0; (j < len) && (lx + 3) <= linebuflen; j++) {
+			ch = ptr[j];
+			linebuf[lx++] = hex_asc(ch >> 4);
+			linebuf[lx++] = hex_asc(ch & 0x0f);
+			linebuf[lx++] = ' ';
+		}
+		if (j)
+			lx--;
+
+		ascii_column = 3 * rowsize + 2;
+		break;
+	}
+	if (!ascii)
+		goto nil;
+
+	while (lx < (linebuflen - 1) && lx < (ascii_column - 1))
+		linebuf[lx++] = ' ';
+	for (j = 0; (j < len) && (lx + 2) < linebuflen; j++)
+		linebuf[lx++] = (isascii(ptr[j]) && isprint(ptr[j])) ? ptr[j]
+				: '.';
+nil:
+	linebuf[lx++] = '\0';
+}
+
+void _kc_print_hex_dump(const char *level,
+			const char *prefix_str, int prefix_type,
+			int rowsize, int groupsize,
+			const void *buf, size_t len, bool ascii)
+{
+	const u8 *ptr = buf;
+	int i, linelen, remaining = len;
+	unsigned char linebuf[200];
+
+	if (rowsize != 16 && rowsize != 32)
+		rowsize = 16;
+
+	for (i = 0; i < len; i += rowsize) {
+		linelen = min(remaining, rowsize);
+		remaining -= rowsize;
+		_kc_hex_dump_to_buffer(ptr + i, linelen, rowsize, groupsize,
+				linebuf, sizeof(linebuf), ascii);
+
+		switch (prefix_type) {
+		case DUMP_PREFIX_ADDRESS:
+			printk("%s%s%*p: %s\n", level, prefix_str,
+				(int)(2 * sizeof(void *)), ptr + i, linebuf);
+			break;
+		case DUMP_PREFIX_OFFSET:
+			printk("%s%s%.8x: %s\n", level, prefix_str, i, linebuf);
+			break;
+		default:
+			printk("%s%s%s\n", level, prefix_str, linebuf);
+			break;
+		}
+	}
+}
+#endif /* < 2.6.22 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23) )
+int ixgbe_dcb_netlink_register(void)
+{
+	return 0;
+}
+
+int ixgbe_dcb_netlink_unregister(void)
+{
+	return 0;
+}
+
+int ixgbe_copy_dcb_cfg(struct ixgbe_adapter *adapter, int tc_max)
+{
+	return 0;
+}
+#endif /* < 2.6.23 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24) )
+#ifdef NAPI
+struct net_device *napi_to_poll_dev(struct napi_struct *napi)
+{
+	struct adapter_q_vector *q_vector = container_of(napi,
+	                                                struct adapter_q_vector,
+	                                                napi);
+	return &q_vector->poll_dev;
+}
+
+int __kc_adapter_clean(struct net_device *netdev, int *budget)
+{
+	int work_done;
+	int work_to_do = min(*budget, netdev->quota);
+	/* kcompat.h netif_napi_add puts napi struct in "fake netdev->priv" */
+	struct napi_struct *napi = netdev->priv;
+	work_done = napi->poll(napi, work_to_do);
+	*budget -= work_done;
+	netdev->quota -= work_done;
+	return (work_done >= work_to_do) ? 1 : 0;
+}
+#endif /* NAPI */
+#endif /* <= 2.6.24 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,26) )
+void _kc_pci_disable_link_state(struct pci_dev *pdev, int state)
+{
+	struct pci_dev *parent = pdev->bus->self;
+	u16 link_state;
+	int pos;
+
+	if (!parent)
+		return;
+
+	pos = pci_find_capability(parent, PCI_CAP_ID_EXP);
+	if (pos) {
+		pci_read_config_word(parent, pos + PCI_EXP_LNKCTL, &link_state);
+		link_state &= ~state;
+		pci_write_config_word(parent, pos + PCI_EXP_LNKCTL, link_state);
+	}
+}
+#endif /* < 2.6.26 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,27) )
+#ifdef HAVE_TX_MQ
+void _kc_netif_tx_stop_all_queues(struct net_device *netdev)
+{
+	struct adapter_struct *adapter = netdev_priv(netdev);
+	int i;
+
+	netif_stop_queue(netdev);
+	if (netif_is_multiqueue(netdev))
+		for (i = 0; i < adapter->num_tx_queues; i++)
+			netif_stop_subqueue(netdev, i);
+}
+void _kc_netif_tx_wake_all_queues(struct net_device *netdev)
+{
+	struct adapter_struct *adapter = netdev_priv(netdev);
+	int i;
+
+	netif_wake_queue(netdev);
+	if (netif_is_multiqueue(netdev))
+		for (i = 0; i < adapter->num_tx_queues; i++)
+			netif_wake_subqueue(netdev, i);
+}
+void _kc_netif_tx_start_all_queues(struct net_device *netdev)
+{
+	struct adapter_struct *adapter = netdev_priv(netdev);
+	int i;
+
+	netif_start_queue(netdev);
+	if (netif_is_multiqueue(netdev))
+		for (i = 0; i < adapter->num_tx_queues; i++)
+			netif_start_subqueue(netdev, i);
+}
+#endif /* HAVE_TX_MQ */
+
+#ifndef __WARN_printf
+void __kc_warn_slowpath(const char *file, int line, const char *fmt, ...)
+{
+	va_list args;
+
+	printk(KERN_WARNING "------------[ cut here ]------------\n");
+	printk(KERN_WARNING "WARNING: at %s:%d %s()\n", file, line);
+	va_start(args, fmt);
+	vprintk(fmt, args);
+	va_end(args);
+
+	dump_stack();
+}
+#endif /* __WARN_printf */
+#endif /* < 2.6.27 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28) )
+
+int
+_kc_pci_prepare_to_sleep(struct pci_dev *dev)
+{
+	pci_power_t target_state;
+	int error;
+
+	target_state = pci_choose_state(dev, PMSG_SUSPEND);
+
+	pci_enable_wake(dev, target_state, true);
+
+	error = pci_set_power_state(dev, target_state);
+
+	if (error)
+		pci_enable_wake(dev, target_state, false);
+
+	return error;
+}
+
+int
+_kc_pci_wake_from_d3(struct pci_dev *dev, bool enable)
+{
+	int err;
+
+	err = pci_enable_wake(dev, PCI_D3cold, enable);
+	if (err)
+		goto out;
+
+	err = pci_enable_wake(dev, PCI_D3hot, enable);
+
+out:
+	return err;
+}
+#endif /* < 2.6.28 */
+
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,4,0) )
+void _kc_skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page,
+			 int off, int size)
+{
+	skb_fill_page_desc(skb, i, page, off, size);
+	skb->len += size;
+	skb->data_len += size;
+	skb->truesize += size;
+}
+#endif /* < 3.4.0 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,30) )
+#ifdef HAVE_NETDEV_SELECT_QUEUE
+#include <net/ip.h>
+static u32 _kc_simple_tx_hashrnd;
+static u32 _kc_simple_tx_hashrnd_initialized;
+
+u16 _kc_skb_tx_hash(struct net_device *dev, struct sk_buff *skb)
+{
+	u32 addr1, addr2, ports;
+	u32 hash, ihl;
+	u8 ip_proto = 0;
+
+	if (unlikely(!_kc_simple_tx_hashrnd_initialized)) {
+		get_random_bytes(&_kc_simple_tx_hashrnd, 4);
+		_kc_simple_tx_hashrnd_initialized = 1;
+	}
+
+	switch (skb->protocol) {
+	case htons(ETH_P_IP):
+		if (!(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)))
+			ip_proto = ip_hdr(skb)->protocol;
+		addr1 = ip_hdr(skb)->saddr;
+		addr2 = ip_hdr(skb)->daddr;
+		ihl = ip_hdr(skb)->ihl;
+		break;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	case htons(ETH_P_IPV6):
+		ip_proto = ipv6_hdr(skb)->nexthdr;
+		addr1 = ipv6_hdr(skb)->saddr.s6_addr32[3];
+		addr2 = ipv6_hdr(skb)->daddr.s6_addr32[3];
+		ihl = (40 >> 2);
+		break;
+#endif
+	default:
+		return 0;
+	}
+
+
+	switch (ip_proto) {
+	case IPPROTO_TCP:
+	case IPPROTO_UDP:
+	case IPPROTO_DCCP:
+	case IPPROTO_ESP:
+	case IPPROTO_AH:
+	case IPPROTO_SCTP:
+	case IPPROTO_UDPLITE:
+		ports = *((u32 *) (skb_network_header(skb) + (ihl * 4)));
+		break;
+
+	default:
+		ports = 0;
+		break;
+	}
+
+	hash = jhash_3words(addr1, addr2, ports, _kc_simple_tx_hashrnd);
+
+	return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32);
+}
+#endif /* HAVE_NETDEV_SELECT_QUEUE */
+#endif /* < 2.6.30 */
+
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,35) )
+#ifdef HAVE_TX_MQ
+#ifndef CONFIG_NETDEVICES_MULTIQUEUE
+void _kc_netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
+{
+	unsigned int real_num = dev->real_num_tx_queues;
+	struct Qdisc *qdisc;
+	int i;
+
+	if (unlikely(txq > dev->num_tx_queues))
+		;
+	else if (txq > real_num)
+		dev->real_num_tx_queues = txq;
+	else if ( txq < real_num) {
+		dev->real_num_tx_queues = txq;
+		for (i = txq; i < dev->num_tx_queues; i++) {
+			qdisc = netdev_get_tx_queue(dev, i)->qdisc;
+			if (qdisc) {
+				spin_lock_bh(qdisc_lock(qdisc));
+				qdisc_reset(qdisc);
+				spin_unlock_bh(qdisc_lock(qdisc));
+			}
+		}
+	}
+}
+#endif /* CONFIG_NETDEVICES_MULTIQUEUE */
+#endif /* HAVE_TX_MQ */
+#endif /* < 2.6.35 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,36) )
+static const u32 _kc_flags_dup_features =
+	(ETH_FLAG_LRO | ETH_FLAG_NTUPLE | ETH_FLAG_RXHASH);
+
+u32 _kc_ethtool_op_get_flags(struct net_device *dev)
+{
+	return dev->features & _kc_flags_dup_features;
+}
+
+int _kc_ethtool_op_set_flags(struct net_device *dev, u32 data, u32 supported)
+{
+	if (data & ~supported)
+		return -EINVAL;
+
+	dev->features = ((dev->features & ~_kc_flags_dup_features) |
+			 (data & _kc_flags_dup_features));
+	return 0;
+}
+#endif /* < 2.6.36 */
+
+/******************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,39) )
+#if (!(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(6,0)))
+u8 _kc_netdev_get_num_tc(struct net_device *dev)
+{
+	struct adapter_struct *kc_adapter = netdev_priv(dev);
+	if (kc_adapter->flags & IXGBE_FLAG_DCB_ENABLED)
+		return kc_adapter->tc;
+	else
+		return 0;
+}
+
+u8 _kc_netdev_get_prio_tc_map(struct net_device *dev, u8 up)
+{
+	struct adapter_struct *kc_adapter = netdev_priv(dev);
+	int tc;
+	u8 map;
+
+	for (tc = 0; tc < IXGBE_DCB_MAX_TRAFFIC_CLASS; tc++) {
+		map = kc_adapter->dcb_cfg.tc_config[tc].path[0].up_to_tc_bitmap;
+
+		if (map & (1 << up))
+			return tc;
+	}
+
+	return 0;
+}
+#endif /* !(RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(6,0)) */
+#endif /* < 2.6.39 */
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/kcompat.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/kcompat.h
new file mode 100644
index 00000000..bf27579b
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/kcompat.h
@@ -0,0 +1,3143 @@
+/*******************************************************************************
+
+  Intel 10 Gigabit PCI Express Linux driver
+  Copyright(c) 1999 - 2012 Intel Corporation.
+
+  This program is free software; you can redistribute it and/or modify it
+  under the terms and conditions of the GNU General Public License,
+  version 2, as published by the Free Software Foundation.
+
+  This program is distributed in the hope it will be useful, but WITHOUT
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+  more details.
+
+  You should have received a copy of the GNU General Public License along with
+  this program; if not, write to the Free Software Foundation, Inc.,
+  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+  The full GNU General Public License is included in this distribution in
+  the file called "COPYING".
+
+  Contact Information:
+  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+#ifndef _KCOMPAT_H_
+#define _KCOMPAT_H_
+
+#ifndef LINUX_VERSION_CODE
+#include <linux/version.h>
+#else
+#define KERNEL_VERSION(a,b,c) (((a) << 16) + ((b) << 8) + (c))
+#endif
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/skbuff.h>
+#include <linux/ioport.h>
+#include <linux/slab.h>
+#include <linux/list.h>
+#include <linux/delay.h>
+#include <linux/sched.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/udp.h>
+#include <linux/mii.h>
+#include <linux/vmalloc.h>
+#include <asm/io.h>
+#include <linux/ethtool.h>
+#include <linux/if_vlan.h>
+
+/* NAPI enable/disable flags here */
+/* enable NAPI for ixgbe by default */
+#undef CONFIG_IXGBE_NAPI
+#define CONFIG_IXGBE_NAPI
+#define NAPI
+#ifdef CONFIG_IXGBE_NAPI
+#undef NAPI
+#define NAPI
+#endif /* CONFIG_IXGBE_NAPI */
+#ifdef IXGBE_NAPI
+#undef NAPI
+#define NAPI
+#endif /* IXGBE_NAPI */
+#ifdef IXGBE_NO_NAPI
+#undef NAPI
+#endif /* IXGBE_NO_NAPI */
+
+#define adapter_struct ixgbe_adapter
+#define adapter_q_vector ixgbe_q_vector
+
+/* and finally set defines so that the code sees the changes */
+#ifdef NAPI
+#ifndef CONFIG_IXGBE_NAPI
+#define CONFIG_IXGBE_NAPI
+#endif
+#else
+#undef CONFIG_IXGBE_NAPI
+#endif /* NAPI */
+
+/* packet split disable/enable */
+#ifdef DISABLE_PACKET_SPLIT
+#ifndef CONFIG_IXGBE_DISABLE_PACKET_SPLIT
+#define CONFIG_IXGBE_DISABLE_PACKET_SPLIT
+#endif
+#endif /* DISABLE_PACKET_SPLIT */
+
+/* MSI compatibility code for all kernels and drivers */
+#ifdef DISABLE_PCI_MSI
+#undef CONFIG_PCI_MSI
+#endif
+#ifndef CONFIG_PCI_MSI
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,8) )
+struct msix_entry {
+	u16 vector; /* kernel uses to write allocated vector */
+	u16 entry;  /* driver uses to specify entry, OS writes */
+};
+#endif
+#undef pci_enable_msi
+#define pci_enable_msi(a) -ENOTSUPP
+#undef pci_disable_msi
+#define pci_disable_msi(a) do {} while (0)
+#undef pci_enable_msix
+#define pci_enable_msix(a, b, c) -ENOTSUPP
+#undef pci_disable_msix
+#define pci_disable_msix(a) do {} while (0)
+#define msi_remove_pci_irq_vectors(a) do {} while (0)
+#endif /* CONFIG_PCI_MSI */
+#ifdef DISABLE_PM
+#undef CONFIG_PM
+#endif
+
+#ifdef DISABLE_NET_POLL_CONTROLLER
+#undef CONFIG_NET_POLL_CONTROLLER
+#endif
+
+#ifndef PMSG_SUSPEND
+#define PMSG_SUSPEND 3
+#endif
+
+/* generic boolean compatibility */
+#undef TRUE
+#undef FALSE
+#define TRUE true
+#define FALSE false
+#ifdef GCC_VERSION
+#if ( GCC_VERSION < 3000 )
+#define _Bool char
+#endif
+#else
+#define _Bool char
+#endif
+
+/* kernels less than 2.4.14 don't have this */
+#ifndef ETH_P_8021Q
+#define ETH_P_8021Q 0x8100
+#endif
+
+#ifndef module_param
+#define module_param(v,t,p) MODULE_PARM(v, "i");
+#endif
+
+#ifndef DMA_64BIT_MASK
+#define DMA_64BIT_MASK  0xffffffffffffffffULL
+#endif
+
+#ifndef DMA_32BIT_MASK
+#define DMA_32BIT_MASK  0x00000000ffffffffULL
+#endif
+
+#ifndef PCI_CAP_ID_EXP
+#define PCI_CAP_ID_EXP 0x10
+#endif
+
+#ifndef PCIE_LINK_STATE_L0S
+#define PCIE_LINK_STATE_L0S 1
+#endif
+#ifndef PCIE_LINK_STATE_L1
+#define PCIE_LINK_STATE_L1 2
+#endif
+
+#ifndef mmiowb
+#ifdef CONFIG_IA64
+#define mmiowb() asm volatile ("mf.a" ::: "memory")
+#else
+#define mmiowb()
+#endif
+#endif
+
+#ifndef SET_NETDEV_DEV
+#define SET_NETDEV_DEV(net, pdev)
+#endif
+
+#if !defined(HAVE_FREE_NETDEV) && ( LINUX_VERSION_CODE < KERNEL_VERSION(3,1,0) )
+#define free_netdev(x)	kfree(x)
+#endif
+
+#ifdef HAVE_POLL_CONTROLLER
+#define CONFIG_NET_POLL_CONTROLLER
+#endif
+
+#ifndef SKB_DATAREF_SHIFT
+/* if we do not have the infrastructure to detect if skb_header is cloned
+   just return false in all cases */
+#define skb_header_cloned(x) 0
+#endif
+
+#ifndef NETIF_F_GSO
+#define gso_size tso_size
+#define gso_segs tso_segs
+#endif
+
+#ifndef NETIF_F_GRO
+#define vlan_gro_receive(_napi, _vlgrp, _vlan, _skb) \
+		vlan_hwaccel_receive_skb(_skb, _vlgrp, _vlan)
+#define napi_gro_receive(_napi, _skb) netif_receive_skb(_skb)
+#endif
+
+#ifndef NETIF_F_SCTP_CSUM
+#define NETIF_F_SCTP_CSUM 0
+#endif
+
+#ifndef NETIF_F_LRO
+#define NETIF_F_LRO (1 << 15)
+#endif
+
+#ifndef NETIF_F_NTUPLE
+#define NETIF_F_NTUPLE (1 << 27)
+#endif
+
+#ifndef IPPROTO_SCTP
+#define IPPROTO_SCTP 132
+#endif
+
+#ifndef CHECKSUM_PARTIAL
+#define CHECKSUM_PARTIAL CHECKSUM_HW
+#define CHECKSUM_COMPLETE CHECKSUM_HW
+#endif
+
+#ifndef __read_mostly
+#define __read_mostly
+#endif
+
+#ifndef MII_RESV1
+#define MII_RESV1		0x17		/* Reserved...		*/
+#endif
+
+#ifndef unlikely
+#define unlikely(_x) _x
+#define likely(_x) _x
+#endif
+
+#ifndef WARN_ON
+#define WARN_ON(x)
+#endif
+
+#ifndef PCI_DEVICE
+#define PCI_DEVICE(vend,dev) \
+	.vendor = (vend), .device = (dev), \
+	.subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID
+#endif
+
+#ifndef node_online
+#define node_online(node) ((node) == 0)
+#endif
+
+#ifndef num_online_cpus
+#define num_online_cpus() smp_num_cpus
+#endif
+
+#ifndef cpu_online
+#define cpu_online(cpuid) test_bit((cpuid), &cpu_online_map)
+#endif
+
+#ifndef _LINUX_RANDOM_H
+#include <linux/random.h>
+#endif
+
+#ifndef DECLARE_BITMAP
+#ifndef BITS_TO_LONGS
+#define BITS_TO_LONGS(bits) (((bits)+BITS_PER_LONG-1)/BITS_PER_LONG)
+#endif
+#define DECLARE_BITMAP(name,bits) long name[BITS_TO_LONGS(bits)]
+#endif
+
+#ifndef VLAN_HLEN
+#define VLAN_HLEN 4
+#endif
+
+#ifndef VLAN_ETH_HLEN
+#define VLAN_ETH_HLEN 18
+#endif
+
+#ifndef VLAN_ETH_FRAME_LEN
+#define VLAN_ETH_FRAME_LEN 1518
+#endif
+
+#if !defined(IXGBE_DCA) && !defined(IGB_DCA)
+#define dca_get_tag(b) 0
+#define dca_add_requester(a) -1
+#define dca_remove_requester(b) do { } while(0)
+#define DCA_PROVIDER_ADD     0x0001
+#define DCA_PROVIDER_REMOVE  0x0002
+#endif
+
+#ifndef DCA_GET_TAG_TWO_ARGS
+#define dca3_get_tag(a,b) dca_get_tag(b)
+#endif
+
+#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+#if defined(__i386__) || defined(__x86_64__)
+#define CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+#endif
+#endif
+
+/* taken from 2.6.24 definition in linux/kernel.h */
+#ifndef IS_ALIGNED
+#define IS_ALIGNED(x,a)         (((x) % ((typeof(x))(a))) == 0)
+#endif
+
+#ifndef NETIF_F_HW_VLAN_TX
+struct _kc_vlan_ethhdr {
+	unsigned char	h_dest[ETH_ALEN];
+	unsigned char	h_source[ETH_ALEN];
+	__be16		h_vlan_proto;
+	__be16		h_vlan_TCI;
+	__be16		h_vlan_encapsulated_proto;
+};
+#define vlan_ethhdr _kc_vlan_ethhdr
+struct _kc_vlan_hdr {
+	__be16		h_vlan_TCI;
+	__be16		h_vlan_encapsulated_proto;
+};
+#define vlan_hdr _kc_vlan_hdr
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0) )
+#define vlan_tx_tag_present(_skb) 0
+#define vlan_tx_tag_get(_skb) 0
+#endif
+#endif
+
+#ifndef VLAN_PRIO_SHIFT
+#define VLAN_PRIO_SHIFT 13
+#endif
+
+
+#ifndef __GFP_COLD
+#define __GFP_COLD 0
+#endif
+
+/*****************************************************************************/
+/* Installations with ethtool version without eeprom, adapter id, or statistics
+ * support */
+
+#ifndef ETH_GSTRING_LEN
+#define ETH_GSTRING_LEN 32
+#endif
+
+#ifndef ETHTOOL_GSTATS
+#define ETHTOOL_GSTATS 0x1d
+#undef ethtool_drvinfo
+#define ethtool_drvinfo k_ethtool_drvinfo
+struct k_ethtool_drvinfo {
+	u32 cmd;
+	char driver[32];
+	char version[32];
+	char fw_version[32];
+	char bus_info[32];
+	char reserved1[32];
+	char reserved2[16];
+	u32 n_stats;
+	u32 testinfo_len;
+	u32 eedump_len;
+	u32 regdump_len;
+};
+
+struct ethtool_stats {
+	u32 cmd;
+	u32 n_stats;
+	u64 data[0];
+};
+#endif /* ETHTOOL_GSTATS */
+
+#ifndef ETHTOOL_PHYS_ID
+#define ETHTOOL_PHYS_ID 0x1c
+#endif /* ETHTOOL_PHYS_ID */
+
+#ifndef ETHTOOL_GSTRINGS
+#define ETHTOOL_GSTRINGS 0x1b
+enum ethtool_stringset {
+	ETH_SS_TEST             = 0,
+	ETH_SS_STATS,
+};
+struct ethtool_gstrings {
+	u32 cmd;            /* ETHTOOL_GSTRINGS */
+	u32 string_set;     /* string set id e.c. ETH_SS_TEST, etc*/
+	u32 len;            /* number of strings in the string set */
+	u8 data[0];
+};
+#endif /* ETHTOOL_GSTRINGS */
+
+#ifndef ETHTOOL_TEST
+#define ETHTOOL_TEST 0x1a
+enum ethtool_test_flags {
+	ETH_TEST_FL_OFFLINE	= (1 << 0),
+	ETH_TEST_FL_FAILED	= (1 << 1),
+};
+struct ethtool_test {
+	u32 cmd;
+	u32 flags;
+	u32 reserved;
+	u32 len;
+	u64 data[0];
+};
+#endif /* ETHTOOL_TEST */
+
+#ifndef ETHTOOL_GEEPROM
+#define ETHTOOL_GEEPROM 0xb
+#undef ETHTOOL_GREGS
+struct ethtool_eeprom {
+	u32 cmd;
+	u32 magic;
+	u32 offset;
+	u32 len;
+	u8 data[0];
+};
+
+struct ethtool_value {
+	u32 cmd;
+	u32 data;
+};
+#endif /* ETHTOOL_GEEPROM */
+
+#ifndef ETHTOOL_GLINK
+#define ETHTOOL_GLINK 0xa
+#endif /* ETHTOOL_GLINK */
+
+#ifndef ETHTOOL_GWOL
+#define ETHTOOL_GWOL 0x5
+#define ETHTOOL_SWOL 0x6
+#define SOPASS_MAX      6
+struct ethtool_wolinfo {
+	u32 cmd;
+	u32 supported;
+	u32 wolopts;
+	u8 sopass[SOPASS_MAX]; /* SecureOn(tm) password */
+};
+#endif /* ETHTOOL_GWOL */
+
+#ifndef ETHTOOL_GREGS
+#define ETHTOOL_GREGS		0x00000004 /* Get NIC registers */
+#define ethtool_regs _kc_ethtool_regs
+/* for passing big chunks of data */
+struct _kc_ethtool_regs {
+	u32 cmd;
+	u32 version; /* driver-specific, indicates different chips/revs */
+	u32 len; /* bytes */
+	u8 data[0];
+};
+#endif /* ETHTOOL_GREGS */
+
+#ifndef ETHTOOL_GMSGLVL
+#define ETHTOOL_GMSGLVL		0x00000007 /* Get driver message level */
+#endif
+#ifndef ETHTOOL_SMSGLVL
+#define ETHTOOL_SMSGLVL		0x00000008 /* Set driver msg level, priv. */
+#endif
+#ifndef ETHTOOL_NWAY_RST
+#define ETHTOOL_NWAY_RST	0x00000009 /* Restart autonegotiation, priv */
+#endif
+#ifndef ETHTOOL_GLINK
+#define ETHTOOL_GLINK		0x0000000a /* Get link status */
+#endif
+#ifndef ETHTOOL_GEEPROM
+#define ETHTOOL_GEEPROM		0x0000000b /* Get EEPROM data */
+#endif
+#ifndef ETHTOOL_SEEPROM
+#define ETHTOOL_SEEPROM		0x0000000c /* Set EEPROM data */
+#endif
+#ifndef ETHTOOL_GCOALESCE
+#define ETHTOOL_GCOALESCE	0x0000000e /* Get coalesce config */
+/* for configuring coalescing parameters of chip */
+#define ethtool_coalesce _kc_ethtool_coalesce
+struct _kc_ethtool_coalesce {
+	u32	cmd;	/* ETHTOOL_{G,S}COALESCE */
+
+	/* How many usecs to delay an RX interrupt after
+	 * a packet arrives.  If 0, only rx_max_coalesced_frames
+	 * is used.
+	 */
+	u32	rx_coalesce_usecs;
+
+	/* How many packets to delay an RX interrupt after
+	 * a packet arrives.  If 0, only rx_coalesce_usecs is
+	 * used.  It is illegal to set both usecs and max frames
+	 * to zero as this would cause RX interrupts to never be
+	 * generated.
+	 */
+	u32	rx_max_coalesced_frames;
+
+	/* Same as above two parameters, except that these values
+	 * apply while an IRQ is being serviced by the host.  Not
+	 * all cards support this feature and the values are ignored
+	 * in that case.
+	 */
+	u32	rx_coalesce_usecs_irq;
+	u32	rx_max_coalesced_frames_irq;
+
+	/* How many usecs to delay a TX interrupt after
+	 * a packet is sent.  If 0, only tx_max_coalesced_frames
+	 * is used.
+	 */
+	u32	tx_coalesce_usecs;
+
+	/* How many packets to delay a TX interrupt after
+	 * a packet is sent.  If 0, only tx_coalesce_usecs is
+	 * used.  It is illegal to set both usecs and max frames
+	 * to zero as this would cause TX interrupts to never be
+	 * generated.
+	 */
+	u32	tx_max_coalesced_frames;
+
+	/* Same as above two parameters, except that these values
+	 * apply while an IRQ is being serviced by the host.  Not
+	 * all cards support this feature and the values are ignored
+	 * in that case.
+	 */
+	u32	tx_coalesce_usecs_irq;
+	u32	tx_max_coalesced_frames_irq;
+
+	/* How many usecs to delay in-memory statistics
+	 * block updates.  Some drivers do not have an in-memory
+	 * statistic block, and in such cases this value is ignored.
+	 * This value must not be zero.
+	 */
+	u32	stats_block_coalesce_usecs;
+
+	/* Adaptive RX/TX coalescing is an algorithm implemented by
+	 * some drivers to improve latency under low packet rates and
+	 * improve throughput under high packet rates.  Some drivers
+	 * only implement one of RX or TX adaptive coalescing.  Anything
+	 * not implemented by the driver causes these values to be
+	 * silently ignored.
+	 */
+	u32	use_adaptive_rx_coalesce;
+	u32	use_adaptive_tx_coalesce;
+
+	/* When the packet rate (measured in packets per second)
+	 * is below pkt_rate_low, the {rx,tx}_*_low parameters are
+	 * used.
+	 */
+	u32	pkt_rate_low;
+	u32	rx_coalesce_usecs_low;
+	u32	rx_max_coalesced_frames_low;
+	u32	tx_coalesce_usecs_low;
+	u32	tx_max_coalesced_frames_low;
+
+	/* When the packet rate is below pkt_rate_high but above
+	 * pkt_rate_low (both measured in packets per second) the
+	 * normal {rx,tx}_* coalescing parameters are used.
+	 */
+
+	/* When the packet rate is (measured in packets per second)
+	 * is above pkt_rate_high, the {rx,tx}_*_high parameters are
+	 * used.
+	 */
+	u32	pkt_rate_high;
+	u32	rx_coalesce_usecs_high;
+	u32	rx_max_coalesced_frames_high;
+	u32	tx_coalesce_usecs_high;
+	u32	tx_max_coalesced_frames_high;
+
+	/* How often to do adaptive coalescing packet rate sampling,
+	 * measured in seconds.  Must not be zero.
+	 */
+	u32	rate_sample_interval;
+};
+#endif /* ETHTOOL_GCOALESCE */
+
+#ifndef ETHTOOL_SCOALESCE
+#define ETHTOOL_SCOALESCE	0x0000000f /* Set coalesce config. */
+#endif
+#ifndef ETHTOOL_GRINGPARAM
+#define ETHTOOL_GRINGPARAM	0x00000010 /* Get ring parameters */
+/* for configuring RX/TX ring parameters */
+#define ethtool_ringparam _kc_ethtool_ringparam
+struct _kc_ethtool_ringparam {
+	u32	cmd;	/* ETHTOOL_{G,S}RINGPARAM */
+
+	/* Read only attributes.  These indicate the maximum number
+	 * of pending RX/TX ring entries the driver will allow the
+	 * user to set.
+	 */
+	u32	rx_max_pending;
+	u32	rx_mini_max_pending;
+	u32	rx_jumbo_max_pending;
+	u32	tx_max_pending;
+
+	/* Values changeable by the user.  The valid values are
+	 * in the range 1 to the "*_max_pending" counterpart above.
+	 */
+	u32	rx_pending;
+	u32	rx_mini_pending;
+	u32	rx_jumbo_pending;
+	u32	tx_pending;
+};
+#endif /* ETHTOOL_GRINGPARAM */
+
+#ifndef ETHTOOL_SRINGPARAM
+#define ETHTOOL_SRINGPARAM	0x00000011 /* Set ring parameters, priv. */
+#endif
+#ifndef ETHTOOL_GPAUSEPARAM
+#define ETHTOOL_GPAUSEPARAM	0x00000012 /* Get pause parameters */
+/* for configuring link flow control parameters */
+#define ethtool_pauseparam _kc_ethtool_pauseparam
+struct _kc_ethtool_pauseparam {
+	u32	cmd;	/* ETHTOOL_{G,S}PAUSEPARAM */
+
+	/* If the link is being auto-negotiated (via ethtool_cmd.autoneg
+	 * being true) the user may set 'autoneg' here non-zero to have the
+	 * pause parameters be auto-negotiated too.  In such a case, the
+	 * {rx,tx}_pause values below determine what capabilities are
+	 * advertised.
+	 *
+	 * If 'autoneg' is zero or the link is not being auto-negotiated,
+	 * then {rx,tx}_pause force the driver to use/not-use pause
+	 * flow control.
+	 */
+	u32	autoneg;
+	u32	rx_pause;
+	u32	tx_pause;
+};
+#endif /* ETHTOOL_GPAUSEPARAM */
+
+#ifndef ETHTOOL_SPAUSEPARAM
+#define ETHTOOL_SPAUSEPARAM	0x00000013 /* Set pause parameters. */
+#endif
+#ifndef ETHTOOL_GRXCSUM
+#define ETHTOOL_GRXCSUM		0x00000014 /* Get RX hw csum enable (ethtool_value) */
+#endif
+#ifndef ETHTOOL_SRXCSUM
+#define ETHTOOL_SRXCSUM		0x00000015 /* Set RX hw csum enable (ethtool_value) */
+#endif
+#ifndef ETHTOOL_GTXCSUM
+#define ETHTOOL_GTXCSUM		0x00000016 /* Get TX hw csum enable (ethtool_value) */
+#endif
+#ifndef ETHTOOL_STXCSUM
+#define ETHTOOL_STXCSUM		0x00000017 /* Set TX hw csum enable (ethtool_value) */
+#endif
+#ifndef ETHTOOL_GSG
+#define ETHTOOL_GSG		0x00000018 /* Get scatter-gather enable
+					    * (ethtool_value) */
+#endif
+#ifndef ETHTOOL_SSG
+#define ETHTOOL_SSG		0x00000019 /* Set scatter-gather enable
+					    * (ethtool_value). */
+#endif
+#ifndef ETHTOOL_TEST
+#define ETHTOOL_TEST		0x0000001a /* execute NIC self-test, priv. */
+#endif
+#ifndef ETHTOOL_GSTRINGS
+#define ETHTOOL_GSTRINGS	0x0000001b /* get specified string set */
+#endif
+#ifndef ETHTOOL_PHYS_ID
+#define ETHTOOL_PHYS_ID		0x0000001c /* identify the NIC */
+#endif
+#ifndef ETHTOOL_GSTATS
+#define ETHTOOL_GSTATS		0x0000001d /* get NIC-specific statistics */
+#endif
+#ifndef ETHTOOL_GTSO
+#define ETHTOOL_GTSO		0x0000001e /* Get TSO enable (ethtool_value) */
+#endif
+#ifndef ETHTOOL_STSO
+#define ETHTOOL_STSO		0x0000001f /* Set TSO enable (ethtool_value) */
+#endif
+
+#ifndef ETHTOOL_BUSINFO_LEN
+#define ETHTOOL_BUSINFO_LEN	32
+#endif
+
+#ifndef RHEL_RELEASE_CODE
+/* NOTE: RHEL_RELEASE_* introduced in RHEL4.5 */
+#define RHEL_RELEASE_CODE 0
+#endif
+#ifndef RHEL_RELEASE_VERSION
+#define RHEL_RELEASE_VERSION(a,b) (((a) << 8) + (b))
+#endif
+#ifndef AX_RELEASE_CODE
+#define AX_RELEASE_CODE 0
+#endif
+#ifndef AX_RELEASE_VERSION
+#define AX_RELEASE_VERSION(a,b) (((a) << 8) + (b))
+#endif
+
+/* SuSE version macro is the same as Linux kernel version */
+#ifndef SLE_VERSION
+#define SLE_VERSION(a,b,c) KERNEL_VERSION(a,b,c)
+#endif
+#ifndef SLE_VERSION_CODE
+#ifdef CONFIG_SUSE_KERNEL
+/* SLES11 GA is 2.6.27 based */
+#if ( LINUX_VERSION_CODE == KERNEL_VERSION(2,6,27) )
+#define SLE_VERSION_CODE SLE_VERSION(11,0,0)
+#elif ( LINUX_VERSION_CODE == KERNEL_VERSION(2,6,32) )
+/* SLES11 SP1 is 2.6.32 based */
+#define SLE_VERSION_CODE SLE_VERSION(11,1,0)
+#else
+#define SLE_VERSION_CODE 0
+#endif
+#else /* CONFIG_SUSE_KERNEL */
+#define SLE_VERSION_CODE 0
+#endif /* CONFIG_SUSE_KERNEL */
+#endif /* SLE_VERSION_CODE */
+
+#ifdef __KLOCWORK__
+#ifdef ARRAY_SIZE
+#undef ARRAY_SIZE
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+#endif
+#endif /* __KLOCWORK__ */
+
+/*****************************************************************************/
+/* 2.4.3 => 2.4.0 */
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,3) )
+
+/**************************************/
+/* PCI DRIVER API */
+
+#ifndef pci_set_dma_mask
+#define pci_set_dma_mask _kc_pci_set_dma_mask
+extern int _kc_pci_set_dma_mask(struct pci_dev *dev, dma_addr_t mask);
+#endif
+
+#ifndef pci_request_regions
+#define pci_request_regions _kc_pci_request_regions
+extern int _kc_pci_request_regions(struct pci_dev *pdev, char *res_name);
+#endif
+
+#ifndef pci_release_regions
+#define pci_release_regions _kc_pci_release_regions
+extern void _kc_pci_release_regions(struct pci_dev *pdev);
+#endif
+
+/**************************************/
+/* NETWORK DRIVER API */
+
+#ifndef alloc_etherdev
+#define alloc_etherdev _kc_alloc_etherdev
+extern struct net_device * _kc_alloc_etherdev(int sizeof_priv);
+#endif
+
+#ifndef is_valid_ether_addr
+#define is_valid_ether_addr _kc_is_valid_ether_addr
+extern int _kc_is_valid_ether_addr(u8 *addr);
+#endif
+
+/**************************************/
+/* MISCELLANEOUS */
+
+#ifndef INIT_TQUEUE
+#define INIT_TQUEUE(_tq, _routine, _data)		\
+	do {						\
+		INIT_LIST_HEAD(&(_tq)->list);		\
+		(_tq)->sync = 0;			\
+		(_tq)->routine = _routine;		\
+		(_tq)->data = _data;			\
+	} while (0)
+#endif
+
+#endif /* 2.4.3 => 2.4.0 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,5) )
+/* Generic MII registers. */
+#define MII_BMCR            0x00        /* Basic mode control register */
+#define MII_BMSR            0x01        /* Basic mode status register  */
+#define MII_PHYSID1         0x02        /* PHYS ID 1                   */
+#define MII_PHYSID2         0x03        /* PHYS ID 2                   */
+#define MII_ADVERTISE       0x04        /* Advertisement control reg   */
+#define MII_LPA             0x05        /* Link partner ability reg    */
+#define MII_EXPANSION       0x06        /* Expansion register          */
+/* Basic mode control register. */
+#define BMCR_FULLDPLX           0x0100  /* Full duplex                 */
+#define BMCR_ANENABLE           0x1000  /* Enable auto negotiation     */
+/* Basic mode status register. */
+#define BMSR_ERCAP              0x0001  /* Ext-reg capability          */
+#define BMSR_ANEGCAPABLE        0x0008  /* Able to do auto-negotiation */
+#define BMSR_10HALF             0x0800  /* Can do 10mbps, half-duplex  */
+#define BMSR_10FULL             0x1000  /* Can do 10mbps, full-duplex  */
+#define BMSR_100HALF            0x2000  /* Can do 100mbps, half-duplex */
+#define BMSR_100FULL            0x4000  /* Can do 100mbps, full-duplex */
+/* Advertisement control register. */
+#define ADVERTISE_CSMA          0x0001  /* Only selector supported     */
+#define ADVERTISE_10HALF        0x0020  /* Try for 10mbps half-duplex  */
+#define ADVERTISE_10FULL        0x0040  /* Try for 10mbps full-duplex  */
+#define ADVERTISE_100HALF       0x0080  /* Try for 100mbps half-duplex */
+#define ADVERTISE_100FULL       0x0100  /* Try for 100mbps full-duplex */
+#define ADVERTISE_ALL (ADVERTISE_10HALF | ADVERTISE_10FULL | \
+                       ADVERTISE_100HALF | ADVERTISE_100FULL)
+/* Expansion register for auto-negotiation. */
+#define EXPANSION_ENABLENPAGE   0x0004  /* This enables npage words    */
+#endif
+
+/*****************************************************************************/
+/* 2.4.6 => 2.4.3 */
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,6) )
+
+#ifndef pci_set_power_state
+#define pci_set_power_state _kc_pci_set_power_state
+extern int _kc_pci_set_power_state(struct pci_dev *dev, int state);
+#endif
+
+#ifndef pci_enable_wake
+#define pci_enable_wake _kc_pci_enable_wake
+extern int _kc_pci_enable_wake(struct pci_dev *pdev, u32 state, int enable);
+#endif
+
+#ifndef pci_disable_device
+#define pci_disable_device _kc_pci_disable_device
+extern void _kc_pci_disable_device(struct pci_dev *pdev);
+#endif
+
+/* PCI PM entry point syntax changed, so don't support suspend/resume */
+#undef CONFIG_PM
+
+#endif /* 2.4.6 => 2.4.3 */
+
+#ifndef HAVE_PCI_SET_MWI
+#define pci_set_mwi(X) pci_write_config_word(X, \
+			       PCI_COMMAND, adapter->hw.bus.pci_cmd_word | \
+			       PCI_COMMAND_INVALIDATE);
+#define pci_clear_mwi(X) pci_write_config_word(X, \
+			       PCI_COMMAND, adapter->hw.bus.pci_cmd_word & \
+			       ~PCI_COMMAND_INVALIDATE);
+#endif
+
+/*****************************************************************************/
+/* 2.4.10 => 2.4.9 */
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,10) )
+
+/**************************************/
+/* MODULE API */
+
+#ifndef MODULE_LICENSE
+	#define MODULE_LICENSE(X)
+#endif
+
+/**************************************/
+/* OTHER */
+
+#undef min
+#define min(x,y) ({ \
+	const typeof(x) _x = (x);	\
+	const typeof(y) _y = (y);	\
+	(void) (&_x == &_y);		\
+	_x < _y ? _x : _y; })
+
+#undef max
+#define max(x,y) ({ \
+	const typeof(x) _x = (x);	\
+	const typeof(y) _y = (y);	\
+	(void) (&_x == &_y);		\
+	_x > _y ? _x : _y; })
+
+#define min_t(type,x,y) ({ \
+	type _x = (x); \
+	type _y = (y); \
+	_x < _y ? _x : _y; })
+
+#define max_t(type,x,y) ({ \
+	type _x = (x); \
+	type _y = (y); \
+	_x > _y ? _x : _y; })
+
+#ifndef list_for_each_safe
+#define list_for_each_safe(pos, n, head) \
+	for (pos = (head)->next, n = pos->next; pos != (head); \
+		pos = n, n = pos->next)
+#endif
+
+#ifndef ____cacheline_aligned_in_smp
+#ifdef CONFIG_SMP
+#define ____cacheline_aligned_in_smp ____cacheline_aligned
+#else
+#define ____cacheline_aligned_in_smp
+#endif /* CONFIG_SMP */
+#endif
+
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,8) )
+extern int _kc_snprintf(char * buf, size_t size, const char *fmt, ...);
+#define snprintf(buf, size, fmt, args...) _kc_snprintf(buf, size, fmt, ##args)
+extern int _kc_vsnprintf(char *buf, size_t size, const char *fmt, va_list args);
+#define vsnprintf(buf, size, fmt, args) _kc_vsnprintf(buf, size, fmt, args)
+#else /* 2.4.8 => 2.4.9 */
+extern int snprintf(char * buf, size_t size, const char *fmt, ...);
+extern int vsnprintf(char *buf, size_t size, const char *fmt, va_list args);
+#endif
+#endif /* 2.4.10 -> 2.4.6 */
+
+
+/*****************************************************************************/
+/* 2.4.12 => 2.4.10 */
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,12) )
+#ifndef HAVE_NETIF_MSG
+#define HAVE_NETIF_MSG 1
+enum {
+	NETIF_MSG_DRV		= 0x0001,
+	NETIF_MSG_PROBE		= 0x0002,
+	NETIF_MSG_LINK		= 0x0004,
+	NETIF_MSG_TIMER		= 0x0008,
+	NETIF_MSG_IFDOWN	= 0x0010,
+	NETIF_MSG_IFUP		= 0x0020,
+	NETIF_MSG_RX_ERR	= 0x0040,
+	NETIF_MSG_TX_ERR	= 0x0080,
+	NETIF_MSG_TX_QUEUED	= 0x0100,
+	NETIF_MSG_INTR		= 0x0200,
+	NETIF_MSG_TX_DONE	= 0x0400,
+	NETIF_MSG_RX_STATUS	= 0x0800,
+	NETIF_MSG_PKTDATA	= 0x1000,
+	NETIF_MSG_HW		= 0x2000,
+	NETIF_MSG_WOL		= 0x4000,
+};
+
+#define netif_msg_drv(p)	((p)->msg_enable & NETIF_MSG_DRV)
+#define netif_msg_probe(p)	((p)->msg_enable & NETIF_MSG_PROBE)
+#define netif_msg_link(p)	((p)->msg_enable & NETIF_MSG_LINK)
+#define netif_msg_timer(p)	((p)->msg_enable & NETIF_MSG_TIMER)
+#define netif_msg_ifdown(p)	((p)->msg_enable & NETIF_MSG_IFDOWN)
+#define netif_msg_ifup(p)	((p)->msg_enable & NETIF_MSG_IFUP)
+#define netif_msg_rx_err(p)	((p)->msg_enable & NETIF_MSG_RX_ERR)
+#define netif_msg_tx_err(p)	((p)->msg_enable & NETIF_MSG_TX_ERR)
+#define netif_msg_tx_queued(p)	((p)->msg_enable & NETIF_MSG_TX_QUEUED)
+#define netif_msg_intr(p)	((p)->msg_enable & NETIF_MSG_INTR)
+#define netif_msg_tx_done(p)	((p)->msg_enable & NETIF_MSG_TX_DONE)
+#define netif_msg_rx_status(p)	((p)->msg_enable & NETIF_MSG_RX_STATUS)
+#define netif_msg_pktdata(p)	((p)->msg_enable & NETIF_MSG_PKTDATA)
+#endif /* !HAVE_NETIF_MSG */
+#endif /* 2.4.12 => 2.4.10 */
+
+/*****************************************************************************/
+/* 2.4.13 => 2.4.12 */
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,13) )
+
+/**************************************/
+/* PCI DMA MAPPING */
+
+#ifndef virt_to_page
+	#define virt_to_page(v) (mem_map + (virt_to_phys(v) >> PAGE_SHIFT))
+#endif
+
+#ifndef pci_map_page
+#define pci_map_page _kc_pci_map_page
+extern u64 _kc_pci_map_page(struct pci_dev *dev, struct page *page, unsigned long offset, size_t size, int direction);
+#endif
+
+#ifndef pci_unmap_page
+#define pci_unmap_page _kc_pci_unmap_page
+extern void _kc_pci_unmap_page(struct pci_dev *dev, u64 dma_addr, size_t size, int direction);
+#endif
+
+/* pci_set_dma_mask takes dma_addr_t, which is only 32-bits prior to 2.4.13 */
+
+#undef DMA_32BIT_MASK
+#define DMA_32BIT_MASK	0xffffffff
+#undef DMA_64BIT_MASK
+#define DMA_64BIT_MASK	0xffffffff
+
+/**************************************/
+/* OTHER */
+
+#ifndef cpu_relax
+#define cpu_relax()	rep_nop()
+#endif
+
+struct vlan_ethhdr {
+	unsigned char h_dest[ETH_ALEN];
+	unsigned char h_source[ETH_ALEN];
+	unsigned short h_vlan_proto;
+	unsigned short h_vlan_TCI;
+	unsigned short h_vlan_encapsulated_proto;
+};
+#endif /* 2.4.13 => 2.4.12 */
+
+/*****************************************************************************/
+/* 2.4.17 => 2.4.12 */
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,17) )
+
+#ifndef __devexit_p
+	#define __devexit_p(x) &(x)
+#endif
+
+#endif /* 2.4.17 => 2.4.13 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,18) )
+#define NETIF_MSG_HW	0x2000
+#define NETIF_MSG_WOL	0x4000
+
+#ifndef netif_msg_hw
+#define netif_msg_hw(p)		((p)->msg_enable & NETIF_MSG_HW)
+#endif
+#ifndef netif_msg_wol
+#define netif_msg_wol(p)	((p)->msg_enable & NETIF_MSG_WOL)
+#endif
+#endif /* 2.4.18 */
+
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* 2.4.20 => 2.4.19 */
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,20) )
+
+/* we won't support NAPI on less than 2.4.20 */
+#ifdef NAPI
+#undef NAPI
+#undef CONFIG_IXGBE_NAPI
+#endif
+
+#endif /* 2.4.20 => 2.4.19 */
+
+/*****************************************************************************/
+/* 2.4.22 => 2.4.17 */
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,22) )
+#define pci_name(x)	((x)->slot_name)
+#endif
+
+/*****************************************************************************/
+/* 2.4.22 => 2.4.17 */
+
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,22) )
+#ifndef IXGBE_NO_LRO
+/* Don't enable LRO for these legacy kernels */
+#define IXGBE_NO_LRO
+#endif
+#endif
+
+/*****************************************************************************/
+/*****************************************************************************/
+/* 2.4.23 => 2.4.22 */
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,23) )
+/*****************************************************************************/
+#ifdef NAPI
+#ifndef netif_poll_disable
+#define netif_poll_disable(x) _kc_netif_poll_disable(x)
+static inline void _kc_netif_poll_disable(struct net_device *netdev)
+{
+	while (test_and_set_bit(__LINK_STATE_RX_SCHED, &netdev->state)) {
+		/* No hurry */
+		current->state = TASK_INTERRUPTIBLE;
+		schedule_timeout(1);
+	}
+}
+#endif
+#ifndef netif_poll_enable
+#define netif_poll_enable(x) _kc_netif_poll_enable(x)
+static inline void _kc_netif_poll_enable(struct net_device *netdev)
+{
+	clear_bit(__LINK_STATE_RX_SCHED, &netdev->state);
+}
+#endif
+#endif /* NAPI */
+#ifndef netif_tx_disable
+#define netif_tx_disable(x) _kc_netif_tx_disable(x)
+static inline void _kc_netif_tx_disable(struct net_device *dev)
+{
+	spin_lock_bh(&dev->xmit_lock);
+	netif_stop_queue(dev);
+	spin_unlock_bh(&dev->xmit_lock);
+}
+#endif
+#else /* 2.4.23 => 2.4.22 */
+#define HAVE_SCTP
+#endif /* 2.4.23 => 2.4.22 */
+
+/*****************************************************************************/
+/* 2.6.4 => 2.6.0 */
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,25) || \
+    ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) && \
+      LINUX_VERSION_CODE < KERNEL_VERSION(2,6,4) ) )
+#define ETHTOOL_OPS_COMPAT
+#endif /* 2.6.4 => 2.6.0 */
+
+/*****************************************************************************/
+/* 2.5.71 => 2.4.x */
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,5,71) )
+#define sk_protocol protocol
+#define pci_get_device pci_find_device
+#endif /* 2.5.70 => 2.4.x */
+
+/*****************************************************************************/
+/* < 2.4.27 or 2.6.0 <= 2.6.5 */
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,27) || \
+    ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) && \
+      LINUX_VERSION_CODE < KERNEL_VERSION(2,6,5) ) )
+
+#ifndef netif_msg_init
+#define netif_msg_init _kc_netif_msg_init
+static inline u32 _kc_netif_msg_init(int debug_value, int default_msg_enable_bits)
+{
+	/* use default */
+	if (debug_value < 0 || debug_value >= (sizeof(u32) * 8))
+		return default_msg_enable_bits;
+	if (debug_value == 0) /* no output */
+		return 0;
+	/* set low N bits */
+	return (1 << debug_value) -1;
+}
+#endif
+
+#endif /* < 2.4.27 or 2.6.0 <= 2.6.5 */
+/*****************************************************************************/
+#if (( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,27) ) || \
+     (( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) ) && \
+      ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,3) )))
+#define netdev_priv(x) x->priv
+#endif
+
+/*****************************************************************************/
+/* <= 2.5.0 */
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0) )
+#include <linux/rtnetlink.h>
+#undef pci_register_driver
+#define pci_register_driver pci_module_init
+
+/*
+ * Most of the dma compat code is copied/modifed from the 2.4.37
+ * /include/linux/libata-compat.h header file
+ */
+/* These definitions mirror those in pci.h, so they can be used
+ * interchangeably with their PCI_ counterparts */
+enum dma_data_direction {
+	DMA_BIDIRECTIONAL = 0,
+	DMA_TO_DEVICE = 1,
+	DMA_FROM_DEVICE = 2,
+	DMA_NONE = 3,
+};
+
+struct device {
+	struct pci_dev pdev;
+};
+
+static inline struct pci_dev *to_pci_dev (struct device *dev)
+{
+	return (struct pci_dev *) dev;
+}
+static inline struct device *pci_dev_to_dev(struct pci_dev *pdev)
+{
+	return (struct device *) pdev;
+}
+
+#define pdev_printk(lvl, pdev, fmt, args...)	\
+	printk("%s %s: " fmt, lvl, pci_name(pdev), ## args)
+#define dev_err(dev, fmt, args...)            \
+	pdev_printk(KERN_ERR, to_pci_dev(dev), fmt, ## args)
+#define dev_info(dev, fmt, args...)            \
+	pdev_printk(KERN_INFO, to_pci_dev(dev), fmt, ## args)
+#define dev_warn(dev, fmt, args...)            \
+	pdev_printk(KERN_WARNING, to_pci_dev(dev), fmt, ## args)
+
+/* NOTE: dangerous! we ignore the 'gfp' argument */
+#define dma_alloc_coherent(dev,sz,dma,gfp) \
+	pci_alloc_consistent(to_pci_dev(dev),(sz),(dma))
+#define dma_free_coherent(dev,sz,addr,dma_addr) \
+	pci_free_consistent(to_pci_dev(dev),(sz),(addr),(dma_addr))
+
+#define dma_map_page(dev,a,b,c,d) \
+	pci_map_page(to_pci_dev(dev),(a),(b),(c),(d))
+#define dma_unmap_page(dev,a,b,c) \
+	pci_unmap_page(to_pci_dev(dev),(a),(b),(c))
+
+#define dma_map_single(dev,a,b,c) \
+	pci_map_single(to_pci_dev(dev),(a),(b),(c))
+#define dma_unmap_single(dev,a,b,c) \
+	pci_unmap_single(to_pci_dev(dev),(a),(b),(c))
+
+#define dma_sync_single(dev,a,b,c) \
+	pci_dma_sync_single(to_pci_dev(dev),(a),(b),(c))
+
+/* for range just sync everything, that's all the pci API can do */
+#define dma_sync_single_range(dev,addr,off,sz,dir) \
+	pci_dma_sync_single(to_pci_dev(dev),(addr),(off)+(sz),(dir))
+
+#define dma_set_mask(dev,mask) \
+	pci_set_dma_mask(to_pci_dev(dev),(mask))
+
+/* hlist_* code - double linked lists */
+struct hlist_head {
+	struct hlist_node *first;
+};
+
+struct hlist_node {
+	struct hlist_node *next, **pprev;
+};
+
+static inline void __hlist_del(struct hlist_node *n)
+{
+	struct hlist_node *next = n->next;
+	struct hlist_node **pprev = n->pprev;
+	*pprev = next;
+	if (next)
+	next->pprev = pprev;
+}
+
+static inline void hlist_del(struct hlist_node *n)
+{
+	__hlist_del(n);
+	n->next = NULL;
+	n->pprev = NULL;
+}
+
+static inline void hlist_add_head(struct hlist_node *n, struct hlist_head *h)
+{
+	struct hlist_node *first = h->first;
+	n->next = first;
+	if (first)
+		first->pprev = &n->next;
+	h->first = n;
+	n->pprev = &h->first;
+}
+
+static inline int hlist_empty(const struct hlist_head *h)
+{
+	return !h->first;
+}
+#define HLIST_HEAD_INIT { .first = NULL }
+#define HLIST_HEAD(name) struct hlist_head name = {  .first = NULL }
+#define INIT_HLIST_HEAD(ptr) ((ptr)->first = NULL)
+static inline void INIT_HLIST_NODE(struct hlist_node *h)
+{
+	h->next = NULL;
+	h->pprev = NULL;
+}
+#define hlist_entry(ptr, type, member) container_of(ptr,type,member)
+
+#define hlist_for_each_entry(tpos, pos, head, member)                    \
+	for (pos = (head)->first;                                        \
+	     pos && ({ prefetch(pos->next); 1;}) &&                      \
+		({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \
+	     pos = pos->next)
+
+#define hlist_for_each_entry_safe(tpos, pos, n, head, member)            \
+	for (pos = (head)->first;                                        \
+	     pos && ({ n = pos->next; 1; }) &&                           \
+		({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \
+	     pos = n)
+
+#ifndef might_sleep
+#define might_sleep()
+#endif
+#else
+static inline struct device *pci_dev_to_dev(struct pci_dev *pdev)
+{
+	return &pdev->dev;
+}
+#endif /* <= 2.5.0 */
+
+/*****************************************************************************/
+/* 2.5.28 => 2.4.23 */
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,5,28) )
+
+static inline void _kc_synchronize_irq(void)
+{
+	synchronize_irq();
+}
+#undef synchronize_irq
+#define synchronize_irq(X) _kc_synchronize_irq()
+
+#include <linux/tqueue.h>
+#define work_struct tq_struct
+#undef INIT_WORK
+#define INIT_WORK(a,b) INIT_TQUEUE(a,(void (*)(void *))b,a)
+#undef container_of
+#define container_of list_entry
+#define schedule_work schedule_task
+#define flush_scheduled_work flush_scheduled_tasks
+#define cancel_work_sync(x) flush_scheduled_work()
+
+#endif /* 2.5.28 => 2.4.17 */
+
+/*****************************************************************************/
+/* 2.6.0 => 2.5.28 */
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) )
+#undef get_cpu
+#define get_cpu() smp_processor_id()
+#undef put_cpu
+#define put_cpu() do { } while(0)
+#define MODULE_INFO(version, _version)
+#ifndef CONFIG_E1000_DISABLE_PACKET_SPLIT
+#define CONFIG_E1000_DISABLE_PACKET_SPLIT 1
+#endif
+#define CONFIG_IGB_DISABLE_PACKET_SPLIT 1
+
+#define dma_set_coherent_mask(dev,mask) 1
+
+#undef dev_put
+#define dev_put(dev) __dev_put(dev)
+
+#ifndef skb_fill_page_desc
+#define skb_fill_page_desc _kc_skb_fill_page_desc
+extern void _kc_skb_fill_page_desc(struct sk_buff *skb, int i, struct page *page, int off, int size);
+#endif
+
+#undef ALIGN
+#define ALIGN(x,a) (((x)+(a)-1)&~((a)-1))
+
+#ifndef page_count
+#define page_count(p) atomic_read(&(p)->count)
+#endif
+
+#ifdef MAX_NUMNODES
+#undef MAX_NUMNODES
+#endif
+#define MAX_NUMNODES 1
+
+/* find_first_bit and find_next bit are not defined for most
+ * 2.4 kernels (except for the redhat 2.4.21 kernels
+ */
+#include <linux/bitops.h>
+#define BITOP_WORD(nr)          ((nr) / BITS_PER_LONG)
+#undef find_next_bit
+#define find_next_bit _kc_find_next_bit
+extern unsigned long _kc_find_next_bit(const unsigned long *addr,
+                                       unsigned long size,
+                                       unsigned long offset);
+#define find_first_bit(addr, size) find_next_bit((addr), (size), 0)
+
+
+#ifndef netdev_name
+static inline const char *_kc_netdev_name(const struct net_device *dev)
+{
+	if (strchr(dev->name, '%'))
+		return "(unregistered net_device)";
+	return dev->name;
+}
+#define netdev_name(netdev)	_kc_netdev_name(netdev)
+#endif /* netdev_name */
+
+#ifndef strlcpy
+#define strlcpy _kc_strlcpy
+extern size_t _kc_strlcpy(char *dest, const char *src, size_t size);
+#endif /* strlcpy */
+
+#endif /* 2.6.0 => 2.5.28 */
+
+/*****************************************************************************/
+/* 2.6.4 => 2.6.0 */
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,4) )
+#define MODULE_VERSION(_version) MODULE_INFO(version, _version)
+#endif /* 2.6.4 => 2.6.0 */
+
+/*****************************************************************************/
+/* 2.6.5 => 2.6.0 */
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,5) )
+#define dma_sync_single_for_cpu		dma_sync_single
+#define dma_sync_single_for_device	dma_sync_single
+#define dma_sync_single_range_for_cpu		dma_sync_single_range
+#define dma_sync_single_range_for_device	dma_sync_single_range
+#ifndef pci_dma_mapping_error
+#define pci_dma_mapping_error _kc_pci_dma_mapping_error
+static inline int _kc_pci_dma_mapping_error(dma_addr_t dma_addr)
+{
+	return dma_addr == 0;
+}
+#endif
+#endif /* 2.6.5 => 2.6.0 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,4) )
+extern int _kc_scnprintf(char * buf, size_t size, const char *fmt, ...);
+#define scnprintf(buf, size, fmt, args...) _kc_scnprintf(buf, size, fmt, ##args)
+#endif /* < 2.6.4 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,6) )
+/* taken from 2.6 include/linux/bitmap.h */
+#undef bitmap_zero
+#define bitmap_zero _kc_bitmap_zero
+static inline void _kc_bitmap_zero(unsigned long *dst, int nbits)
+{
+        if (nbits <= BITS_PER_LONG)
+                *dst = 0UL;
+        else {
+                int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long);
+                memset(dst, 0, len);
+        }
+}
+#define random_ether_addr _kc_random_ether_addr
+static inline void _kc_random_ether_addr(u8 *addr)
+{
+        get_random_bytes(addr, ETH_ALEN);
+        addr[0] &= 0xfe; /* clear multicast */
+        addr[0] |= 0x02; /* set local assignment */
+}
+#define page_to_nid(x) 0
+
+#endif /* < 2.6.6 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,7) )
+#undef if_mii
+#define if_mii _kc_if_mii
+static inline struct mii_ioctl_data *_kc_if_mii(struct ifreq *rq)
+{
+	return (struct mii_ioctl_data *) &rq->ifr_ifru;
+}
+
+#ifndef __force
+#define __force
+#endif
+#endif /* < 2.6.7 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,8) )
+#ifndef PCI_EXP_DEVCTL
+#define PCI_EXP_DEVCTL 8
+#endif
+#ifndef PCI_EXP_DEVCTL_CERE
+#define PCI_EXP_DEVCTL_CERE 0x0001
+#endif
+#define msleep(x)	do { set_current_state(TASK_UNINTERRUPTIBLE); \
+				schedule_timeout((x * HZ)/1000 + 2); \
+			} while (0)
+
+#endif /* < 2.6.8 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,9))
+#include <net/dsfield.h>
+#define __iomem
+
+#ifndef kcalloc
+#define kcalloc(n, size, flags) _kc_kzalloc(((n) * (size)), flags)
+extern void *_kc_kzalloc(size_t size, int flags);
+#endif
+#define MSEC_PER_SEC    1000L
+static inline unsigned int _kc_jiffies_to_msecs(const unsigned long j)
+{
+#if HZ <= MSEC_PER_SEC && !(MSEC_PER_SEC % HZ)
+	return (MSEC_PER_SEC / HZ) * j;
+#elif HZ > MSEC_PER_SEC && !(HZ % MSEC_PER_SEC)
+	return (j + (HZ / MSEC_PER_SEC) - 1)/(HZ / MSEC_PER_SEC);
+#else
+	return (j * MSEC_PER_SEC) / HZ;
+#endif
+}
+static inline unsigned long _kc_msecs_to_jiffies(const unsigned int m)
+{
+	if (m > _kc_jiffies_to_msecs(MAX_JIFFY_OFFSET))
+		return MAX_JIFFY_OFFSET;
+#if HZ <= MSEC_PER_SEC && !(MSEC_PER_SEC % HZ)
+	return (m + (MSEC_PER_SEC / HZ) - 1) / (MSEC_PER_SEC / HZ);
+#elif HZ > MSEC_PER_SEC && !(HZ % MSEC_PER_SEC)
+	return m * (HZ / MSEC_PER_SEC);
+#else
+	return (m * HZ + MSEC_PER_SEC - 1) / MSEC_PER_SEC;
+#endif
+}
+
+#define msleep_interruptible _kc_msleep_interruptible
+static inline unsigned long _kc_msleep_interruptible(unsigned int msecs)
+{
+	unsigned long timeout = _kc_msecs_to_jiffies(msecs) + 1;
+
+	while (timeout && !signal_pending(current)) {
+		__set_current_state(TASK_INTERRUPTIBLE);
+		timeout = schedule_timeout(timeout);
+	}
+	return _kc_jiffies_to_msecs(timeout);
+}
+
+/* Basic mode control register. */
+#define BMCR_SPEED1000		0x0040  /* MSB of Speed (1000)         */
+
+#ifndef __le16
+#define __le16 u16
+#endif
+#ifndef __le32
+#define __le32 u32
+#endif
+#ifndef __le64
+#define __le64 u64
+#endif
+#ifndef __be16
+#define __be16 u16
+#endif
+#ifndef __be32
+#define __be32 u32
+#endif
+#ifndef __be64
+#define __be64 u64
+#endif
+
+static inline struct vlan_ethhdr *vlan_eth_hdr(const struct sk_buff *skb)
+{
+	return (struct vlan_ethhdr *)skb->mac.raw;
+}
+
+/* Wake-On-Lan options. */
+#define WAKE_PHY		(1 << 0)
+#define WAKE_UCAST		(1 << 1)
+#define WAKE_MCAST		(1 << 2)
+#define WAKE_BCAST		(1 << 3)
+#define WAKE_ARP		(1 << 4)
+#define WAKE_MAGIC		(1 << 5)
+#define WAKE_MAGICSECURE	(1 << 6) /* only meaningful if WAKE_MAGIC */
+
+#define skb_header_pointer _kc_skb_header_pointer
+static inline void *_kc_skb_header_pointer(const struct sk_buff *skb,
+					    int offset, int len, void *buffer)
+{
+	int hlen = skb_headlen(skb);
+
+	if (hlen - offset >= len)
+		return skb->data + offset;
+
+#ifdef MAX_SKB_FRAGS
+	if (skb_copy_bits(skb, offset, buffer, len) < 0)
+		return NULL;
+
+	return buffer;
+#else
+	return NULL;
+#endif
+
+#ifndef NETDEV_TX_OK
+#define NETDEV_TX_OK 0
+#endif
+#ifndef NETDEV_TX_BUSY
+#define NETDEV_TX_BUSY 1
+#endif
+#ifndef NETDEV_TX_LOCKED
+#define NETDEV_TX_LOCKED -1
+#endif
+}
+
+#ifndef __bitwise
+#define __bitwise
+#endif
+#endif /* < 2.6.9 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,10) )
+#ifdef module_param_array_named
+#undef module_param_array_named
+#define module_param_array_named(name, array, type, nump, perm)          \
+	static struct kparam_array __param_arr_##name                    \
+	= { ARRAY_SIZE(array), nump, param_set_##type, param_get_##type, \
+	    sizeof(array[0]), array };                                   \
+	module_param_call(name, param_array_set, param_array_get,        \
+			  &__param_arr_##name, perm)
+#endif /* module_param_array_named */
+/*
+ * num_online is broken for all < 2.6.10 kernels.  This is needed to support
+ * Node module parameter of ixgbe.
+ */
+#undef num_online_nodes
+#define num_online_nodes(n) 1
+extern DECLARE_BITMAP(_kcompat_node_online_map, MAX_NUMNODES);
+#undef node_online_map
+#define node_online_map _kcompat_node_online_map
+#endif /* < 2.6.10 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,11) )
+#define PCI_D0      0
+#define PCI_D1      1
+#define PCI_D2      2
+#define PCI_D3hot   3
+#define PCI_D3cold  4
+typedef int pci_power_t;
+#define pci_choose_state(pdev,state) state
+#define PMSG_SUSPEND 3
+#define PCI_EXP_LNKCTL	16
+
+#undef NETIF_F_LLTX
+
+#ifndef ARCH_HAS_PREFETCH
+#define prefetch(X)
+#endif
+
+#ifndef NET_IP_ALIGN
+#define NET_IP_ALIGN 2
+#endif
+
+#define KC_USEC_PER_SEC	1000000L
+#define usecs_to_jiffies _kc_usecs_to_jiffies
+static inline unsigned int _kc_jiffies_to_usecs(const unsigned long j)
+{
+#if HZ <= KC_USEC_PER_SEC && !(KC_USEC_PER_SEC % HZ)
+	return (KC_USEC_PER_SEC / HZ) * j;
+#elif HZ > KC_USEC_PER_SEC && !(HZ % KC_USEC_PER_SEC)
+	return (j + (HZ / KC_USEC_PER_SEC) - 1)/(HZ / KC_USEC_PER_SEC);
+#else
+	return (j * KC_USEC_PER_SEC) / HZ;
+#endif
+}
+static inline unsigned long _kc_usecs_to_jiffies(const unsigned int m)
+{
+	if (m > _kc_jiffies_to_usecs(MAX_JIFFY_OFFSET))
+		return MAX_JIFFY_OFFSET;
+#if HZ <= KC_USEC_PER_SEC && !(KC_USEC_PER_SEC % HZ)
+	return (m + (KC_USEC_PER_SEC / HZ) - 1) / (KC_USEC_PER_SEC / HZ);
+#elif HZ > KC_USEC_PER_SEC && !(HZ % KC_USEC_PER_SEC)
+	return m * (HZ / KC_USEC_PER_SEC);
+#else
+	return (m * HZ + KC_USEC_PER_SEC - 1) / KC_USEC_PER_SEC;
+#endif
+}
+#endif /* < 2.6.11 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,12) )
+#include <linux/reboot.h>
+#define USE_REBOOT_NOTIFIER
+
+/* Generic MII registers. */
+#define MII_CTRL1000        0x09        /* 1000BASE-T control          */
+#define MII_STAT1000        0x0a        /* 1000BASE-T status           */
+/* Advertisement control register. */
+#define ADVERTISE_PAUSE_CAP     0x0400  /* Try for pause               */
+#define ADVERTISE_PAUSE_ASYM    0x0800  /* Try for asymmetric pause     */
+/* 1000BASE-T Control register */
+#define ADVERTISE_1000FULL      0x0200  /* Advertise 1000BASE-T full duplex */
+#ifndef is_zero_ether_addr
+#define is_zero_ether_addr _kc_is_zero_ether_addr
+static inline int _kc_is_zero_ether_addr(const u8 *addr)
+{
+	return !(addr[0] | addr[1] | addr[2] | addr[3] | addr[4] | addr[5]);
+}
+#endif /* is_zero_ether_addr */
+#ifndef is_multicast_ether_addr
+#define is_multicast_ether_addr _kc_is_multicast_ether_addr
+static inline int _kc_is_multicast_ether_addr(const u8 *addr)
+{
+	return addr[0] & 0x01;
+}
+#endif /* is_multicast_ether_addr */
+#endif /* < 2.6.12 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,13) )
+#ifndef kstrdup
+#define kstrdup _kc_kstrdup
+extern char *_kc_kstrdup(const char *s, unsigned int gfp);
+#endif
+#endif /* < 2.6.13 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,14) )
+#define pm_message_t u32
+#ifndef kzalloc
+#define kzalloc _kc_kzalloc
+extern void *_kc_kzalloc(size_t size, int flags);
+#endif
+
+/* Generic MII registers. */
+#define MII_ESTATUS	    0x0f	/* Extended Status */
+/* Basic mode status register. */
+#define BMSR_ESTATEN		0x0100	/* Extended Status in R15 */
+/* Extended status register. */
+#define ESTATUS_1000_TFULL	0x2000	/* Can do 1000BT Full */
+#define ESTATUS_1000_THALF	0x1000	/* Can do 1000BT Half */
+
+#define ADVERTISED_Pause	(1 << 13)
+#define ADVERTISED_Asym_Pause	(1 << 14)
+
+#if (!(RHEL_RELEASE_CODE && \
+       (RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(4,3)) && \
+       (RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(5,0))))
+#if ((LINUX_VERSION_CODE == KERNEL_VERSION(2,6,9)) && !defined(gfp_t))
+#define gfp_t unsigned
+#else
+typedef unsigned gfp_t;
+#endif
+#endif /* !RHEL4.3->RHEL5.0 */
+
+#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,9) )
+#ifdef CONFIG_X86_64
+#define dma_sync_single_range_for_cpu(dev, dma_handle, offset, size, dir)       \
+	dma_sync_single_for_cpu(dev, dma_handle, size, dir)
+#define dma_sync_single_range_for_device(dev, dma_handle, offset, size, dir)    \
+	dma_sync_single_for_device(dev, dma_handle, size, dir)
+#endif
+#endif
+#endif /* < 2.6.14 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,15) )
+#ifndef vmalloc_node
+#define vmalloc_node(a,b) vmalloc(a)
+#endif /* vmalloc_node*/
+
+#define setup_timer(_timer, _function, _data) \
+do { \
+	(_timer)->function = _function; \
+	(_timer)->data = _data; \
+	init_timer(_timer); \
+} while (0)
+#ifndef device_can_wakeup
+#define device_can_wakeup(dev)	(1)
+#endif
+#ifndef device_set_wakeup_enable
+#define device_set_wakeup_enable(dev, val)	do{}while(0)
+#endif
+#ifndef device_init_wakeup
+#define device_init_wakeup(dev,val) do {} while (0)
+#endif
+static inline unsigned _kc_compare_ether_addr(const u8 *addr1, const u8 *addr2)
+{
+	const u16 *a = (const u16 *) addr1;
+	const u16 *b = (const u16 *) addr2;
+
+	return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2])) != 0;
+}
+#undef compare_ether_addr
+#define compare_ether_addr(addr1, addr2) _kc_compare_ether_addr(addr1, addr2)
+#endif /* < 2.6.15 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,16) )
+#undef DEFINE_MUTEX
+#define DEFINE_MUTEX(x)	DECLARE_MUTEX(x)
+#define mutex_lock(x)	down_interruptible(x)
+#define mutex_unlock(x)	up(x)
+
+#ifndef ____cacheline_internodealigned_in_smp
+#ifdef CONFIG_SMP
+#define ____cacheline_internodealigned_in_smp ____cacheline_aligned_in_smp
+#else
+#define ____cacheline_internodealigned_in_smp
+#endif /* CONFIG_SMP */
+#endif /* ____cacheline_internodealigned_in_smp */
+#undef HAVE_PCI_ERS
+#else /* 2.6.16 and above */
+#undef HAVE_PCI_ERS
+#define HAVE_PCI_ERS
+#endif /* < 2.6.16 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,17) )
+#ifndef first_online_node
+#define first_online_node 0
+#endif
+#ifndef NET_SKB_PAD
+#define NET_SKB_PAD 16
+#endif
+#endif /* < 2.6.17 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,18) )
+
+#ifndef IRQ_HANDLED
+#define irqreturn_t void
+#define IRQ_HANDLED
+#define IRQ_NONE
+#endif
+
+#ifndef IRQF_PROBE_SHARED
+#ifdef SA_PROBEIRQ
+#define IRQF_PROBE_SHARED SA_PROBEIRQ
+#else
+#define IRQF_PROBE_SHARED 0
+#endif
+#endif
+
+#ifndef IRQF_SHARED
+#define IRQF_SHARED SA_SHIRQ
+#endif
+
+#ifndef ARRAY_SIZE
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+#endif
+
+#ifndef FIELD_SIZEOF
+#define FIELD_SIZEOF(t, f) (sizeof(((t*)0)->f))
+#endif
+
+#ifndef skb_is_gso
+#ifdef NETIF_F_TSO
+#define skb_is_gso _kc_skb_is_gso
+static inline int _kc_skb_is_gso(const struct sk_buff *skb)
+{
+	return skb_shinfo(skb)->gso_size;
+}
+#else
+#define skb_is_gso(a) 0
+#endif
+#endif
+
+#ifndef resource_size_t
+#define resource_size_t unsigned long
+#endif
+
+#ifdef skb_pad
+#undef skb_pad
+#endif
+#define skb_pad(x,y) _kc_skb_pad(x, y)
+int _kc_skb_pad(struct sk_buff *skb, int pad);
+#ifdef skb_padto
+#undef skb_padto
+#endif
+#define skb_padto(x,y) _kc_skb_padto(x, y)
+static inline int _kc_skb_padto(struct sk_buff *skb, unsigned int len)
+{
+	unsigned int size = skb->len;
+	if(likely(size >= len))
+		return 0;
+	return _kc_skb_pad(skb, len - size);
+}
+
+#ifndef DECLARE_PCI_UNMAP_ADDR
+#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME) \
+	dma_addr_t ADDR_NAME
+#define DECLARE_PCI_UNMAP_LEN(LEN_NAME) \
+	u32 LEN_NAME
+#define pci_unmap_addr(PTR, ADDR_NAME) \
+	((PTR)->ADDR_NAME)
+#define pci_unmap_addr_set(PTR, ADDR_NAME, VAL) \
+	(((PTR)->ADDR_NAME) = (VAL))
+#define pci_unmap_len(PTR, LEN_NAME) \
+	((PTR)->LEN_NAME)
+#define pci_unmap_len_set(PTR, LEN_NAME, VAL) \
+	(((PTR)->LEN_NAME) = (VAL))
+#endif /* DECLARE_PCI_UNMAP_ADDR */
+#endif /* < 2.6.18 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19) )
+
+#ifndef DIV_ROUND_UP
+#define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d))
+#endif
+#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0) )
+#if (!((RHEL_RELEASE_CODE && \
+        ((RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(4,4) && \
+          RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(5,0)) || \
+         (RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(5,0)))) || \
+       (AX_RELEASE_CODE && AX_RELEASE_CODE > AX_RELEASE_VERSION(3,0))))
+typedef irqreturn_t (*irq_handler_t)(int, void*, struct pt_regs *);
+#endif
+#if (RHEL_RELEASE_CODE && RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(6,0))
+#undef CONFIG_INET_LRO
+#undef CONFIG_INET_LRO_MODULE
+#undef CONFIG_FCOE
+#undef CONFIG_FCOE_MODULE
+#endif
+typedef irqreturn_t (*new_handler_t)(int, void*);
+static inline irqreturn_t _kc_request_irq(unsigned int irq, new_handler_t handler, unsigned long flags, const char *devname, void *dev_id)
+#else /* 2.4.x */
+typedef void (*irq_handler_t)(int, void*, struct pt_regs *);
+typedef void (*new_handler_t)(int, void*);
+static inline int _kc_request_irq(unsigned int irq, new_handler_t handler, unsigned long flags, const char *devname, void *dev_id)
+#endif /* >= 2.5.x */
+{
+	irq_handler_t new_handler = (irq_handler_t) handler;
+	return request_irq(irq, new_handler, flags, devname, dev_id);
+}
+
+#undef request_irq
+#define request_irq(irq, handler, flags, devname, dev_id) _kc_request_irq((irq), (handler), (flags), (devname), (dev_id))
+
+#define irq_handler_t new_handler_t
+/* pci_restore_state and pci_save_state handles MSI/PCIE from 2.6.19 */
+#if (!(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(5,4)))
+#define PCIE_CONFIG_SPACE_LEN 256
+#define PCI_CONFIG_SPACE_LEN 64
+#define PCIE_LINK_STATUS 0x12
+#define pci_config_space_ich8lan() do {} while(0)
+#undef pci_save_state
+extern int _kc_pci_save_state(struct pci_dev *);
+#define pci_save_state(pdev) _kc_pci_save_state(pdev)
+#undef pci_restore_state
+extern void _kc_pci_restore_state(struct pci_dev *);
+#define pci_restore_state(pdev) _kc_pci_restore_state(pdev)
+#endif /* !(RHEL_RELEASE_CODE >= RHEL 5.4) */
+
+#ifdef HAVE_PCI_ERS
+#undef free_netdev
+extern void _kc_free_netdev(struct net_device *);
+#define free_netdev(netdev) _kc_free_netdev(netdev)
+#endif
+static inline int pci_enable_pcie_error_reporting(struct pci_dev *dev)
+{
+	return 0;
+}
+#define pci_disable_pcie_error_reporting(dev) do {} while (0)
+#define pci_cleanup_aer_uncorrect_error_status(dev) do {} while (0)
+
+extern void *_kc_kmemdup(const void *src, size_t len, unsigned gfp);
+#define kmemdup(src, len, gfp) _kc_kmemdup(src, len, gfp)
+#ifndef bool
+#define bool _Bool
+#define true 1
+#define false 0
+#endif
+#else /* 2.6.19 */
+#include <linux/aer.h>
+#include <linux/string.h>
+#endif /* < 2.6.19 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,20) )
+#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,28) )
+#undef INIT_WORK
+#define INIT_WORK(_work, _func) \
+do { \
+	INIT_LIST_HEAD(&(_work)->entry); \
+	(_work)->pending = 0; \
+	(_work)->func = (void (*)(void *))_func; \
+	(_work)->data = _work; \
+	init_timer(&(_work)->timer); \
+} while (0)
+#endif
+
+#ifndef PCI_VDEVICE
+#define PCI_VDEVICE(ven, dev)        \
+	PCI_VENDOR_ID_##ven, (dev),  \
+	PCI_ANY_ID, PCI_ANY_ID, 0, 0
+#endif
+
+#ifndef round_jiffies
+#define round_jiffies(x) x
+#endif
+
+#define csum_offset csum
+
+#define HAVE_EARLY_VMALLOC_NODE
+#define dev_to_node(dev) -1
+#undef set_dev_node
+/* remove compiler warning with b=b, for unused variable */
+#define set_dev_node(a, b) do { (b) = (b); } while(0)
+
+#if (!(RHEL_RELEASE_CODE && \
+       (((RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(4,7)) && \
+         (RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(5,0))) || \
+        (RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(5,6)))) && \
+     !(SLE_VERSION_CODE && SLE_VERSION_CODE >= SLE_VERSION(10,2,0)))
+typedef __u16 __bitwise __sum16;
+typedef __u32 __bitwise __wsum;
+#endif
+
+#if (!(RHEL_RELEASE_CODE && \
+       (((RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(4,7)) && \
+         (RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(5,0))) || \
+        (RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(5,4)))) && \
+     !(SLE_VERSION_CODE && SLE_VERSION_CODE >= SLE_VERSION(10,2,0)))
+static inline __wsum csum_unfold(__sum16 n)
+{
+	return (__force __wsum)n;
+}
+#endif
+
+#else /* < 2.6.20 */
+#define HAVE_DEVICE_NUMA_NODE
+#endif /* < 2.6.20 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,21) )
+#define to_net_dev(class) container_of(class, struct net_device, class_dev)
+#define NETDEV_CLASS_DEV
+#if (!(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(5,5)))
+#define vlan_group_get_device(vg, id) (vg->vlan_devices[id])
+#define vlan_group_set_device(vg, id, dev)		\
+	do {						\
+		if (vg) vg->vlan_devices[id] = dev;	\
+	} while (0)
+#endif /* !(RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(5,5)) */
+#define pci_channel_offline(pdev) (pdev->error_state && \
+	pdev->error_state != pci_channel_io_normal)
+#define pci_request_selected_regions(pdev, bars, name) \
+        pci_request_regions(pdev, name)
+#define pci_release_selected_regions(pdev, bars) pci_release_regions(pdev);
+#endif /* < 2.6.21 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22) )
+#define tcp_hdr(skb) (skb->h.th)
+#define tcp_hdrlen(skb) (skb->h.th->doff << 2)
+#define skb_transport_offset(skb) (skb->h.raw - skb->data)
+#define skb_transport_header(skb) (skb->h.raw)
+#define ipv6_hdr(skb) (skb->nh.ipv6h)
+#define ip_hdr(skb) (skb->nh.iph)
+#define skb_network_offset(skb) (skb->nh.raw - skb->data)
+#define skb_network_header(skb) (skb->nh.raw)
+#define skb_tail_pointer(skb) skb->tail
+#define skb_reset_tail_pointer(skb) \
+	do { \
+		skb->tail = skb->data; \
+	} while (0)
+#define skb_copy_to_linear_data(skb, from, len) \
+				memcpy(skb->data, from, len)
+#define skb_copy_to_linear_data_offset(skb, offset, from, len) \
+				memcpy(skb->data + offset, from, len)
+#define skb_network_header_len(skb) (skb->h.raw - skb->nh.raw)
+#define pci_register_driver pci_module_init
+#define skb_mac_header(skb) skb->mac.raw
+
+#ifdef NETIF_F_MULTI_QUEUE
+#ifndef alloc_etherdev_mq
+#define alloc_etherdev_mq(_a, _b) alloc_etherdev(_a)
+#endif
+#endif /* NETIF_F_MULTI_QUEUE */
+
+#ifndef ETH_FCS_LEN
+#define ETH_FCS_LEN 4
+#endif
+#define cancel_work_sync(x) flush_scheduled_work()
+#ifndef udp_hdr
+#define udp_hdr _udp_hdr
+static inline struct udphdr *_udp_hdr(const struct sk_buff *skb)
+{
+	return (struct udphdr *)skb_transport_header(skb);
+}
+#endif
+
+#ifdef cpu_to_be16
+#undef cpu_to_be16
+#endif
+#define cpu_to_be16(x) __constant_htons(x)
+
+#if (!(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(5,1)))
+enum {
+	DUMP_PREFIX_NONE,
+	DUMP_PREFIX_ADDRESS,
+	DUMP_PREFIX_OFFSET
+};
+#endif /* !(RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(5,1)) */
+#ifndef hex_asc
+#define hex_asc(x)	"0123456789abcdef"[x]
+#endif
+#include <linux/ctype.h>
+extern void _kc_print_hex_dump(const char *level, const char *prefix_str,
+			       int prefix_type, int rowsize, int groupsize,
+			       const void *buf, size_t len, bool ascii);
+#define print_hex_dump(lvl, s, t, r, g, b, l, a) \
+		_kc_print_hex_dump(lvl, s, t, r, g, b, l, a)
+#else /* 2.6.22 */
+#define ETH_TYPE_TRANS_SETS_DEV
+#define HAVE_NETDEV_STATS_IN_NETDEV
+#endif /* < 2.6.22 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE > KERNEL_VERSION(2,6,22) )
+#endif /* > 2.6.22 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23) )
+#define netif_subqueue_stopped(_a, _b) 0
+#ifndef PTR_ALIGN
+#define PTR_ALIGN(p, a)         ((typeof(p))ALIGN((unsigned long)(p), (a)))
+#endif
+
+#ifndef CONFIG_PM_SLEEP
+#define CONFIG_PM_SLEEP	CONFIG_PM
+#endif
+
+#if ( LINUX_VERSION_CODE > KERNEL_VERSION(2,6,13) )
+#define HAVE_ETHTOOL_GET_PERM_ADDR
+#endif /* 2.6.14 through 2.6.22 */
+#endif /* < 2.6.23 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24) )
+#ifndef ETH_FLAG_LRO
+#define ETH_FLAG_LRO NETIF_F_LRO
+#endif
+
+/* if GRO is supported then the napi struct must already exist */
+#ifndef NETIF_F_GRO
+/* NAPI API changes in 2.6.24 break everything */
+struct napi_struct {
+	/* used to look up the real NAPI polling routine */
+	int (*poll)(struct napi_struct *, int);
+	struct net_device *dev;
+	int weight;
+};
+#endif
+
+#ifdef NAPI
+extern int __kc_adapter_clean(struct net_device *, int *);
+extern struct net_device *napi_to_poll_dev(struct napi_struct *napi);
+#define netif_napi_add(_netdev, _napi, _poll, _weight) \
+	do { \
+		struct napi_struct *__napi = (_napi); \
+		struct net_device *poll_dev = napi_to_poll_dev(__napi); \
+		poll_dev->poll = &(__kc_adapter_clean); \
+		poll_dev->priv = (_napi); \
+		poll_dev->weight = (_weight); \
+		set_bit(__LINK_STATE_RX_SCHED, &poll_dev->state); \
+		set_bit(__LINK_STATE_START, &poll_dev->state);\
+		dev_hold(poll_dev); \
+		__napi->poll = &(_poll); \
+		__napi->weight = (_weight); \
+		__napi->dev = (_netdev); \
+	} while (0)
+#define netif_napi_del(_napi) \
+	do { \
+		struct net_device *poll_dev = napi_to_poll_dev(_napi); \
+		WARN_ON(!test_bit(__LINK_STATE_RX_SCHED, &poll_dev->state)); \
+		dev_put(poll_dev); \
+		memset(poll_dev, 0, sizeof(struct net_device));\
+	} while (0)
+#define napi_schedule_prep(_napi) \
+	(netif_running((_napi)->dev) && netif_rx_schedule_prep(napi_to_poll_dev(_napi)))
+#define napi_schedule(_napi) \
+	do { \
+		if (napi_schedule_prep(_napi)) \
+			__netif_rx_schedule(napi_to_poll_dev(_napi)); \
+	} while (0)
+#define napi_enable(_napi) netif_poll_enable(napi_to_poll_dev(_napi))
+#define napi_disable(_napi) netif_poll_disable(napi_to_poll_dev(_napi))
+#define __napi_schedule(_napi) __netif_rx_schedule(napi_to_poll_dev(_napi))
+#ifndef NETIF_F_GRO
+#define napi_complete(_napi) netif_rx_complete(napi_to_poll_dev(_napi))
+#else
+#define napi_complete(_napi) \
+	do { \
+		napi_gro_flush(_napi); \
+		netif_rx_complete(napi_to_poll_dev(_napi)); \
+	} while (0)
+#endif /* NETIF_F_GRO */
+#else /* NAPI */
+#define netif_napi_add(_netdev, _napi, _poll, _weight) \
+	do { \
+		struct napi_struct *__napi = _napi; \
+		_netdev->poll = &(_poll); \
+		_netdev->weight = (_weight); \
+		__napi->poll = &(_poll); \
+		__napi->weight = (_weight); \
+		__napi->dev = (_netdev); \
+	} while (0)
+#define netif_napi_del(_a) do {} while (0)
+#endif /* NAPI */
+
+#undef dev_get_by_name
+#define dev_get_by_name(_a, _b) dev_get_by_name(_b)
+#define __netif_subqueue_stopped(_a, _b) netif_subqueue_stopped(_a, _b)
+#ifndef DMA_BIT_MASK
+#define DMA_BIT_MASK(n)	(((n) == 64) ? DMA_64BIT_MASK : ((1ULL<<(n))-1))
+#endif
+
+#ifdef NETIF_F_TSO6
+#define skb_is_gso_v6 _kc_skb_is_gso_v6
+static inline int _kc_skb_is_gso_v6(const struct sk_buff *skb)
+{
+	return skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6;
+}
+#endif /* NETIF_F_TSO6 */
+
+#ifndef KERN_CONT
+#define KERN_CONT	""
+#endif
+#else /* < 2.6.24 */
+#define HAVE_ETHTOOL_GET_SSET_COUNT
+#define HAVE_NETDEV_NAPI_LIST
+#endif /* < 2.6.24 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE > KERNEL_VERSION(2,6,24) )
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,2,0) )
+#include <linux/pm_qos_params.h>
+#else /* >= 3.2.0 */
+#include <linux/pm_qos.h>
+#endif /* else >= 3.2.0 */
+#endif /* > 2.6.24 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,25) )
+#define PM_QOS_CPU_DMA_LATENCY	1
+
+#if ( LINUX_VERSION_CODE > KERNEL_VERSION(2,6,18) )
+#include <linux/latency.h>
+#define PM_QOS_DEFAULT_VALUE	INFINITE_LATENCY
+#define pm_qos_add_requirement(pm_qos_class, name, value) \
+		set_acceptable_latency(name, value)
+#define pm_qos_remove_requirement(pm_qos_class, name) \
+		remove_acceptable_latency(name)
+#define pm_qos_update_requirement(pm_qos_class, name, value) \
+		modify_acceptable_latency(name, value)
+#else
+#define PM_QOS_DEFAULT_VALUE	-1
+#define pm_qos_add_requirement(pm_qos_class, name, value)
+#define pm_qos_remove_requirement(pm_qos_class, name)
+#define pm_qos_update_requirement(pm_qos_class, name, value) { \
+	if (value != PM_QOS_DEFAULT_VALUE) { \
+		printk(KERN_WARNING "%s: unable to set PM QoS requirement\n", \
+			pci_name(adapter->pdev)); \
+	} \
+}
+
+#endif /* > 2.6.18 */
+
+#define pci_enable_device_mem(pdev) pci_enable_device(pdev)
+
+#ifndef DEFINE_PCI_DEVICE_TABLE
+#define DEFINE_PCI_DEVICE_TABLE(_table) struct pci_device_id _table[]
+#endif /* DEFINE_PCI_DEVICE_TABLE */
+
+#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) )
+#ifndef IXGBE_PROCFS
+#define IXGBE_PROCFS
+#endif /* IXGBE_PROCFS */
+#endif /* >= 2.6.0 */
+
+
+#else /* < 2.6.25 */
+
+#ifndef IXGBE_SYSFS
+#define IXGBE_SYSFS
+#endif /* IXGBE_SYSFS */
+
+
+#endif /* < 2.6.25 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,26) )
+#ifndef clamp_t
+#define clamp_t(type, val, min, max) ({		\
+	type __val = (val);			\
+	type __min = (min);			\
+	type __max = (max);			\
+	__val = __val < __min ? __min : __val;	\
+	__val > __max ? __max : __val; })
+#endif /* clamp_t */
+#ifdef NETIF_F_TSO
+#ifdef NETIF_F_TSO6
+#define netif_set_gso_max_size(_netdev, size) \
+	do { \
+		if (adapter->flags & IXGBE_FLAG_DCB_ENABLED) { \
+			_netdev->features &= ~NETIF_F_TSO; \
+			_netdev->features &= ~NETIF_F_TSO6; \
+		} else { \
+			_netdev->features |= NETIF_F_TSO; \
+			_netdev->features |= NETIF_F_TSO6; \
+		} \
+	} while (0)
+#else /* NETIF_F_TSO6 */
+#define netif_set_gso_max_size(_netdev, size) \
+	do { \
+		if (adapter->flags & IXGBE_FLAG_DCB_ENABLED) \
+			_netdev->features &= ~NETIF_F_TSO; \
+		else \
+			_netdev->features |= NETIF_F_TSO; \
+	} while (0)
+#endif /* NETIF_F_TSO6 */
+#else
+#define netif_set_gso_max_size(_netdev, size) do {} while (0)
+#endif /* NETIF_F_TSO */
+#undef kzalloc_node
+#define kzalloc_node(_size, _flags, _node) kzalloc(_size, _flags)
+
+extern void _kc_pci_disable_link_state(struct pci_dev *dev, int state);
+#define pci_disable_link_state(p, s) _kc_pci_disable_link_state(p, s)
+#else /* < 2.6.26 */
+#include <linux/pci-aspm.h>
+#define HAVE_NETDEV_VLAN_FEATURES
+#endif /* < 2.6.26 */
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,27) )
+static inline void _kc_ethtool_cmd_speed_set(struct ethtool_cmd *ep,
+					     __u32 speed)
+{
+	ep->speed = (__u16)speed;
+	/* ep->speed_hi = (__u16)(speed >> 16); */
+}
+#define ethtool_cmd_speed_set _kc_ethtool_cmd_speed_set
+
+static inline __u32 _kc_ethtool_cmd_speed(struct ethtool_cmd *ep)
+{
+	/* no speed_hi before 2.6.27, and probably no need for it yet */
+	return (__u32)ep->speed;
+}
+#define ethtool_cmd_speed _kc_ethtool_cmd_speed
+
+#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,15) )
+#if ((LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23)) && defined(CONFIG_PM))
+#define ANCIENT_PM 1
+#elif ((LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,23)) && \
+       (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,26)) && \
+       defined(CONFIG_PM_SLEEP))
+#define NEWER_PM 1
+#endif
+#if defined(ANCIENT_PM) || defined(NEWER_PM)
+#undef device_set_wakeup_enable
+#define device_set_wakeup_enable(dev, val) \
+	do { \
+		u16 pmc = 0; \
+		int pm = pci_find_capability(adapter->pdev, PCI_CAP_ID_PM); \
+		if (pm) { \
+			pci_read_config_word(adapter->pdev, pm + PCI_PM_PMC, \
+				&pmc); \
+		} \
+		(dev)->power.can_wakeup = !!(pmc >> 11); \
+		(dev)->power.should_wakeup = (val && (pmc >> 11)); \
+	} while (0)
+#endif /* 2.6.15-2.6.22 and CONFIG_PM or 2.6.23-2.6.25 and CONFIG_PM_SLEEP */
+#endif /* 2.6.15 through 2.6.27 */
+#ifndef netif_napi_del
+#define netif_napi_del(_a) do {} while (0)
+#ifdef NAPI
+#ifdef CONFIG_NETPOLL
+#undef netif_napi_del
+#define netif_napi_del(_a) list_del(&(_a)->dev_list);
+#endif
+#endif
+#endif /* netif_napi_del */
+#ifdef dma_mapping_error
+#undef dma_mapping_error
+#endif
+#define dma_mapping_error(dev, dma_addr) pci_dma_mapping_error(dma_addr)
+
+#ifdef CONFIG_NETDEVICES_MULTIQUEUE
+#define HAVE_TX_MQ
+#endif
+
+#ifdef HAVE_TX_MQ
+extern void _kc_netif_tx_stop_all_queues(struct net_device *);
+extern void _kc_netif_tx_wake_all_queues(struct net_device *);
+extern void _kc_netif_tx_start_all_queues(struct net_device *);
+#define netif_tx_stop_all_queues(a) _kc_netif_tx_stop_all_queues(a)
+#define netif_tx_wake_all_queues(a) _kc_netif_tx_wake_all_queues(a)
+#define netif_tx_start_all_queues(a) _kc_netif_tx_start_all_queues(a)
+#undef netif_stop_subqueue
+#define netif_stop_subqueue(_ndev,_qi) do { \
+	if (netif_is_multiqueue((_ndev))) \
+		netif_stop_subqueue((_ndev), (_qi)); \
+	else \
+		netif_stop_queue((_ndev)); \
+	} while (0)
+#undef netif_start_subqueue
+#define netif_start_subqueue(_ndev,_qi) do { \
+	if (netif_is_multiqueue((_ndev))) \
+		netif_start_subqueue((_ndev), (_qi)); \
+	else \
+		netif_start_queue((_ndev)); \
+	} while (0)
+#else /* HAVE_TX_MQ */
+#define netif_tx_stop_all_queues(a) netif_stop_queue(a)
+#define netif_tx_wake_all_queues(a) netif_wake_queue(a)
+#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,12) )
+#define netif_tx_start_all_queues(a) netif_start_queue(a)
+#else
+#define netif_tx_start_all_queues(a) do {} while (0)
+#endif
+#define netif_stop_subqueue(_ndev,_qi) netif_stop_queue((_ndev))
+#define netif_start_subqueue(_ndev,_qi) netif_start_queue((_ndev))
+#endif /* HAVE_TX_MQ */
+#ifndef NETIF_F_MULTI_QUEUE
+#define NETIF_F_MULTI_QUEUE 0
+#define netif_is_multiqueue(a) 0
+#define netif_wake_subqueue(a, b)
+#endif /* NETIF_F_MULTI_QUEUE */
+
+#ifndef __WARN_printf
+extern void __kc_warn_slowpath(const char *file, const int line,
+		const char *fmt, ...) __attribute__((format(printf, 3, 4)));
+#define __WARN_printf(arg...) __kc_warn_slowpath(__FILE__, __LINE__, arg)
+#endif /* __WARN_printf */
+
+#ifndef WARN
+#define WARN(condition, format...) ({						\
+	int __ret_warn_on = !!(condition);				\
+	if (unlikely(__ret_warn_on))					\
+		__WARN_printf(format);					\
+	unlikely(__ret_warn_on);					\
+})
+#endif /* WARN */
+#else /* < 2.6.27 */
+#define HAVE_TX_MQ
+#define HAVE_NETDEV_SELECT_QUEUE
+#endif /* < 2.6.27 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28) )
+#define pci_ioremap_bar(pdev, bar)	ioremap(pci_resource_start(pdev, bar), \
+					        pci_resource_len(pdev, bar))
+#define pci_wake_from_d3 _kc_pci_wake_from_d3
+#define pci_prepare_to_sleep _kc_pci_prepare_to_sleep
+extern int _kc_pci_wake_from_d3(struct pci_dev *dev, bool enable);
+extern int _kc_pci_prepare_to_sleep(struct pci_dev *dev);
+#define netdev_alloc_page(a) alloc_page(GFP_ATOMIC)
+#ifndef __skb_queue_head_init
+static inline void __kc_skb_queue_head_init(struct sk_buff_head *list)
+{
+	list->prev = list->next = (struct sk_buff *)list;
+	list->qlen = 0;
+}
+#define __skb_queue_head_init(_q) __kc_skb_queue_head_init(_q)
+#endif
+#endif /* < 2.6.28 */
+
+#ifndef skb_add_rx_frag
+#define skb_add_rx_frag _kc_skb_add_rx_frag
+extern void _kc_skb_add_rx_frag(struct sk_buff *, int, struct page *, int, int);
+#endif
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,29) )
+#ifndef swap
+#define swap(a, b) \
+	do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0)
+#endif
+#define pci_request_selected_regions_exclusive(pdev, bars, name) \
+		pci_request_selected_regions(pdev, bars, name)
+#ifndef CONFIG_NR_CPUS
+#define CONFIG_NR_CPUS 1
+#endif /* CONFIG_NR_CPUS */
+#ifndef pcie_aspm_enabled
+#define pcie_aspm_enabled()   (1)
+#endif /* pcie_aspm_enabled */
+#else /* < 2.6.29 */
+#ifndef HAVE_NET_DEVICE_OPS
+#define HAVE_NET_DEVICE_OPS
+#endif
+#ifdef CONFIG_DCB
+#define HAVE_PFC_MODE_ENABLE
+#endif /* CONFIG_DCB */
+#endif /* < 2.6.29 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,30) )
+#define skb_rx_queue_recorded(a) false
+#define skb_get_rx_queue(a) 0
+#undef CONFIG_FCOE
+#undef CONFIG_FCOE_MODULE
+extern u16 _kc_skb_tx_hash(struct net_device *dev, struct sk_buff *skb);
+#define skb_tx_hash(n, s) _kc_skb_tx_hash(n, s)
+#define skb_record_rx_queue(a, b) do {} while (0)
+#ifndef CONFIG_PCI_IOV
+#undef pci_enable_sriov
+#define pci_enable_sriov(a, b) -ENOTSUPP
+#undef pci_disable_sriov
+#define pci_disable_sriov(a) do {} while (0)
+#endif /* CONFIG_PCI_IOV */
+#ifndef pr_cont
+#define pr_cont(fmt, ...) \
+	printk(KERN_CONT fmt, ##__VA_ARGS__)
+#endif /* pr_cont */
+#else
+#define HAVE_ASPM_QUIRKS
+#endif /* < 2.6.30 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,31) )
+#define ETH_P_1588 0x88F7
+#define ETH_P_FIP  0x8914
+#ifndef netdev_uc_count
+#define netdev_uc_count(dev) ((dev)->uc_count)
+#endif
+#ifndef netdev_for_each_uc_addr
+#define netdev_for_each_uc_addr(uclist, dev) \
+	for (uclist = dev->uc_list; uclist; uclist = uclist->next)
+#endif
+#else
+#ifndef HAVE_NETDEV_STORAGE_ADDRESS
+#define HAVE_NETDEV_STORAGE_ADDRESS
+#endif
+#ifndef HAVE_NETDEV_HW_ADDR
+#define HAVE_NETDEV_HW_ADDR
+#endif
+#ifndef HAVE_TRANS_START_IN_QUEUE
+#define HAVE_TRANS_START_IN_QUEUE
+#endif
+#endif /* < 2.6.31 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,32) )
+#undef netdev_tx_t
+#define netdev_tx_t int
+#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
+#ifndef NETIF_F_FCOE_MTU
+#define NETIF_F_FCOE_MTU       (1 << 26)
+#endif
+#endif /* CONFIG_FCOE || CONFIG_FCOE_MODULE */
+
+#ifndef pm_runtime_get_sync
+#define pm_runtime_get_sync(dev)	do {} while (0)
+#endif
+#ifndef pm_runtime_put
+#define pm_runtime_put(dev)		do {} while (0)
+#endif
+#ifndef pm_runtime_put_sync
+#define pm_runtime_put_sync(dev)	do {} while (0)
+#endif
+#ifndef pm_runtime_resume
+#define pm_runtime_resume(dev)		do {} while (0)
+#endif
+#ifndef pm_schedule_suspend
+#define pm_schedule_suspend(dev, t)	do {} while (0)
+#endif
+#ifndef pm_runtime_set_suspended
+#define pm_runtime_set_suspended(dev)	do {} while (0)
+#endif
+#ifndef pm_runtime_disable
+#define pm_runtime_disable(dev)		do {} while (0)
+#endif
+#ifndef pm_runtime_put_noidle
+#define pm_runtime_put_noidle(dev)	do {} while (0)
+#endif
+#ifndef pm_runtime_set_active
+#define pm_runtime_set_active(dev)	do {} while (0)
+#endif
+#ifndef pm_runtime_enable
+#define pm_runtime_enable(dev)	do {} while (0)
+#endif
+#ifndef pm_runtime_get_noresume
+#define pm_runtime_get_noresume(dev)	do {} while (0)
+#endif
+#else /* < 2.6.32 */
+#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
+#ifndef HAVE_NETDEV_OPS_FCOE_ENABLE
+#define HAVE_NETDEV_OPS_FCOE_ENABLE
+#endif
+#endif /* CONFIG_FCOE || CONFIG_FCOE_MODULE */
+#ifdef CONFIG_DCB
+#ifndef HAVE_DCBNL_OPS_GETAPP
+#define HAVE_DCBNL_OPS_GETAPP
+#endif
+#endif /* CONFIG_DCB */
+#include <linux/pm_runtime.h>
+/* IOV bad DMA target work arounds require at least this kernel rev support */
+#define HAVE_PCIE_TYPE
+#endif /* < 2.6.32 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,33) )
+#ifndef pci_pcie_cap
+#define pci_pcie_cap(pdev) pci_find_capability(pdev, PCI_CAP_ID_EXP)
+#endif
+#ifndef IPV4_FLOW
+#define IPV4_FLOW 0x10
+#endif /* IPV4_FLOW */
+#ifndef IPV6_FLOW
+#define IPV6_FLOW 0x11
+#endif /* IPV6_FLOW */
+/* Features back-ported to RHEL6 or SLES11 SP1 after 2.6.32 */
+#if ( (RHEL_RELEASE_CODE && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,0)) || \
+      (SLE_VERSION_CODE && SLE_VERSION_CODE >= SLE_VERSION(11,1,0)) )
+#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
+#ifndef HAVE_NETDEV_OPS_FCOE_GETWWN
+#define HAVE_NETDEV_OPS_FCOE_GETWWN
+#endif
+#endif /* CONFIG_FCOE || CONFIG_FCOE_MODULE */
+#endif /* RHEL6 or SLES11 SP1 */
+#ifndef __percpu
+#define __percpu
+#endif /* __percpu */
+#else /* < 2.6.33 */
+#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
+#ifndef HAVE_NETDEV_OPS_FCOE_GETWWN
+#define HAVE_NETDEV_OPS_FCOE_GETWWN
+#endif
+#endif /* CONFIG_FCOE || CONFIG_FCOE_MODULE */
+#define HAVE_ETHTOOL_SFP_DISPLAY_PORT
+#endif /* < 2.6.33 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,34) )
+#ifndef ETH_FLAG_NTUPLE
+#define ETH_FLAG_NTUPLE NETIF_F_NTUPLE
+#endif
+
+#ifndef netdev_mc_count
+#define netdev_mc_count(dev) ((dev)->mc_count)
+#endif
+#ifndef netdev_mc_empty
+#define netdev_mc_empty(dev) (netdev_mc_count(dev) == 0)
+#endif
+#ifndef netdev_for_each_mc_addr
+#define netdev_for_each_mc_addr(mclist, dev) \
+	for (mclist = dev->mc_list; mclist; mclist = mclist->next)
+#endif
+#ifndef netdev_uc_count
+#define netdev_uc_count(dev) ((dev)->uc.count)
+#endif
+#ifndef netdev_uc_empty
+#define netdev_uc_empty(dev) (netdev_uc_count(dev) == 0)
+#endif
+#ifndef netdev_for_each_uc_addr
+#define netdev_for_each_uc_addr(ha, dev) \
+	list_for_each_entry(ha, &dev->uc.list, list)
+#endif
+#ifndef dma_set_coherent_mask
+#define dma_set_coherent_mask(dev,mask) \
+	pci_set_consistent_dma_mask(to_pci_dev(dev),(mask))
+#endif
+#ifndef pci_dev_run_wake
+#define pci_dev_run_wake(pdev)	(0)
+#endif
+
+/* netdev logging taken from include/linux/netdevice.h */
+#ifndef netdev_name
+static inline const char *_kc_netdev_name(const struct net_device *dev)
+{
+	if (dev->reg_state != NETREG_REGISTERED)
+		return "(unregistered net_device)";
+	return dev->name;
+}
+#define netdev_name(netdev)	_kc_netdev_name(netdev)
+#endif /* netdev_name */
+
+#undef netdev_printk
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) )
+#define netdev_printk(level, netdev, format, args...)		\
+do {								\
+	struct adapter_struct *kc_adapter = netdev_priv(netdev);\
+	struct pci_dev *pdev = kc_adapter->pdev;		\
+	printk("%s %s: " format, level, pci_name(pdev),		\
+	       ##args);						\
+} while(0)
+#elif ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,21) )
+#define netdev_printk(level, netdev, format, args...)		\
+do {								\
+	struct adapter_struct *kc_adapter = netdev_priv(netdev);\
+	struct pci_dev *pdev = kc_adapter->pdev;		\
+	struct device *dev = pci_dev_to_dev(pdev);		\
+	dev_printk(level, dev, "%s: " format,			\
+		   netdev_name(netdev), ##args);		\
+} while(0)
+#else /* 2.6.21 => 2.6.34 */
+#define netdev_printk(level, netdev, format, args...)		\
+	dev_printk(level, (netdev)->dev.parent,			\
+		   "%s: " format,				\
+		   netdev_name(netdev), ##args)
+#endif /* <2.6.0 <2.6.21 <2.6.34 */
+#undef netdev_emerg
+#define netdev_emerg(dev, format, args...)			\
+	netdev_printk(KERN_EMERG, dev, format, ##args)
+#undef netdev_alert
+#define netdev_alert(dev, format, args...)			\
+	netdev_printk(KERN_ALERT, dev, format, ##args)
+#undef netdev_crit
+#define netdev_crit(dev, format, args...)			\
+	netdev_printk(KERN_CRIT, dev, format, ##args)
+#undef netdev_err
+#define netdev_err(dev, format, args...)			\
+	netdev_printk(KERN_ERR, dev, format, ##args)
+#undef netdev_warn
+#define netdev_warn(dev, format, args...)			\
+	netdev_printk(KERN_WARNING, dev, format, ##args)
+#undef netdev_notice
+#define netdev_notice(dev, format, args...)			\
+	netdev_printk(KERN_NOTICE, dev, format, ##args)
+#undef netdev_info
+#define netdev_info(dev, format, args...)			\
+	netdev_printk(KERN_INFO, dev, format, ##args)
+#undef netdev_dbg
+#if defined(DEBUG)
+#define netdev_dbg(__dev, format, args...)			\
+	netdev_printk(KERN_DEBUG, __dev, format, ##args)
+#elif defined(CONFIG_DYNAMIC_DEBUG)
+#define netdev_dbg(__dev, format, args...)			\
+do {								\
+	dynamic_dev_dbg((__dev)->dev.parent, "%s: " format,	\
+			netdev_name(__dev), ##args);		\
+} while (0)
+#else /* DEBUG */
+#define netdev_dbg(__dev, format, args...)			\
+({								\
+	if (0)							\
+		netdev_printk(KERN_DEBUG, __dev, format, ##args); \
+	0;							\
+})
+#endif /* DEBUG */
+
+#undef netif_printk
+#define netif_printk(priv, type, level, dev, fmt, args...)	\
+do {								\
+	if (netif_msg_##type(priv))				\
+		netdev_printk(level, (dev), fmt, ##args);	\
+} while (0)
+
+#undef netif_emerg
+#define netif_emerg(priv, type, dev, fmt, args...)		\
+	netif_level(emerg, priv, type, dev, fmt, ##args)
+#undef netif_alert
+#define netif_alert(priv, type, dev, fmt, args...)		\
+	netif_level(alert, priv, type, dev, fmt, ##args)
+#undef netif_crit
+#define netif_crit(priv, type, dev, fmt, args...)		\
+	netif_level(crit, priv, type, dev, fmt, ##args)
+#undef netif_err
+#define netif_err(priv, type, dev, fmt, args...)		\
+	netif_level(err, priv, type, dev, fmt, ##args)
+#undef netif_warn
+#define netif_warn(priv, type, dev, fmt, args...)		\
+	netif_level(warn, priv, type, dev, fmt, ##args)
+#undef netif_notice
+#define netif_notice(priv, type, dev, fmt, args...)		\
+	netif_level(notice, priv, type, dev, fmt, ##args)
+#undef netif_info
+#define netif_info(priv, type, dev, fmt, args...)		\
+	netif_level(info, priv, type, dev, fmt, ##args)
+
+#ifdef SET_SYSTEM_SLEEP_PM_OPS
+#define HAVE_SYSTEM_SLEEP_PM_OPS
+#endif
+
+#ifndef for_each_set_bit
+#define for_each_set_bit(bit, addr, size) \
+	for ((bit) = find_first_bit((addr), (size)); \
+		(bit) < (size); \
+		(bit) = find_next_bit((addr), (size), (bit) + 1))
+#endif /* for_each_set_bit */
+
+#ifndef DEFINE_DMA_UNMAP_ADDR
+#define DEFINE_DMA_UNMAP_ADDR DECLARE_PCI_UNMAP_ADDR
+#define DEFINE_DMA_UNMAP_LEN DECLARE_PCI_UNMAP_LEN
+#define dma_unmap_addr pci_unmap_addr
+#define dma_unmap_addr_set pci_unmap_addr_set
+#define dma_unmap_len pci_unmap_len
+#define dma_unmap_len_set pci_unmap_len_set
+#endif /* DEFINE_DMA_UNMAP_ADDR */
+#else /* < 2.6.34 */
+#define HAVE_SYSTEM_SLEEP_PM_OPS
+#ifndef HAVE_SET_RX_MODE
+#define HAVE_SET_RX_MODE
+#endif
+
+#endif /* < 2.6.34 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,35) )
+#ifndef numa_node_id
+#define numa_node_id() 0
+#endif
+#ifdef HAVE_TX_MQ
+#include <net/sch_generic.h>
+#ifndef CONFIG_NETDEVICES_MULTIQUEUE
+void _kc_netif_set_real_num_tx_queues(struct net_device *, unsigned int);
+#define netif_set_real_num_tx_queues  _kc_netif_set_real_num_tx_queues
+#else /* CONFIG_NETDEVICES_MULTI_QUEUE */
+#define netif_set_real_num_tx_queues(_netdev, _count) \
+	do { \
+		(_netdev)->egress_subqueue_count = _count; \
+	} while (0)
+#endif /* CONFIG_NETDEVICES_MULTI_QUEUE */
+#else
+#define netif_set_real_num_tx_queues(_netdev, _count) do {} while(0)
+#endif /* HAVE_TX_MQ */
+#ifndef ETH_FLAG_RXHASH
+#define ETH_FLAG_RXHASH (1<<28)
+#endif /* ETH_FLAG_RXHASH */
+#else /* < 2.6.35 */
+#define HAVE_PM_QOS_REQUEST_LIST
+#define HAVE_IRQ_AFFINITY_HINT
+#endif /* < 2.6.35 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,36) )
+extern int _kc_ethtool_op_set_flags(struct net_device *, u32, u32);
+#define ethtool_op_set_flags _kc_ethtool_op_set_flags
+extern u32 _kc_ethtool_op_get_flags(struct net_device *);
+#define ethtool_op_get_flags _kc_ethtool_op_get_flags
+
+#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+#ifdef NET_IP_ALIGN
+#undef NET_IP_ALIGN
+#endif
+#define NET_IP_ALIGN 0
+#endif /* CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS */
+
+#ifdef NET_SKB_PAD
+#undef NET_SKB_PAD
+#endif
+
+#if (L1_CACHE_BYTES > 32)
+#define NET_SKB_PAD L1_CACHE_BYTES
+#else
+#define NET_SKB_PAD 32
+#endif
+
+static inline struct sk_buff *_kc_netdev_alloc_skb_ip_align(struct net_device *dev,
+							    unsigned int length)
+{
+	struct sk_buff *skb;
+
+	skb = alloc_skb(length + NET_SKB_PAD + NET_IP_ALIGN, GFP_ATOMIC);
+	if (skb) {
+#if (NET_IP_ALIGN + NET_SKB_PAD)
+		skb_reserve(skb, NET_IP_ALIGN + NET_SKB_PAD);
+#endif
+		skb->dev = dev;
+	}
+	return skb;
+}
+
+#ifdef netdev_alloc_skb_ip_align
+#undef netdev_alloc_skb_ip_align
+#endif
+#define netdev_alloc_skb_ip_align(n, l) _kc_netdev_alloc_skb_ip_align(n, l)
+
+#undef netif_level
+#define netif_level(level, priv, type, dev, fmt, args...)	\
+do {								\
+	if (netif_msg_##type(priv))				\
+		netdev_##level(dev, fmt, ##args);		\
+} while (0)
+
+#undef usleep_range
+#define usleep_range(min, max)	msleep(DIV_ROUND_UP(min, 1000))
+
+#else /* < 2.6.36 */
+#define HAVE_PM_QOS_REQUEST_ACTIVE
+#define HAVE_8021P_SUPPORT
+#define HAVE_NDO_GET_STATS64
+#endif /* < 2.6.36 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,37) )
+#ifndef ETHTOOL_RXNTUPLE_ACTION_CLEAR
+#define ETHTOOL_RXNTUPLE_ACTION_CLEAR (-2)
+#endif
+#ifndef VLAN_N_VID
+#define VLAN_N_VID	VLAN_GROUP_ARRAY_LEN
+#endif /* VLAN_N_VID */
+#ifndef ETH_FLAG_TXVLAN
+#define ETH_FLAG_TXVLAN (1 << 7)
+#endif /* ETH_FLAG_TXVLAN */
+#ifndef ETH_FLAG_RXVLAN
+#define ETH_FLAG_RXVLAN (1 << 8)
+#endif /* ETH_FLAG_RXVLAN */
+
+static inline void _kc_skb_checksum_none_assert(struct sk_buff *skb)
+{
+	WARN_ON(skb->ip_summed != CHECKSUM_NONE);
+}
+#define skb_checksum_none_assert(skb) _kc_skb_checksum_none_assert(skb)
+
+static inline void *_kc_vzalloc_node(unsigned long size, int node)
+{
+	void *addr = vmalloc_node(size, node);
+	if (addr)
+		memset(addr, 0, size);
+	return addr;
+}
+#define vzalloc_node(_size, _node) _kc_vzalloc_node(_size, _node)
+
+static inline void *_kc_vzalloc(unsigned long size)
+{
+	void *addr = vmalloc(size);
+	if (addr)
+		memset(addr, 0, size);
+	return addr;
+}
+#define vzalloc(_size) _kc_vzalloc(_size)
+
+#ifndef vlan_get_protocol
+static inline __be16 __kc_vlan_get_protocol(const struct sk_buff *skb)
+{
+	if (vlan_tx_tag_present(skb) ||
+	    skb->protocol != cpu_to_be16(ETH_P_8021Q))
+		return skb->protocol;
+
+	if (skb_headlen(skb) < sizeof(struct vlan_ethhdr))
+		return 0;
+
+	return ((struct vlan_ethhdr*)skb->data)->h_vlan_encapsulated_proto;
+}
+#define vlan_get_protocol(_skb) __kc_vlan_get_protocol(_skb)
+#endif
+#ifdef HAVE_HW_TIME_STAMP
+#define SKBTX_HW_TSTAMP (1 << 0)
+#define SKBTX_IN_PROGRESS (1 << 2)
+#define SKB_SHARED_TX_IS_UNION
+#endif
+#if ( LINUX_VERSION_CODE > KERNEL_VERSION(2,4,18) )
+#ifndef HAVE_VLAN_RX_REGISTER
+#define HAVE_VLAN_RX_REGISTER
+#endif
+#endif /* > 2.4.18 */
+#endif /* < 2.6.37 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,38) )
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22) )
+#define skb_checksum_start_offset(skb) skb_transport_offset(skb)
+#else /* 2.6.22 -> 2.6.37 */
+static inline int _kc_skb_checksum_start_offset(const struct sk_buff *skb)
+{
+        return skb->csum_start - skb_headroom(skb);
+}
+#define skb_checksum_start_offset(skb) _kc_skb_checksum_start_offset(skb)
+#endif /* 2.6.22 -> 2.6.37 */
+#ifdef CONFIG_DCB
+#ifndef IEEE_8021QAZ_MAX_TCS
+#define IEEE_8021QAZ_MAX_TCS 8
+#endif
+#ifndef DCB_CAP_DCBX_HOST
+#define DCB_CAP_DCBX_HOST		0x01
+#endif
+#ifndef DCB_CAP_DCBX_LLD_MANAGED
+#define DCB_CAP_DCBX_LLD_MANAGED	0x02
+#endif
+#ifndef DCB_CAP_DCBX_VER_CEE
+#define DCB_CAP_DCBX_VER_CEE		0x04
+#endif
+#ifndef DCB_CAP_DCBX_VER_IEEE
+#define DCB_CAP_DCBX_VER_IEEE		0x08
+#endif
+#ifndef DCB_CAP_DCBX_STATIC
+#define DCB_CAP_DCBX_STATIC		0x10
+#endif
+#endif /* CONFIG_DCB */
+#else /* < 2.6.38 */
+#endif /* < 2.6.38 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,39) )
+#ifndef skb_queue_reverse_walk_safe
+#define skb_queue_reverse_walk_safe(queue, skb, tmp)				\
+		for (skb = (queue)->prev, tmp = skb->prev;			\
+		     skb != (struct sk_buff *)(queue);				\
+		     skb = tmp, tmp = skb->prev)
+#endif
+#if (!(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(6,0)))
+extern u8 _kc_netdev_get_num_tc(struct net_device *dev);
+#define netdev_get_num_tc(dev) _kc_netdev_get_num_tc(dev)
+extern u8 _kc_netdev_get_prio_tc_map(struct net_device *dev, u8 up);
+#define netdev_get_prio_tc_map(dev, up) _kc_netdev_get_prio_tc_map(dev, up)
+#define netdev_set_prio_tc_map(dev, up, tc) do {} while (0)
+#else /* RHEL6.1 or greater */
+#ifndef HAVE_MQPRIO
+#define HAVE_MQPRIO
+#endif /* HAVE_MQPRIO */
+#ifdef CONFIG_DCB
+#ifndef HAVE_DCBNL_IEEE
+#define HAVE_DCBNL_IEEE
+#ifndef IEEE_8021QAZ_TSA_STRICT
+#define IEEE_8021QAZ_TSA_STRICT		0
+#endif
+#ifndef IEEE_8021QAZ_TSA_ETS
+#define IEEE_8021QAZ_TSA_ETS		2
+#endif
+#ifndef IEEE_8021QAZ_APP_SEL_ETHERTYPE
+#define IEEE_8021QAZ_APP_SEL_ETHERTYPE	1
+#endif
+#endif
+#endif /* CONFIG_DCB */
+#endif /* !(RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(6,0)) */
+#else /* < 2.6.39 */
+#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
+#ifndef HAVE_NETDEV_OPS_FCOE_DDP_TARGET
+#define HAVE_NETDEV_OPS_FCOE_DDP_TARGET
+#endif
+#endif /* CONFIG_FCOE || CONFIG_FCOE_MODULE */
+#ifndef HAVE_MQPRIO
+#define HAVE_MQPRIO
+#endif
+#ifndef HAVE_SETUP_TC
+#define HAVE_SETUP_TC
+#endif
+#ifdef CONFIG_DCB
+#ifndef HAVE_DCBNL_IEEE
+#define HAVE_DCBNL_IEEE
+#endif
+#endif /* CONFIG_DCB */
+#ifndef HAVE_NDO_SET_FEATURES
+#define HAVE_NDO_SET_FEATURES
+#endif
+#endif /* < 2.6.39 */
+
+/*****************************************************************************/
+/* use < 2.6.40 because of a Fedora 15 kernel update where they
+ * updated the kernel version to 2.6.40.x and they back-ported 3.0 features
+ * like set_phys_id for ethtool.
+ */
+#undef ETHTOOL_GRXRINGS
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,40) )
+#ifdef ETHTOOL_GRXRINGS
+#ifndef FLOW_EXT
+#define	FLOW_EXT	0x80000000
+union _kc_ethtool_flow_union {
+	struct ethtool_tcpip4_spec		tcp_ip4_spec;
+	struct ethtool_usrip4_spec		usr_ip4_spec;
+	__u8					hdata[60];
+};
+struct _kc_ethtool_flow_ext {
+	__be16	vlan_etype;
+	__be16	vlan_tci;
+	__be32	data[2];
+};
+struct _kc_ethtool_rx_flow_spec {
+	__u32		flow_type;
+	union _kc_ethtool_flow_union h_u;
+	struct _kc_ethtool_flow_ext h_ext;
+	union _kc_ethtool_flow_union m_u;
+	struct _kc_ethtool_flow_ext m_ext;
+	__u64		ring_cookie;
+	__u32		location;
+};
+#define ethtool_rx_flow_spec _kc_ethtool_rx_flow_spec
+#endif /* FLOW_EXT */
+#endif
+
+#define pci_disable_link_state_locked pci_disable_link_state
+
+#ifndef PCI_LTR_VALUE_MASK
+#define  PCI_LTR_VALUE_MASK	0x000003ff
+#endif
+#ifndef PCI_LTR_SCALE_MASK
+#define  PCI_LTR_SCALE_MASK	0x00001c00
+#endif
+#ifndef PCI_LTR_SCALE_SHIFT
+#define  PCI_LTR_SCALE_SHIFT	10
+#endif
+
+#else /* < 2.6.40 */
+#define HAVE_ETHTOOL_SET_PHYS_ID
+#endif /* < 2.6.40 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,1,0) )
+#ifndef __netdev_alloc_skb_ip_align
+#define __netdev_alloc_skb_ip_align(d,l,_g) netdev_alloc_skb_ip_align(d,l)
+#endif /* __netdev_alloc_skb_ip_align */
+#define dcb_ieee_setapp(dev, app) dcb_setapp(dev, app)
+#define dcb_ieee_delapp(dev, app) 0
+#define dcb_ieee_getapp_mask(dev, app) (1 << app->priority)
+#else /* < 3.1.0 */
+#ifndef HAVE_DCBNL_IEEE_DELAPP
+#define HAVE_DCBNL_IEEE_DELAPP
+#endif
+#endif /* < 3.1.0 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,2,0) )
+#ifdef ETHTOOL_GRXRINGS
+#define HAVE_ETHTOOL_GET_RXNFC_VOID_RULE_LOCS
+#endif /* ETHTOOL_GRXRINGS */
+
+#ifndef skb_frag_size
+#define skb_frag_size(frag)	_kc_skb_frag_size(frag)
+static inline unsigned int _kc_skb_frag_size(const skb_frag_t *frag)
+{
+	return frag->size;
+}
+#endif /* skb_frag_size */
+
+#ifndef skb_frag_size_sub
+#define skb_frag_size_sub(frag, delta)	_kc_skb_frag_size_sub(frag, delta)
+static inline void _kc_skb_frag_size_sub(skb_frag_t *frag, int delta)
+{
+	frag->size -= delta;
+}
+#endif /* skb_frag_size_sub */
+
+#ifndef skb_frag_page
+#define skb_frag_page(frag)	_kc_skb_frag_page(frag)
+static inline struct page *_kc_skb_frag_page(const skb_frag_t *frag)
+{
+	return frag->page;
+}
+#endif /* skb_frag_page */
+
+#ifndef skb_frag_address
+#define skb_frag_address(frag)	_kc_skb_frag_address(frag)
+static inline void *_kc_skb_frag_address(const skb_frag_t *frag)
+{
+	return page_address(skb_frag_page(frag)) + frag->page_offset;
+}
+#endif /* skb_frag_address */
+
+#ifndef skb_frag_dma_map
+#define skb_frag_dma_map(dev,frag,offset,size,dir) \
+		_kc_skb_frag_dma_map(dev,frag,offset,size,dir)
+static inline dma_addr_t _kc_skb_frag_dma_map(struct device *dev,
+					      const skb_frag_t *frag,
+					      size_t offset, size_t size,
+					      enum dma_data_direction dir)
+{
+	return dma_map_page(dev, skb_frag_page(frag),
+			    frag->page_offset + offset, size, dir);
+}
+#endif /* skb_frag_dma_map */
+
+#ifndef __skb_frag_unref
+#define __skb_frag_unref(frag) __kc_skb_frag_unref(frag)
+static inline void __kc_skb_frag_unref(skb_frag_t *frag)
+{
+	put_page(skb_frag_page(frag));
+}
+#endif /* __skb_frag_unref */
+#else /* < 3.2.0 */
+#ifndef HAVE_PCI_DEV_FLAGS_ASSIGNED
+#define HAVE_PCI_DEV_FLAGS_ASSIGNED
+#define HAVE_VF_SPOOFCHK_CONFIGURE
+#endif
+#endif /* < 3.2.0 */
+
+#if (RHEL_RELEASE_CODE && \
+	(RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,2)) && \
+	(RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(7,0)))
+#undef ixgbe_get_netdev_tc_txq
+#define ixgbe_get_netdev_tc_txq(dev, tc) (&netdev_extended(dev)->qos_data.tc_to_txq[tc])
+#endif
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,3,0) )
+typedef u32 kni_netdev_features_t;
+#else /* ! < 3.3.0 */
+typedef netdev_features_t kni_netdev_features_t;
+#define HAVE_INT_NDO_VLAN_RX_ADD_VID
+#ifdef ETHTOOL_SRXNTUPLE
+#undef ETHTOOL_SRXNTUPLE
+#endif
+#endif /* < 3.3.0 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,4,0) )
+#ifndef NETIF_F_RXFCS
+#define NETIF_F_RXFCS	0
+#endif /* NETIF_F_RXFCS */
+#ifndef NETIF_F_RXALL
+#define NETIF_F_RXALL	0
+#endif /* NETIF_F_RXALL */
+
+#define NUMTCS_RETURNS_U8
+
+
+#endif /* < 3.4.0 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,5,0) )
+static inline bool __kc_ether_addr_equal(const u8 *addr1, const u8 *addr2)
+{
+	return !compare_ether_addr(addr1, addr2);
+}
+#define ether_addr_equal(_addr1, _addr2) __kc_ether_addr_equal((_addr1),(_addr2))
+#else
+#define HAVE_FDB_OPS
+#endif /* < 3.5.0 */
+
+/*****************************************************************************/
+#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(3,10,0) )
+#define NETIF_F_HW_VLAN_TX     NETIF_F_HW_VLAN_CTAG_TX
+#define NETIF_F_HW_VLAN_RX     NETIF_F_HW_VLAN_CTAG_RX
+#define NETIF_F_HW_VLAN_FILTER NETIF_F_HW_VLAN_CTAG_FILTER
+#endif /* >= 3.10.0 */
+
+#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0) )
+#ifdef CONFIG_PCI_IOV
+extern int __kc_pci_vfs_assigned(struct pci_dev *dev);
+#else
+static inline int __kc_pci_vfs_assigned(struct pci_dev *dev)
+{
+        return 0;
+}
+#endif
+#define pci_vfs_assigned(dev) __kc_pci_vfs_assigned(dev)
+
+#endif
+
+#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(3,16,0) )
+#define SET_ETHTOOL_OPS(netdev, ops) ((netdev)->ethtool_ops = (ops))
+#endif /* >= 3.16.0 */
+
+#endif /* _KCOMPAT_H_ */
diff --git a/lib/librte_eal/linuxapp/kni/kni_dev.h b/lib/librte_eal/linuxapp/kni/kni_dev.h
new file mode 100644
index 00000000..a0e5cb6b
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/kni_dev.h
@@ -0,0 +1,149 @@
+/*-
+ * GPL LICENSE SUMMARY
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of version 2 of the GNU General Public License as
+ *   published by the Free Software Foundation.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *   General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program; if not, write to the Free Software
+ *   Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *   The full GNU General Public License is included in this distribution
+ *   in the file called LICENSE.GPL.
+ *
+ *   Contact Information:
+ *   Intel Corporation
+ */
+
+#ifndef _KNI_DEV_H_
+#define _KNI_DEV_H_
+
+#include <linux/if.h>
+#include <linux/wait.h>
+#include <linux/sched.h>
+#include <linux/netdevice.h>
+#include <linux/spinlock.h>
+#include <linux/list.h>
+
+#ifdef RTE_KNI_VHOST
+#include <net/sock.h>
+#endif
+
+#include <exec-env/rte_kni_common.h>
+#define KNI_KTHREAD_RESCHEDULE_INTERVAL 5 /* us */
+
+/**
+ * A structure describing the private information for a kni device.
+ */
+
+struct kni_dev {
+	/* kni list */
+	struct list_head list;
+
+	struct net_device_stats stats;
+	int status;
+	uint16_t group_id;           /* Group ID of a group of KNI devices */
+	unsigned core_id;            /* Core ID to bind */
+	char name[RTE_KNI_NAMESIZE]; /* Network device name */
+	struct task_struct *pthread;
+
+	/* wait queue for req/resp */
+	wait_queue_head_t wq;
+	struct mutex sync_lock;
+
+	/* PCI device id */
+	uint16_t device_id;
+
+	/* kni device */
+	struct net_device *net_dev;
+	struct net_device *lad_dev;
+	struct pci_dev *pci_dev;
+
+	/* queue for packets to be sent out */
+	void *tx_q;
+
+	/* queue for the packets received */
+	void *rx_q;
+
+	/* queue for the allocated mbufs those can be used to save sk buffs */
+	void *alloc_q;
+
+	/* free queue for the mbufs to be freed */
+	void *free_q;
+
+	/* request queue */
+	void *req_q;
+
+	/* response queue */
+	void *resp_q;
+
+	void * sync_kva;
+	void *sync_va;
+
+	void *mbuf_kva;
+	void *mbuf_va;
+
+	/* mbuf size */
+	unsigned mbuf_size;
+
+	/* synchro for request processing */
+	unsigned long synchro;
+
+#ifdef RTE_KNI_VHOST
+	struct kni_vhost_queue* vhost_queue;
+	volatile enum {
+		BE_STOP = 0x1,
+		BE_START = 0x2,
+		BE_FINISH = 0x4,
+	}vq_status;
+#endif
+};
+
+#define KNI_ERR(args...) printk(KERN_DEBUG "KNI: Error: " args)
+#define KNI_PRINT(args...) printk(KERN_DEBUG "KNI: " args)
+#ifdef RTE_KNI_KO_DEBUG
+	#define KNI_DBG(args...) printk(KERN_DEBUG "KNI: " args)
+#else
+	#define KNI_DBG(args...)
+#endif
+
+#ifdef RTE_KNI_VHOST
+unsigned int
+kni_poll(struct file *file, struct socket *sock, poll_table * wait);
+int kni_chk_vhost_rx(struct kni_dev *kni);
+int kni_vhost_init(struct kni_dev *kni);
+int kni_vhost_backend_release(struct kni_dev *kni);
+
+struct kni_vhost_queue {
+	struct sock sk;
+	struct socket *sock;
+	int vnet_hdr_sz;
+	struct kni_dev *kni;
+	int sockfd;
+	unsigned int flags;
+	struct sk_buff* cache;
+	struct rte_kni_fifo* fifo;
+};
+
+#endif
+
+#ifdef RTE_KNI_VHOST_DEBUG_RX
+	#define KNI_DBG_RX(args...) printk(KERN_DEBUG "KNI RX: " args)
+#else
+	#define KNI_DBG_RX(args...)
+#endif
+
+#ifdef RTE_KNI_VHOST_DEBUG_TX
+	#define KNI_DBG_TX(args...) printk(KERN_DEBUG "KNI TX: " args)
+#else
+	#define KNI_DBG_TX(args...)
+#endif
+
+#endif
diff --git a/lib/librte_eal/linuxapp/kni/kni_ethtool.c b/lib/librte_eal/linuxapp/kni/kni_ethtool.c
new file mode 100644
index 00000000..06b6d463
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/kni_ethtool.c
@@ -0,0 +1,217 @@
+/*-
+ * GPL LICENSE SUMMARY
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of version 2 of the GNU General Public License as
+ *   published by the Free Software Foundation.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *   General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program; if not, write to the Free Software
+ *   Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *   The full GNU General Public License is included in this distribution
+ *   in the file called LICENSE.GPL.
+ *
+ *   Contact Information:
+ *   Intel Corporation
+ */
+
+#include <linux/device.h>
+#include <linux/netdevice.h>
+#include <linux/ethtool.h>
+#include "kni_dev.h"
+
+static int
+kni_check_if_running(struct net_device *dev)
+{
+	struct kni_dev *priv = netdev_priv(dev);
+	if (priv->lad_dev)
+		return 0;
+	else
+		return -EOPNOTSUPP;
+}
+
+static void
+kni_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info)
+{
+	struct kni_dev *priv = netdev_priv(dev);
+	priv->lad_dev->ethtool_ops->get_drvinfo(priv->lad_dev, info);
+}
+
+static int
+kni_get_settings(struct net_device *dev, struct ethtool_cmd *ecmd)
+{
+	struct kni_dev *priv = netdev_priv(dev);
+	return priv->lad_dev->ethtool_ops->get_settings(priv->lad_dev, ecmd);
+}
+
+static int
+kni_set_settings(struct net_device *dev, struct ethtool_cmd *ecmd)
+{
+	struct kni_dev *priv = netdev_priv(dev);
+	return priv->lad_dev->ethtool_ops->set_settings(priv->lad_dev, ecmd);
+}
+
+static void
+kni_get_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
+{
+	struct kni_dev *priv = netdev_priv(dev);
+	priv->lad_dev->ethtool_ops->get_wol(priv->lad_dev, wol);
+}
+
+static int
+kni_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
+{
+	struct kni_dev *priv = netdev_priv(dev);
+	return priv->lad_dev->ethtool_ops->set_wol(priv->lad_dev, wol);
+}
+
+static int
+kni_nway_reset(struct net_device *dev)
+{
+	struct kni_dev *priv = netdev_priv(dev);
+	return priv->lad_dev->ethtool_ops->nway_reset(priv->lad_dev);
+}
+
+static int
+kni_get_eeprom_len(struct net_device *dev)
+{
+	struct kni_dev *priv = netdev_priv(dev);
+	return priv->lad_dev->ethtool_ops->get_eeprom_len(priv->lad_dev);
+}
+
+static int
+kni_get_eeprom(struct net_device *dev, struct ethtool_eeprom *eeprom,
+							u8 *bytes)
+{
+	struct kni_dev *priv = netdev_priv(dev);
+	return priv->lad_dev->ethtool_ops->get_eeprom(priv->lad_dev, eeprom,
+								bytes);
+}
+
+static int
+kni_set_eeprom(struct net_device *dev, struct ethtool_eeprom *eeprom,
+							u8 *bytes)
+{
+	struct kni_dev *priv = netdev_priv(dev);
+	return priv->lad_dev->ethtool_ops->set_eeprom(priv->lad_dev, eeprom,
+								bytes);
+}
+
+static void
+kni_get_ringparam(struct net_device *dev, struct ethtool_ringparam *ring)
+{
+	struct kni_dev *priv = netdev_priv(dev);
+	priv->lad_dev->ethtool_ops->get_ringparam(priv->lad_dev, ring);
+}
+
+static int
+kni_set_ringparam(struct net_device *dev, struct ethtool_ringparam *ring)
+{
+	struct kni_dev *priv = netdev_priv(dev);
+	return priv->lad_dev->ethtool_ops->set_ringparam(priv->lad_dev, ring);
+}
+
+static void
+kni_get_pauseparam(struct net_device *dev, struct ethtool_pauseparam *pause)
+{
+	struct kni_dev *priv = netdev_priv(dev);
+	priv->lad_dev->ethtool_ops->get_pauseparam(priv->lad_dev, pause);
+}
+
+static int
+kni_set_pauseparam(struct net_device *dev, struct ethtool_pauseparam *pause)
+{
+	struct kni_dev *priv = netdev_priv(dev);
+	return priv->lad_dev->ethtool_ops->set_pauseparam(priv->lad_dev,
+								pause);
+}
+
+static u32
+kni_get_msglevel(struct net_device *dev)
+{
+	struct kni_dev *priv = netdev_priv(dev);
+	return priv->lad_dev->ethtool_ops->get_msglevel(priv->lad_dev);
+}
+
+static void
+kni_set_msglevel(struct net_device *dev, u32 data)
+{
+	struct kni_dev *priv = netdev_priv(dev);
+	priv->lad_dev->ethtool_ops->set_msglevel(priv->lad_dev, data);
+}
+
+static int
+kni_get_regs_len(struct net_device *dev)
+{
+	struct kni_dev *priv = netdev_priv(dev);
+	return priv->lad_dev->ethtool_ops->get_regs_len(priv->lad_dev);
+}
+
+static void
+kni_get_regs(struct net_device *dev, struct ethtool_regs *regs, void *p)
+{
+	struct kni_dev *priv = netdev_priv(dev);
+	priv->lad_dev->ethtool_ops->get_regs(priv->lad_dev, regs, p);
+}
+
+static void
+kni_get_strings(struct net_device *dev, u32 stringset, u8 *data)
+{
+	struct kni_dev *priv = netdev_priv(dev);
+	priv->lad_dev->ethtool_ops->get_strings(priv->lad_dev, stringset,
+								data);
+}
+
+static int
+kni_get_sset_count(struct net_device *dev, int sset)
+{
+	struct kni_dev *priv = netdev_priv(dev);
+	return priv->lad_dev->ethtool_ops->get_sset_count(priv->lad_dev, sset);
+}
+
+static void
+kni_get_ethtool_stats(struct net_device *dev, struct ethtool_stats *stats,
+								u64 *data)
+{
+	struct kni_dev *priv = netdev_priv(dev);
+	priv->lad_dev->ethtool_ops->get_ethtool_stats(priv->lad_dev, stats,
+								data);
+}
+
+struct ethtool_ops kni_ethtool_ops = {
+	.begin 				= kni_check_if_running,
+	.get_drvinfo		= kni_get_drvinfo,
+	.get_settings		= kni_get_settings,
+	.set_settings		= kni_set_settings,
+	.get_regs_len		= kni_get_regs_len,
+	.get_regs			= kni_get_regs,
+	.get_wol			= kni_get_wol,
+	.set_wol			= kni_set_wol,
+	.nway_reset			= kni_nway_reset,
+	.get_link			= ethtool_op_get_link,
+	.get_eeprom_len		= kni_get_eeprom_len,
+	.get_eeprom			= kni_get_eeprom,
+	.set_eeprom			= kni_set_eeprom,
+	.get_ringparam		= kni_get_ringparam,
+	.set_ringparam		= kni_set_ringparam,
+	.get_pauseparam		= kni_get_pauseparam,
+	.set_pauseparam		= kni_set_pauseparam,
+	.get_msglevel		= kni_get_msglevel,
+	.set_msglevel		= kni_set_msglevel,
+	.get_strings		= kni_get_strings,
+	.get_sset_count		= kni_get_sset_count,
+	.get_ethtool_stats  = kni_get_ethtool_stats,
+};
+
+void
+kni_set_ethtool_ops(struct net_device *netdev)
+{
+	netdev->ethtool_ops = &kni_ethtool_ops;
+}
diff --git a/lib/librte_eal/linuxapp/kni/kni_fifo.h b/lib/librte_eal/linuxapp/kni/kni_fifo.h
new file mode 100644
index 00000000..3ea750e2
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/kni_fifo.h
@@ -0,0 +1,108 @@
+/*-
+ * GPL LICENSE SUMMARY
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of version 2 of the GNU General Public License as
+ *   published by the Free Software Foundation.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *   General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program; if not, write to the Free Software
+ *   Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *   The full GNU General Public License is included in this distribution
+ *   in the file called LICENSE.GPL.
+ *
+ *   Contact Information:
+ *   Intel Corporation
+ */
+
+#ifndef _KNI_FIFO_H_
+#define _KNI_FIFO_H_
+
+#include <exec-env/rte_kni_common.h>
+
+/**
+ * Adds num elements into the fifo. Return the number actually written
+ */
+static inline unsigned
+kni_fifo_put(struct rte_kni_fifo *fifo, void **data, unsigned num)
+{
+	unsigned i = 0;
+	unsigned fifo_write = fifo->write;
+	unsigned fifo_read = fifo->read;
+	unsigned new_write = fifo_write;
+
+	for (i = 0; i < num; i++) {
+		new_write = (new_write + 1) & (fifo->len - 1);
+
+		if (new_write == fifo_read)
+			break;
+		fifo->buffer[fifo_write] = data[i];
+		fifo_write = new_write;
+	}
+	fifo->write = fifo_write;
+
+	return i;
+}
+
+/**
+ * Get up to num elements from the fifo. Return the number actully read
+ */
+static inline unsigned
+kni_fifo_get(struct rte_kni_fifo *fifo, void **data, unsigned num)
+{
+	unsigned i = 0;
+	unsigned new_read = fifo->read;
+	unsigned fifo_write = fifo->write;
+
+	for (i = 0; i < num; i++) {
+		if (new_read == fifo_write)
+			break;
+
+		data[i] = fifo->buffer[new_read];
+		new_read = (new_read + 1) & (fifo->len - 1);
+	}
+	fifo->read = new_read;
+
+	return i;
+}
+
+/**
+ * Get the num of elements in the fifo
+ */
+static inline unsigned
+kni_fifo_count(struct rte_kni_fifo *fifo)
+{
+	return (fifo->len + fifo->write - fifo->read) & ( fifo->len - 1);
+}
+
+/**
+ * Get the num of available elements in the fifo
+ */
+static inline unsigned
+kni_fifo_free_count(struct rte_kni_fifo *fifo)
+{
+	return (fifo->read - fifo->write - 1) & (fifo->len - 1);
+}
+
+#ifdef RTE_KNI_VHOST
+/**
+ * Initializes the kni fifo structure
+ */
+static inline void
+kni_fifo_init(struct rte_kni_fifo *fifo, unsigned size)
+{
+	fifo->write = 0;
+	fifo->read = 0;
+	fifo->len = size;
+	fifo->elem_size = sizeof(void *);
+}
+#endif
+
+#endif /* _KNI_FIFO_H_ */
diff --git a/lib/librte_eal/linuxapp/kni/kni_misc.c b/lib/librte_eal/linuxapp/kni/kni_misc.c
new file mode 100644
index 00000000..ae8133f3
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/kni_misc.c
@@ -0,0 +1,688 @@
+/*-
+ * GPL LICENSE SUMMARY
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of version 2 of the GNU General Public License as
+ *   published by the Free Software Foundation.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *   General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program; if not, write to the Free Software
+ *   Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *   The full GNU General Public License is included in this distribution
+ *   in the file called LICENSE.GPL.
+ *
+ *   Contact Information:
+ *   Intel Corporation
+ */
+
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/miscdevice.h>
+#include <linux/netdevice.h>
+#include <linux/pci.h>
+#include <linux/kthread.h>
+#include <linux/rwsem.h>
+#include <linux/nsproxy.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
+
+#include <exec-env/rte_kni_common.h>
+#include "kni_dev.h"
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_AUTHOR("Intel Corporation");
+MODULE_DESCRIPTION("Kernel Module for managing kni devices");
+
+#define KNI_RX_LOOP_NUM 1000
+
+#define KNI_MAX_DEVICES 32
+
+extern void kni_net_rx(struct kni_dev *kni);
+extern void kni_net_init(struct net_device *dev);
+extern void kni_net_config_lo_mode(char *lo_str);
+extern void kni_net_poll_resp(struct kni_dev *kni);
+extern void kni_set_ethtool_ops(struct net_device *netdev);
+
+extern int ixgbe_kni_probe(struct pci_dev *pdev, struct net_device **lad_dev);
+extern void ixgbe_kni_remove(struct pci_dev *pdev);
+extern int igb_kni_probe(struct pci_dev *pdev, struct net_device **lad_dev);
+extern void igb_kni_remove(struct pci_dev *pdev);
+
+static int kni_open(struct inode *inode, struct file *file);
+static int kni_release(struct inode *inode, struct file *file);
+static int kni_ioctl(struct inode *inode, unsigned int ioctl_num,
+					unsigned long ioctl_param);
+static int kni_compat_ioctl(struct inode *inode, unsigned int ioctl_num,
+						unsigned long ioctl_param);
+static int kni_dev_remove(struct kni_dev *dev);
+
+static int __init kni_parse_kthread_mode(void);
+
+/* KNI processing for single kernel thread mode */
+static int kni_thread_single(void *unused);
+/* KNI processing for multiple kernel thread mode */
+static int kni_thread_multiple(void *param);
+
+static struct file_operations kni_fops = {
+	.owner = THIS_MODULE,
+	.open = kni_open,
+	.release = kni_release,
+	.unlocked_ioctl = (void *)kni_ioctl,
+	.compat_ioctl = (void *)kni_compat_ioctl,
+};
+
+static struct miscdevice kni_misc = {
+	.minor = MISC_DYNAMIC_MINOR,
+	.name = KNI_DEVICE,
+	.fops = &kni_fops,
+};
+
+/* loopback mode */
+static char *lo_mode = NULL;
+
+/* Kernel thread mode */
+static char *kthread_mode = NULL;
+static unsigned multiple_kthread_on = 0;
+
+#define KNI_DEV_IN_USE_BIT_NUM 0 /* Bit number for device in use */
+
+static int kni_net_id;
+
+struct kni_net {
+	unsigned long device_in_use; /* device in use flag */
+	struct task_struct *kni_kthread;
+	struct rw_semaphore kni_list_lock;
+	struct list_head kni_list_head;
+};
+
+static int __net_init kni_init_net(struct net *net)
+{
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 32)
+	struct kni_net *knet = net_generic(net, kni_net_id);
+#else
+	struct kni_net *knet;
+	int ret;
+
+	knet = kmalloc(sizeof(struct kni_net), GFP_KERNEL);
+	if (!knet) {
+		ret = -ENOMEM;
+		return ret;
+	}
+#endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 32) */
+
+	/* Clear the bit of device in use */
+	clear_bit(KNI_DEV_IN_USE_BIT_NUM, &knet->device_in_use);
+
+	init_rwsem(&knet->kni_list_lock);
+	INIT_LIST_HEAD(&knet->kni_list_head);
+
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 32)
+	return 0;
+#else
+	ret = net_assign_generic(net, kni_net_id, knet);
+	if (ret < 0)
+		kfree(knet);
+
+	return ret;
+#endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 32) */
+}
+
+static void __net_exit kni_exit_net(struct net *net)
+{
+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 32)
+	struct kni_net *knet = net_generic(net, kni_net_id);
+
+	kfree(knet);
+#endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 32) */
+}
+
+static struct pernet_operations kni_net_ops = {
+	.init = kni_init_net,
+	.exit = kni_exit_net,
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 32)
+	.id   = &kni_net_id,
+	.size = sizeof(struct kni_net),
+#endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 32) */
+};
+
+static int __init
+kni_init(void)
+{
+	int rc;
+
+	KNI_PRINT("######## DPDK kni module loading ########\n");
+
+	if (kni_parse_kthread_mode() < 0) {
+		KNI_ERR("Invalid parameter for kthread_mode\n");
+		return -EINVAL;
+	}
+
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 32)
+	rc = register_pernet_subsys(&kni_net_ops);
+#else
+	rc = register_pernet_gen_subsys(&kni_net_id, &kni_net_ops);
+#endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 32) */
+	if (rc)
+		return -EPERM;
+
+	rc = misc_register(&kni_misc);
+	if (rc != 0) {
+		KNI_ERR("Misc registration failed\n");
+		goto out;
+	}
+
+	/* Configure the lo mode according to the input parameter */
+	kni_net_config_lo_mode(lo_mode);
+
+	KNI_PRINT("######## DPDK kni module loaded  ########\n");
+
+	return 0;
+
+out:
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 32)
+	unregister_pernet_subsys(&kni_net_ops);
+#else
+	register_pernet_gen_subsys(&kni_net_id, &kni_net_ops);
+#endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 32) */
+	return rc;
+}
+
+static void __exit
+kni_exit(void)
+{
+	misc_deregister(&kni_misc);
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 32)
+	unregister_pernet_subsys(&kni_net_ops);
+#else
+	register_pernet_gen_subsys(&kni_net_id, &kni_net_ops);
+#endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 32) */
+	KNI_PRINT("####### DPDK kni module unloaded  #######\n");
+}
+
+static int __init
+kni_parse_kthread_mode(void)
+{
+	if (!kthread_mode)
+		return 0;
+
+	if (strcmp(kthread_mode, "single") == 0)
+		return 0;
+	else if (strcmp(kthread_mode, "multiple") == 0)
+		multiple_kthread_on = 1;
+	else
+		return -1;
+
+	return 0;
+}
+
+static int
+kni_open(struct inode *inode, struct file *file)
+{
+	struct net *net = current->nsproxy->net_ns;
+	struct kni_net *knet = net_generic(net, kni_net_id);
+
+	/* kni device can be opened by one user only per netns */
+	if (test_and_set_bit(KNI_DEV_IN_USE_BIT_NUM, &knet->device_in_use))
+		return -EBUSY;
+
+	/* Create kernel thread for single mode */
+	if (multiple_kthread_on == 0) {
+		KNI_PRINT("Single kernel thread for all KNI devices\n");
+		/* Create kernel thread for RX */
+		knet->kni_kthread = kthread_run(kni_thread_single, (void *)knet,
+						"kni_single");
+		if (IS_ERR(knet->kni_kthread)) {
+			KNI_ERR("Unable to create kernel threaed\n");
+			return PTR_ERR(knet->kni_kthread);
+		}
+	} else
+		KNI_PRINT("Multiple kernel thread mode enabled\n");
+
+	file->private_data = get_net(net);
+	KNI_PRINT("/dev/kni opened\n");
+
+	return 0;
+}
+
+static int
+kni_release(struct inode *inode, struct file *file)
+{
+	struct net *net = file->private_data;
+	struct kni_net *knet = net_generic(net, kni_net_id);
+	struct kni_dev *dev, *n;
+
+	/* Stop kernel thread for single mode */
+	if (multiple_kthread_on == 0) {
+		/* Stop kernel thread */
+		kthread_stop(knet->kni_kthread);
+		knet->kni_kthread = NULL;
+	}
+
+	down_write(&knet->kni_list_lock);
+	list_for_each_entry_safe(dev, n, &knet->kni_list_head, list) {
+		/* Stop kernel thread for multiple mode */
+		if (multiple_kthread_on && dev->pthread != NULL) {
+			kthread_stop(dev->pthread);
+			dev->pthread = NULL;
+		}
+
+#ifdef RTE_KNI_VHOST
+		kni_vhost_backend_release(dev);
+#endif
+		kni_dev_remove(dev);
+		list_del(&dev->list);
+	}
+	up_write(&knet->kni_list_lock);
+
+	/* Clear the bit of device in use */
+	clear_bit(KNI_DEV_IN_USE_BIT_NUM, &knet->device_in_use);
+
+	put_net(net);
+	KNI_PRINT("/dev/kni closed\n");
+
+	return 0;
+}
+
+static int
+kni_thread_single(void *data)
+{
+	struct kni_net *knet = data;
+	int j;
+	struct kni_dev *dev;
+
+	while (!kthread_should_stop()) {
+		down_read(&knet->kni_list_lock);
+		for (j = 0; j < KNI_RX_LOOP_NUM; j++) {
+			list_for_each_entry(dev, &knet->kni_list_head, list) {
+#ifdef RTE_KNI_VHOST
+				kni_chk_vhost_rx(dev);
+#else
+				kni_net_rx(dev);
+#endif
+				kni_net_poll_resp(dev);
+			}
+		}
+		up_read(&knet->kni_list_lock);
+#ifdef RTE_KNI_PREEMPT_DEFAULT
+		/* reschedule out for a while */
+		schedule_timeout_interruptible(usecs_to_jiffies( \
+				KNI_KTHREAD_RESCHEDULE_INTERVAL));
+#endif
+	}
+
+	return 0;
+}
+
+static int
+kni_thread_multiple(void *param)
+{
+	int j;
+	struct kni_dev *dev = (struct kni_dev *)param;
+
+	while (!kthread_should_stop()) {
+		for (j = 0; j < KNI_RX_LOOP_NUM; j++) {
+#ifdef RTE_KNI_VHOST
+			kni_chk_vhost_rx(dev);
+#else
+			kni_net_rx(dev);
+#endif
+			kni_net_poll_resp(dev);
+		}
+#ifdef RTE_KNI_PREEMPT_DEFAULT
+		schedule_timeout_interruptible(usecs_to_jiffies( \
+				KNI_KTHREAD_RESCHEDULE_INTERVAL));
+#endif
+	}
+
+	return 0;
+}
+
+static int
+kni_dev_remove(struct kni_dev *dev)
+{
+	if (!dev)
+		return -ENODEV;
+
+	switch (dev->device_id) {
+	#define RTE_PCI_DEV_ID_DECL_IGB(vend, dev) case (dev):
+	#include <rte_pci_dev_ids.h>
+		igb_kni_remove(dev->pci_dev);
+		break;
+	#define RTE_PCI_DEV_ID_DECL_IXGBE(vend, dev) case (dev):
+	#include <rte_pci_dev_ids.h>
+		ixgbe_kni_remove(dev->pci_dev);
+		break;
+	default:
+		break;
+	}
+
+	if (dev->net_dev) {
+		unregister_netdev(dev->net_dev);
+		free_netdev(dev->net_dev);
+	}
+
+	return 0;
+}
+
+static int
+kni_check_param(struct kni_dev *kni, struct rte_kni_device_info *dev)
+{
+	if (!kni || !dev)
+		return -1;
+
+	/* Check if network name has been used */
+	if (!strncmp(kni->name, dev->name, RTE_KNI_NAMESIZE)) {
+		KNI_ERR("KNI name %s duplicated\n", dev->name);
+		return -1;
+	}
+
+	return 0;
+}
+
+static int
+kni_ioctl_create(struct net *net,
+		unsigned int ioctl_num, unsigned long ioctl_param)
+{
+	struct kni_net *knet = net_generic(net, kni_net_id);
+	int ret;
+	struct rte_kni_device_info dev_info;
+	struct pci_dev *pci = NULL;
+	struct pci_dev *found_pci = NULL;
+	struct net_device *net_dev = NULL;
+	struct net_device *lad_dev = NULL;
+	struct kni_dev *kni, *dev, *n;
+
+	printk(KERN_INFO "KNI: Creating kni...\n");
+	/* Check the buffer size, to avoid warning */
+	if (_IOC_SIZE(ioctl_num) > sizeof(dev_info))
+		return -EINVAL;
+
+	/* Copy kni info from user space */
+	ret = copy_from_user(&dev_info, (void *)ioctl_param, sizeof(dev_info));
+	if (ret) {
+		KNI_ERR("copy_from_user in kni_ioctl_create");
+		return -EIO;
+	}
+
+	/**
+	 * Check if the cpu core id is valid for binding,
+	 * for multiple kernel thread mode.
+	 */
+	if (multiple_kthread_on && dev_info.force_bind &&
+				!cpu_online(dev_info.core_id)) {
+		KNI_ERR("cpu %u is not online\n", dev_info.core_id);
+		return -EINVAL;
+	}
+
+	/* Check if it has been created */
+	down_read(&knet->kni_list_lock);
+	list_for_each_entry_safe(dev, n, &knet->kni_list_head, list) {
+		if (kni_check_param(dev, &dev_info) < 0) {
+			up_read(&knet->kni_list_lock);
+			return -EINVAL;
+		}
+	}
+	up_read(&knet->kni_list_lock);
+
+	net_dev = alloc_netdev(sizeof(struct kni_dev), dev_info.name,
+#ifdef NET_NAME_UNKNOWN
+							NET_NAME_UNKNOWN,
+#endif
+							kni_net_init);
+	if (net_dev == NULL) {
+		KNI_ERR("error allocating device \"%s\"\n", dev_info.name);
+		return -EBUSY;
+	}
+
+	dev_net_set(net_dev, net);
+
+	kni = netdev_priv(net_dev);
+
+	kni->net_dev = net_dev;
+	kni->group_id = dev_info.group_id;
+	kni->core_id = dev_info.core_id;
+	strncpy(kni->name, dev_info.name, RTE_KNI_NAMESIZE);
+
+	/* Translate user space info into kernel space info */
+	kni->tx_q = phys_to_virt(dev_info.tx_phys);
+	kni->rx_q = phys_to_virt(dev_info.rx_phys);
+	kni->alloc_q = phys_to_virt(dev_info.alloc_phys);
+	kni->free_q = phys_to_virt(dev_info.free_phys);
+
+	kni->req_q = phys_to_virt(dev_info.req_phys);
+	kni->resp_q = phys_to_virt(dev_info.resp_phys);
+	kni->sync_va = dev_info.sync_va;
+	kni->sync_kva = phys_to_virt(dev_info.sync_phys);
+
+	kni->mbuf_kva = phys_to_virt(dev_info.mbuf_phys);
+	kni->mbuf_va = dev_info.mbuf_va;
+
+#ifdef RTE_KNI_VHOST
+	kni->vhost_queue = NULL;
+	kni->vq_status = BE_STOP;
+#endif
+	kni->mbuf_size = dev_info.mbuf_size;
+
+	KNI_PRINT("tx_phys:      0x%016llx, tx_q addr:      0x%p\n",
+		(unsigned long long) dev_info.tx_phys, kni->tx_q);
+	KNI_PRINT("rx_phys:      0x%016llx, rx_q addr:      0x%p\n",
+		(unsigned long long) dev_info.rx_phys, kni->rx_q);
+	KNI_PRINT("alloc_phys:   0x%016llx, alloc_q addr:   0x%p\n",
+		(unsigned long long) dev_info.alloc_phys, kni->alloc_q);
+	KNI_PRINT("free_phys:    0x%016llx, free_q addr:    0x%p\n",
+		(unsigned long long) dev_info.free_phys, kni->free_q);
+	KNI_PRINT("req_phys:     0x%016llx, req_q addr:     0x%p\n",
+		(unsigned long long) dev_info.req_phys, kni->req_q);
+	KNI_PRINT("resp_phys:    0x%016llx, resp_q addr:    0x%p\n",
+		(unsigned long long) dev_info.resp_phys, kni->resp_q);
+	KNI_PRINT("mbuf_phys:    0x%016llx, mbuf_kva:       0x%p\n",
+		(unsigned long long) dev_info.mbuf_phys, kni->mbuf_kva);
+	KNI_PRINT("mbuf_va:      0x%p\n", dev_info.mbuf_va);
+	KNI_PRINT("mbuf_size:    %u\n", kni->mbuf_size);
+
+	KNI_DBG("PCI: %02x:%02x.%02x %04x:%04x\n",
+					dev_info.bus,
+					dev_info.devid,
+					dev_info.function,
+					dev_info.vendor_id,
+					dev_info.device_id);
+
+	pci = pci_get_device(dev_info.vendor_id, dev_info.device_id, NULL);
+
+	/* Support Ethtool */
+	while (pci) {
+		KNI_PRINT("pci_bus: %02x:%02x:%02x \n",
+					pci->bus->number,
+					PCI_SLOT(pci->devfn),
+					PCI_FUNC(pci->devfn));
+
+		if ((pci->bus->number == dev_info.bus) &&
+			(PCI_SLOT(pci->devfn) == dev_info.devid) &&
+			(PCI_FUNC(pci->devfn) == dev_info.function)) {
+			found_pci = pci;
+			switch (dev_info.device_id) {
+			#define RTE_PCI_DEV_ID_DECL_IGB(vend, dev) case (dev):
+			#include <rte_pci_dev_ids.h>
+				ret = igb_kni_probe(found_pci, &lad_dev);
+				break;
+			#define RTE_PCI_DEV_ID_DECL_IXGBE(vend, dev) \
+							case (dev):
+			#include <rte_pci_dev_ids.h>
+				ret = ixgbe_kni_probe(found_pci, &lad_dev);
+				break;
+			default:
+				ret = -1;
+				break;
+			}
+
+			KNI_DBG("PCI found: pci=0x%p, lad_dev=0x%p\n",
+							pci, lad_dev);
+			if (ret == 0) {
+				kni->lad_dev = lad_dev;
+				kni_set_ethtool_ops(kni->net_dev);
+			} else {
+				KNI_ERR("Device not supported by ethtool");
+				kni->lad_dev = NULL;
+			}
+
+			kni->pci_dev = found_pci;
+			kni->device_id = dev_info.device_id;
+			break;
+		}
+		pci = pci_get_device(dev_info.vendor_id,
+				dev_info.device_id, pci);
+	}
+	if (pci)
+		pci_dev_put(pci);
+
+	ret = register_netdev(net_dev);
+	if (ret) {
+		KNI_ERR("error %i registering device \"%s\"\n",
+					ret, dev_info.name);
+		kni_dev_remove(kni);
+		return -ENODEV;
+	}
+
+#ifdef RTE_KNI_VHOST
+	kni_vhost_init(kni);
+#endif
+
+	/**
+	 * Create a new kernel thread for multiple mode, set its core affinity,
+	 * and finally wake it up.
+	 */
+	if (multiple_kthread_on) {
+		kni->pthread = kthread_create(kni_thread_multiple,
+					      (void *)kni,
+					      "kni_%s", kni->name);
+		if (IS_ERR(kni->pthread)) {
+			kni_dev_remove(kni);
+			return -ECANCELED;
+		}
+		if (dev_info.force_bind)
+			kthread_bind(kni->pthread, kni->core_id);
+		wake_up_process(kni->pthread);
+	}
+
+	down_write(&knet->kni_list_lock);
+	list_add(&kni->list, &knet->kni_list_head);
+	up_write(&knet->kni_list_lock);
+
+	return 0;
+}
+
+static int
+kni_ioctl_release(struct net *net,
+		unsigned int ioctl_num, unsigned long ioctl_param)
+{
+	struct kni_net *knet = net_generic(net, kni_net_id);
+	int ret = -EINVAL;
+	struct kni_dev *dev, *n;
+	struct rte_kni_device_info dev_info;
+
+	if (_IOC_SIZE(ioctl_num) > sizeof(dev_info))
+			return -EINVAL;
+
+	ret = copy_from_user(&dev_info, (void *)ioctl_param, sizeof(dev_info));
+	if (ret) {
+		KNI_ERR("copy_from_user in kni_ioctl_release");
+		return -EIO;
+	}
+
+	/* Release the network device according to its name */
+	if (strlen(dev_info.name) == 0)
+		return ret;
+
+	down_write(&knet->kni_list_lock);
+	list_for_each_entry_safe(dev, n, &knet->kni_list_head, list) {
+		if (strncmp(dev->name, dev_info.name, RTE_KNI_NAMESIZE) != 0)
+			continue;
+
+		if (multiple_kthread_on && dev->pthread != NULL) {
+			kthread_stop(dev->pthread);
+			dev->pthread = NULL;
+		}
+
+#ifdef RTE_KNI_VHOST
+		kni_vhost_backend_release(dev);
+#endif
+		kni_dev_remove(dev);
+		list_del(&dev->list);
+		ret = 0;
+		break;
+	}
+	up_write(&knet->kni_list_lock);
+	printk(KERN_INFO "KNI: %s release kni named %s\n",
+		(ret == 0 ? "Successfully" : "Unsuccessfully"), dev_info.name);
+
+	return ret;
+}
+
+static int
+kni_ioctl(struct inode *inode,
+	unsigned int ioctl_num,
+	unsigned long ioctl_param)
+{
+	int ret = -EINVAL;
+	struct net *net = current->nsproxy->net_ns;
+
+	KNI_DBG("IOCTL num=0x%0x param=0x%0lx\n", ioctl_num, ioctl_param);
+
+	/*
+	 * Switch according to the ioctl called
+	 */
+	switch (_IOC_NR(ioctl_num)) {
+	case _IOC_NR(RTE_KNI_IOCTL_TEST):
+		/* For test only, not used */
+		break;
+	case _IOC_NR(RTE_KNI_IOCTL_CREATE):
+		ret = kni_ioctl_create(net, ioctl_num, ioctl_param);
+		break;
+	case _IOC_NR(RTE_KNI_IOCTL_RELEASE):
+		ret = kni_ioctl_release(net, ioctl_num, ioctl_param);
+		break;
+	default:
+		KNI_DBG("IOCTL default\n");
+		break;
+	}
+
+	return ret;
+}
+
+static int
+kni_compat_ioctl(struct inode *inode,
+		unsigned int ioctl_num,
+		unsigned long ioctl_param)
+{
+	/* 32 bits app on 64 bits OS to be supported later */
+	KNI_PRINT("Not implemented.\n");
+
+	return -EINVAL;
+}
+
+module_init(kni_init);
+module_exit(kni_exit);
+
+module_param(lo_mode, charp, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(lo_mode,
+"KNI loopback mode (default=lo_mode_none):\n"
+"    lo_mode_none        Kernel loopback disabled\n"
+"    lo_mode_fifo        Enable kernel loopback with fifo\n"
+"    lo_mode_fifo_skb    Enable kernel loopback with fifo and skb buffer\n"
+"\n"
+);
+
+module_param(kthread_mode, charp, S_IRUGO);
+MODULE_PARM_DESC(kthread_mode,
+"Kernel thread mode (default=single):\n"
+"    single    Single kernel thread mode enabled.\n"
+"    multiple  Multiple kernel thread mode enabled.\n"
+"\n"
+);
diff --git a/lib/librte_eal/linuxapp/kni/kni_net.c b/lib/librte_eal/linuxapp/kni/kni_net.c
new file mode 100644
index 00000000..cfa83398
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/kni_net.c
@@ -0,0 +1,706 @@
+/*-
+ * GPL LICENSE SUMMARY
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of version 2 of the GNU General Public License as
+ *   published by the Free Software Foundation.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *   General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program; if not, write to the Free Software
+ *   Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *   The full GNU General Public License is included in this distribution
+ *   in the file called LICENSE.GPL.
+ *
+ *   Contact Information:
+ *   Intel Corporation
+ */
+
+/*
+ * This code is inspired from the book "Linux Device Drivers" by
+ * Alessandro Rubini and Jonathan Corbet, published by O'Reilly & Associates
+ */
+
+#include <linux/device.h>
+#include <linux/module.h>
+#include <linux/version.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h> /* eth_type_trans */
+#include <linux/skbuff.h>
+#include <linux/kthread.h>
+#include <linux/delay.h>
+
+#include <exec-env/rte_kni_common.h>
+#include <kni_fifo.h>
+#include "kni_dev.h"
+
+#define WD_TIMEOUT 5 /*jiffies */
+
+#define MBUF_BURST_SZ 32
+
+#define KNI_WAIT_RESPONSE_TIMEOUT 300 /* 3 seconds */
+
+/* typedef for rx function */
+typedef void (*kni_net_rx_t)(struct kni_dev *kni);
+
+static int kni_net_tx(struct sk_buff *skb, struct net_device *dev);
+static void kni_net_rx_normal(struct kni_dev *kni);
+static void kni_net_rx_lo_fifo(struct kni_dev *kni);
+static void kni_net_rx_lo_fifo_skb(struct kni_dev *kni);
+static int kni_net_process_request(struct kni_dev *kni,
+			struct rte_kni_request *req);
+
+/* kni rx function pointer, with default to normal rx */
+static kni_net_rx_t kni_net_rx_func = kni_net_rx_normal;
+
+/*
+ * Open and close
+ */
+static int
+kni_net_open(struct net_device *dev)
+{
+	int ret;
+	struct rte_kni_request req;
+	struct kni_dev *kni = netdev_priv(dev);
+
+	if (kni->lad_dev)
+		memcpy(dev->dev_addr, kni->lad_dev->dev_addr, ETH_ALEN);
+	else
+		/*
+		 * Generate random mac address. eth_random_addr() is the newer
+		 * version of generating mac address in linux kernel.
+		 */
+		random_ether_addr(dev->dev_addr);
+
+	netif_start_queue(dev);
+
+	memset(&req, 0, sizeof(req));
+	req.req_id = RTE_KNI_REQ_CFG_NETWORK_IF;
+
+	/* Setting if_up to non-zero means up */
+	req.if_up = 1;
+	ret = kni_net_process_request(kni, &req);
+
+	return (ret == 0) ? req.result : ret;
+}
+
+static int
+kni_net_release(struct net_device *dev)
+{
+	int ret;
+	struct rte_kni_request req;
+	struct kni_dev *kni = netdev_priv(dev);
+
+	netif_stop_queue(dev); /* can't transmit any more */
+
+	memset(&req, 0, sizeof(req));
+	req.req_id = RTE_KNI_REQ_CFG_NETWORK_IF;
+
+	/* Setting if_up to 0 means down */
+	req.if_up = 0;
+	ret = kni_net_process_request(kni, &req);
+
+	return (ret == 0) ? req.result : ret;
+}
+
+/*
+ * Configuration changes (passed on by ifconfig)
+ */
+static int
+kni_net_config(struct net_device *dev, struct ifmap *map)
+{
+	if (dev->flags & IFF_UP) /* can't act on a running interface */
+		return -EBUSY;
+
+	/* ignore other fields */
+	return 0;
+}
+
+/*
+ * RX: normal working mode
+ */
+static void
+kni_net_rx_normal(struct kni_dev *kni)
+{
+	unsigned ret;
+	uint32_t len;
+	unsigned i, num_rx, num_fq;
+	struct rte_kni_mbuf *kva;
+	struct rte_kni_mbuf *va[MBUF_BURST_SZ];
+	void * data_kva;
+
+	struct sk_buff *skb;
+	struct net_device *dev = kni->net_dev;
+
+	/* Get the number of free entries in free_q */
+	num_fq = kni_fifo_free_count(kni->free_q);
+	if (num_fq == 0) {
+		/* No room on the free_q, bail out */
+		return;
+	}
+
+	/* Calculate the number of entries to dequeue from rx_q */
+	num_rx = min(num_fq, (unsigned)MBUF_BURST_SZ);
+
+	/* Burst dequeue from rx_q */
+	num_rx = kni_fifo_get(kni->rx_q, (void **)va, num_rx);
+	if (num_rx == 0)
+		return;
+
+	/* Transfer received packets to netif */
+	for (i = 0; i < num_rx; i++) {
+		kva = (void *)va[i] - kni->mbuf_va + kni->mbuf_kva;
+		len = kva->data_len;
+		data_kva = kva->buf_addr + kva->data_off - kni->mbuf_va
+				+ kni->mbuf_kva;
+
+		skb = dev_alloc_skb(len + 2);
+		if (!skb) {
+			KNI_ERR("Out of mem, dropping pkts\n");
+			/* Update statistics */
+			kni->stats.rx_dropped++;
+		}
+		else {
+			/* Align IP on 16B boundary */
+			skb_reserve(skb, 2);
+			memcpy(skb_put(skb, len), data_kva, len);
+			skb->dev = dev;
+			skb->protocol = eth_type_trans(skb, dev);
+			skb->ip_summed = CHECKSUM_UNNECESSARY;
+
+			/* Call netif interface */
+			netif_rx_ni(skb);
+
+			/* Update statistics */
+			kni->stats.rx_bytes += len;
+			kni->stats.rx_packets++;
+		}
+	}
+
+	/* Burst enqueue mbufs into free_q */
+	ret = kni_fifo_put(kni->free_q, (void **)va, num_rx);
+	if (ret != num_rx)
+		/* Failing should not happen */
+		KNI_ERR("Fail to enqueue entries into free_q\n");
+}
+
+/*
+ * RX: loopback with enqueue/dequeue fifos.
+ */
+static void
+kni_net_rx_lo_fifo(struct kni_dev *kni)
+{
+	unsigned ret;
+	uint32_t len;
+	unsigned i, num, num_rq, num_tq, num_aq, num_fq;
+	struct rte_kni_mbuf *kva;
+	struct rte_kni_mbuf *va[MBUF_BURST_SZ];
+	void * data_kva;
+
+	struct rte_kni_mbuf *alloc_kva;
+	struct rte_kni_mbuf *alloc_va[MBUF_BURST_SZ];
+	void *alloc_data_kva;
+
+	/* Get the number of entries in rx_q */
+	num_rq = kni_fifo_count(kni->rx_q);
+
+	/* Get the number of free entrie in tx_q */
+	num_tq = kni_fifo_free_count(kni->tx_q);
+
+	/* Get the number of entries in alloc_q */
+	num_aq = kni_fifo_count(kni->alloc_q);
+
+	/* Get the number of free entries in free_q */
+	num_fq = kni_fifo_free_count(kni->free_q);
+
+	/* Calculate the number of entries to be dequeued from rx_q */
+	num = min(num_rq, num_tq);
+	num = min(num, num_aq);
+	num = min(num, num_fq);
+	num = min(num, (unsigned)MBUF_BURST_SZ);
+
+	/* Return if no entry to dequeue from rx_q */
+	if (num == 0)
+		return;
+
+	/* Burst dequeue from rx_q */
+	ret = kni_fifo_get(kni->rx_q, (void **)va, num);
+	if (ret == 0)
+		return; /* Failing should not happen */
+
+	/* Dequeue entries from alloc_q */
+	ret = kni_fifo_get(kni->alloc_q, (void **)alloc_va, num);
+	if (ret) {
+		num = ret;
+		/* Copy mbufs */
+		for (i = 0; i < num; i++) {
+			kva = (void *)va[i] - kni->mbuf_va + kni->mbuf_kva;
+			len = kva->pkt_len;
+			data_kva = kva->buf_addr + kva->data_off -
+					kni->mbuf_va + kni->mbuf_kva;
+
+			alloc_kva = (void *)alloc_va[i] - kni->mbuf_va +
+							kni->mbuf_kva;
+			alloc_data_kva = alloc_kva->buf_addr +
+					alloc_kva->data_off - kni->mbuf_va +
+							kni->mbuf_kva;
+			memcpy(alloc_data_kva, data_kva, len);
+			alloc_kva->pkt_len = len;
+			alloc_kva->data_len = len;
+
+			kni->stats.tx_bytes += len;
+			kni->stats.rx_bytes += len;
+		}
+
+		/* Burst enqueue mbufs into tx_q */
+		ret = kni_fifo_put(kni->tx_q, (void **)alloc_va, num);
+		if (ret != num)
+			/* Failing should not happen */
+			KNI_ERR("Fail to enqueue mbufs into tx_q\n");
+	}
+
+	/* Burst enqueue mbufs into free_q */
+	ret = kni_fifo_put(kni->free_q, (void **)va, num);
+	if (ret != num)
+		/* Failing should not happen */
+		KNI_ERR("Fail to enqueue mbufs into free_q\n");
+
+	/**
+	 * Update statistic, and enqueue/dequeue failure is impossible,
+	 * as all queues are checked at first.
+	 */
+	kni->stats.tx_packets += num;
+	kni->stats.rx_packets += num;
+}
+
+/*
+ * RX: loopback with enqueue/dequeue fifos and sk buffer copies.
+ */
+static void
+kni_net_rx_lo_fifo_skb(struct kni_dev *kni)
+{
+	unsigned ret;
+	uint32_t len;
+	unsigned i, num_rq, num_fq, num;
+	struct rte_kni_mbuf *kva;
+	struct rte_kni_mbuf *va[MBUF_BURST_SZ];
+	void * data_kva;
+
+	struct sk_buff *skb;
+	struct net_device *dev = kni->net_dev;
+
+	/* Get the number of entries in rx_q */
+	num_rq = kni_fifo_count(kni->rx_q);
+
+	/* Get the number of free entries in free_q */
+	num_fq = kni_fifo_free_count(kni->free_q);
+
+	/* Calculate the number of entries to dequeue from rx_q */
+	num = min(num_rq, num_fq);
+	num = min(num, (unsigned)MBUF_BURST_SZ);
+
+	/* Return if no entry to dequeue from rx_q */
+	if (num == 0)
+		return;
+
+	/* Burst dequeue mbufs from rx_q */
+	ret = kni_fifo_get(kni->rx_q, (void **)va, num);
+	if (ret == 0)
+		return;
+
+	/* Copy mbufs to sk buffer and then call tx interface */
+	for (i = 0; i < num; i++) {
+		kva = (void *)va[i] - kni->mbuf_va + kni->mbuf_kva;
+		len = kva->data_len;
+		data_kva = kva->buf_addr + kva->data_off - kni->mbuf_va +
+				kni->mbuf_kva;
+
+		skb = dev_alloc_skb(len + 2);
+		if (skb == NULL)
+			KNI_ERR("Out of mem, dropping pkts\n");
+		else {
+			/* Align IP on 16B boundary */
+			skb_reserve(skb, 2);
+			memcpy(skb_put(skb, len), data_kva, len);
+			skb->dev = dev;
+			skb->ip_summed = CHECKSUM_UNNECESSARY;
+			dev_kfree_skb(skb);
+		}
+
+		/* Simulate real usage, allocate/copy skb twice */
+		skb = dev_alloc_skb(len + 2);
+		if (skb == NULL) {
+			KNI_ERR("Out of mem, dropping pkts\n");
+			kni->stats.rx_dropped++;
+		}
+		else {
+			/* Align IP on 16B boundary */
+			skb_reserve(skb, 2);
+			memcpy(skb_put(skb, len), data_kva, len);
+			skb->dev = dev;
+			skb->ip_summed = CHECKSUM_UNNECESSARY;
+
+			kni->stats.rx_bytes += len;
+			kni->stats.rx_packets++;
+
+			/* call tx interface */
+			kni_net_tx(skb, dev);
+		}
+	}
+
+	/* enqueue all the mbufs from rx_q into free_q */
+	ret = kni_fifo_put(kni->free_q, (void **)&va, num);
+	if (ret != num)
+		/* Failing should not happen */
+		KNI_ERR("Fail to enqueue mbufs into free_q\n");
+}
+
+/* rx interface */
+void
+kni_net_rx(struct kni_dev *kni)
+{
+	/**
+	 * It doesn't need to check if it is NULL pointer,
+	 * as it has a default value
+	 */
+	(*kni_net_rx_func)(kni);
+}
+
+/*
+ * Transmit a packet (called by the kernel)
+ */
+#ifdef RTE_KNI_VHOST
+static int
+kni_net_tx(struct sk_buff *skb, struct net_device *dev)
+{
+	struct kni_dev *kni = netdev_priv(dev);
+
+	dev_kfree_skb(skb);
+	kni->stats.tx_dropped++;
+
+	return NETDEV_TX_OK;
+}
+#else
+static int
+kni_net_tx(struct sk_buff *skb, struct net_device *dev)
+{
+	int len = 0;
+	unsigned ret;
+	struct kni_dev *kni = netdev_priv(dev);
+	struct rte_kni_mbuf *pkt_kva = NULL;
+	struct rte_kni_mbuf *pkt_va = NULL;
+
+	dev->trans_start = jiffies; /* save the timestamp */
+
+	/* Check if the length of skb is less than mbuf size */
+	if (skb->len > kni->mbuf_size)
+		goto drop;
+
+	/**
+	 * Check if it has at least one free entry in tx_q and
+	 * one entry in alloc_q.
+	 */
+	if (kni_fifo_free_count(kni->tx_q) == 0 ||
+			kni_fifo_count(kni->alloc_q) == 0) {
+		/**
+		 * If no free entry in tx_q or no entry in alloc_q,
+		 * drops skb and goes out.
+		 */
+		goto drop;
+	}
+
+	/* dequeue a mbuf from alloc_q */
+	ret = kni_fifo_get(kni->alloc_q, (void **)&pkt_va, 1);
+	if (likely(ret == 1)) {
+		void *data_kva;
+
+		pkt_kva = (void *)pkt_va - kni->mbuf_va + kni->mbuf_kva;
+		data_kva = pkt_kva->buf_addr + pkt_kva->data_off - kni->mbuf_va
+				+ kni->mbuf_kva;
+
+		len = skb->len;
+		memcpy(data_kva, skb->data, len);
+		if (unlikely(len < ETH_ZLEN)) {
+			memset(data_kva + len, 0, ETH_ZLEN - len);
+			len = ETH_ZLEN;
+		}
+		pkt_kva->pkt_len = len;
+		pkt_kva->data_len = len;
+
+		/* enqueue mbuf into tx_q */
+		ret = kni_fifo_put(kni->tx_q, (void **)&pkt_va, 1);
+		if (unlikely(ret != 1)) {
+			/* Failing should not happen */
+			KNI_ERR("Fail to enqueue mbuf into tx_q\n");
+			goto drop;
+		}
+	} else {
+		/* Failing should not happen */
+		KNI_ERR("Fail to dequeue mbuf from alloc_q\n");
+		goto drop;
+	}
+
+	/* Free skb and update statistics */
+	dev_kfree_skb(skb);
+	kni->stats.tx_bytes += len;
+	kni->stats.tx_packets++;
+
+	return NETDEV_TX_OK;
+
+drop:
+	/* Free skb and update statistics */
+	dev_kfree_skb(skb);
+	kni->stats.tx_dropped++;
+
+	return NETDEV_TX_OK;
+}
+#endif
+
+/*
+ * Deal with a transmit timeout.
+ */
+static void
+kni_net_tx_timeout (struct net_device *dev)
+{
+	struct kni_dev *kni = netdev_priv(dev);
+
+	KNI_DBG("Transmit timeout at %ld, latency %ld\n", jiffies,
+			jiffies - dev->trans_start);
+
+	kni->stats.tx_errors++;
+	netif_wake_queue(dev);
+	return;
+}
+
+/*
+ * Ioctl commands
+ */
+static int
+kni_net_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
+{
+	KNI_DBG("kni_net_ioctl %d\n",
+		((struct kni_dev *)netdev_priv(dev))->group_id);
+
+	return 0;
+}
+
+static void
+kni_net_set_rx_mode(struct net_device *dev)
+{
+}
+
+static int
+kni_net_change_mtu(struct net_device *dev, int new_mtu)
+{
+	int ret;
+	struct rte_kni_request req;
+	struct kni_dev *kni = netdev_priv(dev);
+
+	KNI_DBG("kni_net_change_mtu new mtu %d to be set\n", new_mtu);
+
+	memset(&req, 0, sizeof(req));
+	req.req_id = RTE_KNI_REQ_CHANGE_MTU;
+	req.new_mtu = new_mtu;
+	ret = kni_net_process_request(kni, &req);
+	if (ret == 0 && req.result == 0)
+		dev->mtu = new_mtu;
+
+	return (ret == 0) ? req.result : ret;
+}
+
+/*
+ * Checks if the user space application provided the resp message
+ */
+void
+kni_net_poll_resp(struct kni_dev *kni)
+{
+	if (kni_fifo_count(kni->resp_q))
+		wake_up_interruptible(&kni->wq);
+}
+
+/*
+ * It can be called to process the request.
+ */
+static int
+kni_net_process_request(struct kni_dev *kni, struct rte_kni_request *req)
+{
+	int ret = -1;
+	void *resp_va;
+	unsigned num;
+	int ret_val;
+
+	if (!kni || !req) {
+		KNI_ERR("No kni instance or request\n");
+		return -EINVAL;
+	}
+
+	mutex_lock(&kni->sync_lock);
+
+	/* Construct data */
+	memcpy(kni->sync_kva, req, sizeof(struct rte_kni_request));
+	num = kni_fifo_put(kni->req_q, &kni->sync_va, 1);
+	if (num < 1) {
+		KNI_ERR("Cannot send to req_q\n");
+		ret = -EBUSY;
+		goto fail;
+	}
+
+	ret_val = wait_event_interruptible_timeout(kni->wq,
+			kni_fifo_count(kni->resp_q), 3 * HZ);
+	if (signal_pending(current) || ret_val <= 0) {
+		ret = -ETIME;
+		goto fail;
+	}
+	num = kni_fifo_get(kni->resp_q, (void **)&resp_va, 1);
+	if (num != 1 || resp_va != kni->sync_va) {
+		/* This should never happen */
+		KNI_ERR("No data in resp_q\n");
+		ret = -ENODATA;
+		goto fail;
+	}
+
+	memcpy(req, kni->sync_kva, sizeof(struct rte_kni_request));
+	ret = 0;
+
+fail:
+	mutex_unlock(&kni->sync_lock);
+	return ret;
+}
+
+/*
+ * Return statistics to the caller
+ */
+static struct net_device_stats *
+kni_net_stats(struct net_device *dev)
+{
+	struct kni_dev *kni = netdev_priv(dev);
+	return &kni->stats;
+}
+
+/*
+ *  Fill the eth header
+ */
+static int
+kni_net_header(struct sk_buff *skb, struct net_device *dev,
+		unsigned short type, const void *daddr,
+		const void *saddr, unsigned int len)
+{
+	struct ethhdr *eth = (struct ethhdr *) skb_push(skb, ETH_HLEN);
+
+	memcpy(eth->h_source, saddr ? saddr : dev->dev_addr, dev->addr_len);
+	memcpy(eth->h_dest,   daddr ? daddr : dev->dev_addr, dev->addr_len);
+	eth->h_proto = htons(type);
+
+	return dev->hard_header_len;
+}
+
+
+/*
+ * Re-fill the eth header
+ */
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 1, 0))
+static int
+kni_net_rebuild_header(struct sk_buff *skb)
+{
+	struct net_device *dev = skb->dev;
+	struct ethhdr *eth = (struct ethhdr *) skb->data;
+
+	memcpy(eth->h_source, dev->dev_addr, dev->addr_len);
+	memcpy(eth->h_dest, dev->dev_addr, dev->addr_len);
+
+	return 0;
+}
+#endif /* < 4.1.0  */
+
+/**
+ * kni_net_set_mac - Change the Ethernet Address of the KNI NIC
+ * @netdev: network interface device structure
+ * @p: pointer to an address structure
+ *
+ * Returns 0 on success, negative on failure
+ **/
+static int kni_net_set_mac(struct net_device *netdev, void *p)
+{
+	struct sockaddr *addr = p;
+	if (!is_valid_ether_addr((unsigned char *)(addr->sa_data)))
+		return -EADDRNOTAVAIL;
+	memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
+	return 0;
+}
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0))
+static int kni_net_change_carrier(struct net_device *dev, bool new_carrier)
+{
+	if (new_carrier)
+		netif_carrier_on(dev);
+	else
+		netif_carrier_off(dev);
+	return 0;
+}
+#endif
+
+static const struct header_ops kni_net_header_ops = {
+	.create  = kni_net_header,
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 1, 0))
+	.rebuild = kni_net_rebuild_header,
+#endif /* < 4.1.0  */
+	.cache   = NULL,  /* disable caching */
+};
+
+static const struct net_device_ops kni_net_netdev_ops = {
+	.ndo_open = kni_net_open,
+	.ndo_stop = kni_net_release,
+	.ndo_set_config = kni_net_config,
+	.ndo_start_xmit = kni_net_tx,
+	.ndo_change_mtu = kni_net_change_mtu,
+	.ndo_do_ioctl = kni_net_ioctl,
+	.ndo_set_rx_mode = kni_net_set_rx_mode,
+	.ndo_get_stats = kni_net_stats,
+	.ndo_tx_timeout = kni_net_tx_timeout,
+	.ndo_set_mac_address = kni_net_set_mac,
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0))
+	.ndo_change_carrier = kni_net_change_carrier,
+#endif
+};
+
+void
+kni_net_init(struct net_device *dev)
+{
+	struct kni_dev *kni = netdev_priv(dev);
+
+	KNI_DBG("kni_net_init\n");
+
+	init_waitqueue_head(&kni->wq);
+	mutex_init(&kni->sync_lock);
+
+	ether_setup(dev); /* assign some of the fields */
+	dev->netdev_ops      = &kni_net_netdev_ops;
+	dev->header_ops      = &kni_net_header_ops;
+	dev->watchdog_timeo = WD_TIMEOUT;
+}
+
+void
+kni_net_config_lo_mode(char *lo_str)
+{
+	if (!lo_str) {
+		KNI_PRINT("loopback disabled");
+		return;
+	}
+
+	if (!strcmp(lo_str, "lo_mode_none"))
+		KNI_PRINT("loopback disabled");
+	else if (!strcmp(lo_str, "lo_mode_fifo")) {
+		KNI_PRINT("loopback mode=lo_mode_fifo enabled");
+		kni_net_rx_func = kni_net_rx_lo_fifo;
+	} else if (!strcmp(lo_str, "lo_mode_fifo_skb")) {
+		KNI_PRINT("loopback mode=lo_mode_fifo_skb enabled");
+		kni_net_rx_func = kni_net_rx_lo_fifo_skb;
+	} else
+		KNI_PRINT("Incognizant parameter, loopback disabled");
+}
diff --git a/lib/librte_eal/linuxapp/kni/kni_vhost.c b/lib/librte_eal/linuxapp/kni/kni_vhost.c
new file mode 100644
index 00000000..a3ca8499
--- /dev/null
+++ b/lib/librte_eal/linuxapp/kni/kni_vhost.c
@@ -0,0 +1,857 @@
+/*-
+ * GPL LICENSE SUMMARY
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of version 2 of the GNU General Public License as
+ *   published by the Free Software Foundation.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *   General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program; if not, write to the Free Software
+ *   Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *   The full GNU General Public License is included in this distribution
+ *   in the file called LICENSE.GPL.
+ *
+ *   Contact Information:
+ *   Intel Corporation
+ */
+
+#include <linux/module.h>
+#include <linux/net.h>
+#include <net/sock.h>
+#include <linux/virtio_net.h>
+#include <linux/wait.h>
+#include <linux/mm.h>
+#include <linux/nsproxy.h>
+#include <linux/sched.h>
+#include <linux/if_tun.h>
+#include <linux/version.h>
+
+#include "compat.h"
+#include "kni_dev.h"
+#include "kni_fifo.h"
+
+#define RX_BURST_SZ 4
+
+extern void put_unused_fd(unsigned int fd);
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,7,0)
+extern struct file*
+sock_alloc_file(struct socket *sock,
+		int flags, const char *dname);
+
+extern int get_unused_fd_flags(unsigned flags);
+
+extern void fd_install(unsigned int fd, struct file *file);
+
+static int kni_sock_map_fd(struct socket *sock)
+{
+	struct file *file;
+	int fd = get_unused_fd_flags(0);
+	if (fd < 0)
+		return fd;
+
+	file = sock_alloc_file(sock, 0, NULL);
+	if (IS_ERR(file)) {
+		put_unused_fd(fd);
+		return PTR_ERR(file);
+	}
+	fd_install(fd, file);
+	return fd;
+}
+#else
+#define kni_sock_map_fd(s)             sock_map_fd(s, 0)
+#endif
+
+static struct proto kni_raw_proto = {
+	.name = "kni_vhost",
+	.owner = THIS_MODULE,
+	.obj_size = sizeof(struct kni_vhost_queue),
+};
+
+static inline int
+kni_vhost_net_tx(struct kni_dev *kni, struct msghdr *m,
+		 unsigned offset, unsigned len)
+{
+	struct rte_kni_mbuf *pkt_kva = NULL;
+	struct rte_kni_mbuf *pkt_va = NULL;
+	int ret;
+
+	KNI_DBG_TX("tx offset=%d, len=%d, iovlen=%d\n",
+#ifdef HAVE_IOV_ITER_MSGHDR
+		   offset, len, (int)m->msg_iter.iov->iov_len);
+#else
+		   offset, len, (int)m->msg_iov->iov_len);
+#endif
+
+	/**
+	 * Check if it has at least one free entry in tx_q and
+	 * one entry in alloc_q.
+	 */
+	if (kni_fifo_free_count(kni->tx_q) == 0 ||
+	    kni_fifo_count(kni->alloc_q) == 0) {
+		/**
+		 * If no free entry in tx_q or no entry in alloc_q,
+		 * drops skb and goes out.
+		 */
+		goto drop;
+	}
+
+	/* dequeue a mbuf from alloc_q */
+	ret = kni_fifo_get(kni->alloc_q, (void **)&pkt_va, 1);
+	if (likely(ret == 1)) {
+		void *data_kva;
+
+		pkt_kva = (void *)pkt_va - kni->mbuf_va + kni->mbuf_kva;
+		data_kva = pkt_kva->buf_addr + pkt_kva->data_off
+		           - kni->mbuf_va + kni->mbuf_kva;
+
+#ifdef HAVE_IOV_ITER_MSGHDR
+		copy_from_iter(data_kva, len, &m->msg_iter);
+#else
+		memcpy_fromiovecend(data_kva, m->msg_iov, offset, len);
+#endif
+
+		if (unlikely(len < ETH_ZLEN)) {
+			memset(data_kva + len, 0, ETH_ZLEN - len);
+			len = ETH_ZLEN;
+		}
+		pkt_kva->pkt_len = len;
+		pkt_kva->data_len = len;
+
+		/* enqueue mbuf into tx_q */
+		ret = kni_fifo_put(kni->tx_q, (void **)&pkt_va, 1);
+		if (unlikely(ret != 1)) {
+			/* Failing should not happen */
+			KNI_ERR("Fail to enqueue mbuf into tx_q\n");
+			goto drop;
+		}
+	} else {
+		/* Failing should not happen */
+		KNI_ERR("Fail to dequeue mbuf from alloc_q\n");
+		goto drop;
+	}
+
+	/* update statistics */
+	kni->stats.tx_bytes += len;
+	kni->stats.tx_packets++;
+
+	return 0;
+
+drop:
+	/* update statistics */
+	kni->stats.tx_dropped++;
+
+	return 0;
+}
+
+static inline int
+kni_vhost_net_rx(struct kni_dev *kni, struct msghdr *m,
+		 unsigned offset, unsigned len)
+{
+	uint32_t pkt_len;
+	struct rte_kni_mbuf *kva;
+	struct rte_kni_mbuf *va;
+	void * data_kva;
+	struct sk_buff *skb;
+	struct kni_vhost_queue *q = kni->vhost_queue;
+
+	if (unlikely(q == NULL))
+		return 0;
+
+	/* ensure at least one entry in free_q */
+	if (unlikely(kni_fifo_free_count(kni->free_q) == 0))
+		return 0;
+
+	skb = skb_dequeue(&q->sk.sk_receive_queue);
+	if (unlikely(skb == NULL))
+		return 0;
+
+	kva = (struct rte_kni_mbuf*)skb->data;
+
+	/* free skb to cache */
+	skb->data = NULL;
+	if (unlikely(1 != kni_fifo_put(q->fifo, (void **)&skb, 1)))
+		/* Failing should not happen */
+		KNI_ERR("Fail to enqueue entries into rx cache fifo\n");
+
+	pkt_len = kva->data_len;
+	if (unlikely(pkt_len > len))
+		goto drop;
+
+	KNI_DBG_RX("rx offset=%d, len=%d, pkt_len=%d, iovlen=%d\n",
+#ifdef HAVE_IOV_ITER_MSGHDR
+		   offset, len, pkt_len, (int)m->msg_iter.iov->iov_len);
+#else
+		   offset, len, pkt_len, (int)m->msg_iov->iov_len);
+#endif
+
+	data_kva = kva->buf_addr + kva->data_off - kni->mbuf_va + kni->mbuf_kva;
+#ifdef HAVE_IOV_ITER_MSGHDR
+	if (unlikely(copy_to_iter(data_kva, pkt_len, &m->msg_iter)))
+#else
+	if (unlikely(memcpy_toiovecend(m->msg_iov, data_kva, offset, pkt_len)))
+#endif
+		goto drop;
+
+	/* Update statistics */
+	kni->stats.rx_bytes += pkt_len;
+	kni->stats.rx_packets++;
+
+	/* enqueue mbufs into free_q */
+	va = (void*)kva - kni->mbuf_kva + kni->mbuf_va;
+	if (unlikely(1 != kni_fifo_put(kni->free_q, (void **)&va, 1)))
+		/* Failing should not happen */
+		KNI_ERR("Fail to enqueue entries into free_q\n");
+
+	KNI_DBG_RX("receive done %d\n", pkt_len);
+
+	return pkt_len;
+
+drop:
+	/* Update drop statistics */
+	kni->stats.rx_dropped++;
+
+	return 0;
+}
+
+static unsigned int
+kni_sock_poll(struct file *file, struct socket *sock, poll_table * wait)
+{
+	struct kni_vhost_queue *q =
+		container_of(sock->sk, struct kni_vhost_queue, sk);
+	struct kni_dev *kni;
+	unsigned int mask = 0;
+
+	if (unlikely(q == NULL || q->kni == NULL))
+		return POLLERR;
+
+	kni = q->kni;
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,35)
+	KNI_DBG("start kni_poll on group %d, wq 0x%16llx\n",
+		  kni->group_id, (uint64_t)sock->wq);
+#else
+	KNI_DBG("start kni_poll on group %d, wait at 0x%16llx\n",
+		  kni->group_id, (uint64_t)&sock->wait);
+#endif
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,35)
+	poll_wait(file, &sock->wq->wait, wait);
+#else
+	poll_wait(file, &sock->wait, wait);
+#endif
+
+	if (kni_fifo_count(kni->rx_q) > 0)
+		mask |= POLLIN | POLLRDNORM;
+
+	if (sock_writeable(&q->sk) ||
+#ifdef SOCKWQ_ASYNC_NOSPACE
+	    (!test_and_set_bit(SOCKWQ_ASYNC_NOSPACE, &q->sock->flags) &&
+#else
+	    (!test_and_set_bit(SOCK_ASYNC_NOSPACE, &q->sock->flags) &&
+#endif
+	     sock_writeable(&q->sk)))
+		mask |= POLLOUT | POLLWRNORM;
+
+	return mask;
+}
+
+static inline void
+kni_vhost_enqueue(struct kni_dev *kni, struct kni_vhost_queue *q,
+		  struct sk_buff *skb, struct rte_kni_mbuf *va)
+{
+	struct rte_kni_mbuf *kva;
+
+	kva = (void *)(va) - kni->mbuf_va + kni->mbuf_kva;
+	(skb)->data = (unsigned char*)kva;
+	(skb)->len = kva->data_len;
+	skb_queue_tail(&q->sk.sk_receive_queue, skb);
+}
+
+static inline void
+kni_vhost_enqueue_burst(struct kni_dev *kni, struct kni_vhost_queue *q,
+	  struct sk_buff **skb, struct rte_kni_mbuf **va)
+{
+	int i;
+	for (i = 0; i < RX_BURST_SZ; skb++, va++, i++)
+		kni_vhost_enqueue(kni, q, *skb, *va);
+}
+
+int
+kni_chk_vhost_rx(struct kni_dev *kni)
+{
+	struct kni_vhost_queue *q = kni->vhost_queue;
+	unsigned nb_in, nb_mbuf, nb_skb;
+	const unsigned BURST_MASK = RX_BURST_SZ - 1;
+	unsigned nb_burst, nb_backlog, i;
+	struct sk_buff *skb[RX_BURST_SZ];
+	struct rte_kni_mbuf *va[RX_BURST_SZ];
+
+	if (unlikely(BE_STOP & kni->vq_status)) {
+		kni->vq_status |= BE_FINISH;
+		return 0;
+	}
+
+	if (unlikely(q == NULL))
+		return 0;
+
+	nb_skb = kni_fifo_count(q->fifo);
+	nb_mbuf = kni_fifo_count(kni->rx_q);
+
+	nb_in = min(nb_mbuf, nb_skb);
+	nb_in = min(nb_in, (unsigned)RX_BURST_SZ);
+	nb_burst   = (nb_in & ~BURST_MASK);
+	nb_backlog = (nb_in & BURST_MASK);
+
+	/* enqueue skb_queue per BURST_SIZE bulk */
+	if (0 != nb_burst) {
+		if (unlikely(RX_BURST_SZ != kni_fifo_get(
+				     kni->rx_q, (void **)&va,
+				     RX_BURST_SZ)))
+			goto except;
+
+		if (unlikely(RX_BURST_SZ != kni_fifo_get(
+				     q->fifo, (void **)&skb,
+				     RX_BURST_SZ)))
+			goto except;
+
+		kni_vhost_enqueue_burst(kni, q, skb, va);
+	}
+
+	/* all leftover, do one by one */
+	for (i = 0; i < nb_backlog; ++i) {
+		if (unlikely(1 != kni_fifo_get(
+				     kni->rx_q,(void **)&va, 1)))
+			goto except;
+
+		if (unlikely(1 != kni_fifo_get(
+				     q->fifo, (void **)&skb, 1)))
+			goto except;
+
+		kni_vhost_enqueue(kni, q, *skb, *va);
+	}
+
+	/* Ondemand wake up */
+	if ((nb_in == RX_BURST_SZ) || (nb_skb == 0) ||
+	    ((nb_mbuf < RX_BURST_SZ) && (nb_mbuf != 0))) {
+		wake_up_interruptible_poll(sk_sleep(&q->sk),
+				   POLLIN | POLLRDNORM | POLLRDBAND);
+		KNI_DBG_RX("RX CHK KICK nb_mbuf %d, nb_skb %d, nb_in %d\n",
+			   nb_mbuf, nb_skb, nb_in);
+	}
+
+	return 0;
+
+except:
+	/* Failing should not happen */
+	KNI_ERR("Fail to enqueue fifo, it shouldn't happen \n");
+	BUG_ON(1);
+
+	return 0;
+}
+
+static int
+#ifdef HAVE_KIOCB_MSG_PARAM
+kni_sock_sndmsg(struct kiocb *iocb, struct socket *sock,
+	   struct msghdr *m, size_t total_len)
+#else
+kni_sock_sndmsg(struct socket *sock,
+	   struct msghdr *m, size_t total_len)
+#endif /* HAVE_KIOCB_MSG_PARAM */
+{
+	struct kni_vhost_queue *q =
+		container_of(sock->sk, struct kni_vhost_queue, sk);
+	int vnet_hdr_len = 0;
+	unsigned long len = total_len;
+
+	if (unlikely(q == NULL || q->kni == NULL))
+		return 0;
+
+	KNI_DBG_TX("kni_sndmsg len %ld, flags 0x%08x, nb_iov %d\n",
+#ifdef HAVE_IOV_ITER_MSGHDR
+		   len, q->flags, (int)m->msg_iter.iov->iov_len);
+#else
+		   len, q->flags, (int)m->msg_iovlen);
+#endif
+
+#ifdef RTE_KNI_VHOST_VNET_HDR_EN
+	if (likely(q->flags & IFF_VNET_HDR)) {
+		vnet_hdr_len = q->vnet_hdr_sz;
+		if (unlikely(len < vnet_hdr_len))
+			return -EINVAL;
+		len -= vnet_hdr_len;
+	}
+#endif
+
+	if (unlikely(len < ETH_HLEN + q->vnet_hdr_sz))
+		return -EINVAL;
+
+	return kni_vhost_net_tx(q->kni, m, vnet_hdr_len, len);
+}
+
+static int
+#ifdef HAVE_KIOCB_MSG_PARAM
+kni_sock_rcvmsg(struct kiocb *iocb, struct socket *sock,
+	   struct msghdr *m, size_t len, int flags)
+#else
+kni_sock_rcvmsg(struct socket *sock,
+	   struct msghdr *m, size_t len, int flags)
+#endif /* HAVE_KIOCB_MSG_PARAM */
+{
+	int vnet_hdr_len = 0;
+	int pkt_len = 0;
+	struct kni_vhost_queue *q =
+		container_of(sock->sk, struct kni_vhost_queue, sk);
+	static struct virtio_net_hdr
+		__attribute__ ((unused)) vnet_hdr = {
+		.flags = 0,
+		.gso_type = VIRTIO_NET_HDR_GSO_NONE
+	};
+
+	if (unlikely(q == NULL || q->kni == NULL))
+		return 0;
+
+#ifdef RTE_KNI_VHOST_VNET_HDR_EN
+	if (likely(q->flags & IFF_VNET_HDR)) {
+		vnet_hdr_len = q->vnet_hdr_sz;
+		if ((len -= vnet_hdr_len) < 0)
+			return -EINVAL;
+	}
+#endif
+
+	if (unlikely(0 == (pkt_len = kni_vhost_net_rx(q->kni,
+		m, vnet_hdr_len, len))))
+		return 0;
+
+#ifdef RTE_KNI_VHOST_VNET_HDR_EN
+	/* no need to copy hdr when no pkt received */
+#ifdef HAVE_IOV_ITER_MSGHDR
+	if (unlikely(copy_to_iter((void *)&vnet_hdr, vnet_hdr_len,
+		&m->msg_iter)))
+#else
+	if (unlikely(memcpy_toiovecend(m->msg_iov,
+		(void *)&vnet_hdr, 0, vnet_hdr_len)))
+#endif /* HAVE_IOV_ITER_MSGHDR */
+		return -EFAULT;
+#endif /* RTE_KNI_VHOST_VNET_HDR_EN */
+	KNI_DBG_RX("kni_rcvmsg expect_len %ld, flags 0x%08x, pkt_len %d\n",
+		   (unsigned long)len, q->flags, pkt_len);
+
+	return pkt_len + vnet_hdr_len;
+}
+
+/* dummy tap like ioctl */
+static int
+kni_sock_ioctl(struct socket *sock, unsigned int cmd,
+	      unsigned long arg)
+{
+	void __user *argp = (void __user *)arg;
+	struct ifreq __user *ifr = argp;
+	unsigned int __user *up = argp;
+	struct kni_vhost_queue *q =
+		container_of(sock->sk, struct kni_vhost_queue, sk);
+	struct kni_dev *kni;
+	unsigned int u;
+	int __user *sp = argp;
+	int s;
+	int ret;
+
+	KNI_DBG("tap ioctl cmd 0x%08x\n", cmd);
+
+	switch (cmd) {
+	case TUNSETIFF:
+		KNI_DBG("TUNSETIFF\n");
+		/* ignore the name, just look at flags */
+		if (get_user(u, &ifr->ifr_flags))
+			return -EFAULT;
+
+		ret = 0;
+		if ((u & ~IFF_VNET_HDR) != (IFF_NO_PI | IFF_TAP))
+			ret = -EINVAL;
+		else
+			q->flags = u;
+
+		return ret;
+
+	case TUNGETIFF:
+		KNI_DBG("TUNGETIFF\n");
+		rcu_read_lock_bh();
+		kni = rcu_dereference_bh(q->kni);
+		if (kni)
+			dev_hold(kni->net_dev);
+		rcu_read_unlock_bh();
+
+		if (!kni)
+			return -ENOLINK;
+
+		ret = 0;
+		if (copy_to_user(&ifr->ifr_name, kni->net_dev->name, IFNAMSIZ) ||
+		    put_user(q->flags, &ifr->ifr_flags))
+			ret = -EFAULT;
+		dev_put(kni->net_dev);
+		return ret;
+
+	case TUNGETFEATURES:
+		KNI_DBG("TUNGETFEATURES\n");
+		u = IFF_TAP | IFF_NO_PI;
+#ifdef RTE_KNI_VHOST_VNET_HDR_EN
+		u |= IFF_VNET_HDR;
+#endif
+		if (put_user(u, up))
+			return -EFAULT;
+		return 0;
+
+	case TUNSETSNDBUF:
+		KNI_DBG("TUNSETSNDBUF\n");
+		if (get_user(u, up))
+			return -EFAULT;
+
+		q->sk.sk_sndbuf = u;
+		return 0;
+
+	case TUNGETVNETHDRSZ:
+		s = q->vnet_hdr_sz;
+		if (put_user(s, sp))
+			return -EFAULT;
+		KNI_DBG("TUNGETVNETHDRSZ %d\n", s);
+		return 0;
+
+	case TUNSETVNETHDRSZ:
+		if (get_user(s, sp))
+			return -EFAULT;
+		if (s < (int)sizeof(struct virtio_net_hdr))
+			return -EINVAL;
+
+		KNI_DBG("TUNSETVNETHDRSZ %d\n", s);
+		q->vnet_hdr_sz = s;
+		return 0;
+
+	case TUNSETOFFLOAD:
+		KNI_DBG("TUNSETOFFLOAD %lx\n", arg);
+#ifdef RTE_KNI_VHOST_VNET_HDR_EN
+		/* not support any offload yet */
+		if (!(q->flags & IFF_VNET_HDR))
+			return  -EINVAL;
+
+		return 0;
+#else
+		return -EINVAL;
+#endif
+
+	default:
+		KNI_DBG("NOT SUPPORT\n");
+		return -EINVAL;
+	}
+}
+
+static int
+kni_sock_compat_ioctl(struct socket *sock, unsigned int cmd,
+		     unsigned long arg)
+{
+	/* 32 bits app on 64 bits OS to be supported later */
+	KNI_PRINT("Not implemented.\n");
+
+	return -EINVAL;
+}
+
+#define KNI_VHOST_WAIT_WQ_SAFE()                        \
+do {		                                	\
+	while ((BE_FINISH | BE_STOP) == kni->vq_status) \
+		msleep(1);                              \
+}while(0)                                               \
+
+
+static int
+kni_sock_release(struct socket *sock)
+{
+	struct kni_vhost_queue *q =
+		container_of(sock->sk, struct kni_vhost_queue, sk);
+	struct kni_dev *kni;
+
+	if (q == NULL)
+		return 0;
+
+	if (NULL != (kni = q->kni)) {
+		kni->vq_status = BE_STOP;
+		KNI_VHOST_WAIT_WQ_SAFE();
+		kni->vhost_queue = NULL;
+		q->kni = NULL;
+	}
+
+	if (q->sockfd != -1)
+		q->sockfd = -1;
+
+	sk_set_socket(&q->sk, NULL);
+	sock->sk = NULL;
+
+	sock_put(&q->sk);
+
+	KNI_DBG("dummy sock release done\n");
+
+	return 0;
+}
+
+int
+kni_sock_getname (struct socket *sock,
+		  struct sockaddr *addr,
+		  int *sockaddr_len, int peer)
+{
+	KNI_DBG("dummy sock getname\n");
+	((struct sockaddr_ll*)addr)->sll_family = AF_PACKET;
+	return 0;
+}
+
+static const struct proto_ops kni_socket_ops = {
+	.getname = kni_sock_getname,
+	.sendmsg = kni_sock_sndmsg,
+	.recvmsg = kni_sock_rcvmsg,
+	.release = kni_sock_release,
+	.poll    = kni_sock_poll,
+	.ioctl   = kni_sock_ioctl,
+	.compat_ioctl = kni_sock_compat_ioctl,
+};
+
+static void
+kni_sk_write_space(struct sock *sk)
+{
+	wait_queue_head_t *wqueue;
+
+	if (!sock_writeable(sk) ||
+#ifdef SOCKWQ_ASYNC_NOSPACE
+	    !test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &sk->sk_socket->flags))
+#else
+	    !test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags))
+#endif
+		return;
+	wqueue = sk_sleep(sk);
+	if (wqueue && waitqueue_active(wqueue))
+		wake_up_interruptible_poll(
+			wqueue, POLLOUT | POLLWRNORM | POLLWRBAND);
+}
+
+static void
+kni_sk_destruct(struct sock *sk)
+{
+	struct kni_vhost_queue *q =
+		container_of(sk, struct kni_vhost_queue, sk);
+
+	if (!q)
+		return;
+
+	/* make sure there's no packet in buffer */
+	while (skb_dequeue(&sk->sk_receive_queue) != NULL)
+	       ;
+
+	mb();
+
+	if (q->fifo != NULL) {
+		kfree(q->fifo);
+		q->fifo = NULL;
+	}
+
+	if (q->cache != NULL) {
+		kfree(q->cache);
+		q->cache = NULL;
+	}
+}
+
+static int
+kni_vhost_backend_init(struct kni_dev *kni)
+{
+	struct kni_vhost_queue *q;
+	struct net *net = current->nsproxy->net_ns;
+	int err, i, sockfd;
+	struct rte_kni_fifo *fifo;
+	struct sk_buff *elem;
+
+	if (kni->vhost_queue != NULL)
+		return -1;
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 2, 0)
+	q = (struct kni_vhost_queue *)sk_alloc(net, AF_UNSPEC, GFP_KERNEL,
+			&kni_raw_proto, 0);
+#else
+	q = (struct kni_vhost_queue *)sk_alloc(net, AF_UNSPEC, GFP_KERNEL,
+			&kni_raw_proto);
+#endif
+	if (!q)
+		return -ENOMEM;
+
+	err = sock_create_lite(AF_UNSPEC, SOCK_RAW, IPPROTO_RAW, &q->sock);
+	if (err)
+		goto free_sk;
+
+	sockfd = kni_sock_map_fd(q->sock);
+	if (sockfd < 0) {
+		err = sockfd;
+		goto free_sock;
+	}
+
+	/* cache init */
+	q->cache = kzalloc(RTE_KNI_VHOST_MAX_CACHE_SIZE * sizeof(struct sk_buff),
+			   GFP_KERNEL);
+	if (!q->cache)
+		goto free_fd;
+
+	fifo = kzalloc(RTE_KNI_VHOST_MAX_CACHE_SIZE * sizeof(void *)
+			+ sizeof(struct rte_kni_fifo), GFP_KERNEL);
+	if (!fifo)
+		goto free_cache;
+
+	kni_fifo_init(fifo, RTE_KNI_VHOST_MAX_CACHE_SIZE);
+
+	for (i = 0; i < RTE_KNI_VHOST_MAX_CACHE_SIZE; i++) {
+		elem = &q->cache[i];
+		kni_fifo_put(fifo, (void**)&elem, 1);
+	}
+	q->fifo = fifo;
+
+	/* store sockfd in vhost_queue */
+	q->sockfd = sockfd;
+
+	/* init socket */
+	q->sock->type = SOCK_RAW;
+	q->sock->state = SS_CONNECTED;
+	q->sock->ops = &kni_socket_ops;
+	sock_init_data(q->sock, &q->sk);
+
+	/* init sock data */
+	q->sk.sk_write_space = kni_sk_write_space;
+	q->sk.sk_destruct = kni_sk_destruct;
+	q->flags = IFF_NO_PI | IFF_TAP;
+	q->vnet_hdr_sz = sizeof(struct virtio_net_hdr);
+#ifdef RTE_KNI_VHOST_VNET_HDR_EN
+	q->flags |= IFF_VNET_HDR;
+#endif
+
+	/* bind kni_dev with vhost_queue */
+	q->kni = kni;
+	kni->vhost_queue = q;
+
+	wmb();
+
+	kni->vq_status = BE_START;
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,35)
+	KNI_DBG("backend init sockfd=%d, sock->wq=0x%16llx,"
+		  "sk->sk_wq=0x%16llx",
+		  q->sockfd, (uint64_t)q->sock->wq,
+		  (uint64_t)q->sk.sk_wq);
+#else
+	KNI_DBG("backend init sockfd=%d, sock->wait at 0x%16llx,"
+		  "sk->sk_sleep=0x%16llx",
+		  q->sockfd, (uint64_t)&q->sock->wait,
+		  (uint64_t)q->sk.sk_sleep);
+#endif
+
+	return 0;
+
+free_cache:
+	kfree(q->cache);
+	q->cache = NULL;
+
+free_fd:
+	put_unused_fd(sockfd);
+
+free_sock:
+	q->kni = NULL;
+	kni->vhost_queue = NULL;
+	kni->vq_status |= BE_FINISH;
+	sock_release(q->sock);
+	q->sock->ops = NULL;
+	q->sock = NULL;
+
+free_sk:
+	sk_free((struct sock*)q);
+
+	return err;
+}
+
+/* kni vhost sock sysfs */
+static ssize_t
+show_sock_fd(struct device *dev, struct device_attribute *attr,
+	     char *buf)
+{
+	struct net_device *net_dev = container_of(dev, struct net_device, dev);
+	struct kni_dev *kni = netdev_priv(net_dev);
+	int sockfd = -1;
+	if (kni->vhost_queue != NULL)
+		sockfd = kni->vhost_queue->sockfd;
+	return snprintf(buf, 10, "%d\n", sockfd);
+}
+
+static ssize_t
+show_sock_en(struct device *dev, struct device_attribute *attr,
+	     char *buf)
+{
+	struct net_device *net_dev = container_of(dev, struct net_device, dev);
+	struct kni_dev *kni = netdev_priv(net_dev);
+	return snprintf(buf, 10, "%u\n", (kni->vhost_queue == NULL ? 0 : 1));
+}
+
+static ssize_t
+set_sock_en(struct device *dev, struct device_attribute *attr,
+	      const char *buf, size_t count)
+{
+	struct net_device *net_dev = container_of(dev, struct net_device, dev);
+	struct kni_dev *kni = netdev_priv(net_dev);
+	unsigned long en;
+	int err = 0;
+
+	if (0 != kstrtoul(buf, 0, &en))
+		return -EINVAL;
+
+	if (en)
+		err = kni_vhost_backend_init(kni);
+
+	return err ? err : count;
+}
+
+static DEVICE_ATTR(sock_fd, S_IRUGO | S_IRUSR, show_sock_fd, NULL);
+static DEVICE_ATTR(sock_en, S_IRUGO | S_IWUSR, show_sock_en, set_sock_en);
+static struct attribute *dev_attrs[] = {
+	&dev_attr_sock_fd.attr,
+	&dev_attr_sock_en.attr,
+        NULL,
+};
+
+static const struct attribute_group dev_attr_grp = {
+	.attrs = dev_attrs,
+};
+
+int
+kni_vhost_backend_release(struct kni_dev *kni)
+{
+	struct kni_vhost_queue *q = kni->vhost_queue;
+
+	if (q == NULL)
+		return 0;
+
+	/* dettach from kni */
+	q->kni = NULL;
+
+	KNI_DBG("release backend done\n");
+
+	return 0;
+}
+
+int
+kni_vhost_init(struct kni_dev *kni)
+{
+	struct net_device *dev = kni->net_dev;
+
+	if (sysfs_create_group(&dev->dev.kobj, &dev_attr_grp))
+		sysfs_remove_group(&dev->dev.kobj, &dev_attr_grp);
+
+	kni->vq_status = BE_STOP;
+
+	KNI_DBG("kni_vhost_init done\n");
+
+	return 0;
+}
diff --git a/lib/librte_eal/linuxapp/xen_dom0/Makefile b/lib/librte_eal/linuxapp/xen_dom0/Makefile
new file mode 100644
index 00000000..9d22fb97
--- /dev/null
+++ b/lib/librte_eal/linuxapp/xen_dom0/Makefile
@@ -0,0 +1,56 @@
+#   BSD LICENSE
+#
+#   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+#   All rights reserved.
+#
+#   Redistribution and use in source and binary forms, with or without
+#   modification, are permitted provided that the following conditions
+#   are met:
+#
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in
+#       the documentation and/or other materials provided with the
+#       distribution.
+#     * Neither the name of Intel Corporation nor the names of its
+#       contributors may be used to endorse or promote products derived
+#       from this software without specific prior written permission.
+#
+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+#
+# module name and path
+#
+MODULE = rte_dom0_mm
+
+#
+# CFLAGS
+#
+MODULE_CFLAGS += -I$(SRCDIR) --param max-inline-insns-single=50
+MODULE_CFLAGS += -I$(RTE_OUTPUT)/include
+MODULE_CFLAGS += -include $(RTE_OUTPUT)/include/rte_config.h
+MODULE_CFLAGS += -Wall -Werror
+
+# this lib needs main eal
+DEPDIRS-y += lib/librte_eal/linuxapp/eal
+
+#
+# all source are stored in SRCS-y
+#
+
+SRCS-y += dom0_mm_misc.c
+
+include $(RTE_SDK)/mk/rte.module.mk
diff --git a/lib/librte_eal/linuxapp/xen_dom0/compat.h b/lib/librte_eal/linuxapp/xen_dom0/compat.h
new file mode 100644
index 00000000..e6eb97f2
--- /dev/null
+++ b/lib/librte_eal/linuxapp/xen_dom0/compat.h
@@ -0,0 +1,15 @@
+/*
+ * Minimal wrappers to allow compiling xen_dom0 on older kernels.
+ */
+
+#ifndef RHEL_RELEASE_VERSION
+#define RHEL_RELEASE_VERSION(a, b) (((a) << 8) + (b))
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 39) && \
+	(!(defined(RHEL_RELEASE_CODE) && \
+	 RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6, 4)))
+
+#define kstrtoul strict_strtoul
+
+#endif /* < 2.6.39 */
diff --git a/lib/librte_eal/linuxapp/xen_dom0/dom0_mm_dev.h b/lib/librte_eal/linuxapp/xen_dom0/dom0_mm_dev.h
new file mode 100644
index 00000000..9d5ffb22
--- /dev/null
+++ b/lib/librte_eal/linuxapp/xen_dom0/dom0_mm_dev.h
@@ -0,0 +1,107 @@
+/*-
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ *   redistributing this file, you may do so under either license.
+ *
+ *   GPL LICENSE SUMMARY
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of version 2 of the GNU General Public License as
+ *   published by the Free Software Foundation.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *   General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program; if not, write to the Free Software
+ *   Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *   The full GNU General Public License is included in this distribution
+ *   in the file called LICENSE.GPL.
+ *
+ *   Contact Information:
+ *   Intel Corporation
+ *
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+#ifndef _DOM0_MM_DEV_H_
+#define _DOM0_MM_DEV_H_
+
+#include <linux/wait.h>
+#include <linux/mutex.h>
+#include <linux/sched.h>
+#include <linux/spinlock.h>
+#include <exec-env/rte_dom0_common.h>
+
+#define NUM_MEM_CTX     256  /**< Maximum number of memory context*/
+#define MAX_EXCHANGE_FAIL_TIME 5  /**< Maximum times of allowing exchange fail .*/
+#define MAX_MEMBLOCK_SIZE (2 * DOM0_MEMBLOCK_SIZE)
+#define MAX_NUM_ORDER     (DOM0_CONTIG_NUM_ORDER + 1)
+#define SIZE_PER_BLOCK    2       /**< Size of memory block (2MB).*/
+
+/**
+ * A structure describing the private information for a dom0 device.
+ */
+struct dom0_mm_dev {
+	struct miscdevice miscdev;
+	uint8_t fail_times;
+	uint32_t used_memsize;
+	uint32_t num_mem_ctx;
+	uint32_t config_memsize;
+	uint32_t num_bigblock;
+	struct  dom0_mm_data *mm_data[NUM_MEM_CTX];
+	struct mutex data_lock;
+};
+
+struct dom0_mm_data{
+	uint32_t refcnt;
+	uint32_t num_memseg; /**< Number of memory segment. */
+	uint32_t mem_size;   /**< Size of requesting memory. */
+
+	char name[DOM0_NAME_MAX];
+
+	/** Store global memory block IDs used by an instance */
+	uint32_t block_num[DOM0_NUM_MEMBLOCK];
+
+	/** Store memory block information.*/
+	struct memblock_info block_info[DOM0_NUM_MEMBLOCK];
+
+	/** Store memory segment information.*/
+	struct memseg_info  seg_info[DOM0_NUM_MEMSEG];
+};
+
+#define XEN_ERR(args...) printk(KERN_DEBUG "XEN_DOM0: Error: " args)
+#define XEN_PRINT(args...) printk(KERN_DEBUG "XEN_DOM0: " args)
+#endif
diff --git a/lib/librte_eal/linuxapp/xen_dom0/dom0_mm_misc.c b/lib/librte_eal/linuxapp/xen_dom0/dom0_mm_misc.c
new file mode 100644
index 00000000..79630bad
--- /dev/null
+++ b/lib/librte_eal/linuxapp/xen_dom0/dom0_mm_misc.c
@@ -0,0 +1,780 @@
+/*-
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ *   redistributing this file, you may do so under either license.
+ *
+ *   GPL LICENSE SUMMARY
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of version 2 of the GNU General Public License as
+ *   published by the Free Software Foundation.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *   General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program; if not, write to the Free Software
+ *   Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *   The full GNU General Public License is included in this distribution
+ *   in the file called LICENSE.GPL.
+ *
+ *   Contact Information:
+ *   Intel Corporation
+ *
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/miscdevice.h>
+#include <linux/fs.h>
+#include <linux/device.h>
+#include <linux/errno.h>
+#include <linux/vmalloc.h>
+#include <linux/mm.h>
+#include <linux/version.h>
+
+#include <xen/xen.h>
+#include <xen/page.h>
+#include <xen/xen-ops.h>
+#include <xen/interface/memory.h>
+
+#include <exec-env/rte_dom0_common.h>
+
+#include "compat.h"
+#include "dom0_mm_dev.h"
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_AUTHOR("Intel Corporation");
+MODULE_DESCRIPTION("Kernel Module for supporting DPDK running on Xen Dom0");
+
+static struct dom0_mm_dev dom0_dev;
+static struct kobject *dom0_kobj = NULL;
+
+static struct memblock_info *rsv_mm_info;
+
+/* Default configuration for reserved memory size(2048 MB). */
+static uint32_t rsv_memsize = 2048;
+
+static int dom0_open(struct inode *inode, struct file *file);
+static int dom0_release(struct inode *inode, struct file *file);
+static int dom0_ioctl(struct file *file, unsigned int ioctl_num,
+		unsigned long ioctl_param);
+static int dom0_mmap(struct file *file, struct vm_area_struct *vma);
+static int dom0_memory_free(uint32_t size);
+static int dom0_memory_release(struct dom0_mm_data *mm_data);
+
+static const struct file_operations data_fops = {
+	.owner = THIS_MODULE,
+	.open = dom0_open,
+	.release = dom0_release,
+	.mmap = dom0_mmap,
+	.unlocked_ioctl = (void *)dom0_ioctl,
+};
+
+static ssize_t
+show_memsize_rsvd(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	return snprintf(buf, 10, "%u\n", dom0_dev.used_memsize);
+}
+
+static ssize_t
+show_memsize(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	return snprintf(buf, 10, "%u\n", dom0_dev.config_memsize);
+}
+
+static ssize_t
+store_memsize(struct device *dev, struct device_attribute *attr,
+            const char *buf, size_t count)
+{
+	int err = 0;
+	unsigned long mem_size;
+
+	if (0 != kstrtoul(buf, 0, &mem_size))
+		return  -EINVAL;
+
+	mutex_lock(&dom0_dev.data_lock);
+	if (0 == mem_size) {
+		err = -EINVAL;
+		goto fail;
+	} else if (mem_size > (rsv_memsize - dom0_dev.used_memsize)) {
+		XEN_ERR("configure memory size fail\n");
+		err = -EINVAL;
+		goto fail;
+	} else
+		dom0_dev.config_memsize = mem_size;
+
+fail:
+	mutex_unlock(&dom0_dev.data_lock);
+	return err ? err : count;
+}
+
+static DEVICE_ATTR(memsize, S_IRUGO | S_IWUSR, show_memsize, store_memsize);
+static DEVICE_ATTR(memsize_rsvd, S_IRUGO, show_memsize_rsvd, NULL);
+
+static struct attribute *dev_attrs[] = {
+	&dev_attr_memsize.attr,
+	&dev_attr_memsize_rsvd.attr,
+	NULL,
+};
+
+/* the memory size unit is MB */
+static const struct attribute_group dev_attr_grp = {
+	.name = "memsize-mB",
+	.attrs = dev_attrs,
+};
+
+
+static void
+sort_viraddr(struct memblock_info *mb, int cnt)
+{
+	int i,j;
+	uint64_t tmp_pfn;
+	uint64_t tmp_viraddr;
+
+	/*sort virtual address and pfn */
+	for(i = 0; i < cnt; i ++) {
+		for(j = cnt - 1; j > i; j--) {
+			if(mb[j].pfn < mb[j - 1].pfn) {
+				tmp_pfn = mb[j - 1].pfn;
+				mb[j - 1].pfn = mb[j].pfn;
+				mb[j].pfn = tmp_pfn;
+
+				tmp_viraddr = mb[j - 1].vir_addr;
+				mb[j - 1].vir_addr = mb[j].vir_addr;
+				mb[j].vir_addr = tmp_viraddr;
+			}
+		}
+	}
+}
+
+static int
+dom0_find_memdata(const char * mem_name)
+{
+	unsigned i;
+	int idx = -1;
+	for(i = 0; i< NUM_MEM_CTX; i++) {
+		if(dom0_dev.mm_data[i] == NULL)
+			continue;
+		if (!strncmp(dom0_dev.mm_data[i]->name, mem_name,
+			sizeof(char) * DOM0_NAME_MAX)) {
+			idx = i;
+			break;
+		}
+	}
+
+	return idx;
+}
+
+static int
+dom0_find_mempos(void)
+{
+	unsigned i;
+	int idx = -1;
+
+	for(i = 0; i< NUM_MEM_CTX; i++) {
+		if(dom0_dev.mm_data[i] == NULL){
+			idx = i;
+			break;
+		}
+	}
+
+	return idx;
+}
+
+static int
+dom0_memory_release(struct dom0_mm_data *mm_data)
+{
+	int idx;
+	uint32_t  num_block, block_id;
+
+	/* each memory block is 2M */
+	num_block = mm_data->mem_size / SIZE_PER_BLOCK;
+	if (num_block == 0)
+		return -EINVAL;
+
+	/* reset global memory data */
+	idx = dom0_find_memdata(mm_data->name);
+	if (idx >= 0) {
+		dom0_dev.used_memsize -= mm_data->mem_size;
+		dom0_dev.mm_data[idx] = NULL;
+		dom0_dev.num_mem_ctx--;
+	}
+
+	/* reset these memory blocks status as free */
+	for (idx = 0; idx < num_block; idx++) {
+		block_id = mm_data->block_num[idx];
+		rsv_mm_info[block_id].used = 0;
+	}
+
+	memset(mm_data, 0, sizeof(struct dom0_mm_data));
+	vfree(mm_data);
+	return 0;
+}
+
+static int
+dom0_memory_free(uint32_t rsv_size)
+{
+	uint64_t vstart, vaddr;
+	uint32_t i, num_block, size;
+
+	if (!xen_pv_domain())
+		return -1;
+
+	/* each memory block is 2M */
+	num_block = rsv_size / SIZE_PER_BLOCK;
+	if (num_block == 0)
+		return -EINVAL;
+
+	/* free all memory blocks of size of 4M and destroy contiguous region */
+	for (i = 0; i < dom0_dev.num_bigblock * 2; i += 2) {
+		vstart = rsv_mm_info[i].vir_addr;
+		if (vstart) {
+		#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 13, 0)
+			if (rsv_mm_info[i].exchange_flag)
+				xen_destroy_contiguous_region(vstart,
+						DOM0_CONTIG_NUM_ORDER);
+			if (rsv_mm_info[i + 1].exchange_flag)
+				xen_destroy_contiguous_region(vstart +
+						DOM0_MEMBLOCK_SIZE,
+						DOM0_CONTIG_NUM_ORDER);
+		#else
+			if (rsv_mm_info[i].exchange_flag)
+				xen_destroy_contiguous_region(rsv_mm_info[i].pfn
+					* PAGE_SIZE,
+					DOM0_CONTIG_NUM_ORDER);
+			if (rsv_mm_info[i + 1].exchange_flag)
+				xen_destroy_contiguous_region(rsv_mm_info[i].pfn
+					* PAGE_SIZE + DOM0_MEMBLOCK_SIZE,
+					DOM0_CONTIG_NUM_ORDER);
+		#endif
+
+			size = DOM0_MEMBLOCK_SIZE * 2;
+			vaddr = vstart;
+			while (size > 0) {
+				ClearPageReserved(virt_to_page(vaddr));
+				vaddr += PAGE_SIZE;
+				size -= PAGE_SIZE;
+			}
+			free_pages(vstart, MAX_NUM_ORDER);
+		}
+	}
+
+	/* free all memory blocks size of 2M and destroy contiguous region */
+	for (; i < num_block; i++) {
+		vstart = rsv_mm_info[i].vir_addr;
+		if (vstart) {
+			if (rsv_mm_info[i].exchange_flag)
+				xen_destroy_contiguous_region(vstart,
+					DOM0_CONTIG_NUM_ORDER);
+
+			size = DOM0_MEMBLOCK_SIZE;
+			vaddr = vstart;
+			while (size > 0) {
+				ClearPageReserved(virt_to_page(vaddr));
+				vaddr += PAGE_SIZE;
+				size -= PAGE_SIZE;
+			}
+			free_pages(vstart, DOM0_CONTIG_NUM_ORDER);
+		}
+	}
+
+	memset(rsv_mm_info, 0, sizeof(struct memblock_info) * num_block);
+	vfree(rsv_mm_info);
+	rsv_mm_info = NULL;
+
+	return 0;
+}
+
+static void
+find_free_memory(uint32_t count, struct dom0_mm_data *mm_data)
+{
+	uint32_t i = 0;
+	uint32_t j = 0;
+
+	while ((i < count) && (j < rsv_memsize / SIZE_PER_BLOCK)) {
+		if (rsv_mm_info[j].used == 0) {
+			mm_data->block_info[i].pfn = rsv_mm_info[j].pfn;
+			mm_data->block_info[i].vir_addr =
+				rsv_mm_info[j].vir_addr;
+			mm_data->block_info[i].mfn = rsv_mm_info[j].mfn;
+			mm_data->block_info[i].exchange_flag =
+				rsv_mm_info[j].exchange_flag;
+			mm_data->block_num[i] = j;
+			rsv_mm_info[j].used = 1;
+			i++;
+		}
+		j++;
+	}
+}
+
+/**
+ * Find all memory segments in which physical addresses are contiguous.
+ */
+static void
+find_memseg(int count, struct dom0_mm_data * mm_data)
+{
+	int i = 0;
+	int j, k, idx = 0;
+	uint64_t zone_len, pfn, num_block;
+
+	while(i < count) {
+		if (mm_data->block_info[i].exchange_flag == 0) {
+			i++;
+			continue;
+		}
+		k = 0;
+		pfn = mm_data->block_info[i].pfn;
+		mm_data->seg_info[idx].pfn = pfn;
+		mm_data->seg_info[idx].mfn[k] = mm_data->block_info[i].mfn;
+
+		for (j = i + 1; j < count; j++) {
+
+			/* ignore exchange fail memory block */
+			if (mm_data->block_info[j].exchange_flag == 0)
+				break;
+
+			if (mm_data->block_info[j].pfn !=
+				(mm_data->block_info[j - 1].pfn +
+					 DOM0_MEMBLOCK_SIZE / PAGE_SIZE))
+			    break;
+			++k;
+			mm_data->seg_info[idx].mfn[k] = mm_data->block_info[j].mfn;
+		}
+
+		num_block = j - i;
+		zone_len = num_block * DOM0_MEMBLOCK_SIZE;
+		mm_data->seg_info[idx].size = zone_len;
+
+		XEN_PRINT("memseg id=%d, size=0x%llx\n", idx, zone_len);
+		i = i+ num_block;
+		idx++;
+		if (idx == DOM0_NUM_MEMSEG)
+			break;
+	}
+	mm_data->num_memseg = idx;
+}
+
+static int
+dom0_memory_reserve(uint32_t rsv_size)
+{
+	uint64_t pfn, vstart, vaddr;
+	uint32_t i, num_block, size, allocated_size = 0;
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0)
+	dma_addr_t dma_handle;
+#endif
+
+	/* 2M as memory block */
+	num_block = rsv_size / SIZE_PER_BLOCK;
+
+	rsv_mm_info = vmalloc(sizeof(struct memblock_info) * num_block);
+	if (!rsv_mm_info) {
+		XEN_ERR("Unable to allocate device memory information\n");
+		return -ENOMEM;
+	}
+	memset(rsv_mm_info, 0, sizeof(struct memblock_info) * num_block);
+
+	/* try alloc size of 4M once */
+	for (i = 0; i < num_block; i += 2) {
+		vstart = (unsigned long)
+			__get_free_pages(GFP_ATOMIC, MAX_NUM_ORDER);
+		if (vstart == 0)
+			break;
+
+		dom0_dev.num_bigblock = i / 2 + 1;
+		allocated_size =  SIZE_PER_BLOCK * (i + 2);
+
+		/* size of 4M */
+		size = DOM0_MEMBLOCK_SIZE * 2;
+
+		vaddr = vstart;
+		while (size > 0) {
+			SetPageReserved(virt_to_page(vaddr));
+			vaddr += PAGE_SIZE;
+			size -= PAGE_SIZE;
+		}
+
+		pfn = virt_to_pfn(vstart);
+		rsv_mm_info[i].pfn = pfn;
+		rsv_mm_info[i].vir_addr = vstart;
+		rsv_mm_info[i + 1].pfn =
+				pfn + DOM0_MEMBLOCK_SIZE / PAGE_SIZE;
+		rsv_mm_info[i + 1].vir_addr =
+				vstart + DOM0_MEMBLOCK_SIZE;
+	}
+
+	/*if it failed to alloc 4M, and continue to alloc 2M once */
+	for (; i < num_block; i++) {
+		vstart = (unsigned long)
+			__get_free_pages(GFP_ATOMIC, DOM0_CONTIG_NUM_ORDER);
+		if (vstart == 0) {
+			XEN_ERR("allocate memory fail.\n");
+			dom0_memory_free(allocated_size);
+			return -ENOMEM;
+		}
+
+		allocated_size += SIZE_PER_BLOCK;
+
+		size = DOM0_MEMBLOCK_SIZE;
+		vaddr = vstart;
+		while (size > 0) {
+			SetPageReserved(virt_to_page(vaddr));
+			vaddr += PAGE_SIZE;
+			size -= PAGE_SIZE;
+		}
+		pfn = virt_to_pfn(vstart);
+		rsv_mm_info[i].pfn = pfn;
+		rsv_mm_info[i].vir_addr = vstart;
+	}
+
+	sort_viraddr(rsv_mm_info, num_block);
+
+	for (i = 0; i< num_block; i++) {
+
+		/*
+		 * This API is used to exchage MFN for getting a block of
+		 * contiguous physical addresses, its maximum size is 2M.
+		 */
+	#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 13, 0)
+		if (xen_create_contiguous_region(rsv_mm_info[i].vir_addr,
+				DOM0_CONTIG_NUM_ORDER, 0) == 0) {
+	#else
+		if (xen_create_contiguous_region(rsv_mm_info[i].pfn * PAGE_SIZE,
+				DOM0_CONTIG_NUM_ORDER, 0, &dma_handle) == 0) {
+	#endif
+			rsv_mm_info[i].exchange_flag = 1;
+			rsv_mm_info[i].mfn =
+				pfn_to_mfn(rsv_mm_info[i].pfn);
+			rsv_mm_info[i].used = 0;
+		} else {
+			XEN_ERR("exchange memeory fail\n");
+			rsv_mm_info[i].exchange_flag = 0;
+			dom0_dev.fail_times++;
+			if (dom0_dev.fail_times > MAX_EXCHANGE_FAIL_TIME) {
+				dom0_memory_free(rsv_size);
+				return  -EFAULT;
+			}
+		}
+	}
+
+	return 0;
+}
+
+static int
+dom0_prepare_memsegs(struct memory_info *meminfo, struct dom0_mm_data *mm_data)
+{
+	uint32_t num_block;
+	int idx;
+
+	/* check if there is a free name buffer */
+	memcpy(mm_data->name, meminfo->name, DOM0_NAME_MAX);
+	mm_data->name[DOM0_NAME_MAX - 1] = '\0';
+	idx = dom0_find_mempos();
+	if (idx < 0)
+		return -1;
+
+	num_block = meminfo->size / SIZE_PER_BLOCK;
+	/* find free memory and new memory segments*/
+	find_free_memory(num_block, mm_data);
+	find_memseg(num_block, mm_data);
+
+	/* update private memory data */
+	mm_data->refcnt++;
+	mm_data->mem_size = meminfo->size;
+
+	/* update global memory data */
+	dom0_dev.mm_data[idx] = mm_data;
+	dom0_dev.num_mem_ctx++;
+	dom0_dev.used_memsize += mm_data->mem_size;
+
+	return 0;
+}
+
+static int
+dom0_check_memory (struct memory_info *meminfo)
+{
+	int idx;
+	uint64_t mem_size;
+
+	/* round memory size to the next even number. */
+	if (meminfo->size % 2)
+		++meminfo->size;
+
+	mem_size = meminfo->size;
+	if (dom0_dev.num_mem_ctx > NUM_MEM_CTX) {
+		XEN_ERR("Memory data space is full in Dom0 driver\n");
+		return -1;
+	}
+	idx = dom0_find_memdata(meminfo->name);
+	if (idx >= 0) {
+		XEN_ERR("Memory data name %s has already exsited in Dom0 driver.\n",
+			meminfo->name);
+		return -1;
+	}
+	if ((dom0_dev.used_memsize + mem_size) > rsv_memsize) {
+		XEN_ERR("Total size can't be larger than reserved size.\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+static int __init
+dom0_init(void)
+{
+	if (!xen_domain())
+		return -ENODEV;
+
+	if (rsv_memsize > DOM0_CONFIG_MEMSIZE) {
+		XEN_ERR("The reserved memory size cannot be greater than %d\n",
+			DOM0_CONFIG_MEMSIZE);
+		return -EINVAL;
+	}
+
+	/* Setup the misc device */
+	dom0_dev.miscdev.minor = MISC_DYNAMIC_MINOR;
+	dom0_dev.miscdev.name = "dom0_mm";
+	dom0_dev.miscdev.fops = &data_fops;
+
+	/* register misc char device */
+	if (misc_register(&dom0_dev.miscdev) != 0) {
+		XEN_ERR("Misc device registration failed\n");
+		return -EPERM;
+	}
+
+	mutex_init(&dom0_dev.data_lock);
+	dom0_kobj = kobject_create_and_add("dom0-mm", mm_kobj);
+
+	if (!dom0_kobj) {
+		XEN_ERR("dom0-mm object creation failed\n");
+		misc_deregister(&dom0_dev.miscdev);
+		return -ENOMEM;
+	}
+
+	if (sysfs_create_group(dom0_kobj, &dev_attr_grp)) {
+		kobject_put(dom0_kobj);
+		misc_deregister(&dom0_dev.miscdev);
+		return -EPERM;
+	}
+
+	if (dom0_memory_reserve(rsv_memsize) < 0) {
+		sysfs_remove_group(dom0_kobj, &dev_attr_grp);
+		kobject_put(dom0_kobj);
+		misc_deregister(&dom0_dev.miscdev);
+		return -ENOMEM;
+	}
+
+	XEN_PRINT("####### DPDK Xen Dom0 module loaded  #######\n");
+
+	return 0;
+}
+
+static void __exit
+dom0_exit(void)
+{
+	if (rsv_mm_info != NULL)
+		dom0_memory_free(rsv_memsize);
+
+	sysfs_remove_group(dom0_kobj, &dev_attr_grp);
+	kobject_put(dom0_kobj);
+	misc_deregister(&dom0_dev.miscdev);
+
+	XEN_PRINT("####### DPDK Xen Dom0 module unloaded  #######\n");
+}
+
+static int
+dom0_open(struct inode *inode, struct file *file)
+{
+	file->private_data = NULL;
+
+	XEN_PRINT(KERN_INFO "/dev/dom0_mm opened\n");
+	return 0;
+}
+
+static int
+dom0_release(struct inode *inode, struct file *file)
+{
+	int ret = 0;
+	struct dom0_mm_data *mm_data = file->private_data;
+
+	if (mm_data == NULL)
+		return ret;
+
+	mutex_lock(&dom0_dev.data_lock);
+	if (--mm_data->refcnt == 0)
+		ret = dom0_memory_release(mm_data);
+	mutex_unlock(&dom0_dev.data_lock);
+
+	file->private_data = NULL;
+	XEN_PRINT(KERN_INFO "/dev/dom0_mm closed\n");
+	return ret;
+}
+
+static int
+dom0_mmap(struct file *file, struct vm_area_struct *vm)
+{
+	int status = 0;
+	uint32_t idx = vm->vm_pgoff;
+	uint64_t pfn, size = vm->vm_end - vm->vm_start;
+	struct dom0_mm_data *mm_data = file->private_data;
+
+	if(mm_data == NULL)
+		return -EINVAL;
+
+	mutex_lock(&dom0_dev.data_lock);
+	if (idx >= mm_data->num_memseg) {
+		mutex_unlock(&dom0_dev.data_lock);
+		return -EINVAL;
+	}
+
+	if (size > mm_data->seg_info[idx].size){
+		mutex_unlock(&dom0_dev.data_lock);
+		return -EINVAL;
+	}
+
+	XEN_PRINT("mmap memseg idx =%d,size = 0x%llx\n", idx, size);
+
+	pfn = mm_data->seg_info[idx].pfn;
+	mutex_unlock(&dom0_dev.data_lock);
+
+	status = remap_pfn_range(vm, vm->vm_start, pfn, size, PAGE_SHARED);
+
+	return status;
+}
+static int
+dom0_ioctl(struct file *file,
+	unsigned int ioctl_num,
+	unsigned long ioctl_param)
+{
+	int idx, ret;
+	char name[DOM0_NAME_MAX] = {0};
+	struct memory_info meminfo;
+	struct dom0_mm_data *mm_data = file->private_data;
+
+	XEN_PRINT("IOCTL num=0x%0x param=0x%0lx \n", ioctl_num, ioctl_param);
+
+	/**
+	 * Switch according to the ioctl called
+	 */
+	switch _IOC_NR(ioctl_num) {
+	case _IOC_NR(RTE_DOM0_IOCTL_PREPARE_MEMSEG):
+		ret = copy_from_user(&meminfo, (void *)ioctl_param,
+			sizeof(struct memory_info));
+		if (ret)
+			return  -EFAULT;
+
+		if (mm_data != NULL) {
+			XEN_ERR("Cannot create memory segment for the same"
+				" file descriptor\n");
+			return -EINVAL;
+		}
+
+		/* Allocate private data */
+		mm_data = vmalloc(sizeof(struct dom0_mm_data));
+		if (!mm_data) {
+			XEN_ERR("Unable to allocate device private data\n");
+			return -ENOMEM;
+		}
+		memset(mm_data, 0, sizeof(struct dom0_mm_data));
+
+		mutex_lock(&dom0_dev.data_lock);
+		/* check if we can allocate memory*/
+		if (dom0_check_memory(&meminfo) < 0) {
+			mutex_unlock(&dom0_dev.data_lock);
+			vfree(mm_data);
+			return -EINVAL;
+		}
+
+		/* allocate memory and created memory segments*/
+		if (dom0_prepare_memsegs(&meminfo, mm_data) < 0) {
+			XEN_ERR("create memory segment fail.\n");
+			mutex_unlock(&dom0_dev.data_lock);
+			return -EIO;
+		}
+
+		file->private_data = mm_data;
+		mutex_unlock(&dom0_dev.data_lock);
+		break;
+
+	/* support multiple process in term of memory mapping*/
+	case _IOC_NR(RTE_DOM0_IOCTL_ATTACH_TO_MEMSEG):
+		ret = copy_from_user(name, (void *)ioctl_param,
+				sizeof(char) * DOM0_NAME_MAX);
+		if (ret)
+			return -EFAULT;
+
+		mutex_lock(&dom0_dev.data_lock);
+		idx = dom0_find_memdata(name);
+		if (idx < 0) {
+			mutex_unlock(&dom0_dev.data_lock);
+			return -EINVAL;
+		}
+
+		mm_data = dom0_dev.mm_data[idx];
+		mm_data->refcnt++;
+		file->private_data = mm_data;
+		mutex_unlock(&dom0_dev.data_lock);
+		break;
+
+	case _IOC_NR(RTE_DOM0_IOCTL_GET_NUM_MEMSEG):
+		ret = copy_to_user((void *)ioctl_param, &mm_data->num_memseg,
+				sizeof(int));
+		if (ret)
+			return -EFAULT;
+		break;
+
+	case _IOC_NR(RTE_DOM0_IOCTL_GET_MEMSEG_INFO):
+		ret = copy_to_user((void *)ioctl_param,
+				&mm_data->seg_info[0],
+				sizeof(struct memseg_info) *
+				mm_data->num_memseg);
+		if (ret)
+			return -EFAULT;
+		break;
+	default:
+		XEN_PRINT("IOCTL default \n");
+		break;
+	}
+
+	return 0;
+}
+
+module_init(dom0_init);
+module_exit(dom0_exit);
+
+module_param(rsv_memsize, uint, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(rsv_memsize, "Xen-dom0 reserved memory size(MB).\n");
diff --git a/lib/librte_ether/Makefile b/lib/librte_ether/Makefile
new file mode 100644
index 00000000..e8102846
--- /dev/null
+++ b/lib/librte_ether/Makefile
@@ -0,0 +1,59 @@
+#   BSD LICENSE
+#
+#   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+#   All rights reserved.
+#
+#   Redistribution and use in source and binary forms, with or without
+#   modification, are permitted provided that the following conditions
+#   are met:
+#
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in
+#       the documentation and/or other materials provided with the
+#       distribution.
+#     * Neither the name of Intel Corporation nor the names of its
+#       contributors may be used to endorse or promote products derived
+#       from this software without specific prior written permission.
+#
+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+#
+# library name
+#
+LIB = libethdev.a
+
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS)
+
+EXPORT_MAP := rte_ether_version.map
+
+LIBABIVER := 3
+
+SRCS-y += rte_ethdev.c
+
+#
+# Export include files
+#
+SYMLINK-y-include += rte_ether.h
+SYMLINK-y-include += rte_ethdev.h
+SYMLINK-y-include += rte_eth_ctrl.h
+SYMLINK-y-include += rte_dev_info.h
+
+# this lib depends upon:
+DEPDIRS-y += lib/librte_eal lib/librte_mempool lib/librte_ring lib/librte_mbuf
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_ether/rte_dev_info.h b/lib/librte_ether/rte_dev_info.h
new file mode 100644
index 00000000..291bd4d7
--- /dev/null
+++ b/lib/librte_ether/rte_dev_info.h
@@ -0,0 +1,57 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2015 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_DEV_INFO_H_
+#define _RTE_DEV_INFO_H_
+
+/*
+ * Placeholder for accessing device registers
+ */
+struct rte_dev_reg_info {
+	void *data; /**< Buffer for return registers */
+	uint32_t offset; /**< Start register table location for access */
+	uint32_t length; /**< Number of registers to fetch */
+	uint32_t version; /**< Device version */
+};
+
+/*
+ * Placeholder for accessing device eeprom
+ */
+struct rte_dev_eeprom_info {
+	void *data; /**< Buffer for return eeprom */
+	uint32_t offset; /**< Start eeprom address for access*/
+	uint32_t length; /**< Length of eeprom region to access */
+	uint32_t magic; /**< Device-specific key, such as device-id */
+};
+
+#endif /* _RTE_DEV_INFO_H_ */
diff --git a/lib/librte_ether/rte_eth_ctrl.h b/lib/librte_ether/rte_eth_ctrl.h
new file mode 100644
index 00000000..b8c7be90
--- /dev/null
+++ b/lib/librte_ether/rte_eth_ctrl.h
@@ -0,0 +1,846 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_ETH_CTRL_H_
+#define _RTE_ETH_CTRL_H_
+
+/**
+ * @file
+ *
+ * Ethernet device features and related data structures used
+ * by control APIs should be defined in this file.
+ *
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * A packet can be identified by hardware as different flow types. Different
+ * NIC hardwares may support different flow types.
+ * Basically, the NIC hardware identifies the flow type as deep protocol as
+ * possible, and exclusively. For example, if a packet is identified as
+ * 'RTE_ETH_FLOW_NONFRAG_IPV4_TCP', it will not be any of other flow types,
+ * though it is an actual IPV4 packet.
+ * Note that the flow types are used to define RSS offload types in
+ * rte_ethdev.h.
+ */
+#define RTE_ETH_FLOW_UNKNOWN             0
+#define RTE_ETH_FLOW_RAW                 1
+#define RTE_ETH_FLOW_IPV4                2
+#define RTE_ETH_FLOW_FRAG_IPV4           3
+#define RTE_ETH_FLOW_NONFRAG_IPV4_TCP    4
+#define RTE_ETH_FLOW_NONFRAG_IPV4_UDP    5
+#define RTE_ETH_FLOW_NONFRAG_IPV4_SCTP   6
+#define RTE_ETH_FLOW_NONFRAG_IPV4_OTHER  7
+#define RTE_ETH_FLOW_IPV6                8
+#define RTE_ETH_FLOW_FRAG_IPV6           9
+#define RTE_ETH_FLOW_NONFRAG_IPV6_TCP   10
+#define RTE_ETH_FLOW_NONFRAG_IPV6_UDP   11
+#define RTE_ETH_FLOW_NONFRAG_IPV6_SCTP  12
+#define RTE_ETH_FLOW_NONFRAG_IPV6_OTHER 13
+#define RTE_ETH_FLOW_L2_PAYLOAD         14
+#define RTE_ETH_FLOW_IPV6_EX            15
+#define RTE_ETH_FLOW_IPV6_TCP_EX        16
+#define RTE_ETH_FLOW_IPV6_UDP_EX        17
+#define RTE_ETH_FLOW_MAX                18
+
+/**
+ * Feature filter types
+ */
+enum rte_filter_type {
+	RTE_ETH_FILTER_NONE = 0,
+	RTE_ETH_FILTER_MACVLAN,
+	RTE_ETH_FILTER_ETHERTYPE,
+	RTE_ETH_FILTER_FLEXIBLE,
+	RTE_ETH_FILTER_SYN,
+	RTE_ETH_FILTER_NTUPLE,
+	RTE_ETH_FILTER_TUNNEL,
+	RTE_ETH_FILTER_FDIR,
+	RTE_ETH_FILTER_HASH,
+	RTE_ETH_FILTER_L2_TUNNEL,
+	RTE_ETH_FILTER_MAX
+};
+
+/**
+ * Generic operations on filters
+ */
+enum rte_filter_op {
+	/** used to check whether the type filter is supported */
+	RTE_ETH_FILTER_NOP = 0,
+	RTE_ETH_FILTER_ADD,      /**< add filter entry */
+	RTE_ETH_FILTER_UPDATE,   /**< update filter entry */
+	RTE_ETH_FILTER_DELETE,   /**< delete filter entry */
+	RTE_ETH_FILTER_FLUSH,    /**< flush all entries */
+	RTE_ETH_FILTER_GET,      /**< get filter entry */
+	RTE_ETH_FILTER_SET,      /**< configurations */
+	RTE_ETH_FILTER_INFO,     /**< retrieve information */
+	RTE_ETH_FILTER_STATS,    /**< retrieve statistics */
+	RTE_ETH_FILTER_OP_MAX
+};
+
+/**
+ * MAC filter type
+ */
+enum rte_mac_filter_type {
+	RTE_MAC_PERFECT_MATCH = 1, /**< exact match of MAC addr. */
+	RTE_MACVLAN_PERFECT_MATCH, /**< exact match of MAC addr and VLAN ID. */
+	RTE_MAC_HASH_MATCH, /**< hash match of MAC addr. */
+	/** hash match of MAC addr and exact match of VLAN ID. */
+	RTE_MACVLAN_HASH_MATCH,
+};
+
+/**
+ * MAC filter info
+ */
+struct rte_eth_mac_filter {
+	uint8_t is_vf; /**< 1 for VF, 0 for port dev */
+	uint16_t dst_id; /**< VF ID, available when is_vf is 1*/
+	enum rte_mac_filter_type filter_type; /**< MAC filter type */
+	struct ether_addr mac_addr;
+};
+
+/**
+ * Define all structures for Ethertype Filter type.
+ */
+
+#define RTE_ETHTYPE_FLAGS_MAC    0x0001 /**< If set, compare mac */
+#define RTE_ETHTYPE_FLAGS_DROP   0x0002 /**< If set, drop packet when match */
+
+/**
+ * A structure used to define the ethertype filter entry
+ * to support RTE_ETH_FILTER_ETHERTYPE with RTE_ETH_FILTER_ADD,
+ * RTE_ETH_FILTER_DELETE and RTE_ETH_FILTER_GET operations.
+ */
+struct rte_eth_ethertype_filter {
+	struct ether_addr mac_addr;   /**< Mac address to match. */
+	uint16_t ether_type;          /**< Ether type to match */
+	uint16_t flags;               /**< Flags from RTE_ETHTYPE_FLAGS_* */
+	uint16_t queue;               /**< Queue assigned to when match*/
+};
+
+#define RTE_FLEX_FILTER_MAXLEN	128	/**< bytes to use in flex filter. */
+#define RTE_FLEX_FILTER_MASK_SIZE	\
+	(RTE_ALIGN(RTE_FLEX_FILTER_MAXLEN, CHAR_BIT) / CHAR_BIT)
+					/**< mask bytes in flex filter. */
+
+/**
+ *  A structure used to define the flex filter entry
+ *  to support RTE_ETH_FILTER_FLEXIBLE with RTE_ETH_FILTER_ADD,
+ *  RTE_ETH_FILTER_DELETE and RTE_ETH_FILTER_GET operations.
+ */
+struct rte_eth_flex_filter {
+	uint16_t len;
+	uint8_t bytes[RTE_FLEX_FILTER_MAXLEN];  /**< flex bytes in big endian.*/
+	uint8_t mask[RTE_FLEX_FILTER_MASK_SIZE];    /**< if mask bit is 1b, do
+					not compare corresponding byte. */
+	uint8_t priority;
+	uint16_t queue;       /**< Queue assigned to when match. */
+};
+
+/**
+ * A structure used to define the TCP syn filter entry
+ * to support RTE_ETH_FILTER_SYN with RTE_ETH_FILTER_ADD,
+ * RTE_ETH_FILTER_DELETE and RTE_ETH_FILTER_GET operations.
+ */
+struct rte_eth_syn_filter {
+	uint8_t hig_pri;     /**< 1 - higher priority than other filters,
+				  0 - lower priority. */
+	uint16_t queue;      /**< Queue assigned to when match */
+};
+
+/**
+ * Define all structures for ntuple Filter type.
+ */
+
+#define RTE_NTUPLE_FLAGS_DST_IP    0x0001 /**< If set, dst_ip is part of ntuple */
+#define RTE_NTUPLE_FLAGS_SRC_IP    0x0002 /**< If set, src_ip is part of ntuple */
+#define RTE_NTUPLE_FLAGS_DST_PORT  0x0004 /**< If set, dst_port is part of ntuple */
+#define RTE_NTUPLE_FLAGS_SRC_PORT  0x0008 /**< If set, src_port is part of ntuple */
+#define RTE_NTUPLE_FLAGS_PROTO     0x0010 /**< If set, protocol is part of ntuple */
+#define RTE_NTUPLE_FLAGS_TCP_FLAG  0x0020 /**< If set, tcp flag is involved */
+
+#define RTE_5TUPLE_FLAGS ( \
+		RTE_NTUPLE_FLAGS_DST_IP | \
+		RTE_NTUPLE_FLAGS_SRC_IP | \
+		RTE_NTUPLE_FLAGS_DST_PORT | \
+		RTE_NTUPLE_FLAGS_SRC_PORT | \
+		RTE_NTUPLE_FLAGS_PROTO)
+
+#define RTE_2TUPLE_FLAGS ( \
+		RTE_NTUPLE_FLAGS_DST_PORT | \
+		RTE_NTUPLE_FLAGS_PROTO)
+
+#define TCP_URG_FLAG 0x20
+#define TCP_ACK_FLAG 0x10
+#define TCP_PSH_FLAG 0x08
+#define TCP_RST_FLAG 0x04
+#define TCP_SYN_FLAG 0x02
+#define TCP_FIN_FLAG 0x01
+#define TCP_FLAG_ALL 0x3F
+
+/**
+ * A structure used to define the ntuple filter entry
+ * to support RTE_ETH_FILTER_NTUPLE with RTE_ETH_FILTER_ADD,
+ * RTE_ETH_FILTER_DELETE and RTE_ETH_FILTER_GET operations.
+ */
+struct rte_eth_ntuple_filter {
+	uint16_t flags;          /**< Flags from RTE_NTUPLE_FLAGS_* */
+	uint32_t dst_ip;         /**< Destination IP address in big endian. */
+	uint32_t dst_ip_mask;    /**< Mask of destination IP address. */
+	uint32_t src_ip;         /**< Source IP address in big endian. */
+	uint32_t src_ip_mask;    /**< Mask of destination IP address. */
+	uint16_t dst_port;       /**< Destination port in big endian. */
+	uint16_t dst_port_mask;  /**< Mask of destination port. */
+	uint16_t src_port;       /**< Source Port in big endian. */
+	uint16_t src_port_mask;  /**< Mask of source port. */
+	uint8_t proto;           /**< L4 protocol. */
+	uint8_t proto_mask;      /**< Mask of L4 protocol. */
+	/** tcp_flags only meaningful when the proto is TCP.
+	    The packet matched above ntuple fields and contain
+	    any set bit in tcp_flags will hit this filter. */
+	uint8_t tcp_flags;
+	uint16_t priority;       /**< seven levels (001b-111b), 111b is highest,
+				      used when more than one filter matches. */
+	uint16_t queue;          /**< Queue assigned to when match*/
+};
+
+/**
+ * Tunneled type.
+ */
+enum rte_eth_tunnel_type {
+	RTE_TUNNEL_TYPE_NONE = 0,
+	RTE_TUNNEL_TYPE_VXLAN,
+	RTE_TUNNEL_TYPE_GENEVE,
+	RTE_TUNNEL_TYPE_TEREDO,
+	RTE_TUNNEL_TYPE_NVGRE,
+	RTE_TUNNEL_TYPE_IP_IN_GRE,
+	RTE_L2_TUNNEL_TYPE_E_TAG,
+	RTE_TUNNEL_TYPE_MAX,
+};
+
+/**
+ * filter type of tunneling packet
+ */
+#define ETH_TUNNEL_FILTER_OMAC  0x01 /**< filter by outer MAC addr */
+#define ETH_TUNNEL_FILTER_OIP   0x02 /**< filter by outer IP Addr */
+#define ETH_TUNNEL_FILTER_TENID 0x04 /**< filter by tenant ID */
+#define ETH_TUNNEL_FILTER_IMAC  0x08 /**< filter by inner MAC addr */
+#define ETH_TUNNEL_FILTER_IVLAN 0x10 /**< filter by inner VLAN ID */
+#define ETH_TUNNEL_FILTER_IIP   0x20 /**< filter by inner IP addr */
+
+#define RTE_TUNNEL_FILTER_IMAC_IVLAN (ETH_TUNNEL_FILTER_IMAC | \
+					ETH_TUNNEL_FILTER_IVLAN)
+#define RTE_TUNNEL_FILTER_IMAC_IVLAN_TENID (ETH_TUNNEL_FILTER_IMAC | \
+					ETH_TUNNEL_FILTER_IVLAN | \
+					ETH_TUNNEL_FILTER_TENID)
+#define RTE_TUNNEL_FILTER_IMAC_TENID (ETH_TUNNEL_FILTER_IMAC | \
+					ETH_TUNNEL_FILTER_TENID)
+#define RTE_TUNNEL_FILTER_OMAC_TENID_IMAC (ETH_TUNNEL_FILTER_OMAC | \
+					ETH_TUNNEL_FILTER_TENID | \
+					ETH_TUNNEL_FILTER_IMAC)
+
+/**
+ *  Select IPv4 or IPv6 for tunnel filters.
+ */
+enum rte_tunnel_iptype {
+	RTE_TUNNEL_IPTYPE_IPV4 = 0, /**< IPv4. */
+	RTE_TUNNEL_IPTYPE_IPV6,     /**< IPv6. */
+};
+
+/**
+ * Tunneling Packet filter configuration.
+ */
+struct rte_eth_tunnel_filter_conf {
+	struct ether_addr outer_mac;    /**< Outer MAC address to match. */
+	struct ether_addr inner_mac;    /**< Inner MAC address to match. */
+	uint16_t inner_vlan;            /**< Inner VLAN to match. */
+	enum rte_tunnel_iptype ip_type; /**< IP address type. */
+	/** Outer destination IP address to match if ETH_TUNNEL_FILTER_OIP
+	    is set in filter_type, or inner destination IP address to match
+	    if ETH_TUNNEL_FILTER_IIP is set in filter_type . */
+	union {
+		uint32_t ipv4_addr;     /**< IPv4 address in big endian. */
+		uint32_t ipv6_addr[4];  /**< IPv6 address in big endian. */
+	} ip_addr;
+	/** Flags from ETH_TUNNEL_FILTER_XX - see above. */
+	uint16_t filter_type;
+	enum rte_eth_tunnel_type tunnel_type; /**< Tunnel Type. */
+	uint32_t tenant_id;     /**< Tenant ID to match. VNI, GRE key... */
+	uint16_t queue_id;      /**< Queue assigned to if match. */
+};
+
+/**
+ * Global eth device configuration type.
+ */
+enum rte_eth_global_cfg_type {
+	RTE_ETH_GLOBAL_CFG_TYPE_UNKNOWN = 0,
+	RTE_ETH_GLOBAL_CFG_TYPE_GRE_KEY_LEN,
+	RTE_ETH_GLOBAL_CFG_TYPE_MAX,
+};
+
+/**
+ * Global eth device configuration.
+ */
+struct rte_eth_global_cfg {
+	enum rte_eth_global_cfg_type cfg_type; /**< Global config type. */
+	union {
+		uint8_t gre_key_len; /**< Valid GRE key length in byte. */
+		uint64_t reserved; /**< Reserve space for future use. */
+	} cfg;
+};
+
+#define RTE_ETH_FDIR_MAX_FLEXLEN 16  /**< Max length of flexbytes. */
+#define RTE_ETH_INSET_SIZE_MAX   128 /**< Max length of input set. */
+
+/**
+ * Input set fields for Flow Director and Hash filters
+ */
+enum rte_eth_input_set_field {
+	RTE_ETH_INPUT_SET_UNKNOWN = 0,
+
+	/* L2 */
+	RTE_ETH_INPUT_SET_L2_SRC_MAC = 1,
+	RTE_ETH_INPUT_SET_L2_DST_MAC,
+	RTE_ETH_INPUT_SET_L2_OUTER_VLAN,
+	RTE_ETH_INPUT_SET_L2_INNER_VLAN,
+	RTE_ETH_INPUT_SET_L2_ETHERTYPE,
+
+	/* L3 */
+	RTE_ETH_INPUT_SET_L3_SRC_IP4 = 129,
+	RTE_ETH_INPUT_SET_L3_DST_IP4,
+	RTE_ETH_INPUT_SET_L3_SRC_IP6,
+	RTE_ETH_INPUT_SET_L3_DST_IP6,
+	RTE_ETH_INPUT_SET_L3_IP4_TOS,
+	RTE_ETH_INPUT_SET_L3_IP4_PROTO,
+	RTE_ETH_INPUT_SET_L3_IP6_TC,
+	RTE_ETH_INPUT_SET_L3_IP6_NEXT_HEADER,
+	RTE_ETH_INPUT_SET_L3_IP4_TTL,
+	RTE_ETH_INPUT_SET_L3_IP6_HOP_LIMITS,
+
+	/* L4 */
+	RTE_ETH_INPUT_SET_L4_UDP_SRC_PORT = 257,
+	RTE_ETH_INPUT_SET_L4_UDP_DST_PORT,
+	RTE_ETH_INPUT_SET_L4_TCP_SRC_PORT,
+	RTE_ETH_INPUT_SET_L4_TCP_DST_PORT,
+	RTE_ETH_INPUT_SET_L4_SCTP_SRC_PORT,
+	RTE_ETH_INPUT_SET_L4_SCTP_DST_PORT,
+	RTE_ETH_INPUT_SET_L4_SCTP_VERIFICATION_TAG,
+
+	/* Tunnel */
+	RTE_ETH_INPUT_SET_TUNNEL_L2_INNER_DST_MAC = 385,
+	RTE_ETH_INPUT_SET_TUNNEL_L2_INNER_SRC_MAC,
+	RTE_ETH_INPUT_SET_TUNNEL_L2_INNER_VLAN,
+	RTE_ETH_INPUT_SET_TUNNEL_L4_UDP_KEY,
+	RTE_ETH_INPUT_SET_TUNNEL_GRE_KEY,
+
+	/* Flexible Payload */
+	RTE_ETH_INPUT_SET_FLEX_PAYLOAD_1ST_WORD = 641,
+	RTE_ETH_INPUT_SET_FLEX_PAYLOAD_2ND_WORD,
+	RTE_ETH_INPUT_SET_FLEX_PAYLOAD_3RD_WORD,
+	RTE_ETH_INPUT_SET_FLEX_PAYLOAD_4TH_WORD,
+	RTE_ETH_INPUT_SET_FLEX_PAYLOAD_5TH_WORD,
+	RTE_ETH_INPUT_SET_FLEX_PAYLOAD_6TH_WORD,
+	RTE_ETH_INPUT_SET_FLEX_PAYLOAD_7TH_WORD,
+	RTE_ETH_INPUT_SET_FLEX_PAYLOAD_8TH_WORD,
+
+	RTE_ETH_INPUT_SET_DEFAULT = 65533,
+	RTE_ETH_INPUT_SET_NONE = 65534,
+	RTE_ETH_INPUT_SET_MAX = 65535,
+};
+
+/**
+ * Filters input set operations
+ */
+enum rte_filter_input_set_op {
+	RTE_ETH_INPUT_SET_OP_UNKNOWN,
+	RTE_ETH_INPUT_SET_SELECT, /**< select input set */
+	RTE_ETH_INPUT_SET_ADD,    /**< add input set entry */
+	RTE_ETH_INPUT_SET_OP_MAX
+};
+
+
+/**
+ * A structure used to define the input set configuration for
+ * flow director and hash filters
+ */
+struct rte_eth_input_set_conf {
+	uint16_t flow_type;
+	uint16_t inset_size;
+	enum rte_eth_input_set_field field[RTE_ETH_INSET_SIZE_MAX];
+	enum rte_filter_input_set_op op;
+};
+
+/**
+ * A structure used to define the input for L2 flow
+ */
+struct rte_eth_l2_flow {
+	uint16_t ether_type;          /**< Ether type in big endian */
+};
+
+/**
+ * A structure used to define the input for IPV4 flow
+ */
+struct rte_eth_ipv4_flow {
+	uint32_t src_ip;      /**< IPv4 source address in big endian. */
+	uint32_t dst_ip;      /**< IPv4 destination address in big endian. */
+	uint8_t  tos;         /**< Type of service to match. */
+	uint8_t  ttl;         /**< Time to live to match. */
+	uint8_t  proto;       /**< Protocol, next header in big endian. */
+};
+
+/**
+ * A structure used to define the input for IPV4 UDP flow
+ */
+struct rte_eth_udpv4_flow {
+	struct rte_eth_ipv4_flow ip; /**< IPv4 fields to match. */
+	uint16_t src_port;           /**< UDP source port in big endian. */
+	uint16_t dst_port;           /**< UDP destination port in big endian. */
+};
+
+/**
+ * A structure used to define the input for IPV4 TCP flow
+ */
+struct rte_eth_tcpv4_flow {
+	struct rte_eth_ipv4_flow ip; /**< IPv4 fields to match. */
+	uint16_t src_port;           /**< TCP source port in big endian. */
+	uint16_t dst_port;           /**< TCP destination port in big endian. */
+};
+
+/**
+ * A structure used to define the input for IPV4 SCTP flow
+ */
+struct rte_eth_sctpv4_flow {
+	struct rte_eth_ipv4_flow ip; /**< IPv4 fields to match. */
+	uint16_t src_port;           /**< SCTP source port in big endian. */
+	uint16_t dst_port;           /**< SCTP destination port in big endian. */
+	uint32_t verify_tag;         /**< Verify tag in big endian */
+};
+
+/**
+ * A structure used to define the input for IPV6 flow
+ */
+struct rte_eth_ipv6_flow {
+	uint32_t src_ip[4];      /**< IPv6 source address in big endian. */
+	uint32_t dst_ip[4];      /**< IPv6 destination address in big endian. */
+	uint8_t  tc;             /**< Traffic class to match. */
+	uint8_t  proto;          /**< Protocol, next header to match. */
+	uint8_t  hop_limits;     /**< Hop limits to match. */
+};
+
+/**
+ * A structure used to define the input for IPV6 UDP flow
+ */
+struct rte_eth_udpv6_flow {
+	struct rte_eth_ipv6_flow ip; /**< IPv6 fields to match. */
+	uint16_t src_port;           /**< UDP source port in big endian. */
+	uint16_t dst_port;           /**< UDP destination port in big endian. */
+};
+
+/**
+ * A structure used to define the input for IPV6 TCP flow
+ */
+struct rte_eth_tcpv6_flow {
+	struct rte_eth_ipv6_flow ip; /**< IPv6 fields to match. */
+	uint16_t src_port;           /**< TCP source port to in big endian. */
+	uint16_t dst_port;           /**< TCP destination port in big endian. */
+};
+
+/**
+ * A structure used to define the input for IPV6 SCTP flow
+ */
+struct rte_eth_sctpv6_flow {
+	struct rte_eth_ipv6_flow ip; /**< IPv6 fields to match. */
+	uint16_t src_port;           /**< SCTP source port in big endian. */
+	uint16_t dst_port;           /**< SCTP destination port in big endian. */
+	uint32_t verify_tag;         /**< Verify tag in big endian. */
+};
+
+/**
+ * A structure used to define the input for MAC VLAN flow
+ */
+struct rte_eth_mac_vlan_flow {
+	struct ether_addr mac_addr;  /**< Mac address to match. */
+};
+
+/**
+ * Tunnel type for flow director.
+ */
+enum rte_eth_fdir_tunnel_type {
+	RTE_FDIR_TUNNEL_TYPE_UNKNOWN = 0,
+	RTE_FDIR_TUNNEL_TYPE_NVGRE,
+	RTE_FDIR_TUNNEL_TYPE_VXLAN,
+};
+
+/**
+ * A structure used to define the input for tunnel flow, now it's VxLAN or
+ * NVGRE
+ */
+struct rte_eth_tunnel_flow {
+	enum rte_eth_fdir_tunnel_type tunnel_type; /**< Tunnel type to match. */
+	/** Tunnel ID to match. TNI, VNI... in big endian. */
+	uint32_t tunnel_id;
+	struct ether_addr mac_addr;                /**< Mac address to match. */
+};
+
+/**
+ * An union contains the inputs for all types of flow
+ * Items in flows need to be in big endian
+ */
+union rte_eth_fdir_flow {
+	struct rte_eth_l2_flow     l2_flow;
+	struct rte_eth_udpv4_flow  udp4_flow;
+	struct rte_eth_tcpv4_flow  tcp4_flow;
+	struct rte_eth_sctpv4_flow sctp4_flow;
+	struct rte_eth_ipv4_flow   ip4_flow;
+	struct rte_eth_udpv6_flow  udp6_flow;
+	struct rte_eth_tcpv6_flow  tcp6_flow;
+	struct rte_eth_sctpv6_flow sctp6_flow;
+	struct rte_eth_ipv6_flow   ipv6_flow;
+	struct rte_eth_mac_vlan_flow mac_vlan_flow;
+	struct rte_eth_tunnel_flow   tunnel_flow;
+};
+
+/**
+ * A structure used to contain extend input of flow
+ */
+struct rte_eth_fdir_flow_ext {
+	uint16_t vlan_tci;
+	uint8_t flexbytes[RTE_ETH_FDIR_MAX_FLEXLEN];
+	/**< It is filled by the flexible payload to match. */
+	uint8_t is_vf;   /**< 1 for VF, 0 for port dev */
+	uint16_t dst_id; /**< VF ID, available when is_vf is 1*/
+};
+
+/**
+ * A structure used to define the input for a flow director filter entry
+ */
+struct rte_eth_fdir_input {
+	uint16_t flow_type;
+	union rte_eth_fdir_flow flow;
+	/**< Flow fields to match, dependent on flow_type */
+	struct rte_eth_fdir_flow_ext flow_ext;
+	/**< Additional fields to match */
+};
+
+/**
+ * Behavior will be taken if FDIR match
+ */
+enum rte_eth_fdir_behavior {
+	RTE_ETH_FDIR_ACCEPT = 0,
+	RTE_ETH_FDIR_REJECT,
+	RTE_ETH_FDIR_PASSTHRU,
+};
+
+/**
+ * Flow director report status
+ * It defines what will be reported if FDIR entry is matched.
+ */
+enum rte_eth_fdir_status {
+	RTE_ETH_FDIR_NO_REPORT_STATUS = 0, /**< Report nothing. */
+	RTE_ETH_FDIR_REPORT_ID,            /**< Only report FD ID. */
+	RTE_ETH_FDIR_REPORT_ID_FLEX_4,     /**< Report FD ID and 4 flex bytes. */
+	RTE_ETH_FDIR_REPORT_FLEX_8,        /**< Report 8 flex bytes. */
+};
+
+/**
+ * A structure used to define an action when match FDIR packet filter.
+ */
+struct rte_eth_fdir_action {
+	uint16_t rx_queue;        /**< Queue assigned to if FDIR match. */
+	enum rte_eth_fdir_behavior behavior;     /**< Behavior will be taken */
+	enum rte_eth_fdir_status report_status;  /**< Status report option */
+	uint8_t flex_off;
+	/**< If report_status is RTE_ETH_FDIR_REPORT_ID_FLEX_4 or
+	     RTE_ETH_FDIR_REPORT_FLEX_8, flex_off specifies where the reported
+	     flex bytes start from in flexible payload. */
+};
+
+/**
+ * A structure used to define the flow director filter entry by filter_ctrl API
+ * It supports RTE_ETH_FILTER_FDIR with RTE_ETH_FILTER_ADD and
+ * RTE_ETH_FILTER_DELETE operations.
+ */
+struct rte_eth_fdir_filter {
+	uint32_t soft_id;
+	/**< ID, an unique value is required when deal with FDIR entry */
+	struct rte_eth_fdir_input input;    /**< Input set */
+	struct rte_eth_fdir_action action;  /**< Action taken when match */
+};
+
+/**
+ *  A structure used to configure FDIR masks that are used by the device
+ *  to match the various fields of RX packet headers.
+ */
+struct rte_eth_fdir_masks {
+	uint16_t vlan_tci_mask;   /**< Bit mask for vlan_tci in big endian */
+	/** Bit mask for ipv4 flow in big endian. */
+	struct rte_eth_ipv4_flow   ipv4_mask;
+	/** Bit maks for ipv6 flow in big endian. */
+	struct rte_eth_ipv6_flow   ipv6_mask;
+	/** Bit mask for L4 source port in big endian. */
+	uint16_t src_port_mask;
+	/** Bit mask for L4 destination port in big endian. */
+	uint16_t dst_port_mask;
+	/** 6 bit mask for proper 6 bytes of Mac address, bit 0 matches the
+	    first byte on the wire */
+	uint8_t mac_addr_byte_mask;
+	/** Bit mask for tunnel ID in big endian. */
+	uint32_t tunnel_id_mask;
+	uint8_t tunnel_type_mask; /**< 1 - Match tunnel type,
+				       0 - Ignore tunnel type. */
+};
+
+/**
+ * Payload type
+ */
+enum rte_eth_payload_type {
+	RTE_ETH_PAYLOAD_UNKNOWN = 0,
+	RTE_ETH_RAW_PAYLOAD,
+	RTE_ETH_L2_PAYLOAD,
+	RTE_ETH_L3_PAYLOAD,
+	RTE_ETH_L4_PAYLOAD,
+	RTE_ETH_PAYLOAD_MAX = 8,
+};
+
+/**
+ * A structure used to select bytes extracted from the protocol layers to
+ * flexible payload for filter
+ */
+struct rte_eth_flex_payload_cfg {
+	enum rte_eth_payload_type type;  /**< Payload type */
+	uint16_t src_offset[RTE_ETH_FDIR_MAX_FLEXLEN];
+	/**< Offset in bytes from the beginning of packet's payload
+	     src_offset[i] indicates the flexbyte i's offset in original
+	     packet payload. This value should be less than
+	     flex_payload_limit in struct rte_eth_fdir_info.*/
+};
+
+/**
+ * A structure used to define FDIR masks for flexible payload
+ * for each flow type
+ */
+struct rte_eth_fdir_flex_mask {
+	uint16_t flow_type;
+	uint8_t mask[RTE_ETH_FDIR_MAX_FLEXLEN];
+	/**< Mask for the whole flexible payload */
+};
+
+/**
+ * A structure used to define all flexible payload related setting
+ * include flex payload and flex mask
+ */
+struct rte_eth_fdir_flex_conf {
+	uint16_t nb_payloads;  /**< The number of following payload cfg */
+	uint16_t nb_flexmasks; /**< The number of following mask */
+	struct rte_eth_flex_payload_cfg flex_set[RTE_ETH_PAYLOAD_MAX];
+	/**< Flex payload configuration for each payload type */
+	struct rte_eth_fdir_flex_mask flex_mask[RTE_ETH_FLOW_MAX];
+	/**< Flex mask configuration for each flow type */
+};
+
+/**
+ *  Flow Director setting modes: none, signature or perfect.
+ */
+enum rte_fdir_mode {
+	RTE_FDIR_MODE_NONE      = 0, /**< Disable FDIR support. */
+	RTE_FDIR_MODE_SIGNATURE,     /**< Enable FDIR signature filter mode. */
+	RTE_FDIR_MODE_PERFECT,       /**< Enable FDIR perfect filter mode. */
+	RTE_FDIR_MODE_PERFECT_MAC_VLAN, /**< Enable FDIR filter mode - MAC VLAN. */
+	RTE_FDIR_MODE_PERFECT_TUNNEL,   /**< Enable FDIR filter mode - tunnel. */
+};
+
+#define UINT32_BIT (CHAR_BIT * sizeof(uint32_t))
+#define RTE_FLOW_MASK_ARRAY_SIZE \
+	(RTE_ALIGN(RTE_ETH_FLOW_MAX, UINT32_BIT)/UINT32_BIT)
+
+/**
+ * A structure used to get the information of flow director filter.
+ * It supports RTE_ETH_FILTER_FDIR with RTE_ETH_FILTER_INFO operation.
+ * It includes the mode, flexible payload configuration information,
+ * capabilities and supported flow types, flexible payload characters.
+ * It can be gotten to help taking specific configurations per device.
+ */
+struct rte_eth_fdir_info {
+	enum rte_fdir_mode mode; /**< Flow director mode */
+	struct rte_eth_fdir_masks mask;
+	/** Flex payload configuration information */
+	struct rte_eth_fdir_flex_conf flex_conf;
+	uint32_t guarant_spc; /**< Guaranteed spaces.*/
+	uint32_t best_spc; /**< Best effort spaces.*/
+	/** Bit mask for every supported flow type. */
+	uint32_t flow_types_mask[RTE_FLOW_MASK_ARRAY_SIZE];
+	uint32_t max_flexpayload; /**< Total flex payload in bytes. */
+	/** Flexible payload unit in bytes. Size and alignments of all flex
+	    payload segments should be multiplies of this value. */
+	uint32_t flex_payload_unit;
+	/** Max number of flexible payload continuous segments.
+	    Each segment should be a multiple of flex_payload_unit.*/
+	uint32_t max_flex_payload_segment_num;
+	/** Maximum src_offset in bytes allowed. It indicates that
+	    src_offset[i] in struct rte_eth_flex_payload_cfg should be less
+	    than this value. */
+	uint16_t flex_payload_limit;
+	/** Flex bitmask unit in bytes. Size of flex bitmasks should be a
+	    multiply of this value. */
+	uint32_t flex_bitmask_unit;
+	/** Max supported size of flex bitmasks in flex_bitmask_unit */
+	uint32_t max_flex_bitmask_num;
+};
+
+/**
+ * A structure used to define the statistics of flow director.
+ * It supports RTE_ETH_FILTER_FDIR with RTE_ETH_FILTER_STATS operation.
+ */
+struct rte_eth_fdir_stats {
+	uint32_t collision;    /**< Number of filters with collision. */
+	uint32_t free;         /**< Number of free filters. */
+	uint32_t maxhash;
+	/**< The lookup hash value of the added filter that updated the value
+	   of the MAXLEN field */
+	uint32_t maxlen;       /**< Longest linked list of filters. */
+	uint64_t add;          /**< Number of added filters. */
+	uint64_t remove;       /**< Number of removed filters. */
+	uint64_t f_add;        /**< Number of failed added filters. */
+	uint64_t f_remove;     /**< Number of failed removed filters. */
+	uint32_t guarant_cnt;  /**< Number of filters in guaranteed spaces. */
+	uint32_t best_cnt;     /**< Number of filters in best effort spaces. */
+};
+
+/**
+ * Flow Director filter information types.
+ */
+enum rte_eth_fdir_filter_info_type {
+	RTE_ETH_FDIR_FILTER_INFO_TYPE_UNKNOWN = 0,
+	/** Flow Director filter input set configuration */
+	RTE_ETH_FDIR_FILTER_INPUT_SET_SELECT,
+	RTE_ETH_FDIR_FILTER_INFO_TYPE_MAX,
+};
+
+/**
+ * A structure used to set FDIR filter information, to support filter type
+ * of 'RTE_ETH_FILTER_FDIR' RTE_ETH_FDIR_FILTER_INPUT_SET_SELECT operation.
+ */
+struct rte_eth_fdir_filter_info {
+	enum rte_eth_fdir_filter_info_type info_type; /**< Information type */
+	/** Details of fdir filter information */
+	union {
+		/** Flow Director input set configuration per port */
+		struct rte_eth_input_set_conf input_set_conf;
+	} info;
+};
+
+/**
+ * Hash filter information types.
+ * - RTE_ETH_HASH_FILTER_SYM_HASH_ENA_PER_PORT is for getting/setting the
+ *   information/configuration of 'symmetric hash enable' per port.
+ * - RTE_ETH_HASH_FILTER_GLOBAL_CONFIG is for getting/setting the global
+ *   configurations of hash filters. Those global configurations are valid
+ *   for all ports of the same NIC.
+ * - RTE_ETH_HASH_FILTER_INPUT_SET_SELECT is for setting the global
+ *   hash input set fields
+ */
+enum rte_eth_hash_filter_info_type {
+	RTE_ETH_HASH_FILTER_INFO_TYPE_UNKNOWN = 0,
+	/** Symmetric hash enable per port */
+	RTE_ETH_HASH_FILTER_SYM_HASH_ENA_PER_PORT,
+	/** Configure globally for hash filter */
+	RTE_ETH_HASH_FILTER_GLOBAL_CONFIG,
+	/** Global Hash filter input set configuration */
+	RTE_ETH_HASH_FILTER_INPUT_SET_SELECT,
+	RTE_ETH_HASH_FILTER_INFO_TYPE_MAX,
+};
+
+/**
+ * Hash function types.
+ */
+enum rte_eth_hash_function {
+	RTE_ETH_HASH_FUNCTION_DEFAULT = 0,
+	RTE_ETH_HASH_FUNCTION_TOEPLITZ, /**< Toeplitz */
+	RTE_ETH_HASH_FUNCTION_SIMPLE_XOR, /**< Simple XOR */
+	RTE_ETH_HASH_FUNCTION_MAX,
+};
+
+#define RTE_SYM_HASH_MASK_ARRAY_SIZE \
+	(RTE_ALIGN(RTE_ETH_FLOW_MAX, UINT32_BIT)/UINT32_BIT)
+/**
+ * A structure used to set or get global hash function configurations which
+ * include symmetric hash enable per flow type and hash function type.
+ * Each bit in sym_hash_enable_mask[] indicates if the symmetric hash of the
+ * corresponding flow type is enabled or not.
+ * Each bit in valid_bit_mask[] indicates if the corresponding bit in
+ * sym_hash_enable_mask[] is valid or not. For the configurations gotten, it
+ * also means if the flow type is supported by hardware or not.
+ */
+struct rte_eth_hash_global_conf {
+	enum rte_eth_hash_function hash_func; /**< Hash function type */
+	/** Bit mask for symmetric hash enable per flow type */
+	uint32_t sym_hash_enable_mask[RTE_SYM_HASH_MASK_ARRAY_SIZE];
+	/** Bit mask indicates if the corresponding bit is valid */
+	uint32_t valid_bit_mask[RTE_SYM_HASH_MASK_ARRAY_SIZE];
+};
+
+/**
+ * A structure used to set or get hash filter information, to support filter
+ * type of 'RTE_ETH_FILTER_HASH' and its operations.
+ */
+struct rte_eth_hash_filter_info {
+	enum rte_eth_hash_filter_info_type info_type; /**< Information type */
+	/** Details of hash filter information */
+	union {
+		/** For RTE_ETH_HASH_FILTER_SYM_HASH_ENA_PER_PORT */
+		uint8_t enable;
+		/** Global configurations of hash filter */
+		struct rte_eth_hash_global_conf global_conf;
+		/** Global configurations of hash filter input set */
+		struct rte_eth_input_set_conf input_set_conf;
+	} info;
+};
+
+/**
+ * l2 tunnel configuration.
+ */
+struct rte_eth_l2_tunnel_conf {
+	enum rte_eth_tunnel_type l2_tunnel_type;
+	uint16_t ether_type; /* ether type in l2 header */
+	uint32_t tunnel_id; /* port tag id for e-tag */
+	uint16_t vf_id; /* VF id for tag insertion */
+	uint32_t pool; /* destination pool for tag based forwarding */
+};
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_ETH_CTRL_H_ */
diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
new file mode 100644
index 00000000..a31018e8
--- /dev/null
+++ b/lib/librte_ether/rte_ethdev.c
@@ -0,0 +1,3371 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/types.h>
+#include <sys/queue.h>
+#include <ctype.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdarg.h>
+#include <errno.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <netinet/in.h>
+
+#include <rte_byteorder.h>
+#include <rte_log.h>
+#include <rte_debug.h>
+#include <rte_interrupts.h>
+#include <rte_pci.h>
+#include <rte_memory.h>
+#include <rte_memcpy.h>
+#include <rte_memzone.h>
+#include <rte_launch.h>
+#include <rte_eal.h>
+#include <rte_per_lcore.h>
+#include <rte_lcore.h>
+#include <rte_atomic.h>
+#include <rte_branch_prediction.h>
+#include <rte_common.h>
+#include <rte_ring.h>
+#include <rte_mempool.h>
+#include <rte_malloc.h>
+#include <rte_mbuf.h>
+#include <rte_errno.h>
+#include <rte_spinlock.h>
+#include <rte_string_fns.h>
+
+#include "rte_ether.h"
+#include "rte_ethdev.h"
+
+static const char *MZ_RTE_ETH_DEV_DATA = "rte_eth_dev_data";
+struct rte_eth_dev rte_eth_devices[RTE_MAX_ETHPORTS];
+static struct rte_eth_dev_data *rte_eth_dev_data;
+static uint8_t nb_ports;
+
+/* spinlock for eth device callbacks */
+static rte_spinlock_t rte_eth_dev_cb_lock = RTE_SPINLOCK_INITIALIZER;
+
+/* store statistics names and its offset in stats structure  */
+struct rte_eth_xstats_name_off {
+	char name[RTE_ETH_XSTATS_NAME_SIZE];
+	unsigned offset;
+};
+
+static const struct rte_eth_xstats_name_off rte_stats_strings[] = {
+	{"rx_good_packets", offsetof(struct rte_eth_stats, ipackets)},
+	{"tx_good_packets", offsetof(struct rte_eth_stats, opackets)},
+	{"rx_good_bytes", offsetof(struct rte_eth_stats, ibytes)},
+	{"tx_good_bytes", offsetof(struct rte_eth_stats, obytes)},
+	{"rx_errors", offsetof(struct rte_eth_stats, ierrors)},
+	{"tx_errors", offsetof(struct rte_eth_stats, oerrors)},
+	{"rx_mbuf_allocation_errors", offsetof(struct rte_eth_stats,
+		rx_nombuf)},
+};
+
+#define RTE_NB_STATS (sizeof(rte_stats_strings) / sizeof(rte_stats_strings[0]))
+
+static const struct rte_eth_xstats_name_off rte_rxq_stats_strings[] = {
+	{"packets", offsetof(struct rte_eth_stats, q_ipackets)},
+	{"bytes", offsetof(struct rte_eth_stats, q_ibytes)},
+	{"errors", offsetof(struct rte_eth_stats, q_errors)},
+};
+
+#define RTE_NB_RXQ_STATS (sizeof(rte_rxq_stats_strings) /	\
+		sizeof(rte_rxq_stats_strings[0]))
+
+static const struct rte_eth_xstats_name_off rte_txq_stats_strings[] = {
+	{"packets", offsetof(struct rte_eth_stats, q_opackets)},
+	{"bytes", offsetof(struct rte_eth_stats, q_obytes)},
+};
+#define RTE_NB_TXQ_STATS (sizeof(rte_txq_stats_strings) /	\
+		sizeof(rte_txq_stats_strings[0]))
+
+
+/**
+ * The user application callback description.
+ *
+ * It contains callback address to be registered by user application,
+ * the pointer to the parameters for callback, and the event type.
+ */
+struct rte_eth_dev_callback {
+	TAILQ_ENTRY(rte_eth_dev_callback) next; /**< Callbacks list */
+	rte_eth_dev_cb_fn cb_fn;                /**< Callback address */
+	void *cb_arg;                           /**< Parameter for callback */
+	enum rte_eth_event_type event;          /**< Interrupt event type */
+	uint32_t active;                        /**< Callback is executing */
+};
+
+enum {
+	STAT_QMAP_TX = 0,
+	STAT_QMAP_RX
+};
+
+enum {
+	DEV_DETACHED = 0,
+	DEV_ATTACHED
+};
+
+static void
+rte_eth_dev_data_alloc(void)
+{
+	const unsigned flags = 0;
+	const struct rte_memzone *mz;
+
+	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+		mz = rte_memzone_reserve(MZ_RTE_ETH_DEV_DATA,
+				RTE_MAX_ETHPORTS * sizeof(*rte_eth_dev_data),
+				rte_socket_id(), flags);
+	} else
+		mz = rte_memzone_lookup(MZ_RTE_ETH_DEV_DATA);
+	if (mz == NULL)
+		rte_panic("Cannot allocate memzone for ethernet port data\n");
+
+	rte_eth_dev_data = mz->addr;
+	if (rte_eal_process_type() == RTE_PROC_PRIMARY)
+		memset(rte_eth_dev_data, 0,
+				RTE_MAX_ETHPORTS * sizeof(*rte_eth_dev_data));
+}
+
+struct rte_eth_dev *
+rte_eth_dev_allocated(const char *name)
+{
+	unsigned i;
+
+	for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
+		if ((rte_eth_devices[i].attached == DEV_ATTACHED) &&
+		    strcmp(rte_eth_devices[i].data->name, name) == 0)
+			return &rte_eth_devices[i];
+	}
+	return NULL;
+}
+
+static uint8_t
+rte_eth_dev_find_free_port(void)
+{
+	unsigned i;
+
+	for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
+		if (rte_eth_devices[i].attached == DEV_DETACHED)
+			return i;
+	}
+	return RTE_MAX_ETHPORTS;
+}
+
+struct rte_eth_dev *
+rte_eth_dev_allocate(const char *name, enum rte_eth_dev_type type)
+{
+	uint8_t port_id;
+	struct rte_eth_dev *eth_dev;
+
+	port_id = rte_eth_dev_find_free_port();
+	if (port_id == RTE_MAX_ETHPORTS) {
+		RTE_PMD_DEBUG_TRACE("Reached maximum number of Ethernet ports\n");
+		return NULL;
+	}
+
+	if (rte_eth_dev_data == NULL)
+		rte_eth_dev_data_alloc();
+
+	if (rte_eth_dev_allocated(name) != NULL) {
+		RTE_PMD_DEBUG_TRACE("Ethernet Device with name %s already allocated!\n",
+				name);
+		return NULL;
+	}
+
+	eth_dev = &rte_eth_devices[port_id];
+	eth_dev->data = &rte_eth_dev_data[port_id];
+	snprintf(eth_dev->data->name, sizeof(eth_dev->data->name), "%s", name);
+	eth_dev->data->port_id = port_id;
+	eth_dev->attached = DEV_ATTACHED;
+	eth_dev->dev_type = type;
+	nb_ports++;
+	return eth_dev;
+}
+
+static int
+rte_eth_dev_create_unique_device_name(char *name, size_t size,
+		struct rte_pci_device *pci_dev)
+{
+	int ret;
+
+	ret = snprintf(name, size, "%d:%d.%d",
+			pci_dev->addr.bus, pci_dev->addr.devid,
+			pci_dev->addr.function);
+	if (ret < 0)
+		return ret;
+	return 0;
+}
+
+int
+rte_eth_dev_release_port(struct rte_eth_dev *eth_dev)
+{
+	if (eth_dev == NULL)
+		return -EINVAL;
+
+	eth_dev->attached = DEV_DETACHED;
+	nb_ports--;
+	return 0;
+}
+
+static int
+rte_eth_dev_init(struct rte_pci_driver *pci_drv,
+		 struct rte_pci_device *pci_dev)
+{
+	struct eth_driver    *eth_drv;
+	struct rte_eth_dev *eth_dev;
+	char ethdev_name[RTE_ETH_NAME_MAX_LEN];
+
+	int diag;
+
+	eth_drv = (struct eth_driver *)pci_drv;
+
+	/* Create unique Ethernet device name using PCI address */
+	rte_eth_dev_create_unique_device_name(ethdev_name,
+			sizeof(ethdev_name), pci_dev);
+
+	eth_dev = rte_eth_dev_allocate(ethdev_name, RTE_ETH_DEV_PCI);
+	if (eth_dev == NULL)
+		return -ENOMEM;
+
+	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+		eth_dev->data->dev_private = rte_zmalloc("ethdev private structure",
+				  eth_drv->dev_private_size,
+				  RTE_CACHE_LINE_SIZE);
+		if (eth_dev->data->dev_private == NULL)
+			rte_panic("Cannot allocate memzone for private port data\n");
+	}
+	eth_dev->pci_dev = pci_dev;
+	eth_dev->driver = eth_drv;
+	eth_dev->data->rx_mbuf_alloc_failed = 0;
+
+	/* init user callbacks */
+	TAILQ_INIT(&(eth_dev->link_intr_cbs));
+
+	/*
+	 * Set the default MTU.
+	 */
+	eth_dev->data->mtu = ETHER_MTU;
+
+	/* Invoke PMD device initialization function */
+	diag = (*eth_drv->eth_dev_init)(eth_dev);
+	if (diag == 0)
+		return 0;
+
+	RTE_PMD_DEBUG_TRACE("driver %s: eth_dev_init(vendor_id=0x%u device_id=0x%x) failed\n",
+			pci_drv->name,
+			(unsigned) pci_dev->id.vendor_id,
+			(unsigned) pci_dev->id.device_id);
+	if (rte_eal_process_type() == RTE_PROC_PRIMARY)
+		rte_free(eth_dev->data->dev_private);
+	rte_eth_dev_release_port(eth_dev);
+	return diag;
+}
+
+static int
+rte_eth_dev_uninit(struct rte_pci_device *pci_dev)
+{
+	const struct eth_driver *eth_drv;
+	struct rte_eth_dev *eth_dev;
+	char ethdev_name[RTE_ETH_NAME_MAX_LEN];
+	int ret;
+
+	if (pci_dev == NULL)
+		return -EINVAL;
+
+	/* Create unique Ethernet device name using PCI address */
+	rte_eth_dev_create_unique_device_name(ethdev_name,
+			sizeof(ethdev_name), pci_dev);
+
+	eth_dev = rte_eth_dev_allocated(ethdev_name);
+	if (eth_dev == NULL)
+		return -ENODEV;
+
+	eth_drv = (const struct eth_driver *)pci_dev->driver;
+
+	/* Invoke PMD device uninit function */
+	if (*eth_drv->eth_dev_uninit) {
+		ret = (*eth_drv->eth_dev_uninit)(eth_dev);
+		if (ret)
+			return ret;
+	}
+
+	/* free ether device */
+	rte_eth_dev_release_port(eth_dev);
+
+	if (rte_eal_process_type() == RTE_PROC_PRIMARY)
+		rte_free(eth_dev->data->dev_private);
+
+	eth_dev->pci_dev = NULL;
+	eth_dev->driver = NULL;
+	eth_dev->data = NULL;
+
+	return 0;
+}
+
+/**
+ * Register an Ethernet [Poll Mode] driver.
+ *
+ * Function invoked by the initialization function of an Ethernet driver
+ * to simultaneously register itself as a PCI driver and as an Ethernet
+ * Poll Mode Driver.
+ * Invokes the rte_eal_pci_register() function to register the *pci_drv*
+ * structure embedded in the *eth_drv* structure, after having stored the
+ * address of the rte_eth_dev_init() function in the *devinit* field of
+ * the *pci_drv* structure.
+ * During the PCI probing phase, the rte_eth_dev_init() function is
+ * invoked for each PCI [Ethernet device] matching the embedded PCI
+ * identifiers provided by the driver.
+ */
+void
+rte_eth_driver_register(struct eth_driver *eth_drv)
+{
+	eth_drv->pci_drv.devinit = rte_eth_dev_init;
+	eth_drv->pci_drv.devuninit = rte_eth_dev_uninit;
+	rte_eal_pci_register(&eth_drv->pci_drv);
+}
+
+int
+rte_eth_dev_is_valid_port(uint8_t port_id)
+{
+	if (port_id >= RTE_MAX_ETHPORTS ||
+	    rte_eth_devices[port_id].attached != DEV_ATTACHED)
+		return 0;
+	else
+		return 1;
+}
+
+int
+rte_eth_dev_socket_id(uint8_t port_id)
+{
+	if (!rte_eth_dev_is_valid_port(port_id))
+		return -1;
+	return rte_eth_devices[port_id].data->numa_node;
+}
+
+uint8_t
+rte_eth_dev_count(void)
+{
+	return nb_ports;
+}
+
+static enum rte_eth_dev_type
+rte_eth_dev_get_device_type(uint8_t port_id)
+{
+	if (!rte_eth_dev_is_valid_port(port_id))
+		return RTE_ETH_DEV_UNKNOWN;
+	return rte_eth_devices[port_id].dev_type;
+}
+
+static int
+rte_eth_dev_get_addr_by_port(uint8_t port_id, struct rte_pci_addr *addr)
+{
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
+
+	if (addr == NULL) {
+		RTE_PMD_DEBUG_TRACE("Null pointer is specified\n");
+		return -EINVAL;
+	}
+
+	*addr = rte_eth_devices[port_id].pci_dev->addr;
+	return 0;
+}
+
+static int
+rte_eth_dev_get_name_by_port(uint8_t port_id, char *name)
+{
+	char *tmp;
+
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
+
+	if (name == NULL) {
+		RTE_PMD_DEBUG_TRACE("Null pointer is specified\n");
+		return -EINVAL;
+	}
+
+	/* shouldn't check 'rte_eth_devices[i].data',
+	 * because it might be overwritten by VDEV PMD */
+	tmp = rte_eth_dev_data[port_id].name;
+	strcpy(name, tmp);
+	return 0;
+}
+
+static int
+rte_eth_dev_get_port_by_name(const char *name, uint8_t *port_id)
+{
+	int i;
+
+	if (name == NULL) {
+		RTE_PMD_DEBUG_TRACE("Null pointer is specified\n");
+		return -EINVAL;
+	}
+
+	*port_id = RTE_MAX_ETHPORTS;
+
+	for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
+
+		if (!strncmp(name,
+			rte_eth_dev_data[i].name, strlen(name))) {
+
+			*port_id = i;
+
+			return 0;
+		}
+	}
+	return -ENODEV;
+}
+
+static int
+rte_eth_dev_get_port_by_addr(const struct rte_pci_addr *addr, uint8_t *port_id)
+{
+	int i;
+	struct rte_pci_device *pci_dev = NULL;
+
+	if (addr == NULL) {
+		RTE_PMD_DEBUG_TRACE("Null pointer is specified\n");
+		return -EINVAL;
+	}
+
+	*port_id = RTE_MAX_ETHPORTS;
+
+	for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
+
+		pci_dev = rte_eth_devices[i].pci_dev;
+
+		if (pci_dev &&
+			!rte_eal_compare_pci_addr(&pci_dev->addr, addr)) {
+
+			*port_id = i;
+
+			return 0;
+		}
+	}
+	return -ENODEV;
+}
+
+static int
+rte_eth_dev_is_detachable(uint8_t port_id)
+{
+	uint32_t dev_flags;
+
+	if (!rte_eth_dev_is_valid_port(port_id)) {
+		RTE_PMD_DEBUG_TRACE("Invalid port_id=%d\n", port_id);
+		return -EINVAL;
+	}
+
+	switch (rte_eth_devices[port_id].data->kdrv) {
+	case RTE_KDRV_IGB_UIO:
+	case RTE_KDRV_UIO_GENERIC:
+	case RTE_KDRV_NIC_UIO:
+	case RTE_KDRV_NONE:
+		break;
+	case RTE_KDRV_VFIO:
+	default:
+		return -ENOTSUP;
+	}
+	dev_flags = rte_eth_devices[port_id].data->dev_flags;
+	if ((dev_flags & RTE_ETH_DEV_DETACHABLE) &&
+		(!(dev_flags & RTE_ETH_DEV_BONDED_SLAVE)))
+		return 0;
+	else
+		return 1;
+}
+
+/* attach the new physical device, then store port_id of the device */
+static int
+rte_eth_dev_attach_pdev(struct rte_pci_addr *addr, uint8_t *port_id)
+{
+	/* re-construct pci_device_list */
+	if (rte_eal_pci_scan())
+		goto err;
+	/* Invoke probe func of the driver can handle the new device. */
+	if (rte_eal_pci_probe_one(addr))
+		goto err;
+
+	if (rte_eth_dev_get_port_by_addr(addr, port_id))
+		goto err;
+
+	return 0;
+err:
+	return -1;
+}
+
+/* detach the new physical device, then store pci_addr of the device */
+static int
+rte_eth_dev_detach_pdev(uint8_t port_id, struct rte_pci_addr *addr)
+{
+	struct rte_pci_addr freed_addr;
+	struct rte_pci_addr vp;
+
+	/* get pci address by port id */
+	if (rte_eth_dev_get_addr_by_port(port_id, &freed_addr))
+		goto err;
+
+	/* Zeroed pci addr means the port comes from virtual device */
+	vp.domain = vp.bus = vp.devid = vp.function = 0;
+	if (rte_eal_compare_pci_addr(&vp, &freed_addr) == 0)
+		goto err;
+
+	/* invoke devuninit func of the pci driver,
+	 * also remove the device from pci_device_list */
+	if (rte_eal_pci_detach(&freed_addr))
+		goto err;
+
+	*addr = freed_addr;
+	return 0;
+err:
+	return -1;
+}
+
+/* attach the new virtual device, then store port_id of the device */
+static int
+rte_eth_dev_attach_vdev(const char *vdevargs, uint8_t *port_id)
+{
+	char *name = NULL, *args = NULL;
+	int ret = -1;
+
+	/* parse vdevargs, then retrieve device name and args */
+	if (rte_eal_parse_devargs_str(vdevargs, &name, &args))
+		goto end;
+
+	/* walk around dev_driver_list to find the driver of the device,
+	 * then invoke probe function of the driver.
+	 * rte_eal_vdev_init() updates port_id allocated after
+	 * initialization.
+	 */
+	if (rte_eal_vdev_init(name, args))
+		goto end;
+
+	if (rte_eth_dev_get_port_by_name(name, port_id))
+		goto end;
+
+	ret = 0;
+end:
+	free(name);
+	free(args);
+
+	return ret;
+}
+
+/* detach the new virtual device, then store the name of the device */
+static int
+rte_eth_dev_detach_vdev(uint8_t port_id, char *vdevname)
+{
+	char name[RTE_ETH_NAME_MAX_LEN];
+
+	/* get device name by port id */
+	if (rte_eth_dev_get_name_by_port(port_id, name))
+		goto err;
+	/* walk around dev_driver_list to find the driver of the device,
+	 * then invoke uninit function of the driver */
+	if (rte_eal_vdev_uninit(name))
+		goto err;
+
+	strncpy(vdevname, name, sizeof(name));
+	return 0;
+err:
+	return -1;
+}
+
+/* attach the new device, then store port_id of the device */
+int
+rte_eth_dev_attach(const char *devargs, uint8_t *port_id)
+{
+	struct rte_pci_addr addr;
+	int ret = -1;
+
+	if ((devargs == NULL) || (port_id == NULL)) {
+		ret = -EINVAL;
+		goto err;
+	}
+
+	if (eal_parse_pci_DomBDF(devargs, &addr) == 0) {
+		ret = rte_eth_dev_attach_pdev(&addr, port_id);
+		if (ret < 0)
+			goto err;
+	} else {
+		ret = rte_eth_dev_attach_vdev(devargs, port_id);
+		if (ret < 0)
+			goto err;
+	}
+
+	return 0;
+err:
+	RTE_LOG(ERR, EAL, "Driver, cannot attach the device\n");
+	return ret;
+}
+
+/* detach the device, then store the name of the device */
+int
+rte_eth_dev_detach(uint8_t port_id, char *name)
+{
+	struct rte_pci_addr addr;
+	int ret = -1;
+
+	if (name == NULL) {
+		ret = -EINVAL;
+		goto err;
+	}
+
+	/* check whether the driver supports detach feature, or not */
+	if (rte_eth_dev_is_detachable(port_id))
+		goto err;
+
+	if (rte_eth_dev_get_device_type(port_id) == RTE_ETH_DEV_PCI) {
+		ret = rte_eth_dev_get_addr_by_port(port_id, &addr);
+		if (ret < 0)
+			goto err;
+
+		ret = rte_eth_dev_detach_pdev(port_id, &addr);
+		if (ret < 0)
+			goto err;
+
+		snprintf(name, RTE_ETH_NAME_MAX_LEN,
+			"%04x:%02x:%02x.%d",
+			addr.domain, addr.bus,
+			addr.devid, addr.function);
+	} else {
+		ret = rte_eth_dev_detach_vdev(port_id, name);
+		if (ret < 0)
+			goto err;
+	}
+
+	return 0;
+
+err:
+	RTE_LOG(ERR, EAL, "Driver, cannot detach the device\n");
+	return ret;
+}
+
+static int
+rte_eth_dev_rx_queue_config(struct rte_eth_dev *dev, uint16_t nb_queues)
+{
+	uint16_t old_nb_queues = dev->data->nb_rx_queues;
+	void **rxq;
+	unsigned i;
+
+	if (dev->data->rx_queues == NULL && nb_queues != 0) { /* first time configuration */
+		dev->data->rx_queues = rte_zmalloc("ethdev->rx_queues",
+				sizeof(dev->data->rx_queues[0]) * nb_queues,
+				RTE_CACHE_LINE_SIZE);
+		if (dev->data->rx_queues == NULL) {
+			dev->data->nb_rx_queues = 0;
+			return -(ENOMEM);
+		}
+	} else if (dev->data->rx_queues != NULL && nb_queues != 0) { /* re-configure */
+		RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_release, -ENOTSUP);
+
+		rxq = dev->data->rx_queues;
+
+		for (i = nb_queues; i < old_nb_queues; i++)
+			(*dev->dev_ops->rx_queue_release)(rxq[i]);
+		rxq = rte_realloc(rxq, sizeof(rxq[0]) * nb_queues,
+				RTE_CACHE_LINE_SIZE);
+		if (rxq == NULL)
+			return -(ENOMEM);
+		if (nb_queues > old_nb_queues) {
+			uint16_t new_qs = nb_queues - old_nb_queues;
+
+			memset(rxq + old_nb_queues, 0,
+				sizeof(rxq[0]) * new_qs);
+		}
+
+		dev->data->rx_queues = rxq;
+
+	} else if (dev->data->rx_queues != NULL && nb_queues == 0) {
+		RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_release, -ENOTSUP);
+
+		rxq = dev->data->rx_queues;
+
+		for (i = nb_queues; i < old_nb_queues; i++)
+			(*dev->dev_ops->rx_queue_release)(rxq[i]);
+	}
+	dev->data->nb_rx_queues = nb_queues;
+	return 0;
+}
+
+int
+rte_eth_dev_rx_queue_start(uint8_t port_id, uint16_t rx_queue_id)
+{
+	struct rte_eth_dev *dev;
+
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
+
+	dev = &rte_eth_devices[port_id];
+	if (rx_queue_id >= dev->data->nb_rx_queues) {
+		RTE_PMD_DEBUG_TRACE("Invalid RX queue_id=%d\n", rx_queue_id);
+		return -EINVAL;
+	}
+
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_start, -ENOTSUP);
+
+	if (dev->data->rx_queue_state[rx_queue_id] != RTE_ETH_QUEUE_STATE_STOPPED) {
+		RTE_PMD_DEBUG_TRACE("Queue %" PRIu16" of device with port_id=%" PRIu8
+			" already started\n",
+			rx_queue_id, port_id);
+		return 0;
+	}
+
+	return dev->dev_ops->rx_queue_start(dev, rx_queue_id);
+
+}
+
+int
+rte_eth_dev_rx_queue_stop(uint8_t port_id, uint16_t rx_queue_id)
+{
+	struct rte_eth_dev *dev;
+
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
+
+	dev = &rte_eth_devices[port_id];
+	if (rx_queue_id >= dev->data->nb_rx_queues) {
+		RTE_PMD_DEBUG_TRACE("Invalid RX queue_id=%d\n", rx_queue_id);
+		return -EINVAL;
+	}
+
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_stop, -ENOTSUP);
+
+	if (dev->data->rx_queue_state[rx_queue_id] == RTE_ETH_QUEUE_STATE_STOPPED) {
+		RTE_PMD_DEBUG_TRACE("Queue %" PRIu16" of device with port_id=%" PRIu8
+			" already stopped\n",
+			rx_queue_id, port_id);
+		return 0;
+	}
+
+	return dev->dev_ops->rx_queue_stop(dev, rx_queue_id);
+
+}
+
+int
+rte_eth_dev_tx_queue_start(uint8_t port_id, uint16_t tx_queue_id)
+{
+	struct rte_eth_dev *dev;
+
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
+
+	dev = &rte_eth_devices[port_id];
+	if (tx_queue_id >= dev->data->nb_tx_queues) {
+		RTE_PMD_DEBUG_TRACE("Invalid TX queue_id=%d\n", tx_queue_id);
+		return -EINVAL;
+	}
+
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->tx_queue_start, -ENOTSUP);
+
+	if (dev->data->tx_queue_state[tx_queue_id] != RTE_ETH_QUEUE_STATE_STOPPED) {
+		RTE_PMD_DEBUG_TRACE("Queue %" PRIu16" of device with port_id=%" PRIu8
+			" already started\n",
+			tx_queue_id, port_id);
+		return 0;
+	}
+
+	return dev->dev_ops->tx_queue_start(dev, tx_queue_id);
+
+}
+
+int
+rte_eth_dev_tx_queue_stop(uint8_t port_id, uint16_t tx_queue_id)
+{
+	struct rte_eth_dev *dev;
+
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
+
+	dev = &rte_eth_devices[port_id];
+	if (tx_queue_id >= dev->data->nb_tx_queues) {
+		RTE_PMD_DEBUG_TRACE("Invalid TX queue_id=%d\n", tx_queue_id);
+		return -EINVAL;
+	}
+
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->tx_queue_stop, -ENOTSUP);
+
+	if (dev->data->tx_queue_state[tx_queue_id] == RTE_ETH_QUEUE_STATE_STOPPED) {
+		RTE_PMD_DEBUG_TRACE("Queue %" PRIu16" of device with port_id=%" PRIu8
+			" already stopped\n",
+			tx_queue_id, port_id);
+		return 0;
+	}
+
+	return dev->dev_ops->tx_queue_stop(dev, tx_queue_id);
+
+}
+
+static int
+rte_eth_dev_tx_queue_config(struct rte_eth_dev *dev, uint16_t nb_queues)
+{
+	uint16_t old_nb_queues = dev->data->nb_tx_queues;
+	void **txq;
+	unsigned i;
+
+	if (dev->data->tx_queues == NULL && nb_queues != 0) { /* first time configuration */
+		dev->data->tx_queues = rte_zmalloc("ethdev->tx_queues",
+						   sizeof(dev->data->tx_queues[0]) * nb_queues,
+						   RTE_CACHE_LINE_SIZE);
+		if (dev->data->tx_queues == NULL) {
+			dev->data->nb_tx_queues = 0;
+			return -(ENOMEM);
+		}
+	} else if (dev->data->tx_queues != NULL && nb_queues != 0) { /* re-configure */
+		RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->tx_queue_release, -ENOTSUP);
+
+		txq = dev->data->tx_queues;
+
+		for (i = nb_queues; i < old_nb_queues; i++)
+			(*dev->dev_ops->tx_queue_release)(txq[i]);
+		txq = rte_realloc(txq, sizeof(txq[0]) * nb_queues,
+				  RTE_CACHE_LINE_SIZE);
+		if (txq == NULL)
+			return -ENOMEM;
+		if (nb_queues > old_nb_queues) {
+			uint16_t new_qs = nb_queues - old_nb_queues;
+
+			memset(txq + old_nb_queues, 0,
+			       sizeof(txq[0]) * new_qs);
+		}
+
+		dev->data->tx_queues = txq;
+
+	} else if (dev->data->tx_queues != NULL && nb_queues == 0) {
+		RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->tx_queue_release, -ENOTSUP);
+
+		txq = dev->data->tx_queues;
+
+		for (i = nb_queues; i < old_nb_queues; i++)
+			(*dev->dev_ops->tx_queue_release)(txq[i]);
+	}
+	dev->data->nb_tx_queues = nb_queues;
+	return 0;
+}
+
+uint32_t
+rte_eth_speed_bitflag(uint32_t speed, int duplex)
+{
+	switch (speed) {
+	case ETH_SPEED_NUM_10M:
+		return duplex ? ETH_LINK_SPEED_10M : ETH_LINK_SPEED_10M_HD;
+	case ETH_SPEED_NUM_100M:
+		return duplex ? ETH_LINK_SPEED_100M : ETH_LINK_SPEED_100M_HD;
+	case ETH_SPEED_NUM_1G:
+		return ETH_LINK_SPEED_1G;
+	case ETH_SPEED_NUM_2_5G:
+		return ETH_LINK_SPEED_2_5G;
+	case ETH_SPEED_NUM_5G:
+		return ETH_LINK_SPEED_5G;
+	case ETH_SPEED_NUM_10G:
+		return ETH_LINK_SPEED_10G;
+	case ETH_SPEED_NUM_20G:
+		return ETH_LINK_SPEED_20G;
+	case ETH_SPEED_NUM_25G:
+		return ETH_LINK_SPEED_25G;
+	case ETH_SPEED_NUM_40G:
+		return ETH_LINK_SPEED_40G;
+	case ETH_SPEED_NUM_50G:
+		return ETH_LINK_SPEED_50G;
+	case ETH_SPEED_NUM_56G:
+		return ETH_LINK_SPEED_56G;
+	case ETH_SPEED_NUM_100G:
+		return ETH_LINK_SPEED_100G;
+	default:
+		return 0;
+	}
+}
+
+int
+rte_eth_dev_configure(uint8_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
+		      const struct rte_eth_conf *dev_conf)
+{
+	struct rte_eth_dev *dev;
+	struct rte_eth_dev_info dev_info;
+	int diag;
+
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
+
+	if (nb_rx_q > RTE_MAX_QUEUES_PER_PORT) {
+		RTE_PMD_DEBUG_TRACE(
+			"Number of RX queues requested (%u) is greater than max supported(%d)\n",
+			nb_rx_q, RTE_MAX_QUEUES_PER_PORT);
+		return -EINVAL;
+	}
+
+	if (nb_tx_q > RTE_MAX_QUEUES_PER_PORT) {
+		RTE_PMD_DEBUG_TRACE(
+			"Number of TX queues requested (%u) is greater than max supported(%d)\n",
+			nb_tx_q, RTE_MAX_QUEUES_PER_PORT);
+		return -EINVAL;
+	}
+
+	dev = &rte_eth_devices[port_id];
+
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_infos_get, -ENOTSUP);
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_configure, -ENOTSUP);
+
+	if (dev->data->dev_started) {
+		RTE_PMD_DEBUG_TRACE(
+		    "port %d must be stopped to allow configuration\n", port_id);
+		return -EBUSY;
+	}
+
+	/* Copy the dev_conf parameter into the dev structure */
+	memcpy(&dev->data->dev_conf, dev_conf, sizeof(dev->data->dev_conf));
+
+	/*
+	 * Check that the numbers of RX and TX queues are not greater
+	 * than the maximum number of RX and TX queues supported by the
+	 * configured device.
+	 */
+	(*dev->dev_ops->dev_infos_get)(dev, &dev_info);
+
+	if (nb_rx_q == 0 && nb_tx_q == 0) {
+		RTE_PMD_DEBUG_TRACE("ethdev port_id=%d both rx and tx queue cannot be 0\n", port_id);
+		return -EINVAL;
+	}
+
+	if (nb_rx_q > dev_info.max_rx_queues) {
+		RTE_PMD_DEBUG_TRACE("ethdev port_id=%d nb_rx_queues=%d > %d\n",
+				port_id, nb_rx_q, dev_info.max_rx_queues);
+		return -EINVAL;
+	}
+
+	if (nb_tx_q > dev_info.max_tx_queues) {
+		RTE_PMD_DEBUG_TRACE("ethdev port_id=%d nb_tx_queues=%d > %d\n",
+				port_id, nb_tx_q, dev_info.max_tx_queues);
+		return -EINVAL;
+	}
+
+	/*
+	 * If link state interrupt is enabled, check that the
+	 * device supports it.
+	 */
+	if ((dev_conf->intr_conf.lsc == 1) &&
+		(!(dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC))) {
+			RTE_PMD_DEBUG_TRACE("driver %s does not support lsc\n",
+					dev->data->drv_name);
+			return -EINVAL;
+	}
+
+	/*
+	 * If jumbo frames are enabled, check that the maximum RX packet
+	 * length is supported by the configured device.
+	 */
+	if (dev_conf->rxmode.jumbo_frame == 1) {
+		if (dev_conf->rxmode.max_rx_pkt_len >
+		    dev_info.max_rx_pktlen) {
+			RTE_PMD_DEBUG_TRACE("ethdev port_id=%d max_rx_pkt_len %u"
+				" > max valid value %u\n",
+				port_id,
+				(unsigned)dev_conf->rxmode.max_rx_pkt_len,
+				(unsigned)dev_info.max_rx_pktlen);
+			return -EINVAL;
+		} else if (dev_conf->rxmode.max_rx_pkt_len < ETHER_MIN_LEN) {
+			RTE_PMD_DEBUG_TRACE("ethdev port_id=%d max_rx_pkt_len %u"
+				" < min valid value %u\n",
+				port_id,
+				(unsigned)dev_conf->rxmode.max_rx_pkt_len,
+				(unsigned)ETHER_MIN_LEN);
+			return -EINVAL;
+		}
+	} else {
+		if (dev_conf->rxmode.max_rx_pkt_len < ETHER_MIN_LEN ||
+			dev_conf->rxmode.max_rx_pkt_len > ETHER_MAX_LEN)
+			/* Use default value */
+			dev->data->dev_conf.rxmode.max_rx_pkt_len =
+							ETHER_MAX_LEN;
+	}
+
+	/*
+	 * Setup new number of RX/TX queues and reconfigure device.
+	 */
+	diag = rte_eth_dev_rx_queue_config(dev, nb_rx_q);
+	if (diag != 0) {
+		RTE_PMD_DEBUG_TRACE("port%d rte_eth_dev_rx_queue_config = %d\n",
+				port_id, diag);
+		return diag;
+	}
+
+	diag = rte_eth_dev_tx_queue_config(dev, nb_tx_q);
+	if (diag != 0) {
+		RTE_PMD_DEBUG_TRACE("port%d rte_eth_dev_tx_queue_config = %d\n",
+				port_id, diag);
+		rte_eth_dev_rx_queue_config(dev, 0);
+		return diag;
+	}
+
+	diag = (*dev->dev_ops->dev_configure)(dev);
+	if (diag != 0) {
+		RTE_PMD_DEBUG_TRACE("port%d dev_configure = %d\n",
+				port_id, diag);
+		rte_eth_dev_rx_queue_config(dev, 0);
+		rte_eth_dev_tx_queue_config(dev, 0);
+		return diag;
+	}
+
+	return 0;
+}
+
+static void
+rte_eth_dev_config_restore(uint8_t port_id)
+{
+	struct rte_eth_dev *dev;
+	struct rte_eth_dev_info dev_info;
+	struct ether_addr addr;
+	uint16_t i;
+	uint32_t pool = 0;
+
+	dev = &rte_eth_devices[port_id];
+
+	rte_eth_dev_info_get(port_id, &dev_info);
+
+	if (RTE_ETH_DEV_SRIOV(dev).active)
+		pool = RTE_ETH_DEV_SRIOV(dev).def_vmdq_idx;
+
+	/* replay MAC address configuration */
+	for (i = 0; i < dev_info.max_mac_addrs; i++) {
+		addr = dev->data->mac_addrs[i];
+
+		/* skip zero address */
+		if (is_zero_ether_addr(&addr))
+			continue;
+
+		/* add address to the hardware */
+		if  (*dev->dev_ops->mac_addr_add &&
+			(dev->data->mac_pool_sel[i] & (1ULL << pool)))
+			(*dev->dev_ops->mac_addr_add)(dev, &addr, i, pool);
+		else {
+			RTE_PMD_DEBUG_TRACE("port %d: MAC address array not supported\n",
+					port_id);
+			/* exit the loop but not return an error */
+			break;
+		}
+	}
+
+	/* replay promiscuous configuration */
+	if (rte_eth_promiscuous_get(port_id) == 1)
+		rte_eth_promiscuous_enable(port_id);
+	else if (rte_eth_promiscuous_get(port_id) == 0)
+		rte_eth_promiscuous_disable(port_id);
+
+	/* replay all multicast configuration */
+	if (rte_eth_allmulticast_get(port_id) == 1)
+		rte_eth_allmulticast_enable(port_id);
+	else if (rte_eth_allmulticast_get(port_id) == 0)
+		rte_eth_allmulticast_disable(port_id);
+}
+
+int
+rte_eth_dev_start(uint8_t port_id)
+{
+	struct rte_eth_dev *dev;
+	int diag;
+
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
+
+	dev = &rte_eth_devices[port_id];
+
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_start, -ENOTSUP);
+
+	if (dev->data->dev_started != 0) {
+		RTE_PMD_DEBUG_TRACE("Device with port_id=%" PRIu8
+			" already started\n",
+			port_id);
+		return 0;
+	}
+
+	diag = (*dev->dev_ops->dev_start)(dev);
+	if (diag == 0)
+		dev->data->dev_started = 1;
+	else
+		return diag;
+
+	rte_eth_dev_config_restore(port_id);
+
+	if (dev->data->dev_conf.intr_conf.lsc == 0) {
+		RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->link_update, -ENOTSUP);
+		(*dev->dev_ops->link_update)(dev, 0);
+	}
+	return 0;
+}
+
+void
+rte_eth_dev_stop(uint8_t port_id)
+{
+	struct rte_eth_dev *dev;
+
+	RTE_ETH_VALID_PORTID_OR_RET(port_id);
+	dev = &rte_eth_devices[port_id];
+
+	RTE_FUNC_PTR_OR_RET(*dev->dev_ops->dev_stop);
+
+	if (dev->data->dev_started == 0) {
+		RTE_PMD_DEBUG_TRACE("Device with port_id=%" PRIu8
+			" already stopped\n",
+			port_id);
+		return;
+	}
+
+	dev->data->dev_started = 0;
+	(*dev->dev_ops->dev_stop)(dev);
+}
+
+int
+rte_eth_dev_set_link_up(uint8_t port_id)
+{
+	struct rte_eth_dev *dev;
+
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
+
+	dev = &rte_eth_devices[port_id];
+
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_set_link_up, -ENOTSUP);
+	return (*dev->dev_ops->dev_set_link_up)(dev);
+}
+
+int
+rte_eth_dev_set_link_down(uint8_t port_id)
+{
+	struct rte_eth_dev *dev;
+
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
+
+	dev = &rte_eth_devices[port_id];
+
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_set_link_down, -ENOTSUP);
+	return (*dev->dev_ops->dev_set_link_down)(dev);
+}
+
+void
+rte_eth_dev_close(uint8_t port_id)
+{
+	struct rte_eth_dev *dev;
+
+	RTE_ETH_VALID_PORTID_OR_RET(port_id);
+	dev = &rte_eth_devices[port_id];
+
+	RTE_FUNC_PTR_OR_RET(*dev->dev_ops->dev_close);
+	dev->data->dev_started = 0;
+	(*dev->dev_ops->dev_close)(dev);
+
+	rte_free(dev->data->rx_queues);
+	dev->data->rx_queues = NULL;
+	rte_free(dev->data->tx_queues);
+	dev->data->tx_queues = NULL;
+}
+
+int
+rte_eth_rx_queue_setup(uint8_t port_id, uint16_t rx_queue_id,
+		       uint16_t nb_rx_desc, unsigned int socket_id,
+		       const struct rte_eth_rxconf *rx_conf,
+		       struct rte_mempool *mp)
+{
+	int ret;
+	uint32_t mbp_buf_size;
+	struct rte_eth_dev *dev;
+	struct rte_eth_dev_info dev_info;
+
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
+
+	dev = &rte_eth_devices[port_id];
+	if (rx_queue_id >= dev->data->nb_rx_queues) {
+		RTE_PMD_DEBUG_TRACE("Invalid RX queue_id=%d\n", rx_queue_id);
+		return -EINVAL;
+	}
+
+	if (dev->data->dev_started) {
+		RTE_PMD_DEBUG_TRACE(
+		    "port %d must be stopped to allow configuration\n", port_id);
+		return -EBUSY;
+	}
+
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_infos_get, -ENOTSUP);
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_setup, -ENOTSUP);
+
+	/*
+	 * Check the size of the mbuf data buffer.
+	 * This value must be provided in the private data of the memory pool.
+	 * First check that the memory pool has a valid private data.
+	 */
+	rte_eth_dev_info_get(port_id, &dev_info);
+	if (mp->private_data_size < sizeof(struct rte_pktmbuf_pool_private)) {
+		RTE_PMD_DEBUG_TRACE("%s private_data_size %d < %d\n",
+				mp->name, (int) mp->private_data_size,
+				(int) sizeof(struct rte_pktmbuf_pool_private));
+		return -ENOSPC;
+	}
+	mbp_buf_size = rte_pktmbuf_data_room_size(mp);
+
+	if ((mbp_buf_size - RTE_PKTMBUF_HEADROOM) < dev_info.min_rx_bufsize) {
+		RTE_PMD_DEBUG_TRACE("%s mbuf_data_room_size %d < %d "
+				"(RTE_PKTMBUF_HEADROOM=%d + min_rx_bufsize(dev)"
+				"=%d)\n",
+				mp->name,
+				(int)mbp_buf_size,
+				(int)(RTE_PKTMBUF_HEADROOM +
+				      dev_info.min_rx_bufsize),
+				(int)RTE_PKTMBUF_HEADROOM,
+				(int)dev_info.min_rx_bufsize);
+		return -EINVAL;
+	}
+
+	if (nb_rx_desc > dev_info.rx_desc_lim.nb_max ||
+			nb_rx_desc < dev_info.rx_desc_lim.nb_min ||
+			nb_rx_desc % dev_info.rx_desc_lim.nb_align != 0) {
+
+		RTE_PMD_DEBUG_TRACE("Invalid value for nb_rx_desc(=%hu), "
+			"should be: <= %hu, = %hu, and a product of %hu\n",
+			nb_rx_desc,
+			dev_info.rx_desc_lim.nb_max,
+			dev_info.rx_desc_lim.nb_min,
+			dev_info.rx_desc_lim.nb_align);
+		return -EINVAL;
+	}
+
+	if (rx_conf == NULL)
+		rx_conf = &dev_info.default_rxconf;
+
+	ret = (*dev->dev_ops->rx_queue_setup)(dev, rx_queue_id, nb_rx_desc,
+					      socket_id, rx_conf, mp);
+	if (!ret) {
+		if (!dev->data->min_rx_buf_size ||
+		    dev->data->min_rx_buf_size > mbp_buf_size)
+			dev->data->min_rx_buf_size = mbp_buf_size;
+	}
+
+	return ret;
+}
+
+int
+rte_eth_tx_queue_setup(uint8_t port_id, uint16_t tx_queue_id,
+		       uint16_t nb_tx_desc, unsigned int socket_id,
+		       const struct rte_eth_txconf *tx_conf)
+{
+	struct rte_eth_dev *dev;
+	struct rte_eth_dev_info dev_info;
+
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
+
+	dev = &rte_eth_devices[port_id];
+	if (tx_queue_id >= dev->data->nb_tx_queues) {
+		RTE_PMD_DEBUG_TRACE("Invalid TX queue_id=%d\n", tx_queue_id);
+		return -EINVAL;
+	}
+
+	if (dev->data->dev_started) {
+		RTE_PMD_DEBUG_TRACE(
+		    "port %d must be stopped to allow configuration\n", port_id);
+		return -EBUSY;
+	}
+
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_infos_get, -ENOTSUP);
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->tx_queue_setup, -ENOTSUP);
+
+	rte_eth_dev_info_get(port_id, &dev_info);
+
+	if (nb_tx_desc > dev_info.tx_desc_lim.nb_max ||
+	    nb_tx_desc < dev_info.tx_desc_lim.nb_min ||
+	    nb_tx_desc % dev_info.tx_desc_lim.nb_align != 0) {
+		RTE_PMD_DEBUG_TRACE("Invalid value for nb_tx_desc(=%hu), "
+				"should be: <= %hu, = %hu, and a product of %hu\n",
+				nb_tx_desc,
+				dev_info.tx_desc_lim.nb_max,
+				dev_info.tx_desc_lim.nb_min,
+				dev_info.tx_desc_lim.nb_align);
+		return -EINVAL;
+	}
+
+	if (tx_conf == NULL)
+		tx_conf = &dev_info.default_txconf;
+
+	return (*dev->dev_ops->tx_queue_setup)(dev, tx_queue_id, nb_tx_desc,
+					       socket_id, tx_conf);
+}
+
+void
+rte_eth_tx_buffer_drop_callback(struct rte_mbuf **pkts, uint16_t unsent,
+		void *userdata __rte_unused)
+{
+	unsigned i;
+
+	for (i = 0; i < unsent; i++)
+		rte_pktmbuf_free(pkts[i]);
+}
+
+void
+rte_eth_tx_buffer_count_callback(struct rte_mbuf **pkts, uint16_t unsent,
+		void *userdata)
+{
+	uint64_t *count = userdata;
+	unsigned i;
+
+	for (i = 0; i < unsent; i++)
+		rte_pktmbuf_free(pkts[i]);
+
+	*count += unsent;
+}
+
+int
+rte_eth_tx_buffer_set_err_callback(struct rte_eth_dev_tx_buffer *buffer,
+		buffer_tx_error_fn cbfn, void *userdata)
+{
+	buffer->error_callback = cbfn;
+	buffer->error_userdata = userdata;
+	return 0;
+}
+
+int
+rte_eth_tx_buffer_init(struct rte_eth_dev_tx_buffer *buffer, uint16_t size)
+{
+	int ret = 0;
+
+	if (buffer == NULL)
+		return -EINVAL;
+
+	buffer->size = size;
+	if (buffer->error_callback == NULL) {
+		ret = rte_eth_tx_buffer_set_err_callback(
+			buffer, rte_eth_tx_buffer_drop_callback, NULL);
+	}
+
+	return ret;
+}
+
+void
+rte_eth_promiscuous_enable(uint8_t port_id)
+{
+	struct rte_eth_dev *dev;
+
+	RTE_ETH_VALID_PORTID_OR_RET(port_id);
+	dev = &rte_eth_devices[port_id];
+
+	RTE_FUNC_PTR_OR_RET(*dev->dev_ops->promiscuous_enable);
+	(*dev->dev_ops->promiscuous_enable)(dev);
+	dev->data->promiscuous = 1;
+}
+
+void
+rte_eth_promiscuous_disable(uint8_t port_id)
+{
+	struct rte_eth_dev *dev;
+
+	RTE_ETH_VALID_PORTID_OR_RET(port_id);
+	dev = &rte_eth_devices[port_id];
+
+	RTE_FUNC_PTR_OR_RET(*dev->dev_ops->promiscuous_disable);
+	dev->data->promiscuous = 0;
+	(*dev->dev_ops->promiscuous_disable)(dev);
+}
+
+int
+rte_eth_promiscuous_get(uint8_t port_id)
+{
+	struct rte_eth_dev *dev;
+
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
+
+	dev = &rte_eth_devices[port_id];
+	return dev->data->promiscuous;
+}
+
+void
+rte_eth_allmulticast_enable(uint8_t port_id)
+{
+	struct rte_eth_dev *dev;
+
+	RTE_ETH_VALID_PORTID_OR_RET(port_id);
+	dev = &rte_eth_devices[port_id];
+
+	RTE_FUNC_PTR_OR_RET(*dev->dev_ops->allmulticast_enable);
+	(*dev->dev_ops->allmulticast_enable)(dev);
+	dev->data->all_multicast = 1;
+}
+
+void
+rte_eth_allmulticast_disable(uint8_t port_id)
+{
+	struct rte_eth_dev *dev;
+
+	RTE_ETH_VALID_PORTID_OR_RET(port_id);
+	dev = &rte_eth_devices[port_id];
+
+	RTE_FUNC_PTR_OR_RET(*dev->dev_ops->allmulticast_disable);
+	dev->data->all_multicast = 0;
+	(*dev->dev_ops->allmulticast_disable)(dev);
+}
+
+int
+rte_eth_allmulticast_get(uint8_t port_id)
+{
+	struct rte_eth_dev *dev;
+
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
+
+	dev = &rte_eth_devices[port_id];
+	return dev->data->all_multicast;
+}
+
+static inline int
+rte_eth_dev_atomic_read_link_status(struct rte_eth_dev *dev,
+				struct rte_eth_link *link)
+{
+	struct rte_eth_link *dst = link;
+	struct rte_eth_link *src = &(dev->data->dev_link);
+
+	if (rte_atomic64_cmpset((uint64_t *)dst, *(uint64_t *)dst,
+					*(uint64_t *)src) == 0)
+		return -1;
+
+	return 0;
+}
+
+void
+rte_eth_link_get(uint8_t port_id, struct rte_eth_link *eth_link)
+{
+	struct rte_eth_dev *dev;
+
+	RTE_ETH_VALID_PORTID_OR_RET(port_id);
+	dev = &rte_eth_devices[port_id];
+
+	if (dev->data->dev_conf.intr_conf.lsc != 0)
+		rte_eth_dev_atomic_read_link_status(dev, eth_link);
+	else {
+		RTE_FUNC_PTR_OR_RET(*dev->dev_ops->link_update);
+		(*dev->dev_ops->link_update)(dev, 1);
+		*eth_link = dev->data->dev_link;
+	}
+}
+
+void
+rte_eth_link_get_nowait(uint8_t port_id, struct rte_eth_link *eth_link)
+{
+	struct rte_eth_dev *dev;
+
+	RTE_ETH_VALID_PORTID_OR_RET(port_id);
+	dev = &rte_eth_devices[port_id];
+
+	if (dev->data->dev_conf.intr_conf.lsc != 0)
+		rte_eth_dev_atomic_read_link_status(dev, eth_link);
+	else {
+		RTE_FUNC_PTR_OR_RET(*dev->dev_ops->link_update);
+		(*dev->dev_ops->link_update)(dev, 0);
+		*eth_link = dev->data->dev_link;
+	}
+}
+
+int
+rte_eth_stats_get(uint8_t port_id, struct rte_eth_stats *stats)
+{
+	struct rte_eth_dev *dev;
+
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
+
+	dev = &rte_eth_devices[port_id];
+	memset(stats, 0, sizeof(*stats));
+
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->stats_get, -ENOTSUP);
+	(*dev->dev_ops->stats_get)(dev, stats);
+	stats->rx_nombuf = dev->data->rx_mbuf_alloc_failed;
+	return 0;
+}
+
+void
+rte_eth_stats_reset(uint8_t port_id)
+{
+	struct rte_eth_dev *dev;
+
+	RTE_ETH_VALID_PORTID_OR_RET(port_id);
+	dev = &rte_eth_devices[port_id];
+
+	RTE_FUNC_PTR_OR_RET(*dev->dev_ops->stats_reset);
+	(*dev->dev_ops->stats_reset)(dev);
+	dev->data->rx_mbuf_alloc_failed = 0;
+}
+
+/* retrieve ethdev extended statistics */
+int
+rte_eth_xstats_get(uint8_t port_id, struct rte_eth_xstats *xstats,
+	unsigned n)
+{
+	struct rte_eth_stats eth_stats;
+	struct rte_eth_dev *dev;
+	unsigned count = 0, i, q;
+	signed xcount = 0;
+	uint64_t val, *stats_ptr;
+
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
+
+	dev = &rte_eth_devices[port_id];
+
+	/* Return generic statistics */
+	count = RTE_NB_STATS + (dev->data->nb_rx_queues * RTE_NB_RXQ_STATS) +
+		(dev->data->nb_tx_queues * RTE_NB_TXQ_STATS);
+
+	/* implemented by the driver */
+	if (dev->dev_ops->xstats_get != NULL) {
+		/* Retrieve the xstats from the driver at the end of the
+		 * xstats struct.
+		 */
+		xcount = (*dev->dev_ops->xstats_get)(dev,
+				     xstats ? xstats + count : NULL,
+				     (n > count) ? n - count : 0);
+
+		if (xcount < 0)
+			return xcount;
+	}
+
+	if (n < count + xcount || xstats == NULL)
+		return count + xcount;
+
+	/* now fill the xstats structure */
+	count = 0;
+	rte_eth_stats_get(port_id, &eth_stats);
+
+	/* global stats */
+	for (i = 0; i < RTE_NB_STATS; i++) {
+		stats_ptr = RTE_PTR_ADD(&eth_stats,
+					rte_stats_strings[i].offset);
+		val = *stats_ptr;
+		snprintf(xstats[count].name, sizeof(xstats[count].name),
+			"%s", rte_stats_strings[i].name);
+		xstats[count++].value = val;
+	}
+
+	/* per-rxq stats */
+	for (q = 0; q < dev->data->nb_rx_queues; q++) {
+		for (i = 0; i < RTE_NB_RXQ_STATS; i++) {
+			stats_ptr = RTE_PTR_ADD(&eth_stats,
+					rte_rxq_stats_strings[i].offset +
+					q * sizeof(uint64_t));
+			val = *stats_ptr;
+			snprintf(xstats[count].name, sizeof(xstats[count].name),
+				"rx_q%u_%s", q,
+				rte_rxq_stats_strings[i].name);
+			xstats[count++].value = val;
+		}
+	}
+
+	/* per-txq stats */
+	for (q = 0; q < dev->data->nb_tx_queues; q++) {
+		for (i = 0; i < RTE_NB_TXQ_STATS; i++) {
+			stats_ptr = RTE_PTR_ADD(&eth_stats,
+					rte_txq_stats_strings[i].offset +
+					q * sizeof(uint64_t));
+			val = *stats_ptr;
+			snprintf(xstats[count].name, sizeof(xstats[count].name),
+				"tx_q%u_%s", q,
+				rte_txq_stats_strings[i].name);
+			xstats[count++].value = val;
+		}
+	}
+
+	return count + xcount;
+}
+
+/* reset ethdev extended statistics */
+void
+rte_eth_xstats_reset(uint8_t port_id)
+{
+	struct rte_eth_dev *dev;
+
+	RTE_ETH_VALID_PORTID_OR_RET(port_id);
+	dev = &rte_eth_devices[port_id];
+
+	/* implemented by the driver */
+	if (dev->dev_ops->xstats_reset != NULL) {
+		(*dev->dev_ops->xstats_reset)(dev);
+		return;
+	}
+
+	/* fallback to default */
+	rte_eth_stats_reset(port_id);
+}
+
+static int
+set_queue_stats_mapping(uint8_t port_id, uint16_t queue_id, uint8_t stat_idx,
+		uint8_t is_rx)
+{
+	struct rte_eth_dev *dev;
+
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
+
+	dev = &rte_eth_devices[port_id];
+
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->queue_stats_mapping_set, -ENOTSUP);
+	return (*dev->dev_ops->queue_stats_mapping_set)
+			(dev, queue_id, stat_idx, is_rx);
+}
+
+
+int
+rte_eth_dev_set_tx_queue_stats_mapping(uint8_t port_id, uint16_t tx_queue_id,
+		uint8_t stat_idx)
+{
+	return set_queue_stats_mapping(port_id, tx_queue_id, stat_idx,
+			STAT_QMAP_TX);
+}
+
+
+int
+rte_eth_dev_set_rx_queue_stats_mapping(uint8_t port_id, uint16_t rx_queue_id,
+		uint8_t stat_idx)
+{
+	return set_queue_stats_mapping(port_id, rx_queue_id, stat_idx,
+			STAT_QMAP_RX);
+}
+
+
+void
+rte_eth_dev_info_get(uint8_t port_id, struct rte_eth_dev_info *dev_info)
+{
+	struct rte_eth_dev *dev;
+	const struct rte_eth_desc_lim lim = {
+		.nb_max = UINT16_MAX,
+		.nb_min = 0,
+		.nb_align = 1,
+	};
+
+	RTE_ETH_VALID_PORTID_OR_RET(port_id);
+	dev = &rte_eth_devices[port_id];
+
+	memset(dev_info, 0, sizeof(struct rte_eth_dev_info));
+	dev_info->rx_desc_lim = lim;
+	dev_info->tx_desc_lim = lim;
+
+	RTE_FUNC_PTR_OR_RET(*dev->dev_ops->dev_infos_get);
+	(*dev->dev_ops->dev_infos_get)(dev, dev_info);
+	dev_info->pci_dev = dev->pci_dev;
+	dev_info->driver_name = dev->data->drv_name;
+}
+
+int
+rte_eth_dev_get_supported_ptypes(uint8_t port_id, uint32_t ptype_mask,
+				 uint32_t *ptypes, int num)
+{
+	int i, j;
+	struct rte_eth_dev *dev;
+	const uint32_t *all_ptypes;
+
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
+	dev = &rte_eth_devices[port_id];
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_supported_ptypes_get, 0);
+	all_ptypes = (*dev->dev_ops->dev_supported_ptypes_get)(dev);
+
+	if (!all_ptypes)
+		return 0;
+
+	for (i = 0, j = 0; all_ptypes[i] != RTE_PTYPE_UNKNOWN; ++i)
+		if (all_ptypes[i] & ptype_mask) {
+			if (j < num)
+				ptypes[j] = all_ptypes[i];
+			j++;
+		}
+
+	return j;
+}
+
+void
+rte_eth_macaddr_get(uint8_t port_id, struct ether_addr *mac_addr)
+{
+	struct rte_eth_dev *dev;
+
+	RTE_ETH_VALID_PORTID_OR_RET(port_id);
+	dev = &rte_eth_devices[port_id];
+	ether_addr_copy(&dev->data->mac_addrs[0], mac_addr);
+}
+
+
+int
+rte_eth_dev_get_mtu(uint8_t port_id, uint16_t *mtu)
+{
+	struct rte_eth_dev *dev;
+
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
+
+	dev = &rte_eth_devices[port_id];
+	*mtu = dev->data->mtu;
+	return 0;
+}
+
+int
+rte_eth_dev_set_mtu(uint8_t port_id, uint16_t mtu)
+{
+	int ret;
+	struct rte_eth_dev *dev;
+
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
+	dev = &rte_eth_devices[port_id];
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->mtu_set, -ENOTSUP);
+
+	ret = (*dev->dev_ops->mtu_set)(dev, mtu);
+	if (!ret)
+		dev->data->mtu = mtu;
+
+	return ret;
+}
+
+int
+rte_eth_dev_vlan_filter(uint8_t port_id, uint16_t vlan_id, int on)
+{
+	struct rte_eth_dev *dev;
+
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
+	dev = &rte_eth_devices[port_id];
+	if (!(dev->data->dev_conf.rxmode.hw_vlan_filter)) {
+		RTE_PMD_DEBUG_TRACE("port %d: vlan-filtering disabled\n", port_id);
+		return -ENOSYS;
+	}
+
+	if (vlan_id > 4095) {
+		RTE_PMD_DEBUG_TRACE("(port_id=%d) invalid vlan_id=%u > 4095\n",
+				port_id, (unsigned) vlan_id);
+		return -EINVAL;
+	}
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->vlan_filter_set, -ENOTSUP);
+
+	return (*dev->dev_ops->vlan_filter_set)(dev, vlan_id, on);
+}
+
+int
+rte_eth_dev_set_vlan_strip_on_queue(uint8_t port_id, uint16_t rx_queue_id, int on)
+{
+	struct rte_eth_dev *dev;
+
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
+	dev = &rte_eth_devices[port_id];
+	if (rx_queue_id >= dev->data->nb_rx_queues) {
+		RTE_PMD_DEBUG_TRACE("Invalid rx_queue_id=%d\n", port_id);
+		return -EINVAL;
+	}
+
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->vlan_strip_queue_set, -ENOTSUP);
+	(*dev->dev_ops->vlan_strip_queue_set)(dev, rx_queue_id, on);
+
+	return 0;
+}
+
+int
+rte_eth_dev_set_vlan_ether_type(uint8_t port_id,
+				enum rte_vlan_type vlan_type,
+				uint16_t tpid)
+{
+	struct rte_eth_dev *dev;
+
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
+	dev = &rte_eth_devices[port_id];
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->vlan_tpid_set, -ENOTSUP);
+
+	return (*dev->dev_ops->vlan_tpid_set)(dev, vlan_type, tpid);
+}
+
+int
+rte_eth_dev_set_vlan_offload(uint8_t port_id, int offload_mask)
+{
+	struct rte_eth_dev *dev;
+	int ret = 0;
+	int mask = 0;
+	int cur, org = 0;
+
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
+	dev = &rte_eth_devices[port_id];
+
+	/*check which option changed by application*/
+	cur = !!(offload_mask & ETH_VLAN_STRIP_OFFLOAD);
+	org = !!(dev->data->dev_conf.rxmode.hw_vlan_strip);
+	if (cur != org) {
+		dev->data->dev_conf.rxmode.hw_vlan_strip = (uint8_t)cur;
+		mask |= ETH_VLAN_STRIP_MASK;
+	}
+
+	cur = !!(offload_mask & ETH_VLAN_FILTER_OFFLOAD);
+	org = !!(dev->data->dev_conf.rxmode.hw_vlan_filter);
+	if (cur != org) {
+		dev->data->dev_conf.rxmode.hw_vlan_filter = (uint8_t)cur;
+		mask |= ETH_VLAN_FILTER_MASK;
+	}
+
+	cur = !!(offload_mask & ETH_VLAN_EXTEND_OFFLOAD);
+	org = !!(dev->data->dev_conf.rxmode.hw_vlan_extend);
+	if (cur != org) {
+		dev->data->dev_conf.rxmode.hw_vlan_extend = (uint8_t)cur;
+		mask |= ETH_VLAN_EXTEND_MASK;
+	}
+
+	/*no change*/
+	if (mask == 0)
+		return ret;
+
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->vlan_offload_set, -ENOTSUP);
+	(*dev->dev_ops->vlan_offload_set)(dev, mask);
+
+	return ret;
+}
+
+int
+rte_eth_dev_get_vlan_offload(uint8_t port_id)
+{
+	struct rte_eth_dev *dev;
+	int ret = 0;
+
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
+	dev = &rte_eth_devices[port_id];
+
+	if (dev->data->dev_conf.rxmode.hw_vlan_strip)
+		ret |= ETH_VLAN_STRIP_OFFLOAD;
+
+	if (dev->data->dev_conf.rxmode.hw_vlan_filter)
+		ret |= ETH_VLAN_FILTER_OFFLOAD;
+
+	if (dev->data->dev_conf.rxmode.hw_vlan_extend)
+		ret |= ETH_VLAN_EXTEND_OFFLOAD;
+
+	return ret;
+}
+
+int
+rte_eth_dev_set_vlan_pvid(uint8_t port_id, uint16_t pvid, int on)
+{
+	struct rte_eth_dev *dev;
+
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
+	dev = &rte_eth_devices[port_id];
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->vlan_pvid_set, -ENOTSUP);
+	(*dev->dev_ops->vlan_pvid_set)(dev, pvid, on);
+
+	return 0;
+}
+
+int
+rte_eth_dev_flow_ctrl_get(uint8_t port_id, struct rte_eth_fc_conf *fc_conf)
+{
+	struct rte_eth_dev *dev;
+
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
+	dev = &rte_eth_devices[port_id];
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->flow_ctrl_get, -ENOTSUP);
+	memset(fc_conf, 0, sizeof(*fc_conf));
+	return (*dev->dev_ops->flow_ctrl_get)(dev, fc_conf);
+}
+
+int
+rte_eth_dev_flow_ctrl_set(uint8_t port_id, struct rte_eth_fc_conf *fc_conf)
+{
+	struct rte_eth_dev *dev;
+
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
+	if ((fc_conf->send_xon != 0) && (fc_conf->send_xon != 1)) {
+		RTE_PMD_DEBUG_TRACE("Invalid send_xon, only 0/1 allowed\n");
+		return -EINVAL;
+	}
+
+	dev = &rte_eth_devices[port_id];
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->flow_ctrl_set, -ENOTSUP);
+	return (*dev->dev_ops->flow_ctrl_set)(dev, fc_conf);
+}
+
+int
+rte_eth_dev_priority_flow_ctrl_set(uint8_t port_id, struct rte_eth_pfc_conf *pfc_conf)
+{
+	struct rte_eth_dev *dev;
+
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
+	if (pfc_conf->priority > (ETH_DCB_NUM_USER_PRIORITIES - 1)) {
+		RTE_PMD_DEBUG_TRACE("Invalid priority, only 0-7 allowed\n");
+		return -EINVAL;
+	}
+
+	dev = &rte_eth_devices[port_id];
+	/* High water, low water validation are device specific */
+	if  (*dev->dev_ops->priority_flow_ctrl_set)
+		return (*dev->dev_ops->priority_flow_ctrl_set)(dev, pfc_conf);
+	return -ENOTSUP;
+}
+
+static int
+rte_eth_check_reta_mask(struct rte_eth_rss_reta_entry64 *reta_conf,
+			uint16_t reta_size)
+{
+	uint16_t i, num;
+
+	if (!reta_conf)
+		return -EINVAL;
+
+	if (reta_size != RTE_ALIGN(reta_size, RTE_RETA_GROUP_SIZE)) {
+		RTE_PMD_DEBUG_TRACE("Invalid reta size, should be %u aligned\n",
+							RTE_RETA_GROUP_SIZE);
+		return -EINVAL;
+	}
+
+	num = reta_size / RTE_RETA_GROUP_SIZE;
+	for (i = 0; i < num; i++) {
+		if (reta_conf[i].mask)
+			return 0;
+	}
+
+	return -EINVAL;
+}
+
+static int
+rte_eth_check_reta_entry(struct rte_eth_rss_reta_entry64 *reta_conf,
+			 uint16_t reta_size,
+			 uint16_t max_rxq)
+{
+	uint16_t i, idx, shift;
+
+	if (!reta_conf)
+		return -EINVAL;
+
+	if (max_rxq == 0) {
+		RTE_PMD_DEBUG_TRACE("No receive queue is available\n");
+		return -EINVAL;
+	}
+
+	for (i = 0; i < reta_size; i++) {
+		idx = i / RTE_RETA_GROUP_SIZE;
+		shift = i % RTE_RETA_GROUP_SIZE;
+		if ((reta_conf[idx].mask & (1ULL << shift)) &&
+			(reta_conf[idx].reta[shift] >= max_rxq)) {
+			RTE_PMD_DEBUG_TRACE("reta_conf[%u]->reta[%u]: %u exceeds "
+				"the maximum rxq index: %u\n", idx, shift,
+				reta_conf[idx].reta[shift], max_rxq);
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
+int
+rte_eth_dev_rss_reta_update(uint8_t port_id,
+			    struct rte_eth_rss_reta_entry64 *reta_conf,
+			    uint16_t reta_size)
+{
+	struct rte_eth_dev *dev;
+	int ret;
+
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
+	/* Check mask bits */
+	ret = rte_eth_check_reta_mask(reta_conf, reta_size);
+	if (ret < 0)
+		return ret;
+
+	dev = &rte_eth_devices[port_id];
+
+	/* Check entry value */
+	ret = rte_eth_check_reta_entry(reta_conf, reta_size,
+				dev->data->nb_rx_queues);
+	if (ret < 0)
+		return ret;
+
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->reta_update, -ENOTSUP);
+	return (*dev->dev_ops->reta_update)(dev, reta_conf, reta_size);
+}
+
+int
+rte_eth_dev_rss_reta_query(uint8_t port_id,
+			   struct rte_eth_rss_reta_entry64 *reta_conf,
+			   uint16_t reta_size)
+{
+	struct rte_eth_dev *dev;
+	int ret;
+
+	if (port_id >= nb_ports) {
+		RTE_PMD_DEBUG_TRACE("Invalid port_id=%d\n", port_id);
+		return -ENODEV;
+	}
+
+	/* Check mask bits */
+	ret = rte_eth_check_reta_mask(reta_conf, reta_size);
+	if (ret < 0)
+		return ret;
+
+	dev = &rte_eth_devices[port_id];
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->reta_query, -ENOTSUP);
+	return (*dev->dev_ops->reta_query)(dev, reta_conf, reta_size);
+}
+
+int
+rte_eth_dev_rss_hash_update(uint8_t port_id, struct rte_eth_rss_conf *rss_conf)
+{
+	struct rte_eth_dev *dev;
+	uint16_t rss_hash_protos;
+
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
+	rss_hash_protos = rss_conf->rss_hf;
+	if ((rss_hash_protos != 0) &&
+	    ((rss_hash_protos & ETH_RSS_PROTO_MASK) == 0)) {
+		RTE_PMD_DEBUG_TRACE("Invalid rss_hash_protos=0x%x\n",
+				rss_hash_protos);
+		return -EINVAL;
+	}
+	dev = &rte_eth_devices[port_id];
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rss_hash_update, -ENOTSUP);
+	return (*dev->dev_ops->rss_hash_update)(dev, rss_conf);
+}
+
+int
+rte_eth_dev_rss_hash_conf_get(uint8_t port_id,
+			      struct rte_eth_rss_conf *rss_conf)
+{
+	struct rte_eth_dev *dev;
+
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
+	dev = &rte_eth_devices[port_id];
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rss_hash_conf_get, -ENOTSUP);
+	return (*dev->dev_ops->rss_hash_conf_get)(dev, rss_conf);
+}
+
+int
+rte_eth_dev_udp_tunnel_port_add(uint8_t port_id,
+				struct rte_eth_udp_tunnel *udp_tunnel)
+{
+	struct rte_eth_dev *dev;
+
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
+	if (udp_tunnel == NULL) {
+		RTE_PMD_DEBUG_TRACE("Invalid udp_tunnel parameter\n");
+		return -EINVAL;
+	}
+
+	if (udp_tunnel->prot_type >= RTE_TUNNEL_TYPE_MAX) {
+		RTE_PMD_DEBUG_TRACE("Invalid tunnel type\n");
+		return -EINVAL;
+	}
+
+	dev = &rte_eth_devices[port_id];
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->udp_tunnel_port_add, -ENOTSUP);
+	return (*dev->dev_ops->udp_tunnel_port_add)(dev, udp_tunnel);
+}
+
+int
+rte_eth_dev_udp_tunnel_port_delete(uint8_t port_id,
+				   struct rte_eth_udp_tunnel *udp_tunnel)
+{
+	struct rte_eth_dev *dev;
+
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
+	dev = &rte_eth_devices[port_id];
+
+	if (udp_tunnel == NULL) {
+		RTE_PMD_DEBUG_TRACE("Invalid udp_tunnel parameter\n");
+		return -EINVAL;
+	}
+
+	if (udp_tunnel->prot_type >= RTE_TUNNEL_TYPE_MAX) {
+		RTE_PMD_DEBUG_TRACE("Invalid tunnel type\n");
+		return -EINVAL;
+	}
+
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->udp_tunnel_port_del, -ENOTSUP);
+	return (*dev->dev_ops->udp_tunnel_port_del)(dev, udp_tunnel);
+}
+
+int
+rte_eth_led_on(uint8_t port_id)
+{
+	struct rte_eth_dev *dev;
+
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
+	dev = &rte_eth_devices[port_id];
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_led_on, -ENOTSUP);
+	return (*dev->dev_ops->dev_led_on)(dev);
+}
+
+int
+rte_eth_led_off(uint8_t port_id)
+{
+	struct rte_eth_dev *dev;
+
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
+	dev = &rte_eth_devices[port_id];
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_led_off, -ENOTSUP);
+	return (*dev->dev_ops->dev_led_off)(dev);
+}
+
+/*
+ * Returns index into MAC address array of addr. Use 00:00:00:00:00:00 to find
+ * an empty spot.
+ */
+static int
+get_mac_addr_index(uint8_t port_id, const struct ether_addr *addr)
+{
+	struct rte_eth_dev_info dev_info;
+	struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+	unsigned i;
+
+	rte_eth_dev_info_get(port_id, &dev_info);
+
+	for (i = 0; i < dev_info.max_mac_addrs; i++)
+		if (memcmp(addr, &dev->data->mac_addrs[i], ETHER_ADDR_LEN) == 0)
+			return i;
+
+	return -1;
+}
+
+static const struct ether_addr null_mac_addr;
+
+int
+rte_eth_dev_mac_addr_add(uint8_t port_id, struct ether_addr *addr,
+			uint32_t pool)
+{
+	struct rte_eth_dev *dev;
+	int index;
+	uint64_t pool_mask;
+
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
+	dev = &rte_eth_devices[port_id];
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->mac_addr_add, -ENOTSUP);
+
+	if (is_zero_ether_addr(addr)) {
+		RTE_PMD_DEBUG_TRACE("port %d: Cannot add NULL MAC address\n",
+			port_id);
+		return -EINVAL;
+	}
+	if (pool >= ETH_64_POOLS) {
+		RTE_PMD_DEBUG_TRACE("pool id must be 0-%d\n", ETH_64_POOLS - 1);
+		return -EINVAL;
+	}
+
+	index = get_mac_addr_index(port_id, addr);
+	if (index < 0) {
+		index = get_mac_addr_index(port_id, &null_mac_addr);
+		if (index < 0) {
+			RTE_PMD_DEBUG_TRACE("port %d: MAC address array full\n",
+				port_id);
+			return -ENOSPC;
+		}
+	} else {
+		pool_mask = dev->data->mac_pool_sel[index];
+
+		/* Check if both MAC address and pool is already there, and do nothing */
+		if (pool_mask & (1ULL << pool))
+			return 0;
+	}
+
+	/* Update NIC */
+	(*dev->dev_ops->mac_addr_add)(dev, addr, index, pool);
+
+	/* Update address in NIC data structure */
+	ether_addr_copy(addr, &dev->data->mac_addrs[index]);
+
+	/* Update pool bitmap in NIC data structure */
+	dev->data->mac_pool_sel[index] |= (1ULL << pool);
+
+	return 0;
+}
+
+int
+rte_eth_dev_mac_addr_remove(uint8_t port_id, struct ether_addr *addr)
+{
+	struct rte_eth_dev *dev;
+	int index;
+
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
+	dev = &rte_eth_devices[port_id];
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->mac_addr_remove, -ENOTSUP);
+
+	index = get_mac_addr_index(port_id, addr);
+	if (index == 0) {
+		RTE_PMD_DEBUG_TRACE("port %d: Cannot remove default MAC address\n", port_id);
+		return -EADDRINUSE;
+	} else if (index < 0)
+		return 0;  /* Do nothing if address wasn't found */
+
+	/* Update NIC */
+	(*dev->dev_ops->mac_addr_remove)(dev, index);
+
+	/* Update address in NIC data structure */
+	ether_addr_copy(&null_mac_addr, &dev->data->mac_addrs[index]);
+
+	/* reset pool bitmap */
+	dev->data->mac_pool_sel[index] = 0;
+
+	return 0;
+}
+
+int
+rte_eth_dev_default_mac_addr_set(uint8_t port_id, struct ether_addr *addr)
+{
+	struct rte_eth_dev *dev;
+
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
+
+	if (!is_valid_assigned_ether_addr(addr))
+		return -EINVAL;
+
+	dev = &rte_eth_devices[port_id];
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->mac_addr_set, -ENOTSUP);
+
+	/* Update default address in NIC data structure */
+	ether_addr_copy(addr, &dev->data->mac_addrs[0]);
+
+	(*dev->dev_ops->mac_addr_set)(dev, addr);
+
+	return 0;
+}
+
+int
+rte_eth_dev_set_vf_rxmode(uint8_t port_id,  uint16_t vf,
+				uint16_t rx_mode, uint8_t on)
+{
+	uint16_t num_vfs;
+	struct rte_eth_dev *dev;
+	struct rte_eth_dev_info dev_info;
+
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
+
+	dev = &rte_eth_devices[port_id];
+	rte_eth_dev_info_get(port_id, &dev_info);
+
+	num_vfs = dev_info.max_vfs;
+	if (vf > num_vfs) {
+		RTE_PMD_DEBUG_TRACE("set VF RX mode:invalid VF id %d\n", vf);
+		return -EINVAL;
+	}
+
+	if (rx_mode == 0) {
+		RTE_PMD_DEBUG_TRACE("set VF RX mode:mode mask ca not be zero\n");
+		return -EINVAL;
+	}
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->set_vf_rx_mode, -ENOTSUP);
+	return (*dev->dev_ops->set_vf_rx_mode)(dev, vf, rx_mode, on);
+}
+
+/*
+ * Returns index into MAC address array of addr. Use 00:00:00:00:00:00 to find
+ * an empty spot.
+ */
+static int
+get_hash_mac_addr_index(uint8_t port_id, const struct ether_addr *addr)
+{
+	struct rte_eth_dev_info dev_info;
+	struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+	unsigned i;
+
+	rte_eth_dev_info_get(port_id, &dev_info);
+	if (!dev->data->hash_mac_addrs)
+		return -1;
+
+	for (i = 0; i < dev_info.max_hash_mac_addrs; i++)
+		if (memcmp(addr, &dev->data->hash_mac_addrs[i],
+			ETHER_ADDR_LEN) == 0)
+			return i;
+
+	return -1;
+}
+
+int
+rte_eth_dev_uc_hash_table_set(uint8_t port_id, struct ether_addr *addr,
+				uint8_t on)
+{
+	int index;
+	int ret;
+	struct rte_eth_dev *dev;
+
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
+
+	dev = &rte_eth_devices[port_id];
+	if (is_zero_ether_addr(addr)) {
+		RTE_PMD_DEBUG_TRACE("port %d: Cannot add NULL MAC address\n",
+			port_id);
+		return -EINVAL;
+	}
+
+	index = get_hash_mac_addr_index(port_id, addr);
+	/* Check if it's already there, and do nothing */
+	if ((index >= 0) && (on))
+		return 0;
+
+	if (index < 0) {
+		if (!on) {
+			RTE_PMD_DEBUG_TRACE("port %d: the MAC address was not "
+				"set in UTA\n", port_id);
+			return -EINVAL;
+		}
+
+		index = get_hash_mac_addr_index(port_id, &null_mac_addr);
+		if (index < 0) {
+			RTE_PMD_DEBUG_TRACE("port %d: MAC address array full\n",
+					port_id);
+			return -ENOSPC;
+		}
+	}
+
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->uc_hash_table_set, -ENOTSUP);
+	ret = (*dev->dev_ops->uc_hash_table_set)(dev, addr, on);
+	if (ret == 0) {
+		/* Update address in NIC data structure */
+		if (on)
+			ether_addr_copy(addr,
+					&dev->data->hash_mac_addrs[index]);
+		else
+			ether_addr_copy(&null_mac_addr,
+					&dev->data->hash_mac_addrs[index]);
+	}
+
+	return ret;
+}
+
+int
+rte_eth_dev_uc_all_hash_table_set(uint8_t port_id, uint8_t on)
+{
+	struct rte_eth_dev *dev;
+
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
+
+	dev = &rte_eth_devices[port_id];
+
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->uc_all_hash_table_set, -ENOTSUP);
+	return (*dev->dev_ops->uc_all_hash_table_set)(dev, on);
+}
+
+int
+rte_eth_dev_set_vf_rx(uint8_t port_id, uint16_t vf, uint8_t on)
+{
+	uint16_t num_vfs;
+	struct rte_eth_dev *dev;
+	struct rte_eth_dev_info dev_info;
+
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
+
+	dev = &rte_eth_devices[port_id];
+	rte_eth_dev_info_get(port_id, &dev_info);
+
+	num_vfs = dev_info.max_vfs;
+	if (vf > num_vfs) {
+		RTE_PMD_DEBUG_TRACE("port %d: invalid vf id\n", port_id);
+		return -EINVAL;
+	}
+
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->set_vf_rx, -ENOTSUP);
+	return (*dev->dev_ops->set_vf_rx)(dev, vf, on);
+}
+
+int
+rte_eth_dev_set_vf_tx(uint8_t port_id, uint16_t vf, uint8_t on)
+{
+	uint16_t num_vfs;
+	struct rte_eth_dev *dev;
+	struct rte_eth_dev_info dev_info;
+
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
+
+	dev = &rte_eth_devices[port_id];
+	rte_eth_dev_info_get(port_id, &dev_info);
+
+	num_vfs = dev_info.max_vfs;
+	if (vf > num_vfs) {
+		RTE_PMD_DEBUG_TRACE("set pool tx:invalid pool id=%d\n", vf);
+		return -EINVAL;
+	}
+
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->set_vf_tx, -ENOTSUP);
+	return (*dev->dev_ops->set_vf_tx)(dev, vf, on);
+}
+
+int
+rte_eth_dev_set_vf_vlan_filter(uint8_t port_id, uint16_t vlan_id,
+			       uint64_t vf_mask, uint8_t vlan_on)
+{
+	struct rte_eth_dev *dev;
+
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
+
+	dev = &rte_eth_devices[port_id];
+
+	if (vlan_id > ETHER_MAX_VLAN_ID) {
+		RTE_PMD_DEBUG_TRACE("VF VLAN filter:invalid VLAN id=%d\n",
+			vlan_id);
+		return -EINVAL;
+	}
+
+	if (vf_mask == 0) {
+		RTE_PMD_DEBUG_TRACE("VF VLAN filter:pool_mask can not be 0\n");
+		return -EINVAL;
+	}
+
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->set_vf_vlan_filter, -ENOTSUP);
+	return (*dev->dev_ops->set_vf_vlan_filter)(dev, vlan_id,
+						   vf_mask, vlan_on);
+}
+
+int rte_eth_set_queue_rate_limit(uint8_t port_id, uint16_t queue_idx,
+					uint16_t tx_rate)
+{
+	struct rte_eth_dev *dev;
+	struct rte_eth_dev_info dev_info;
+	struct rte_eth_link link;
+
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
+
+	dev = &rte_eth_devices[port_id];
+	rte_eth_dev_info_get(port_id, &dev_info);
+	link = dev->data->dev_link;
+
+	if (queue_idx > dev_info.max_tx_queues) {
+		RTE_PMD_DEBUG_TRACE("set queue rate limit:port %d: "
+				"invalid queue id=%d\n", port_id, queue_idx);
+		return -EINVAL;
+	}
+
+	if (tx_rate > link.link_speed) {
+		RTE_PMD_DEBUG_TRACE("set queue rate limit:invalid tx_rate=%d, "
+				"bigger than link speed= %d\n",
+			tx_rate, link.link_speed);
+		return -EINVAL;
+	}
+
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->set_queue_rate_limit, -ENOTSUP);
+	return (*dev->dev_ops->set_queue_rate_limit)(dev, queue_idx, tx_rate);
+}
+
+int rte_eth_set_vf_rate_limit(uint8_t port_id, uint16_t vf, uint16_t tx_rate,
+				uint64_t q_msk)
+{
+	struct rte_eth_dev *dev;
+	struct rte_eth_dev_info dev_info;
+	struct rte_eth_link link;
+
+	if (q_msk == 0)
+		return 0;
+
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
+
+	dev = &rte_eth_devices[port_id];
+	rte_eth_dev_info_get(port_id, &dev_info);
+	link = dev->data->dev_link;
+
+	if (vf > dev_info.max_vfs) {
+		RTE_PMD_DEBUG_TRACE("set VF rate limit:port %d: "
+				"invalid vf id=%d\n", port_id, vf);
+		return -EINVAL;
+	}
+
+	if (tx_rate > link.link_speed) {
+		RTE_PMD_DEBUG_TRACE("set VF rate limit:invalid tx_rate=%d, "
+				"bigger than link speed= %d\n",
+				tx_rate, link.link_speed);
+		return -EINVAL;
+	}
+
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->set_vf_rate_limit, -ENOTSUP);
+	return (*dev->dev_ops->set_vf_rate_limit)(dev, vf, tx_rate, q_msk);
+}
+
+int
+rte_eth_mirror_rule_set(uint8_t port_id,
+			struct rte_eth_mirror_conf *mirror_conf,
+			uint8_t rule_id, uint8_t on)
+{
+	struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
+	if (mirror_conf->rule_type == 0) {
+		RTE_PMD_DEBUG_TRACE("mirror rule type can not be 0.\n");
+		return -EINVAL;
+	}
+
+	if (mirror_conf->dst_pool >= ETH_64_POOLS) {
+		RTE_PMD_DEBUG_TRACE("Invalid dst pool, pool id must be 0-%d\n",
+				ETH_64_POOLS - 1);
+		return -EINVAL;
+	}
+
+	if ((mirror_conf->rule_type & (ETH_MIRROR_VIRTUAL_POOL_UP |
+	     ETH_MIRROR_VIRTUAL_POOL_DOWN)) &&
+	    (mirror_conf->pool_mask == 0)) {
+		RTE_PMD_DEBUG_TRACE("Invalid mirror pool, pool mask can not be 0.\n");
+		return -EINVAL;
+	}
+
+	if ((mirror_conf->rule_type & ETH_MIRROR_VLAN) &&
+	    mirror_conf->vlan.vlan_mask == 0) {
+		RTE_PMD_DEBUG_TRACE("Invalid vlan mask, vlan mask can not be 0.\n");
+		return -EINVAL;
+	}
+
+	dev = &rte_eth_devices[port_id];
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->mirror_rule_set, -ENOTSUP);
+
+	return (*dev->dev_ops->mirror_rule_set)(dev, mirror_conf, rule_id, on);
+}
+
+int
+rte_eth_mirror_rule_reset(uint8_t port_id, uint8_t rule_id)
+{
+	struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
+
+	dev = &rte_eth_devices[port_id];
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->mirror_rule_reset, -ENOTSUP);
+
+	return (*dev->dev_ops->mirror_rule_reset)(dev, rule_id);
+}
+
+int
+rte_eth_dev_callback_register(uint8_t port_id,
+			enum rte_eth_event_type event,
+			rte_eth_dev_cb_fn cb_fn, void *cb_arg)
+{
+	struct rte_eth_dev *dev;
+	struct rte_eth_dev_callback *user_cb;
+
+	if (!cb_fn)
+		return -EINVAL;
+
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
+
+	dev = &rte_eth_devices[port_id];
+	rte_spinlock_lock(&rte_eth_dev_cb_lock);
+
+	TAILQ_FOREACH(user_cb, &(dev->link_intr_cbs), next) {
+		if (user_cb->cb_fn == cb_fn &&
+			user_cb->cb_arg == cb_arg &&
+			user_cb->event == event) {
+			break;
+		}
+	}
+
+	/* create a new callback. */
+	if (user_cb == NULL)
+		user_cb = rte_zmalloc("INTR_USER_CALLBACK",
+					sizeof(struct rte_eth_dev_callback), 0);
+	if (user_cb != NULL) {
+		user_cb->cb_fn = cb_fn;
+		user_cb->cb_arg = cb_arg;
+		user_cb->event = event;
+		TAILQ_INSERT_TAIL(&(dev->link_intr_cbs), user_cb, next);
+	}
+
+	rte_spinlock_unlock(&rte_eth_dev_cb_lock);
+	return (user_cb == NULL) ? -ENOMEM : 0;
+}
+
+int
+rte_eth_dev_callback_unregister(uint8_t port_id,
+			enum rte_eth_event_type event,
+			rte_eth_dev_cb_fn cb_fn, void *cb_arg)
+{
+	int ret;
+	struct rte_eth_dev *dev;
+	struct rte_eth_dev_callback *cb, *next;
+
+	if (!cb_fn)
+		return -EINVAL;
+
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
+
+	dev = &rte_eth_devices[port_id];
+	rte_spinlock_lock(&rte_eth_dev_cb_lock);
+
+	ret = 0;
+	for (cb = TAILQ_FIRST(&dev->link_intr_cbs); cb != NULL; cb = next) {
+
+		next = TAILQ_NEXT(cb, next);
+
+		if (cb->cb_fn != cb_fn || cb->event != event ||
+				(cb->cb_arg != (void *)-1 &&
+				cb->cb_arg != cb_arg))
+			continue;
+
+		/*
+		 * if this callback is not executing right now,
+		 * then remove it.
+		 */
+		if (cb->active == 0) {
+			TAILQ_REMOVE(&(dev->link_intr_cbs), cb, next);
+			rte_free(cb);
+		} else {
+			ret = -EAGAIN;
+		}
+	}
+
+	rte_spinlock_unlock(&rte_eth_dev_cb_lock);
+	return ret;
+}
+
+void
+_rte_eth_dev_callback_process(struct rte_eth_dev *dev,
+	enum rte_eth_event_type event)
+{
+	struct rte_eth_dev_callback *cb_lst;
+	struct rte_eth_dev_callback dev_cb;
+
+	rte_spinlock_lock(&rte_eth_dev_cb_lock);
+	TAILQ_FOREACH(cb_lst, &(dev->link_intr_cbs), next) {
+		if (cb_lst->cb_fn == NULL || cb_lst->event != event)
+			continue;
+		dev_cb = *cb_lst;
+		cb_lst->active = 1;
+		rte_spinlock_unlock(&rte_eth_dev_cb_lock);
+		dev_cb.cb_fn(dev->data->port_id, dev_cb.event,
+						dev_cb.cb_arg);
+		rte_spinlock_lock(&rte_eth_dev_cb_lock);
+		cb_lst->active = 0;
+	}
+	rte_spinlock_unlock(&rte_eth_dev_cb_lock);
+}
+
+int
+rte_eth_dev_rx_intr_ctl(uint8_t port_id, int epfd, int op, void *data)
+{
+	uint32_t vec;
+	struct rte_eth_dev *dev;
+	struct rte_intr_handle *intr_handle;
+	uint16_t qid;
+	int rc;
+
+	if (!rte_eth_dev_is_valid_port(port_id)) {
+		RTE_PMD_DEBUG_TRACE("Invalid port_id=%u\n", port_id);
+		return -ENODEV;
+	}
+
+	dev = &rte_eth_devices[port_id];
+	intr_handle = &dev->pci_dev->intr_handle;
+	if (!intr_handle->intr_vec) {
+		RTE_PMD_DEBUG_TRACE("RX Intr vector unset\n");
+		return -EPERM;
+	}
+
+	for (qid = 0; qid < dev->data->nb_rx_queues; qid++) {
+		vec = intr_handle->intr_vec[qid];
+		rc = rte_intr_rx_ctl(intr_handle, epfd, op, vec, data);
+		if (rc && rc != -EEXIST) {
+			RTE_PMD_DEBUG_TRACE("p %u q %u rx ctl error"
+					" op %d epfd %d vec %u\n",
+					port_id, qid, op, epfd, vec);
+		}
+	}
+
+	return 0;
+}
+
+const struct rte_memzone *
+rte_eth_dma_zone_reserve(const struct rte_eth_dev *dev, const char *ring_name,
+			 uint16_t queue_id, size_t size, unsigned align,
+			 int socket_id)
+{
+	char z_name[RTE_MEMZONE_NAMESIZE];
+	const struct rte_memzone *mz;
+
+	snprintf(z_name, sizeof(z_name), "%s_%s_%d_%d",
+		 dev->driver->pci_drv.name, ring_name,
+		 dev->data->port_id, queue_id);
+
+	mz = rte_memzone_lookup(z_name);
+	if (mz)
+		return mz;
+
+	if (rte_xen_dom0_supported())
+		return rte_memzone_reserve_bounded(z_name, size, socket_id,
+						   0, align, RTE_PGSIZE_2M);
+	else
+		return rte_memzone_reserve_aligned(z_name, size, socket_id,
+						   0, align);
+}
+
+int
+rte_eth_dev_rx_intr_ctl_q(uint8_t port_id, uint16_t queue_id,
+			  int epfd, int op, void *data)
+{
+	uint32_t vec;
+	struct rte_eth_dev *dev;
+	struct rte_intr_handle *intr_handle;
+	int rc;
+
+	if (!rte_eth_dev_is_valid_port(port_id)) {
+		RTE_PMD_DEBUG_TRACE("Invalid port_id=%u\n", port_id);
+		return -ENODEV;
+	}
+
+	dev = &rte_eth_devices[port_id];
+	if (queue_id >= dev->data->nb_rx_queues) {
+		RTE_PMD_DEBUG_TRACE("Invalid RX queue_id=%u\n", queue_id);
+		return -EINVAL;
+	}
+
+	intr_handle = &dev->pci_dev->intr_handle;
+	if (!intr_handle->intr_vec) {
+		RTE_PMD_DEBUG_TRACE("RX Intr vector unset\n");
+		return -EPERM;
+	}
+
+	vec = intr_handle->intr_vec[queue_id];
+	rc = rte_intr_rx_ctl(intr_handle, epfd, op, vec, data);
+	if (rc && rc != -EEXIST) {
+		RTE_PMD_DEBUG_TRACE("p %u q %u rx ctl error"
+				" op %d epfd %d vec %u\n",
+				port_id, queue_id, op, epfd, vec);
+		return rc;
+	}
+
+	return 0;
+}
+
+int
+rte_eth_dev_rx_intr_enable(uint8_t port_id,
+			   uint16_t queue_id)
+{
+	struct rte_eth_dev *dev;
+
+	if (!rte_eth_dev_is_valid_port(port_id)) {
+		RTE_PMD_DEBUG_TRACE("Invalid port_id=%d\n", port_id);
+		return -ENODEV;
+	}
+
+	dev = &rte_eth_devices[port_id];
+
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_intr_enable, -ENOTSUP);
+	return (*dev->dev_ops->rx_queue_intr_enable)(dev, queue_id);
+}
+
+int
+rte_eth_dev_rx_intr_disable(uint8_t port_id,
+			    uint16_t queue_id)
+{
+	struct rte_eth_dev *dev;
+
+	if (!rte_eth_dev_is_valid_port(port_id)) {
+		RTE_PMD_DEBUG_TRACE("Invalid port_id=%d\n", port_id);
+		return -ENODEV;
+	}
+
+	dev = &rte_eth_devices[port_id];
+
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_intr_disable, -ENOTSUP);
+	return (*dev->dev_ops->rx_queue_intr_disable)(dev, queue_id);
+}
+
+#ifdef RTE_NIC_BYPASS
+int rte_eth_dev_bypass_init(uint8_t port_id)
+{
+	struct rte_eth_dev *dev;
+
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
+
+	dev = &rte_eth_devices[port_id];
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->bypass_init, -ENOTSUP);
+	(*dev->dev_ops->bypass_init)(dev);
+	return 0;
+}
+
+int
+rte_eth_dev_bypass_state_show(uint8_t port_id, uint32_t *state)
+{
+	struct rte_eth_dev *dev;
+
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
+
+	dev = &rte_eth_devices[port_id];
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->bypass_state_show, -ENOTSUP);
+	(*dev->dev_ops->bypass_state_show)(dev, state);
+	return 0;
+}
+
+int
+rte_eth_dev_bypass_state_set(uint8_t port_id, uint32_t *new_state)
+{
+	struct rte_eth_dev *dev;
+
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
+
+	dev = &rte_eth_devices[port_id];
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->bypass_state_set, -ENOTSUP);
+	(*dev->dev_ops->bypass_state_set)(dev, new_state);
+	return 0;
+}
+
+int
+rte_eth_dev_bypass_event_show(uint8_t port_id, uint32_t event, uint32_t *state)
+{
+	struct rte_eth_dev *dev;
+
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
+
+	dev = &rte_eth_devices[port_id];
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->bypass_state_show, -ENOTSUP);
+	(*dev->dev_ops->bypass_event_show)(dev, event, state);
+	return 0;
+}
+
+int
+rte_eth_dev_bypass_event_store(uint8_t port_id, uint32_t event, uint32_t state)
+{
+	struct rte_eth_dev *dev;
+
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
+
+	dev = &rte_eth_devices[port_id];
+
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->bypass_event_set, -ENOTSUP);
+	(*dev->dev_ops->bypass_event_set)(dev, event, state);
+	return 0;
+}
+
+int
+rte_eth_dev_wd_timeout_store(uint8_t port_id, uint32_t timeout)
+{
+	struct rte_eth_dev *dev;
+
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
+
+	dev = &rte_eth_devices[port_id];
+
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->bypass_wd_timeout_set, -ENOTSUP);
+	(*dev->dev_ops->bypass_wd_timeout_set)(dev, timeout);
+	return 0;
+}
+
+int
+rte_eth_dev_bypass_ver_show(uint8_t port_id, uint32_t *ver)
+{
+	struct rte_eth_dev *dev;
+
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
+
+	dev = &rte_eth_devices[port_id];
+
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->bypass_ver_show, -ENOTSUP);
+	(*dev->dev_ops->bypass_ver_show)(dev, ver);
+	return 0;
+}
+
+int
+rte_eth_dev_bypass_wd_timeout_show(uint8_t port_id, uint32_t *wd_timeout)
+{
+	struct rte_eth_dev *dev;
+
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
+
+	dev = &rte_eth_devices[port_id];
+
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->bypass_wd_timeout_show, -ENOTSUP);
+	(*dev->dev_ops->bypass_wd_timeout_show)(dev, wd_timeout);
+	return 0;
+}
+
+int
+rte_eth_dev_bypass_wd_reset(uint8_t port_id)
+{
+	struct rte_eth_dev *dev;
+
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
+
+	dev = &rte_eth_devices[port_id];
+
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->bypass_wd_reset, -ENOTSUP);
+	(*dev->dev_ops->bypass_wd_reset)(dev);
+	return 0;
+}
+#endif
+
+int
+rte_eth_dev_filter_supported(uint8_t port_id, enum rte_filter_type filter_type)
+{
+	struct rte_eth_dev *dev;
+
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
+
+	dev = &rte_eth_devices[port_id];
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->filter_ctrl, -ENOTSUP);
+	return (*dev->dev_ops->filter_ctrl)(dev, filter_type,
+				RTE_ETH_FILTER_NOP, NULL);
+}
+
+int
+rte_eth_dev_filter_ctrl(uint8_t port_id, enum rte_filter_type filter_type,
+		       enum rte_filter_op filter_op, void *arg)
+{
+	struct rte_eth_dev *dev;
+
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
+
+	dev = &rte_eth_devices[port_id];
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->filter_ctrl, -ENOTSUP);
+	return (*dev->dev_ops->filter_ctrl)(dev, filter_type, filter_op, arg);
+}
+
+void *
+rte_eth_add_rx_callback(uint8_t port_id, uint16_t queue_id,
+		rte_rx_callback_fn fn, void *user_param)
+{
+#ifndef RTE_ETHDEV_RXTX_CALLBACKS
+	rte_errno = ENOTSUP;
+	return NULL;
+#endif
+	/* check input parameters */
+	if (!rte_eth_dev_is_valid_port(port_id) || fn == NULL ||
+		    queue_id >= rte_eth_devices[port_id].data->nb_rx_queues) {
+		rte_errno = EINVAL;
+		return NULL;
+	}
+
+	struct rte_eth_rxtx_callback *cb = rte_zmalloc(NULL, sizeof(*cb), 0);
+
+	if (cb == NULL) {
+		rte_errno = ENOMEM;
+		return NULL;
+	}
+
+	cb->fn.rx = fn;
+	cb->param = user_param;
+
+	/* Add the callbacks in fifo order. */
+	struct rte_eth_rxtx_callback *tail =
+		rte_eth_devices[port_id].post_rx_burst_cbs[queue_id];
+
+	if (!tail) {
+		rte_eth_devices[port_id].post_rx_burst_cbs[queue_id] = cb;
+
+	} else {
+		while (tail->next)
+			tail = tail->next;
+		tail->next = cb;
+	}
+
+	return cb;
+}
+
+void *
+rte_eth_add_tx_callback(uint8_t port_id, uint16_t queue_id,
+		rte_tx_callback_fn fn, void *user_param)
+{
+#ifndef RTE_ETHDEV_RXTX_CALLBACKS
+	rte_errno = ENOTSUP;
+	return NULL;
+#endif
+	/* check input parameters */
+	if (!rte_eth_dev_is_valid_port(port_id) || fn == NULL ||
+		    queue_id >= rte_eth_devices[port_id].data->nb_tx_queues) {
+		rte_errno = EINVAL;
+		return NULL;
+	}
+
+	struct rte_eth_rxtx_callback *cb = rte_zmalloc(NULL, sizeof(*cb), 0);
+
+	if (cb == NULL) {
+		rte_errno = ENOMEM;
+		return NULL;
+	}
+
+	cb->fn.tx = fn;
+	cb->param = user_param;
+
+	/* Add the callbacks in fifo order. */
+	struct rte_eth_rxtx_callback *tail =
+		rte_eth_devices[port_id].pre_tx_burst_cbs[queue_id];
+
+	if (!tail) {
+		rte_eth_devices[port_id].pre_tx_burst_cbs[queue_id] = cb;
+
+	} else {
+		while (tail->next)
+			tail = tail->next;
+		tail->next = cb;
+	}
+
+	return cb;
+}
+
+int
+rte_eth_remove_rx_callback(uint8_t port_id, uint16_t queue_id,
+		struct rte_eth_rxtx_callback *user_cb)
+{
+#ifndef RTE_ETHDEV_RXTX_CALLBACKS
+	return -ENOTSUP;
+#endif
+	/* Check input parameters. */
+	if (!rte_eth_dev_is_valid_port(port_id) || user_cb == NULL ||
+		    queue_id >= rte_eth_devices[port_id].data->nb_rx_queues) {
+		return -EINVAL;
+	}
+
+	struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+	struct rte_eth_rxtx_callback *cb = dev->post_rx_burst_cbs[queue_id];
+	struct rte_eth_rxtx_callback *prev_cb;
+
+	/* Reset head pointer and remove user cb if first in the list. */
+	if (cb == user_cb) {
+		dev->post_rx_burst_cbs[queue_id] = user_cb->next;
+		return 0;
+	}
+
+	/* Remove the user cb from the callback list. */
+	do {
+		prev_cb = cb;
+		cb = cb->next;
+
+		if (cb == user_cb) {
+			prev_cb->next = user_cb->next;
+			return 0;
+		}
+
+	} while (cb != NULL);
+
+	/* Callback wasn't found. */
+	return -EINVAL;
+}
+
+int
+rte_eth_remove_tx_callback(uint8_t port_id, uint16_t queue_id,
+		struct rte_eth_rxtx_callback *user_cb)
+{
+#ifndef RTE_ETHDEV_RXTX_CALLBACKS
+	return -ENOTSUP;
+#endif
+	/* Check input parameters. */
+	if (!rte_eth_dev_is_valid_port(port_id) || user_cb == NULL ||
+		    queue_id >= rte_eth_devices[port_id].data->nb_tx_queues) {
+		return -EINVAL;
+	}
+
+	struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+	struct rte_eth_rxtx_callback *cb = dev->pre_tx_burst_cbs[queue_id];
+	struct rte_eth_rxtx_callback *prev_cb;
+
+	/* Reset head pointer and remove user cb if first in the list. */
+	if (cb == user_cb) {
+		dev->pre_tx_burst_cbs[queue_id] = user_cb->next;
+		return 0;
+	}
+
+	/* Remove the user cb from the callback list. */
+	do {
+		prev_cb = cb;
+		cb = cb->next;
+
+		if (cb == user_cb) {
+			prev_cb->next = user_cb->next;
+			return 0;
+		}
+
+	} while (cb != NULL);
+
+	/* Callback wasn't found. */
+	return -EINVAL;
+}
+
+int
+rte_eth_rx_queue_info_get(uint8_t port_id, uint16_t queue_id,
+	struct rte_eth_rxq_info *qinfo)
+{
+	struct rte_eth_dev *dev;
+
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
+
+	if (qinfo == NULL)
+		return -EINVAL;
+
+	dev = &rte_eth_devices[port_id];
+	if (queue_id >= dev->data->nb_rx_queues) {
+		RTE_PMD_DEBUG_TRACE("Invalid RX queue_id=%d\n", queue_id);
+		return -EINVAL;
+	}
+
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rxq_info_get, -ENOTSUP);
+
+	memset(qinfo, 0, sizeof(*qinfo));
+	dev->dev_ops->rxq_info_get(dev, queue_id, qinfo);
+	return 0;
+}
+
+int
+rte_eth_tx_queue_info_get(uint8_t port_id, uint16_t queue_id,
+	struct rte_eth_txq_info *qinfo)
+{
+	struct rte_eth_dev *dev;
+
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
+
+	if (qinfo == NULL)
+		return -EINVAL;
+
+	dev = &rte_eth_devices[port_id];
+	if (queue_id >= dev->data->nb_tx_queues) {
+		RTE_PMD_DEBUG_TRACE("Invalid TX queue_id=%d\n", queue_id);
+		return -EINVAL;
+	}
+
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->txq_info_get, -ENOTSUP);
+
+	memset(qinfo, 0, sizeof(*qinfo));
+	dev->dev_ops->txq_info_get(dev, queue_id, qinfo);
+	return 0;
+}
+
+int
+rte_eth_dev_set_mc_addr_list(uint8_t port_id,
+			     struct ether_addr *mc_addr_set,
+			     uint32_t nb_mc_addr)
+{
+	struct rte_eth_dev *dev;
+
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
+
+	dev = &rte_eth_devices[port_id];
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->set_mc_addr_list, -ENOTSUP);
+	return dev->dev_ops->set_mc_addr_list(dev, mc_addr_set, nb_mc_addr);
+}
+
+int
+rte_eth_timesync_enable(uint8_t port_id)
+{
+	struct rte_eth_dev *dev;
+
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
+	dev = &rte_eth_devices[port_id];
+
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->timesync_enable, -ENOTSUP);
+	return (*dev->dev_ops->timesync_enable)(dev);
+}
+
+int
+rte_eth_timesync_disable(uint8_t port_id)
+{
+	struct rte_eth_dev *dev;
+
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
+	dev = &rte_eth_devices[port_id];
+
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->timesync_disable, -ENOTSUP);
+	return (*dev->dev_ops->timesync_disable)(dev);
+}
+
+int
+rte_eth_timesync_read_rx_timestamp(uint8_t port_id, struct timespec *timestamp,
+				   uint32_t flags)
+{
+	struct rte_eth_dev *dev;
+
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
+	dev = &rte_eth_devices[port_id];
+
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->timesync_read_rx_timestamp, -ENOTSUP);
+	return (*dev->dev_ops->timesync_read_rx_timestamp)(dev, timestamp, flags);
+}
+
+int
+rte_eth_timesync_read_tx_timestamp(uint8_t port_id, struct timespec *timestamp)
+{
+	struct rte_eth_dev *dev;
+
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
+	dev = &rte_eth_devices[port_id];
+
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->timesync_read_tx_timestamp, -ENOTSUP);
+	return (*dev->dev_ops->timesync_read_tx_timestamp)(dev, timestamp);
+}
+
+int
+rte_eth_timesync_adjust_time(uint8_t port_id, int64_t delta)
+{
+	struct rte_eth_dev *dev;
+
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
+	dev = &rte_eth_devices[port_id];
+
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->timesync_adjust_time, -ENOTSUP);
+	return (*dev->dev_ops->timesync_adjust_time)(dev, delta);
+}
+
+int
+rte_eth_timesync_read_time(uint8_t port_id, struct timespec *timestamp)
+{
+	struct rte_eth_dev *dev;
+
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
+	dev = &rte_eth_devices[port_id];
+
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->timesync_read_time, -ENOTSUP);
+	return (*dev->dev_ops->timesync_read_time)(dev, timestamp);
+}
+
+int
+rte_eth_timesync_write_time(uint8_t port_id, const struct timespec *timestamp)
+{
+	struct rte_eth_dev *dev;
+
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
+	dev = &rte_eth_devices[port_id];
+
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->timesync_write_time, -ENOTSUP);
+	return (*dev->dev_ops->timesync_write_time)(dev, timestamp);
+}
+
+int
+rte_eth_dev_get_reg_length(uint8_t port_id)
+{
+	struct rte_eth_dev *dev;
+
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
+
+	dev = &rte_eth_devices[port_id];
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->get_reg_length, -ENOTSUP);
+	return (*dev->dev_ops->get_reg_length)(dev);
+}
+
+int
+rte_eth_dev_get_reg_info(uint8_t port_id, struct rte_dev_reg_info *info)
+{
+	struct rte_eth_dev *dev;
+
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
+
+	dev = &rte_eth_devices[port_id];
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->get_reg, -ENOTSUP);
+	return (*dev->dev_ops->get_reg)(dev, info);
+}
+
+int
+rte_eth_dev_get_eeprom_length(uint8_t port_id)
+{
+	struct rte_eth_dev *dev;
+
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
+
+	dev = &rte_eth_devices[port_id];
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->get_eeprom_length, -ENOTSUP);
+	return (*dev->dev_ops->get_eeprom_length)(dev);
+}
+
+int
+rte_eth_dev_get_eeprom(uint8_t port_id, struct rte_dev_eeprom_info *info)
+{
+	struct rte_eth_dev *dev;
+
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
+
+	dev = &rte_eth_devices[port_id];
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->get_eeprom, -ENOTSUP);
+	return (*dev->dev_ops->get_eeprom)(dev, info);
+}
+
+int
+rte_eth_dev_set_eeprom(uint8_t port_id, struct rte_dev_eeprom_info *info)
+{
+	struct rte_eth_dev *dev;
+
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
+
+	dev = &rte_eth_devices[port_id];
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->set_eeprom, -ENOTSUP);
+	return (*dev->dev_ops->set_eeprom)(dev, info);
+}
+
+int
+rte_eth_dev_get_dcb_info(uint8_t port_id,
+			     struct rte_eth_dcb_info *dcb_info)
+{
+	struct rte_eth_dev *dev;
+
+	if (!rte_eth_dev_is_valid_port(port_id)) {
+		RTE_PMD_DEBUG_TRACE("Invalid port_id=%d\n", port_id);
+		return -ENODEV;
+	}
+
+	dev = &rte_eth_devices[port_id];
+	memset(dcb_info, 0, sizeof(struct rte_eth_dcb_info));
+
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->get_dcb_info, -ENOTSUP);
+	return (*dev->dev_ops->get_dcb_info)(dev, dcb_info);
+}
+
+void
+rte_eth_copy_pci_info(struct rte_eth_dev *eth_dev, struct rte_pci_device *pci_dev)
+{
+	if ((eth_dev == NULL) || (pci_dev == NULL)) {
+		RTE_PMD_DEBUG_TRACE("NULL pointer eth_dev=%p pci_dev=%p\n",
+				eth_dev, pci_dev);
+		return;
+	}
+
+	eth_dev->data->dev_flags = 0;
+	if (pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC)
+		eth_dev->data->dev_flags |= RTE_ETH_DEV_INTR_LSC;
+	if (pci_dev->driver->drv_flags & RTE_PCI_DRV_DETACHABLE)
+		eth_dev->data->dev_flags |= RTE_ETH_DEV_DETACHABLE;
+
+	eth_dev->data->kdrv = pci_dev->kdrv;
+	eth_dev->data->numa_node = pci_dev->numa_node;
+	eth_dev->data->drv_name = pci_dev->driver->name;
+}
+
+int
+rte_eth_dev_l2_tunnel_eth_type_conf(uint8_t port_id,
+				    struct rte_eth_l2_tunnel_conf *l2_tunnel)
+{
+	struct rte_eth_dev *dev;
+
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
+	if (l2_tunnel == NULL) {
+		RTE_PMD_DEBUG_TRACE("Invalid l2_tunnel parameter\n");
+		return -EINVAL;
+	}
+
+	if (l2_tunnel->l2_tunnel_type >= RTE_TUNNEL_TYPE_MAX) {
+		RTE_PMD_DEBUG_TRACE("Invalid tunnel type\n");
+		return -EINVAL;
+	}
+
+	dev = &rte_eth_devices[port_id];
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->l2_tunnel_eth_type_conf,
+				-ENOTSUP);
+	return (*dev->dev_ops->l2_tunnel_eth_type_conf)(dev, l2_tunnel);
+}
+
+int
+rte_eth_dev_l2_tunnel_offload_set(uint8_t port_id,
+				  struct rte_eth_l2_tunnel_conf *l2_tunnel,
+				  uint32_t mask,
+				  uint8_t en)
+{
+	struct rte_eth_dev *dev;
+
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
+
+	if (l2_tunnel == NULL) {
+		RTE_PMD_DEBUG_TRACE("Invalid l2_tunnel parameter\n");
+		return -EINVAL;
+	}
+
+	if (l2_tunnel->l2_tunnel_type >= RTE_TUNNEL_TYPE_MAX) {
+		RTE_PMD_DEBUG_TRACE("Invalid tunnel type.\n");
+		return -EINVAL;
+	}
+
+	if (mask == 0) {
+		RTE_PMD_DEBUG_TRACE("Mask should have a value.\n");
+		return -EINVAL;
+	}
+
+	dev = &rte_eth_devices[port_id];
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->l2_tunnel_offload_set,
+				-ENOTSUP);
+	return (*dev->dev_ops->l2_tunnel_offload_set)(dev, l2_tunnel, mask, en);
+}
diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
new file mode 100644
index 00000000..022733ec
--- /dev/null
+++ b/lib/librte_ether/rte_ethdev.h
@@ -0,0 +1,4286 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_ETHDEV_H_
+#define _RTE_ETHDEV_H_
+
+/**
+ * @file
+ *
+ * RTE Ethernet Device API
+ *
+ * The Ethernet Device API is composed of two parts:
+ *
+ * - The application-oriented Ethernet API that includes functions to setup
+ *   an Ethernet device (configure it, setup its RX and TX queues and start it),
+ *   to get its MAC address, the speed and the status of its physical link,
+ *   to receive and to transmit packets, and so on.
+ *
+ * - The driver-oriented Ethernet API that exports a function allowing
+ *   an Ethernet Poll Mode Driver (PMD) to simultaneously register itself as
+ *   an Ethernet device driver and as a PCI driver for a set of matching PCI
+ *   [Ethernet] devices classes.
+ *
+ * By default, all the functions of the Ethernet Device API exported by a PMD
+ * are lock-free functions which assume to not be invoked in parallel on
+ * different logical cores to work on the same target object.  For instance,
+ * the receive function of a PMD cannot be invoked in parallel on two logical
+ * cores to poll the same RX queue [of the same port]. Of course, this function
+ * can be invoked in parallel by different logical cores on different RX queues.
+ * It is the responsibility of the upper level application to enforce this rule.
+ *
+ * If needed, parallel accesses by multiple logical cores to shared queues
+ * shall be explicitly protected by dedicated inline lock-aware functions
+ * built on top of their corresponding lock-free functions of the PMD API.
+ *
+ * In all functions of the Ethernet API, the Ethernet device is
+ * designated by an integer >= 0 named the device port identifier.
+ *
+ * At the Ethernet driver level, Ethernet devices are represented by a generic
+ * data structure of type *rte_eth_dev*.
+ *
+ * Ethernet devices are dynamically registered during the PCI probing phase
+ * performed at EAL initialization time.
+ * When an Ethernet device is being probed, an *rte_eth_dev* structure and
+ * a new port identifier are allocated for that device. Then, the eth_dev_init()
+ * function supplied by the Ethernet driver matching the probed PCI
+ * device is invoked to properly initialize the device.
+ *
+ * The role of the device init function consists of resetting the hardware,
+ * checking access to Non-volatile Memory (NVM), reading the MAC address
+ * from NVM etc.
+ *
+ * If the device init operation is successful, the correspondence between
+ * the port identifier assigned to the new device and its associated
+ * *rte_eth_dev* structure is effectively registered.
+ * Otherwise, both the *rte_eth_dev* structure and the port identifier are
+ * freed.
+ *
+ * The functions exported by the application Ethernet API to setup a device
+ * designated by its port identifier must be invoked in the following order:
+ *     - rte_eth_dev_configure()
+ *     - rte_eth_tx_queue_setup()
+ *     - rte_eth_rx_queue_setup()
+ *     - rte_eth_dev_start()
+ *
+ * Then, the network application can invoke, in any order, the functions
+ * exported by the Ethernet API to get the MAC address of a given device, to
+ * get the speed and the status of a device physical link, to receive/transmit
+ * [burst of] packets, and so on.
+ *
+ * If the application wants to change the configuration (i.e. call
+ * rte_eth_dev_configure(), rte_eth_tx_queue_setup(), or
+ * rte_eth_rx_queue_setup()), it must call rte_eth_dev_stop() first to stop the
+ * device and then do the reconfiguration before calling rte_eth_dev_start()
+ * again. The tramsit and receive functions should not be invoked when the
+ * device is stopped.
+ *
+ * Please note that some configuration is not stored between calls to
+ * rte_eth_dev_stop()/rte_eth_dev_start(). The following configuration will
+ * be retained:
+ *
+ *     - flow control settings
+ *     - receive mode configuration (promiscuous mode, hardware checksum mode,
+ *       RSS/VMDQ settings etc.)
+ *     - VLAN filtering configuration
+ *     - MAC addresses supplied to MAC address array
+ *     - flow director filtering mode (but not filtering rules)
+ *     - NIC queue statistics mappings
+ *
+ * Any other configuration will not be stored and will need to be re-entered
+ * after a call to rte_eth_dev_start().
+ *
+ * Finally, a network application can close an Ethernet device by invoking the
+ * rte_eth_dev_close() function.
+ *
+ * Each function of the application Ethernet API invokes a specific function
+ * of the PMD that controls the target device designated by its port
+ * identifier.
+ * For this purpose, all device-specific functions of an Ethernet driver are
+ * supplied through a set of pointers contained in a generic structure of type
+ * *eth_dev_ops*.
+ * The address of the *eth_dev_ops* structure is stored in the *rte_eth_dev*
+ * structure by the device init function of the Ethernet driver, which is
+ * invoked during the PCI probing phase, as explained earlier.
+ *
+ * In other words, each function of the Ethernet API simply retrieves the
+ * *rte_eth_dev* structure associated with the device port identifier and
+ * performs an indirect invocation of the corresponding driver function
+ * supplied in the *eth_dev_ops* structure of the *rte_eth_dev* structure.
+ *
+ * For performance reasons, the address of the burst-oriented RX and TX
+ * functions of the Ethernet driver are not contained in the *eth_dev_ops*
+ * structure. Instead, they are directly stored at the beginning of the
+ * *rte_eth_dev* structure to avoid an extra indirect memory access during
+ * their invocation.
+ *
+ * RTE ethernet device drivers do not use interrupts for transmitting or
+ * receiving. Instead, Ethernet drivers export Poll-Mode receive and transmit
+ * functions to applications.
+ * Both receive and transmit functions are packet-burst oriented to minimize
+ * their cost per packet through the following optimizations:
+ *
+ * - Sharing among multiple packets the incompressible cost of the
+ *   invocation of receive/transmit functions.
+ *
+ * - Enabling receive/transmit functions to take advantage of burst-oriented
+ *   hardware features (L1 cache, prefetch instructions, NIC head/tail
+ *   registers) to minimize the number of CPU cycles per packet, for instance,
+ *   by avoiding useless read memory accesses to ring descriptors, or by
+ *   systematically using arrays of pointers that exactly fit L1 cache line
+ *   boundaries and sizes.
+ *
+ * The burst-oriented receive function does not provide any error notification,
+ * to avoid the corresponding overhead. As a hint, the upper-level application
+ * might check the status of the device link once being systematically returned
+ * a 0 value by the receive function of the driver for a given number of tries.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+
+#include <rte_dev.h>
+
+/* Use this macro to check if LRO API is supported */
+#define RTE_ETHDEV_HAS_LRO_SUPPORT
+
+#include <rte_log.h>
+#include <rte_interrupts.h>
+#include <rte_pci.h>
+#include <rte_dev.h>
+#include <rte_devargs.h>
+#include "rte_ether.h"
+#include "rte_eth_ctrl.h"
+#include "rte_dev_info.h"
+
+struct rte_mbuf;
+
+/**
+ * A structure used to retrieve statistics for an Ethernet port.
+ */
+struct rte_eth_stats {
+	uint64_t ipackets;  /**< Total number of successfully received packets. */
+	uint64_t opackets;  /**< Total number of successfully transmitted packets.*/
+	uint64_t ibytes;    /**< Total number of successfully received bytes. */
+	uint64_t obytes;    /**< Total number of successfully transmitted bytes. */
+	uint64_t imissed;
+	/**< Total of RX packets dropped by the HW,
+	 * because there are no available mbufs (i.e. RX queues are full).
+	 */
+	uint64_t ibadcrc __rte_deprecated;
+	/**< Deprecated; Total of RX packets with CRC error. */
+	uint64_t ibadlen __rte_deprecated;
+	/**< Deprecated; Total of RX packets with bad length. */
+	uint64_t ierrors;   /**< Total number of erroneous received packets. */
+	uint64_t oerrors;   /**< Total number of failed transmitted packets. */
+	uint64_t imcasts;
+	/**< Deprecated; Total number of multicast received packets. */
+	uint64_t rx_nombuf; /**< Total number of RX mbuf allocation failures. */
+	uint64_t fdirmatch __rte_deprecated;
+	/**< Deprecated; Total number of RX packets matching a filter. */
+	uint64_t fdirmiss __rte_deprecated;
+	/**< Deprecated; Total number of RX packets not matching any filter. */
+	uint64_t tx_pause_xon __rte_deprecated;
+	 /**< Deprecated; Total nb. of XON pause frame sent. */
+	uint64_t rx_pause_xon __rte_deprecated;
+	/**< Deprecated; Total nb. of XON pause frame received. */
+	uint64_t tx_pause_xoff __rte_deprecated;
+	/**< Deprecated; Total nb. of XOFF pause frame sent. */
+	uint64_t rx_pause_xoff __rte_deprecated;
+	/**< Deprecated; Total nb. of XOFF pause frame received. */
+	uint64_t q_ipackets[RTE_ETHDEV_QUEUE_STAT_CNTRS];
+	/**< Total number of queue RX packets. */
+	uint64_t q_opackets[RTE_ETHDEV_QUEUE_STAT_CNTRS];
+	/**< Total number of queue TX packets. */
+	uint64_t q_ibytes[RTE_ETHDEV_QUEUE_STAT_CNTRS];
+	/**< Total number of successfully received queue bytes. */
+	uint64_t q_obytes[RTE_ETHDEV_QUEUE_STAT_CNTRS];
+	/**< Total number of successfully transmitted queue bytes. */
+	uint64_t q_errors[RTE_ETHDEV_QUEUE_STAT_CNTRS];
+	/**< Total number of queue packets received that are dropped. */
+	uint64_t ilbpackets;
+	/**< Total number of good packets received from loopback,VF Only */
+	uint64_t olbpackets;
+	/**< Total number of good packets transmitted to loopback,VF Only */
+	uint64_t ilbbytes;
+	/**< Total number of good bytes received from loopback,VF Only */
+	uint64_t olbbytes;
+	/**< Total number of good bytes transmitted to loopback,VF Only */
+};
+
+/**
+ * Device supported speeds bitmap flags
+ */
+#define ETH_LINK_SPEED_AUTONEG  (0 <<  0)  /**< Autonegotiate (all speeds) */
+#define ETH_LINK_SPEED_FIXED    (1 <<  0)  /**< Disable autoneg (fixed speed) */
+#define ETH_LINK_SPEED_10M_HD   (1 <<  1)  /**<  10 Mbps half-duplex */
+#define ETH_LINK_SPEED_10M      (1 <<  2)  /**<  10 Mbps full-duplex */
+#define ETH_LINK_SPEED_100M_HD  (1 <<  3)  /**< 100 Mbps half-duplex */
+#define ETH_LINK_SPEED_100M     (1 <<  4)  /**< 100 Mbps full-duplex */
+#define ETH_LINK_SPEED_1G       (1 <<  5)  /**<   1 Gbps */
+#define ETH_LINK_SPEED_2_5G     (1 <<  6)  /**< 2.5 Gbps */
+#define ETH_LINK_SPEED_5G       (1 <<  7)  /**<   5 Gbps */
+#define ETH_LINK_SPEED_10G      (1 <<  8)  /**<  10 Gbps */
+#define ETH_LINK_SPEED_20G      (1 <<  9)  /**<  20 Gbps */
+#define ETH_LINK_SPEED_25G      (1 << 10)  /**<  25 Gbps */
+#define ETH_LINK_SPEED_40G      (1 << 11)  /**<  40 Gbps */
+#define ETH_LINK_SPEED_50G      (1 << 12)  /**<  50 Gbps */
+#define ETH_LINK_SPEED_56G      (1 << 13)  /**<  56 Gbps */
+#define ETH_LINK_SPEED_100G     (1 << 14)  /**< 100 Gbps */
+
+/**
+ * Ethernet numeric link speeds in Mbps
+ */
+#define ETH_SPEED_NUM_NONE         0 /**< Not defined */
+#define ETH_SPEED_NUM_10M         10 /**<  10 Mbps */
+#define ETH_SPEED_NUM_100M       100 /**< 100 Mbps */
+#define ETH_SPEED_NUM_1G        1000 /**<   1 Gbps */
+#define ETH_SPEED_NUM_2_5G      2500 /**< 2.5 Gbps */
+#define ETH_SPEED_NUM_5G        5000 /**<   5 Gbps */
+#define ETH_SPEED_NUM_10G      10000 /**<  10 Gbps */
+#define ETH_SPEED_NUM_20G      20000 /**<  20 Gbps */
+#define ETH_SPEED_NUM_25G      25000 /**<  25 Gbps */
+#define ETH_SPEED_NUM_40G      40000 /**<  40 Gbps */
+#define ETH_SPEED_NUM_50G      50000 /**<  50 Gbps */
+#define ETH_SPEED_NUM_56G      56000 /**<  56 Gbps */
+#define ETH_SPEED_NUM_100G    100000 /**< 100 Gbps */
+
+/**
+ * A structure used to retrieve link-level information of an Ethernet port.
+ */
+struct rte_eth_link {
+	uint32_t link_speed;        /**< ETH_SPEED_NUM_ */
+	uint16_t link_duplex  : 1;  /**< ETH_LINK_[HALF/FULL]_DUPLEX */
+	uint16_t link_autoneg : 1;  /**< ETH_LINK_SPEED_[AUTONEG/FIXED] */
+	uint16_t link_status  : 1;  /**< ETH_LINK_[DOWN/UP] */
+} __attribute__((aligned(8)));      /**< aligned for atomic64 read/write */
+
+/* Utility constants */
+#define ETH_LINK_HALF_DUPLEX    0 /**< Half-duplex connection. */
+#define ETH_LINK_FULL_DUPLEX    1 /**< Full-duplex connection. */
+#define ETH_LINK_DOWN           0 /**< Link is down. */
+#define ETH_LINK_UP             1 /**< Link is up. */
+#define ETH_LINK_FIXED          0 /**< No autonegotiation. */
+#define ETH_LINK_AUTONEG        1 /**< Autonegotiated. */
+
+/**
+ * A structure used to configure the ring threshold registers of an RX/TX
+ * queue for an Ethernet port.
+ */
+struct rte_eth_thresh {
+	uint8_t pthresh; /**< Ring prefetch threshold. */
+	uint8_t hthresh; /**< Ring host threshold. */
+	uint8_t wthresh; /**< Ring writeback threshold. */
+};
+
+/**
+ *  Simple flags are used for rte_eth_conf.rxmode.mq_mode.
+ */
+#define ETH_MQ_RX_RSS_FLAG  0x1
+#define ETH_MQ_RX_DCB_FLAG  0x2
+#define ETH_MQ_RX_VMDQ_FLAG 0x4
+
+/**
+ *  A set of values to identify what method is to be used to route
+ *  packets to multiple queues.
+ */
+enum rte_eth_rx_mq_mode {
+	/** None of DCB,RSS or VMDQ mode */
+	ETH_MQ_RX_NONE = 0,
+
+	/** For RX side, only RSS is on */
+	ETH_MQ_RX_RSS = ETH_MQ_RX_RSS_FLAG,
+	/** For RX side,only DCB is on. */
+	ETH_MQ_RX_DCB = ETH_MQ_RX_DCB_FLAG,
+	/** Both DCB and RSS enable */
+	ETH_MQ_RX_DCB_RSS = ETH_MQ_RX_RSS_FLAG | ETH_MQ_RX_DCB_FLAG,
+
+	/** Only VMDQ, no RSS nor DCB */
+	ETH_MQ_RX_VMDQ_ONLY = ETH_MQ_RX_VMDQ_FLAG,
+	/** RSS mode with VMDQ */
+	ETH_MQ_RX_VMDQ_RSS = ETH_MQ_RX_RSS_FLAG | ETH_MQ_RX_VMDQ_FLAG,
+	/** Use VMDQ+DCB to route traffic to queues */
+	ETH_MQ_RX_VMDQ_DCB = ETH_MQ_RX_VMDQ_FLAG | ETH_MQ_RX_DCB_FLAG,
+	/** Enable both VMDQ and DCB in VMDq */
+	ETH_MQ_RX_VMDQ_DCB_RSS = ETH_MQ_RX_RSS_FLAG | ETH_MQ_RX_DCB_FLAG |
+				 ETH_MQ_RX_VMDQ_FLAG,
+};
+
+/**
+ * for rx mq mode backward compatible
+ */
+#define ETH_RSS                       ETH_MQ_RX_RSS
+#define VMDQ_DCB                      ETH_MQ_RX_VMDQ_DCB
+#define ETH_DCB_RX                    ETH_MQ_RX_DCB
+
+/**
+ * A set of values to identify what method is to be used to transmit
+ * packets using multi-TCs.
+ */
+enum rte_eth_tx_mq_mode {
+	ETH_MQ_TX_NONE    = 0,  /**< It is in neither DCB nor VT mode. */
+	ETH_MQ_TX_DCB,          /**< For TX side,only DCB is on. */
+	ETH_MQ_TX_VMDQ_DCB,	/**< For TX side,both DCB and VT is on. */
+	ETH_MQ_TX_VMDQ_ONLY,    /**< Only VT on, no DCB */
+};
+
+/**
+ * for tx mq mode backward compatible
+ */
+#define ETH_DCB_NONE                ETH_MQ_TX_NONE
+#define ETH_VMDQ_DCB_TX             ETH_MQ_TX_VMDQ_DCB
+#define ETH_DCB_TX                  ETH_MQ_TX_DCB
+
+/**
+ * A structure used to configure the RX features of an Ethernet port.
+ */
+struct rte_eth_rxmode {
+	/** The multi-queue packet distribution mode to be used, e.g. RSS. */
+	enum rte_eth_rx_mq_mode mq_mode;
+	uint32_t max_rx_pkt_len;  /**< Only used if jumbo_frame enabled. */
+	uint16_t split_hdr_size;  /**< hdr buf size (header_split enabled).*/
+	uint16_t header_split : 1, /**< Header Split enable. */
+		hw_ip_checksum   : 1, /**< IP/UDP/TCP checksum offload enable. */
+		hw_vlan_filter   : 1, /**< VLAN filter enable. */
+		hw_vlan_strip    : 1, /**< VLAN strip enable. */
+		hw_vlan_extend   : 1, /**< Extended VLAN enable. */
+		jumbo_frame      : 1, /**< Jumbo Frame Receipt enable. */
+		hw_strip_crc     : 1, /**< Enable CRC stripping by hardware. */
+		enable_scatter   : 1, /**< Enable scatter packets rx handler */
+		enable_lro       : 1; /**< Enable LRO */
+};
+
+/**
+ * VLAN types to indicate if it is for single VLAN, inner VLAN or outer VLAN.
+ * Note that single VLAN is treated the same as inner VLAN.
+ */
+enum rte_vlan_type {
+	ETH_VLAN_TYPE_UNKNOWN = 0,
+	ETH_VLAN_TYPE_INNER, /**< Single VLAN, or inner VLAN. */
+	ETH_VLAN_TYPE_OUTER, /**< Outer VLAN. */
+	ETH_VLAN_TYPE_MAX,
+};
+
+/**
+ * A structure used to configure the Receive Side Scaling (RSS) feature
+ * of an Ethernet port.
+ * If not NULL, the *rss_key* pointer of the *rss_conf* structure points
+ * to an array holding the RSS key to use for hashing specific header
+ * fields of received packets. The length of this array should be indicated
+ * by *rss_key_len* below. Otherwise, a default random hash key is used by
+ * the device driver.
+ *
+ * The *rss_key_len* field of the *rss_conf* structure indicates the length
+ * in bytes of the array pointed by *rss_key*. To be compatible, this length
+ * will be checked in i40e only. Others assume 40 bytes to be used as before.
+ *
+ * The *rss_hf* field of the *rss_conf* structure indicates the different
+ * types of IPv4/IPv6 packets to which the RSS hashing must be applied.
+ * Supplying an *rss_hf* equal to zero disables the RSS feature.
+ */
+struct rte_eth_rss_conf {
+	uint8_t *rss_key;    /**< If not NULL, 40-byte hash key. */
+	uint8_t rss_key_len; /**< hash key length in bytes. */
+	uint64_t rss_hf;     /**< Hash functions to apply - see below. */
+};
+
+/*
+ * The RSS offload types are defined based on flow types which are defined
+ * in rte_eth_ctrl.h. Different NIC hardwares may support different RSS offload
+ * types. The supported flow types or RSS offload types can be queried by
+ * rte_eth_dev_info_get().
+ */
+#define ETH_RSS_IPV4               (1ULL << RTE_ETH_FLOW_IPV4)
+#define ETH_RSS_FRAG_IPV4          (1ULL << RTE_ETH_FLOW_FRAG_IPV4)
+#define ETH_RSS_NONFRAG_IPV4_TCP   (1ULL << RTE_ETH_FLOW_NONFRAG_IPV4_TCP)
+#define ETH_RSS_NONFRAG_IPV4_UDP   (1ULL << RTE_ETH_FLOW_NONFRAG_IPV4_UDP)
+#define ETH_RSS_NONFRAG_IPV4_SCTP  (1ULL << RTE_ETH_FLOW_NONFRAG_IPV4_SCTP)
+#define ETH_RSS_NONFRAG_IPV4_OTHER (1ULL << RTE_ETH_FLOW_NONFRAG_IPV4_OTHER)
+#define ETH_RSS_IPV6               (1ULL << RTE_ETH_FLOW_IPV6)
+#define ETH_RSS_FRAG_IPV6          (1ULL << RTE_ETH_FLOW_FRAG_IPV6)
+#define ETH_RSS_NONFRAG_IPV6_TCP   (1ULL << RTE_ETH_FLOW_NONFRAG_IPV6_TCP)
+#define ETH_RSS_NONFRAG_IPV6_UDP   (1ULL << RTE_ETH_FLOW_NONFRAG_IPV6_UDP)
+#define ETH_RSS_NONFRAG_IPV6_SCTP  (1ULL << RTE_ETH_FLOW_NONFRAG_IPV6_SCTP)
+#define ETH_RSS_NONFRAG_IPV6_OTHER (1ULL << RTE_ETH_FLOW_NONFRAG_IPV6_OTHER)
+#define ETH_RSS_L2_PAYLOAD         (1ULL << RTE_ETH_FLOW_L2_PAYLOAD)
+#define ETH_RSS_IPV6_EX            (1ULL << RTE_ETH_FLOW_IPV6_EX)
+#define ETH_RSS_IPV6_TCP_EX        (1ULL << RTE_ETH_FLOW_IPV6_TCP_EX)
+#define ETH_RSS_IPV6_UDP_EX        (1ULL << RTE_ETH_FLOW_IPV6_UDP_EX)
+
+#define ETH_RSS_IP ( \
+	ETH_RSS_IPV4 | \
+	ETH_RSS_FRAG_IPV4 | \
+	ETH_RSS_NONFRAG_IPV4_OTHER | \
+	ETH_RSS_IPV6 | \
+	ETH_RSS_FRAG_IPV6 | \
+	ETH_RSS_NONFRAG_IPV6_OTHER | \
+	ETH_RSS_IPV6_EX)
+
+#define ETH_RSS_UDP ( \
+	ETH_RSS_NONFRAG_IPV4_UDP | \
+	ETH_RSS_NONFRAG_IPV6_UDP | \
+	ETH_RSS_IPV6_UDP_EX)
+
+#define ETH_RSS_TCP ( \
+	ETH_RSS_NONFRAG_IPV4_TCP | \
+	ETH_RSS_NONFRAG_IPV6_TCP | \
+	ETH_RSS_IPV6_TCP_EX)
+
+#define ETH_RSS_SCTP ( \
+	ETH_RSS_NONFRAG_IPV4_SCTP | \
+	ETH_RSS_NONFRAG_IPV6_SCTP)
+
+/**< Mask of valid RSS hash protocols */
+#define ETH_RSS_PROTO_MASK ( \
+	ETH_RSS_IPV4 | \
+	ETH_RSS_FRAG_IPV4 | \
+	ETH_RSS_NONFRAG_IPV4_TCP | \
+	ETH_RSS_NONFRAG_IPV4_UDP | \
+	ETH_RSS_NONFRAG_IPV4_SCTP | \
+	ETH_RSS_NONFRAG_IPV4_OTHER | \
+	ETH_RSS_IPV6 | \
+	ETH_RSS_FRAG_IPV6 | \
+	ETH_RSS_NONFRAG_IPV6_TCP | \
+	ETH_RSS_NONFRAG_IPV6_UDP | \
+	ETH_RSS_NONFRAG_IPV6_SCTP | \
+	ETH_RSS_NONFRAG_IPV6_OTHER | \
+	ETH_RSS_L2_PAYLOAD | \
+	ETH_RSS_IPV6_EX | \
+	ETH_RSS_IPV6_TCP_EX | \
+	ETH_RSS_IPV6_UDP_EX)
+
+/*
+ * Definitions used for redirection table entry size.
+ * Some RSS RETA sizes may not be supported by some drivers, check the
+ * documentation or the description of relevant functions for more details.
+ */
+#define ETH_RSS_RETA_SIZE_64  64
+#define ETH_RSS_RETA_SIZE_128 128
+#define ETH_RSS_RETA_SIZE_512 512
+#define RTE_RETA_GROUP_SIZE   64
+
+/* Definitions used for VMDQ and DCB functionality */
+#define ETH_VMDQ_MAX_VLAN_FILTERS   64 /**< Maximum nb. of VMDQ vlan filters. */
+#define ETH_DCB_NUM_USER_PRIORITIES 8  /**< Maximum nb. of DCB priorities. */
+#define ETH_VMDQ_DCB_NUM_QUEUES     128 /**< Maximum nb. of VMDQ DCB queues. */
+#define ETH_DCB_NUM_QUEUES          128 /**< Maximum nb. of DCB queues. */
+
+/* DCB capability defines */
+#define ETH_DCB_PG_SUPPORT      0x00000001 /**< Priority Group(ETS) support. */
+#define ETH_DCB_PFC_SUPPORT     0x00000002 /**< Priority Flow Control support. */
+
+/* Definitions used for VLAN Offload functionality */
+#define ETH_VLAN_STRIP_OFFLOAD   0x0001 /**< VLAN Strip  On/Off */
+#define ETH_VLAN_FILTER_OFFLOAD  0x0002 /**< VLAN Filter On/Off */
+#define ETH_VLAN_EXTEND_OFFLOAD  0x0004 /**< VLAN Extend On/Off */
+
+/* Definitions used for mask VLAN setting */
+#define ETH_VLAN_STRIP_MASK   0x0001 /**< VLAN Strip  setting mask */
+#define ETH_VLAN_FILTER_MASK  0x0002 /**< VLAN Filter  setting mask*/
+#define ETH_VLAN_EXTEND_MASK  0x0004 /**< VLAN Extend  setting mask*/
+#define ETH_VLAN_ID_MAX       0x0FFF /**< VLAN ID is in lower 12 bits*/
+
+/* Definitions used for receive MAC address   */
+#define ETH_NUM_RECEIVE_MAC_ADDR  128 /**< Maximum nb. of receive mac addr. */
+
+/* Definitions used for unicast hash  */
+#define ETH_VMDQ_NUM_UC_HASH_ARRAY  128 /**< Maximum nb. of UC hash array. */
+
+/* Definitions used for VMDQ pool rx mode setting */
+#define ETH_VMDQ_ACCEPT_UNTAG   0x0001 /**< accept untagged packets. */
+#define ETH_VMDQ_ACCEPT_HASH_MC 0x0002 /**< accept packets in multicast table . */
+#define ETH_VMDQ_ACCEPT_HASH_UC 0x0004 /**< accept packets in unicast table. */
+#define ETH_VMDQ_ACCEPT_BROADCAST   0x0008 /**< accept broadcast packets. */
+#define ETH_VMDQ_ACCEPT_MULTICAST   0x0010 /**< multicast promiscuous. */
+
+/** Maximum nb. of vlan per mirror rule */
+#define ETH_MIRROR_MAX_VLANS       64
+
+#define ETH_MIRROR_VIRTUAL_POOL_UP     0x01  /**< Virtual Pool uplink Mirroring. */
+#define ETH_MIRROR_UPLINK_PORT         0x02  /**< Uplink Port Mirroring. */
+#define ETH_MIRROR_DOWNLINK_PORT       0x04  /**< Downlink Port Mirroring. */
+#define ETH_MIRROR_VLAN                0x08  /**< VLAN Mirroring. */
+#define ETH_MIRROR_VIRTUAL_POOL_DOWN   0x10  /**< Virtual Pool downlink Mirroring. */
+
+/**
+ * A structure used to configure VLAN traffic mirror of an Ethernet port.
+ */
+struct rte_eth_vlan_mirror {
+	uint64_t vlan_mask; /**< mask for valid VLAN ID. */
+	/** VLAN ID list for vlan mirroring. */
+	uint16_t vlan_id[ETH_MIRROR_MAX_VLANS];
+};
+
+/**
+ * A structure used to configure traffic mirror of an Ethernet port.
+ */
+struct rte_eth_mirror_conf {
+	uint8_t rule_type; /**< Mirroring rule type */
+	uint8_t dst_pool;  /**< Destination pool for this mirror rule. */
+	uint64_t pool_mask; /**< Bitmap of pool for pool mirroring */
+	/** VLAN ID setting for VLAN mirroring. */
+	struct rte_eth_vlan_mirror vlan;
+};
+
+/**
+ * A structure used to configure 64 entries of Redirection Table of the
+ * Receive Side Scaling (RSS) feature of an Ethernet port. To configure
+ * more than 64 entries supported by hardware, an array of this structure
+ * is needed.
+ */
+struct rte_eth_rss_reta_entry64 {
+	uint64_t mask;
+	/**< Mask bits indicate which entries need to be updated/queried. */
+	uint16_t reta[RTE_RETA_GROUP_SIZE];
+	/**< Group of 64 redirection table entries. */
+};
+
+/**
+ * This enum indicates the possible number of traffic classes
+ * in DCB configratioins
+ */
+enum rte_eth_nb_tcs {
+	ETH_4_TCS = 4, /**< 4 TCs with DCB. */
+	ETH_8_TCS = 8  /**< 8 TCs with DCB. */
+};
+
+/**
+ * This enum indicates the possible number of queue pools
+ * in VMDQ configurations.
+ */
+enum rte_eth_nb_pools {
+	ETH_8_POOLS = 8,    /**< 8 VMDq pools. */
+	ETH_16_POOLS = 16,  /**< 16 VMDq pools. */
+	ETH_32_POOLS = 32,  /**< 32 VMDq pools. */
+	ETH_64_POOLS = 64   /**< 64 VMDq pools. */
+};
+
+/* This structure may be extended in future. */
+struct rte_eth_dcb_rx_conf {
+	enum rte_eth_nb_tcs nb_tcs; /**< Possible DCB TCs, 4 or 8 TCs */
+	/** Traffic class each UP mapped to. */
+	uint8_t dcb_tc[ETH_DCB_NUM_USER_PRIORITIES];
+};
+
+struct rte_eth_vmdq_dcb_tx_conf {
+	enum rte_eth_nb_pools nb_queue_pools; /**< With DCB, 16 or 32 pools. */
+	/** Traffic class each UP mapped to. */
+	uint8_t dcb_tc[ETH_DCB_NUM_USER_PRIORITIES];
+};
+
+struct rte_eth_dcb_tx_conf {
+	enum rte_eth_nb_tcs nb_tcs; /**< Possible DCB TCs, 4 or 8 TCs. */
+	/** Traffic class each UP mapped to. */
+	uint8_t dcb_tc[ETH_DCB_NUM_USER_PRIORITIES];
+};
+
+struct rte_eth_vmdq_tx_conf {
+	enum rte_eth_nb_pools nb_queue_pools; /**< VMDq mode, 64 pools. */
+};
+
+/**
+ * A structure used to configure the VMDQ+DCB feature
+ * of an Ethernet port.
+ *
+ * Using this feature, packets are routed to a pool of queues, based
+ * on the vlan id in the vlan tag, and then to a specific queue within
+ * that pool, using the user priority vlan tag field.
+ *
+ * A default pool may be used, if desired, to route all traffic which
+ * does not match the vlan filter rules.
+ */
+struct rte_eth_vmdq_dcb_conf {
+	enum rte_eth_nb_pools nb_queue_pools; /**< With DCB, 16 or 32 pools */
+	uint8_t enable_default_pool; /**< If non-zero, use a default pool */
+	uint8_t default_pool; /**< The default pool, if applicable */
+	uint8_t nb_pool_maps; /**< We can have up to 64 filters/mappings */
+	struct {
+		uint16_t vlan_id; /**< The vlan id of the received frame */
+		uint64_t pools;   /**< Bitmask of pools for packet rx */
+	} pool_map[ETH_VMDQ_MAX_VLAN_FILTERS]; /**< VMDq vlan pool maps. */
+	uint8_t dcb_tc[ETH_DCB_NUM_USER_PRIORITIES];
+	/**< Selects a queue in a pool */
+};
+
+struct rte_eth_vmdq_rx_conf {
+	enum rte_eth_nb_pools nb_queue_pools; /**< VMDq only mode, 8 or 64 pools */
+	uint8_t enable_default_pool; /**< If non-zero, use a default pool */
+	uint8_t default_pool; /**< The default pool, if applicable */
+	uint8_t enable_loop_back; /**< Enable VT loop back */
+	uint8_t nb_pool_maps; /**< We can have up to 64 filters/mappings */
+	uint32_t rx_mode; /**< Flags from ETH_VMDQ_ACCEPT_* */
+	struct {
+		uint16_t vlan_id; /**< The vlan id of the received frame */
+		uint64_t pools;   /**< Bitmask of pools for packet rx */
+	} pool_map[ETH_VMDQ_MAX_VLAN_FILTERS]; /**< VMDq vlan pool maps. */
+};
+
+/**
+ * A structure used to configure the TX features of an Ethernet port.
+ */
+struct rte_eth_txmode {
+	enum rte_eth_tx_mq_mode mq_mode; /**< TX multi-queues mode. */
+
+	/* For i40e specifically */
+	uint16_t pvid;
+	uint8_t hw_vlan_reject_tagged : 1,
+		/**< If set, reject sending out tagged pkts */
+		hw_vlan_reject_untagged : 1,
+		/**< If set, reject sending out untagged pkts */
+		hw_vlan_insert_pvid : 1;
+		/**< If set, enable port based VLAN insertion */
+};
+
+/**
+ * A structure used to configure an RX ring of an Ethernet port.
+ */
+struct rte_eth_rxconf {
+	struct rte_eth_thresh rx_thresh; /**< RX ring threshold registers. */
+	uint16_t rx_free_thresh; /**< Drives the freeing of RX descriptors. */
+	uint8_t rx_drop_en; /**< Drop packets if no descriptors are available. */
+	uint8_t rx_deferred_start; /**< Do not start queue with rte_eth_dev_start(). */
+};
+
+#define ETH_TXQ_FLAGS_NOMULTSEGS 0x0001 /**< nb_segs=1 for all mbufs */
+#define ETH_TXQ_FLAGS_NOREFCOUNT 0x0002 /**< refcnt can be ignored */
+#define ETH_TXQ_FLAGS_NOMULTMEMP 0x0004 /**< all bufs come from same mempool */
+#define ETH_TXQ_FLAGS_NOVLANOFFL 0x0100 /**< disable VLAN offload */
+#define ETH_TXQ_FLAGS_NOXSUMSCTP 0x0200 /**< disable SCTP checksum offload */
+#define ETH_TXQ_FLAGS_NOXSUMUDP  0x0400 /**< disable UDP checksum offload */
+#define ETH_TXQ_FLAGS_NOXSUMTCP  0x0800 /**< disable TCP checksum offload */
+#define ETH_TXQ_FLAGS_NOOFFLOADS \
+		(ETH_TXQ_FLAGS_NOVLANOFFL | ETH_TXQ_FLAGS_NOXSUMSCTP | \
+		 ETH_TXQ_FLAGS_NOXSUMUDP  | ETH_TXQ_FLAGS_NOXSUMTCP)
+#define ETH_TXQ_FLAGS_NOXSUMS \
+		(ETH_TXQ_FLAGS_NOXSUMSCTP | ETH_TXQ_FLAGS_NOXSUMUDP | \
+		 ETH_TXQ_FLAGS_NOXSUMTCP)
+/**
+ * A structure used to configure a TX ring of an Ethernet port.
+ */
+struct rte_eth_txconf {
+	struct rte_eth_thresh tx_thresh; /**< TX ring threshold registers. */
+	uint16_t tx_rs_thresh; /**< Drives the setting of RS bit on TXDs. */
+	uint16_t tx_free_thresh; /**< Start freeing TX buffers if there are
+				      less free descriptors than this value. */
+
+	uint32_t txq_flags; /**< Set flags for the Tx queue */
+	uint8_t tx_deferred_start; /**< Do not start queue with rte_eth_dev_start(). */
+};
+
+/**
+ * A structure contains information about HW descriptor ring limitations.
+ */
+struct rte_eth_desc_lim {
+	uint16_t nb_max;   /**< Max allowed number of descriptors. */
+	uint16_t nb_min;   /**< Min allowed number of descriptors. */
+	uint16_t nb_align; /**< Number of descriptors should be aligned to. */
+};
+
+/**
+ * This enum indicates the flow control mode
+ */
+enum rte_eth_fc_mode {
+	RTE_FC_NONE = 0, /**< Disable flow control. */
+	RTE_FC_RX_PAUSE, /**< RX pause frame, enable flowctrl on TX side. */
+	RTE_FC_TX_PAUSE, /**< TX pause frame, enable flowctrl on RX side. */
+	RTE_FC_FULL      /**< Enable flow control on both side. */
+};
+
+/**
+ * A structure used to configure Ethernet flow control parameter.
+ * These parameters will be configured into the register of the NIC.
+ * Please refer to the corresponding data sheet for proper value.
+ */
+struct rte_eth_fc_conf {
+	uint32_t high_water;  /**< High threshold value to trigger XOFF */
+	uint32_t low_water;   /**< Low threshold value to trigger XON */
+	uint16_t pause_time;  /**< Pause quota in the Pause frame */
+	uint16_t send_xon;    /**< Is XON frame need be sent */
+	enum rte_eth_fc_mode mode;  /**< Link flow control mode */
+	uint8_t mac_ctrl_frame_fwd; /**< Forward MAC control frames */
+	uint8_t autoneg;      /**< Use Pause autoneg */
+};
+
+/**
+ * A structure used to configure Ethernet priority flow control parameter.
+ * These parameters will be configured into the register of the NIC.
+ * Please refer to the corresponding data sheet for proper value.
+ */
+struct rte_eth_pfc_conf {
+	struct rte_eth_fc_conf fc; /**< General flow control parameter. */
+	uint8_t priority;          /**< VLAN User Priority. */
+};
+
+/**
+ *  Memory space that can be configured to store Flow Director filters
+ *  in the board memory.
+ */
+enum rte_fdir_pballoc_type {
+	RTE_FDIR_PBALLOC_64K = 0,  /**< 64k. */
+	RTE_FDIR_PBALLOC_128K,     /**< 128k. */
+	RTE_FDIR_PBALLOC_256K,     /**< 256k. */
+};
+
+/**
+ *  Select report mode of FDIR hash information in RX descriptors.
+ */
+enum rte_fdir_status_mode {
+	RTE_FDIR_NO_REPORT_STATUS = 0, /**< Never report FDIR hash. */
+	RTE_FDIR_REPORT_STATUS, /**< Only report FDIR hash for matching pkts. */
+	RTE_FDIR_REPORT_STATUS_ALWAYS, /**< Always report FDIR hash. */
+};
+
+/**
+ * A structure used to configure the Flow Director (FDIR) feature
+ * of an Ethernet port.
+ *
+ * If mode is RTE_FDIR_DISABLE, the pballoc value is ignored.
+ */
+struct rte_fdir_conf {
+	enum rte_fdir_mode mode; /**< Flow Director mode. */
+	enum rte_fdir_pballoc_type pballoc; /**< Space for FDIR filters. */
+	enum rte_fdir_status_mode status;  /**< How to report FDIR hash. */
+	/** RX queue of packets matching a "drop" filter in perfect mode. */
+	uint8_t drop_queue;
+	struct rte_eth_fdir_masks mask;
+	struct rte_eth_fdir_flex_conf flex_conf;
+	/**< Flex payload configuration. */
+};
+
+/**
+ * UDP tunneling configuration.
+ * Used to config the UDP port for a type of tunnel.
+ * NICs need the UDP port to identify the tunnel type.
+ * Normally a type of tunnel has a default UDP port, this structure can be used
+ * in case if the users want to change or support more UDP port.
+ */
+struct rte_eth_udp_tunnel {
+	uint16_t udp_port; /**< UDP port used for the tunnel. */
+	uint8_t prot_type; /**< Tunnel type. Defined in rte_eth_tunnel_type. */
+};
+
+/**
+ * A structure used to enable/disable specific device interrupts.
+ */
+struct rte_intr_conf {
+	/** enable/disable lsc interrupt. 0 (default) - disable, 1 enable */
+	uint16_t lsc;
+	/** enable/disable rxq interrupt. 0 (default) - disable, 1 enable */
+	uint16_t rxq;
+};
+
+/**
+ * A structure used to configure an Ethernet port.
+ * Depending upon the RX multi-queue mode, extra advanced
+ * configuration settings may be needed.
+ */
+struct rte_eth_conf {
+	uint32_t link_speeds; /**< bitmap of ETH_LINK_SPEED_XXX of speeds to be
+				used. ETH_LINK_SPEED_FIXED disables link
+				autonegotiation, and a unique speed shall be
+				set. Otherwise, the bitmap defines the set of
+				speeds to be advertised. If the special value
+				ETH_LINK_SPEED_AUTONEG (0) is used, all speeds
+				supported are advertised. */
+	struct rte_eth_rxmode rxmode; /**< Port RX configuration. */
+	struct rte_eth_txmode txmode; /**< Port TX configuration. */
+	uint32_t lpbk_mode; /**< Loopback operation mode. By default the value
+			         is 0, meaning the loopback mode is disabled.
+				 Read the datasheet of given ethernet controller
+				 for details. The possible values of this field
+				 are defined in implementation of each driver. */
+	struct {
+		struct rte_eth_rss_conf rss_conf; /**< Port RSS configuration */
+		struct rte_eth_vmdq_dcb_conf vmdq_dcb_conf;
+		/**< Port vmdq+dcb configuration. */
+		struct rte_eth_dcb_rx_conf dcb_rx_conf;
+		/**< Port dcb RX configuration. */
+		struct rte_eth_vmdq_rx_conf vmdq_rx_conf;
+		/**< Port vmdq RX configuration. */
+	} rx_adv_conf; /**< Port RX filtering configuration (union). */
+	union {
+		struct rte_eth_vmdq_dcb_tx_conf vmdq_dcb_tx_conf;
+		/**< Port vmdq+dcb TX configuration. */
+		struct rte_eth_dcb_tx_conf dcb_tx_conf;
+		/**< Port dcb TX configuration. */
+		struct rte_eth_vmdq_tx_conf vmdq_tx_conf;
+		/**< Port vmdq TX configuration. */
+	} tx_adv_conf; /**< Port TX DCB configuration (union). */
+	/** Currently,Priority Flow Control(PFC) are supported,if DCB with PFC
+	    is needed,and the variable must be set ETH_DCB_PFC_SUPPORT. */
+	uint32_t dcb_capability_en;
+	struct rte_fdir_conf fdir_conf; /**< FDIR configuration. */
+	struct rte_intr_conf intr_conf; /**< Interrupt mode configuration. */
+};
+
+/**
+ * A structure used to retrieve the contextual information of
+ * an Ethernet device, such as the controlling driver of the device,
+ * its PCI context, etc...
+ */
+
+/**
+ * RX offload capabilities of a device.
+ */
+#define DEV_RX_OFFLOAD_VLAN_STRIP  0x00000001
+#define DEV_RX_OFFLOAD_IPV4_CKSUM  0x00000002
+#define DEV_RX_OFFLOAD_UDP_CKSUM   0x00000004
+#define DEV_RX_OFFLOAD_TCP_CKSUM   0x00000008
+#define DEV_RX_OFFLOAD_TCP_LRO     0x00000010
+#define DEV_RX_OFFLOAD_QINQ_STRIP  0x00000020
+#define DEV_RX_OFFLOAD_OUTER_IPV4_CKSUM 0x00000040
+
+/**
+ * TX offload capabilities of a device.
+ */
+#define DEV_TX_OFFLOAD_VLAN_INSERT 0x00000001
+#define DEV_TX_OFFLOAD_IPV4_CKSUM  0x00000002
+#define DEV_TX_OFFLOAD_UDP_CKSUM   0x00000004
+#define DEV_TX_OFFLOAD_TCP_CKSUM   0x00000008
+#define DEV_TX_OFFLOAD_SCTP_CKSUM  0x00000010
+#define DEV_TX_OFFLOAD_TCP_TSO     0x00000020
+#define DEV_TX_OFFLOAD_UDP_TSO     0x00000040
+#define DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM 0x00000080 /**< Used for tunneling packet. */
+#define DEV_TX_OFFLOAD_QINQ_INSERT 0x00000100
+
+/**
+ * Ethernet device information
+ */
+struct rte_eth_dev_info {
+	struct rte_pci_device *pci_dev; /**< Device PCI information. */
+	const char *driver_name; /**< Device Driver name. */
+	unsigned int if_index; /**< Index to bound host interface, or 0 if none.
+		Use if_indextoname() to translate into an interface name. */
+	uint32_t min_rx_bufsize; /**< Minimum size of RX buffer. */
+	uint32_t max_rx_pktlen; /**< Maximum configurable length of RX pkt. */
+	uint16_t max_rx_queues; /**< Maximum number of RX queues. */
+	uint16_t max_tx_queues; /**< Maximum number of TX queues. */
+	uint32_t max_mac_addrs; /**< Maximum number of MAC addresses. */
+	uint32_t max_hash_mac_addrs;
+	/** Maximum number of hash MAC addresses for MTA and UTA. */
+	uint16_t max_vfs; /**< Maximum number of VFs. */
+	uint16_t max_vmdq_pools; /**< Maximum number of VMDq pools. */
+	uint32_t rx_offload_capa; /**< Device RX offload capabilities. */
+	uint32_t tx_offload_capa; /**< Device TX offload capabilities. */
+	uint16_t reta_size;
+	/**< Device redirection table size, the total number of entries. */
+	uint8_t hash_key_size; /**< Hash key size in bytes */
+	/** Bit mask of RSS offloads, the bit offset also means flow type */
+	uint64_t flow_type_rss_offloads;
+	struct rte_eth_rxconf default_rxconf; /**< Default RX configuration */
+	struct rte_eth_txconf default_txconf; /**< Default TX configuration */
+	uint16_t vmdq_queue_base; /**< First queue ID for VMDQ pools. */
+	uint16_t vmdq_queue_num;  /**< Queue number for VMDQ pools. */
+	uint16_t vmdq_pool_base;  /**< First ID of VMDQ pools. */
+	struct rte_eth_desc_lim rx_desc_lim;  /**< RX descriptors limits */
+	struct rte_eth_desc_lim tx_desc_lim;  /**< TX descriptors limits */
+	uint32_t speed_capa;  /**< Supported speeds bitmap (ETH_LINK_SPEED_). */
+};
+
+/**
+ * Ethernet device RX queue information structure.
+ * Used to retieve information about configured queue.
+ */
+struct rte_eth_rxq_info {
+	struct rte_mempool *mp;     /**< mempool used by that queue. */
+	struct rte_eth_rxconf conf; /**< queue config parameters. */
+	uint8_t scattered_rx;       /**< scattered packets RX supported. */
+	uint16_t nb_desc;           /**< configured number of RXDs. */
+} __rte_cache_min_aligned;
+
+/**
+ * Ethernet device TX queue information structure.
+ * Used to retieve information about configured queue.
+ */
+struct rte_eth_txq_info {
+	struct rte_eth_txconf conf; /**< queue config parameters. */
+	uint16_t nb_desc;           /**< configured number of TXDs. */
+} __rte_cache_min_aligned;
+
+/** Maximum name length for extended statistics counters */
+#define RTE_ETH_XSTATS_NAME_SIZE 64
+
+/**
+ * An Ethernet device extended statistic structure
+ *
+ * This structure is used by ethdev->eth_xstats_get() to provide
+ * statistics that are not provided in the generic rte_eth_stats
+ * structure.
+ */
+struct rte_eth_xstats {
+	char name[RTE_ETH_XSTATS_NAME_SIZE];
+	uint64_t value;
+};
+
+#define ETH_DCB_NUM_TCS    8
+#define ETH_MAX_VMDQ_POOL  64
+
+/**
+ * A structure used to get the information of queue and
+ * TC mapping on both TX and RX paths.
+ */
+struct rte_eth_dcb_tc_queue_mapping {
+	/** rx queues assigned to tc per Pool */
+	struct {
+		uint8_t base;
+		uint8_t nb_queue;
+	} tc_rxq[ETH_MAX_VMDQ_POOL][ETH_DCB_NUM_TCS];
+	/** rx queues assigned to tc per Pool */
+	struct {
+		uint8_t base;
+		uint8_t nb_queue;
+	} tc_txq[ETH_MAX_VMDQ_POOL][ETH_DCB_NUM_TCS];
+};
+
+/**
+ * A structure used to get the information of DCB.
+ * It includes TC UP mapping and queue TC mapping.
+ */
+struct rte_eth_dcb_info {
+	uint8_t nb_tcs;        /**< number of TCs */
+	uint8_t prio_tc[ETH_DCB_NUM_USER_PRIORITIES]; /**< Priority to tc */
+	uint8_t tc_bws[ETH_DCB_NUM_TCS]; /**< TX BW percentage for each TC */
+	/** rx queues assigned to tc */
+	struct rte_eth_dcb_tc_queue_mapping tc_queue;
+};
+
+/**
+ * RX/TX queue states
+ */
+#define RTE_ETH_QUEUE_STATE_STOPPED 0
+#define RTE_ETH_QUEUE_STATE_STARTED 1
+
+struct rte_eth_dev;
+
+struct rte_eth_dev_callback;
+/** @internal Structure to keep track of registered callbacks */
+TAILQ_HEAD(rte_eth_dev_cb_list, rte_eth_dev_callback);
+
+
+#ifdef RTE_LIBRTE_ETHDEV_DEBUG
+#define RTE_PMD_DEBUG_TRACE(...) \
+	rte_pmd_debug_trace(__func__, __VA_ARGS__)
+#else
+#define RTE_PMD_DEBUG_TRACE(...)
+#endif
+
+
+/* Macros to check for valid port */
+#define RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, retval) do { \
+	if (!rte_eth_dev_is_valid_port(port_id)) { \
+		RTE_PMD_DEBUG_TRACE("Invalid port_id=%d\n", port_id); \
+		return retval; \
+	} \
+} while (0)
+
+#define RTE_ETH_VALID_PORTID_OR_RET(port_id) do { \
+	if (!rte_eth_dev_is_valid_port(port_id)) { \
+		RTE_PMD_DEBUG_TRACE("Invalid port_id=%d\n", port_id); \
+		return; \
+	} \
+} while (0)
+
+/**
+ * l2 tunnel configuration.
+ */
+
+/**< l2 tunnel enable mask */
+#define ETH_L2_TUNNEL_ENABLE_MASK       0x00000001
+/**< l2 tunnel insertion mask */
+#define ETH_L2_TUNNEL_INSERTION_MASK    0x00000002
+/**< l2 tunnel stripping mask */
+#define ETH_L2_TUNNEL_STRIPPING_MASK    0x00000004
+/**< l2 tunnel forwarding mask */
+#define ETH_L2_TUNNEL_FORWARDING_MASK   0x00000008
+
+/*
+ * Definitions of all functions exported by an Ethernet driver through the
+ * the generic structure of type *eth_dev_ops* supplied in the *rte_eth_dev*
+ * structure associated with an Ethernet device.
+ */
+
+typedef int  (*eth_dev_configure_t)(struct rte_eth_dev *dev);
+/**< @internal Ethernet device configuration. */
+
+typedef int  (*eth_dev_start_t)(struct rte_eth_dev *dev);
+/**< @internal Function used to start a configured Ethernet device. */
+
+typedef void (*eth_dev_stop_t)(struct rte_eth_dev *dev);
+/**< @internal Function used to stop a configured Ethernet device. */
+
+typedef int  (*eth_dev_set_link_up_t)(struct rte_eth_dev *dev);
+/**< @internal Function used to link up a configured Ethernet device. */
+
+typedef int  (*eth_dev_set_link_down_t)(struct rte_eth_dev *dev);
+/**< @internal Function used to link down a configured Ethernet device. */
+
+typedef void (*eth_dev_close_t)(struct rte_eth_dev *dev);
+/**< @internal Function used to close a configured Ethernet device. */
+
+typedef void (*eth_promiscuous_enable_t)(struct rte_eth_dev *dev);
+/**< @internal Function used to enable the RX promiscuous mode of an Ethernet device. */
+
+typedef void (*eth_promiscuous_disable_t)(struct rte_eth_dev *dev);
+/**< @internal Function used to disable the RX promiscuous mode of an Ethernet device. */
+
+typedef void (*eth_allmulticast_enable_t)(struct rte_eth_dev *dev);
+/**< @internal Enable the receipt of all multicast packets by an Ethernet device. */
+
+typedef void (*eth_allmulticast_disable_t)(struct rte_eth_dev *dev);
+/**< @internal Disable the receipt of all multicast packets by an Ethernet device. */
+
+typedef int (*eth_link_update_t)(struct rte_eth_dev *dev,
+				int wait_to_complete);
+/**< @internal Get link speed, duplex mode and state (up/down) of an Ethernet device. */
+
+typedef void (*eth_stats_get_t)(struct rte_eth_dev *dev,
+				struct rte_eth_stats *igb_stats);
+/**< @internal Get global I/O statistics of an Ethernet device. */
+
+typedef void (*eth_stats_reset_t)(struct rte_eth_dev *dev);
+/**< @internal Reset global I/O statistics of an Ethernet device to 0. */
+
+typedef int (*eth_xstats_get_t)(struct rte_eth_dev *dev,
+	struct rte_eth_xstats *stats, unsigned n);
+/**< @internal Get extended stats of an Ethernet device. */
+
+typedef void (*eth_xstats_reset_t)(struct rte_eth_dev *dev);
+/**< @internal Reset extended stats of an Ethernet device. */
+
+typedef int (*eth_queue_stats_mapping_set_t)(struct rte_eth_dev *dev,
+					     uint16_t queue_id,
+					     uint8_t stat_idx,
+					     uint8_t is_rx);
+/**< @internal Set a queue statistics mapping for a tx/rx queue of an Ethernet device. */
+
+typedef void (*eth_dev_infos_get_t)(struct rte_eth_dev *dev,
+				    struct rte_eth_dev_info *dev_info);
+/**< @internal Get specific informations of an Ethernet device. */
+
+typedef const uint32_t *(*eth_dev_supported_ptypes_get_t)(struct rte_eth_dev *dev);
+/**< @internal Get supported ptypes of an Ethernet device. */
+
+typedef int (*eth_queue_start_t)(struct rte_eth_dev *dev,
+				    uint16_t queue_id);
+/**< @internal Start rx and tx of a queue of an Ethernet device. */
+
+typedef int (*eth_queue_stop_t)(struct rte_eth_dev *dev,
+				    uint16_t queue_id);
+/**< @internal Stop rx and tx of a queue of an Ethernet device. */
+
+typedef int (*eth_rx_queue_setup_t)(struct rte_eth_dev *dev,
+				    uint16_t rx_queue_id,
+				    uint16_t nb_rx_desc,
+				    unsigned int socket_id,
+				    const struct rte_eth_rxconf *rx_conf,
+				    struct rte_mempool *mb_pool);
+/**< @internal Set up a receive queue of an Ethernet device. */
+
+typedef int (*eth_tx_queue_setup_t)(struct rte_eth_dev *dev,
+				    uint16_t tx_queue_id,
+				    uint16_t nb_tx_desc,
+				    unsigned int socket_id,
+				    const struct rte_eth_txconf *tx_conf);
+/**< @internal Setup a transmit queue of an Ethernet device. */
+
+typedef int (*eth_rx_enable_intr_t)(struct rte_eth_dev *dev,
+				    uint16_t rx_queue_id);
+/**< @internal Enable interrupt of a receive queue of an Ethernet device. */
+
+typedef int (*eth_rx_disable_intr_t)(struct rte_eth_dev *dev,
+				    uint16_t rx_queue_id);
+/**< @internal Disable interrupt of a receive queue of an Ethernet device. */
+
+typedef void (*eth_queue_release_t)(void *queue);
+/**< @internal Release memory resources allocated by given RX/TX queue. */
+
+typedef uint32_t (*eth_rx_queue_count_t)(struct rte_eth_dev *dev,
+					 uint16_t rx_queue_id);
+/**< @internal Get number of available descriptors on a receive queue of an Ethernet device. */
+
+typedef int (*eth_rx_descriptor_done_t)(void *rxq, uint16_t offset);
+/**< @internal Check DD bit of specific RX descriptor */
+
+typedef void (*eth_rxq_info_get_t)(struct rte_eth_dev *dev,
+	uint16_t rx_queue_id, struct rte_eth_rxq_info *qinfo);
+
+typedef void (*eth_txq_info_get_t)(struct rte_eth_dev *dev,
+	uint16_t tx_queue_id, struct rte_eth_txq_info *qinfo);
+
+typedef int (*mtu_set_t)(struct rte_eth_dev *dev, uint16_t mtu);
+/**< @internal Set MTU. */
+
+typedef int (*vlan_filter_set_t)(struct rte_eth_dev *dev,
+				  uint16_t vlan_id,
+				  int on);
+/**< @internal filtering of a VLAN Tag Identifier by an Ethernet device. */
+
+typedef int (*vlan_tpid_set_t)(struct rte_eth_dev *dev,
+			       enum rte_vlan_type type, uint16_t tpid);
+/**< @internal set the outer VLAN-TPID by an Ethernet device. */
+
+typedef void (*vlan_offload_set_t)(struct rte_eth_dev *dev, int mask);
+/**< @internal set VLAN offload function by an Ethernet device. */
+
+typedef int (*vlan_pvid_set_t)(struct rte_eth_dev *dev,
+			       uint16_t vlan_id,
+			       int on);
+/**< @internal set port based TX VLAN insertion by an Ethernet device. */
+
+typedef void (*vlan_strip_queue_set_t)(struct rte_eth_dev *dev,
+				  uint16_t rx_queue_id,
+				  int on);
+/**< @internal VLAN stripping enable/disable by an queue of Ethernet device. */
+
+typedef uint16_t (*eth_rx_burst_t)(void *rxq,
+				   struct rte_mbuf **rx_pkts,
+				   uint16_t nb_pkts);
+/**< @internal Retrieve input packets from a receive queue of an Ethernet device. */
+
+typedef uint16_t (*eth_tx_burst_t)(void *txq,
+				   struct rte_mbuf **tx_pkts,
+				   uint16_t nb_pkts);
+/**< @internal Send output packets on a transmit queue of an Ethernet device. */
+
+typedef int (*flow_ctrl_get_t)(struct rte_eth_dev *dev,
+			       struct rte_eth_fc_conf *fc_conf);
+/**< @internal Get current flow control parameter on an Ethernet device */
+
+typedef int (*flow_ctrl_set_t)(struct rte_eth_dev *dev,
+			       struct rte_eth_fc_conf *fc_conf);
+/**< @internal Setup flow control parameter on an Ethernet device */
+
+typedef int (*priority_flow_ctrl_set_t)(struct rte_eth_dev *dev,
+				struct rte_eth_pfc_conf *pfc_conf);
+/**< @internal Setup priority flow control parameter on an Ethernet device */
+
+typedef int (*reta_update_t)(struct rte_eth_dev *dev,
+			     struct rte_eth_rss_reta_entry64 *reta_conf,
+			     uint16_t reta_size);
+/**< @internal Update RSS redirection table on an Ethernet device */
+
+typedef int (*reta_query_t)(struct rte_eth_dev *dev,
+			    struct rte_eth_rss_reta_entry64 *reta_conf,
+			    uint16_t reta_size);
+/**< @internal Query RSS redirection table on an Ethernet device */
+
+typedef int (*rss_hash_update_t)(struct rte_eth_dev *dev,
+				 struct rte_eth_rss_conf *rss_conf);
+/**< @internal Update RSS hash configuration of an Ethernet device */
+
+typedef int (*rss_hash_conf_get_t)(struct rte_eth_dev *dev,
+				   struct rte_eth_rss_conf *rss_conf);
+/**< @internal Get current RSS hash configuration of an Ethernet device */
+
+typedef int (*eth_dev_led_on_t)(struct rte_eth_dev *dev);
+/**< @internal Turn on SW controllable LED on an Ethernet device */
+
+typedef int (*eth_dev_led_off_t)(struct rte_eth_dev *dev);
+/**< @internal Turn off SW controllable LED on an Ethernet device */
+
+typedef void (*eth_mac_addr_remove_t)(struct rte_eth_dev *dev, uint32_t index);
+/**< @internal Remove MAC address from receive address register */
+
+typedef void (*eth_mac_addr_add_t)(struct rte_eth_dev *dev,
+				  struct ether_addr *mac_addr,
+				  uint32_t index,
+				  uint32_t vmdq);
+/**< @internal Set a MAC address into Receive Address Address Register */
+
+typedef void (*eth_mac_addr_set_t)(struct rte_eth_dev *dev,
+				  struct ether_addr *mac_addr);
+/**< @internal Set a MAC address into Receive Address Address Register */
+
+typedef int (*eth_uc_hash_table_set_t)(struct rte_eth_dev *dev,
+				  struct ether_addr *mac_addr,
+				  uint8_t on);
+/**< @internal Set a Unicast Hash bitmap */
+
+typedef int (*eth_uc_all_hash_table_set_t)(struct rte_eth_dev *dev,
+				  uint8_t on);
+/**< @internal Set all Unicast Hash bitmap */
+
+typedef int (*eth_set_vf_rx_mode_t)(struct rte_eth_dev *dev,
+				  uint16_t vf,
+				  uint16_t rx_mode,
+				  uint8_t on);
+/**< @internal Set a VF receive mode */
+
+typedef int (*eth_set_vf_rx_t)(struct rte_eth_dev *dev,
+				uint16_t vf,
+				uint8_t on);
+/**< @internal Set a VF receive  mode */
+
+typedef int (*eth_set_vf_tx_t)(struct rte_eth_dev *dev,
+				uint16_t vf,
+				uint8_t on);
+/**< @internal Enable or disable a VF transmit   */
+
+typedef int (*eth_set_vf_vlan_filter_t)(struct rte_eth_dev *dev,
+				  uint16_t vlan,
+				  uint64_t vf_mask,
+				  uint8_t vlan_on);
+/**< @internal Set VF VLAN pool filter */
+
+typedef int (*eth_set_queue_rate_limit_t)(struct rte_eth_dev *dev,
+				uint16_t queue_idx,
+				uint16_t tx_rate);
+/**< @internal Set queue TX rate */
+
+typedef int (*eth_set_vf_rate_limit_t)(struct rte_eth_dev *dev,
+				uint16_t vf,
+				uint16_t tx_rate,
+				uint64_t q_msk);
+/**< @internal Set VF TX rate */
+
+typedef int (*eth_mirror_rule_set_t)(struct rte_eth_dev *dev,
+				  struct rte_eth_mirror_conf *mirror_conf,
+				  uint8_t rule_id,
+				  uint8_t on);
+/**< @internal Add a traffic mirroring rule on an Ethernet device */
+
+typedef int (*eth_mirror_rule_reset_t)(struct rte_eth_dev *dev,
+				  uint8_t rule_id);
+/**< @internal Remove a traffic mirroring rule on an Ethernet device */
+
+typedef int (*eth_udp_tunnel_port_add_t)(struct rte_eth_dev *dev,
+					 struct rte_eth_udp_tunnel *tunnel_udp);
+/**< @internal Add tunneling UDP port */
+
+typedef int (*eth_udp_tunnel_port_del_t)(struct rte_eth_dev *dev,
+					 struct rte_eth_udp_tunnel *tunnel_udp);
+/**< @internal Delete tunneling UDP port */
+
+typedef int (*eth_set_mc_addr_list_t)(struct rte_eth_dev *dev,
+				      struct ether_addr *mc_addr_set,
+				      uint32_t nb_mc_addr);
+/**< @internal set the list of multicast addresses on an Ethernet device */
+
+typedef int (*eth_timesync_enable_t)(struct rte_eth_dev *dev);
+/**< @internal Function used to enable IEEE1588/802.1AS timestamping. */
+
+typedef int (*eth_timesync_disable_t)(struct rte_eth_dev *dev);
+/**< @internal Function used to disable IEEE1588/802.1AS timestamping. */
+
+typedef int (*eth_timesync_read_rx_timestamp_t)(struct rte_eth_dev *dev,
+						struct timespec *timestamp,
+						uint32_t flags);
+/**< @internal Function used to read an RX IEEE1588/802.1AS timestamp. */
+
+typedef int (*eth_timesync_read_tx_timestamp_t)(struct rte_eth_dev *dev,
+						struct timespec *timestamp);
+/**< @internal Function used to read a TX IEEE1588/802.1AS timestamp. */
+
+typedef int (*eth_timesync_adjust_time)(struct rte_eth_dev *dev, int64_t);
+/**< @internal Function used to adjust the device clock */
+
+typedef int (*eth_timesync_read_time)(struct rte_eth_dev *dev,
+				      struct timespec *timestamp);
+/**< @internal Function used to get time from the device clock. */
+
+typedef int (*eth_timesync_write_time)(struct rte_eth_dev *dev,
+				       const struct timespec *timestamp);
+/**< @internal Function used to get time from the device clock */
+
+typedef int (*eth_get_reg_length_t)(struct rte_eth_dev *dev);
+/**< @internal Retrieve device register count  */
+
+typedef int (*eth_get_reg_t)(struct rte_eth_dev *dev,
+				struct rte_dev_reg_info *info);
+/**< @internal Retrieve registers  */
+
+typedef int (*eth_get_eeprom_length_t)(struct rte_eth_dev *dev);
+/**< @internal Retrieve eeprom size  */
+
+typedef int (*eth_get_eeprom_t)(struct rte_eth_dev *dev,
+				struct rte_dev_eeprom_info *info);
+/**< @internal Retrieve eeprom data  */
+
+typedef int (*eth_set_eeprom_t)(struct rte_eth_dev *dev,
+				struct rte_dev_eeprom_info *info);
+/**< @internal Program eeprom data  */
+
+typedef int (*eth_l2_tunnel_eth_type_conf_t)
+	(struct rte_eth_dev *dev, struct rte_eth_l2_tunnel_conf *l2_tunnel);
+/**< @internal config l2 tunnel ether type */
+
+typedef int (*eth_l2_tunnel_offload_set_t)
+	(struct rte_eth_dev *dev,
+	 struct rte_eth_l2_tunnel_conf *l2_tunnel,
+	 uint32_t mask,
+	 uint8_t en);
+/**< @internal enable/disable the l2 tunnel offload functions */
+
+#ifdef RTE_NIC_BYPASS
+
+enum {
+	RTE_BYPASS_MODE_NONE,
+	RTE_BYPASS_MODE_NORMAL,
+	RTE_BYPASS_MODE_BYPASS,
+	RTE_BYPASS_MODE_ISOLATE,
+	RTE_BYPASS_MODE_NUM,
+};
+
+#define	RTE_BYPASS_MODE_VALID(x)	\
+	((x) > RTE_BYPASS_MODE_NONE && (x) < RTE_BYPASS_MODE_NUM)
+
+enum {
+	RTE_BYPASS_EVENT_NONE,
+	RTE_BYPASS_EVENT_START,
+	RTE_BYPASS_EVENT_OS_ON = RTE_BYPASS_EVENT_START,
+	RTE_BYPASS_EVENT_POWER_ON,
+	RTE_BYPASS_EVENT_OS_OFF,
+	RTE_BYPASS_EVENT_POWER_OFF,
+	RTE_BYPASS_EVENT_TIMEOUT,
+	RTE_BYPASS_EVENT_NUM
+};
+
+#define	RTE_BYPASS_EVENT_VALID(x)	\
+	((x) > RTE_BYPASS_EVENT_NONE && (x) < RTE_BYPASS_MODE_NUM)
+
+enum {
+	RTE_BYPASS_TMT_OFF,     /* timeout disabled. */
+	RTE_BYPASS_TMT_1_5_SEC, /* timeout for 1.5 seconds */
+	RTE_BYPASS_TMT_2_SEC,   /* timeout for 2 seconds */
+	RTE_BYPASS_TMT_3_SEC,   /* timeout for 3 seconds */
+	RTE_BYPASS_TMT_4_SEC,   /* timeout for 4 seconds */
+	RTE_BYPASS_TMT_8_SEC,   /* timeout for 8 seconds */
+	RTE_BYPASS_TMT_16_SEC,  /* timeout for 16 seconds */
+	RTE_BYPASS_TMT_32_SEC,  /* timeout for 32 seconds */
+	RTE_BYPASS_TMT_NUM
+};
+
+#define	RTE_BYPASS_TMT_VALID(x)	\
+	((x) == RTE_BYPASS_TMT_OFF || \
+	((x) > RTE_BYPASS_TMT_OFF && (x) < RTE_BYPASS_TMT_NUM))
+
+typedef void (*bypass_init_t)(struct rte_eth_dev *dev);
+typedef int32_t (*bypass_state_set_t)(struct rte_eth_dev *dev, uint32_t *new_state);
+typedef int32_t (*bypass_state_show_t)(struct rte_eth_dev *dev, uint32_t *state);
+typedef int32_t (*bypass_event_set_t)(struct rte_eth_dev *dev, uint32_t state, uint32_t event);
+typedef int32_t (*bypass_event_show_t)(struct rte_eth_dev *dev, uint32_t event_shift, uint32_t *event);
+typedef int32_t (*bypass_wd_timeout_set_t)(struct rte_eth_dev *dev, uint32_t timeout);
+typedef int32_t (*bypass_wd_timeout_show_t)(struct rte_eth_dev *dev, uint32_t *wd_timeout);
+typedef int32_t (*bypass_ver_show_t)(struct rte_eth_dev *dev, uint32_t *ver);
+typedef int32_t (*bypass_wd_reset_t)(struct rte_eth_dev *dev);
+#endif
+
+typedef int (*eth_filter_ctrl_t)(struct rte_eth_dev *dev,
+				 enum rte_filter_type filter_type,
+				 enum rte_filter_op filter_op,
+				 void *arg);
+/**< @internal Take operations to assigned filter type on an Ethernet device */
+
+typedef int (*eth_get_dcb_info)(struct rte_eth_dev *dev,
+				 struct rte_eth_dcb_info *dcb_info);
+/**< @internal Get dcb information on an Ethernet device */
+
+/**
+ * @internal A structure containing the functions exported by an Ethernet driver.
+ */
+struct eth_dev_ops {
+	eth_dev_configure_t        dev_configure; /**< Configure device. */
+	eth_dev_start_t            dev_start;     /**< Start device. */
+	eth_dev_stop_t             dev_stop;      /**< Stop device. */
+	eth_dev_set_link_up_t      dev_set_link_up;   /**< Device link up. */
+	eth_dev_set_link_down_t    dev_set_link_down; /**< Device link down. */
+	eth_dev_close_t            dev_close;     /**< Close device. */
+	eth_promiscuous_enable_t   promiscuous_enable; /**< Promiscuous ON. */
+	eth_promiscuous_disable_t  promiscuous_disable;/**< Promiscuous OFF. */
+	eth_allmulticast_enable_t  allmulticast_enable;/**< RX multicast ON. */
+	eth_allmulticast_disable_t allmulticast_disable;/**< RX multicast OF. */
+	eth_link_update_t          link_update;   /**< Get device link state. */
+	eth_stats_get_t            stats_get;     /**< Get generic device statistics. */
+	eth_stats_reset_t          stats_reset;   /**< Reset generic device statistics. */
+	eth_xstats_get_t           xstats_get;    /**< Get extended device statistics. */
+	eth_xstats_reset_t         xstats_reset;  /**< Reset extended device statistics. */
+	eth_queue_stats_mapping_set_t queue_stats_mapping_set;
+	/**< Configure per queue stat counter mapping. */
+	eth_dev_infos_get_t        dev_infos_get; /**< Get device info. */
+	eth_dev_supported_ptypes_get_t dev_supported_ptypes_get;
+	/**< Get packet types supported and identified by device*/
+	mtu_set_t                  mtu_set; /**< Set MTU. */
+	vlan_filter_set_t          vlan_filter_set;  /**< Filter VLAN Setup. */
+	vlan_tpid_set_t            vlan_tpid_set;      /**< Outer VLAN TPID Setup. */
+	vlan_strip_queue_set_t     vlan_strip_queue_set; /**< VLAN Stripping on queue. */
+	vlan_offload_set_t         vlan_offload_set; /**< Set VLAN Offload. */
+	vlan_pvid_set_t            vlan_pvid_set; /**< Set port based TX VLAN insertion */
+	eth_queue_start_t          rx_queue_start;/**< Start RX for a queue.*/
+	eth_queue_stop_t           rx_queue_stop;/**< Stop RX for a queue.*/
+	eth_queue_start_t          tx_queue_start;/**< Start TX for a queue.*/
+	eth_queue_stop_t           tx_queue_stop;/**< Stop TX for a queue.*/
+	eth_rx_queue_setup_t       rx_queue_setup;/**< Set up device RX queue.*/
+	eth_queue_release_t        rx_queue_release;/**< Release RX queue.*/
+	eth_rx_queue_count_t       rx_queue_count; /**< Get Rx queue count. */
+	eth_rx_descriptor_done_t   rx_descriptor_done;  /**< Check rxd DD bit */
+	/**< Enable Rx queue interrupt. */
+	eth_rx_enable_intr_t       rx_queue_intr_enable;
+	/**< Disable Rx queue interrupt.*/
+	eth_rx_disable_intr_t      rx_queue_intr_disable;
+	eth_tx_queue_setup_t       tx_queue_setup;/**< Set up device TX queue.*/
+	eth_queue_release_t        tx_queue_release;/**< Release TX queue.*/
+	eth_dev_led_on_t           dev_led_on;    /**< Turn on LED. */
+	eth_dev_led_off_t          dev_led_off;   /**< Turn off LED. */
+	flow_ctrl_get_t            flow_ctrl_get; /**< Get flow control. */
+	flow_ctrl_set_t            flow_ctrl_set; /**< Setup flow control. */
+	priority_flow_ctrl_set_t   priority_flow_ctrl_set; /**< Setup priority flow control.*/
+	eth_mac_addr_remove_t      mac_addr_remove; /**< Remove MAC address */
+	eth_mac_addr_add_t         mac_addr_add;  /**< Add a MAC address */
+	eth_mac_addr_set_t         mac_addr_set;  /**< Set a MAC address */
+	eth_uc_hash_table_set_t    uc_hash_table_set;  /**< Set Unicast Table Array */
+	eth_uc_all_hash_table_set_t uc_all_hash_table_set;  /**< Set Unicast hash bitmap */
+	eth_mirror_rule_set_t	   mirror_rule_set;  /**< Add a traffic mirror rule.*/
+	eth_mirror_rule_reset_t	   mirror_rule_reset;  /**< reset a traffic mirror rule.*/
+	eth_set_vf_rx_mode_t       set_vf_rx_mode;   /**< Set VF RX mode */
+	eth_set_vf_rx_t            set_vf_rx;  /**< enable/disable a VF receive */
+	eth_set_vf_tx_t            set_vf_tx;  /**< enable/disable a VF transmit */
+	eth_set_vf_vlan_filter_t   set_vf_vlan_filter;  /**< Set VF VLAN filter */
+	/** Add UDP tunnel port. */
+	eth_udp_tunnel_port_add_t udp_tunnel_port_add;
+	/** Del UDP tunnel port. */
+	eth_udp_tunnel_port_del_t udp_tunnel_port_del;
+	eth_set_queue_rate_limit_t set_queue_rate_limit;   /**< Set queue rate limit */
+	eth_set_vf_rate_limit_t    set_vf_rate_limit;   /**< Set VF rate limit */
+	/** Update redirection table. */
+	reta_update_t reta_update;
+	/** Query redirection table. */
+	reta_query_t reta_query;
+
+	eth_get_reg_length_t get_reg_length;
+	/**< Get # of registers */
+	eth_get_reg_t get_reg;
+	/**< Get registers */
+	eth_get_eeprom_length_t get_eeprom_length;
+	/**< Get eeprom length */
+	eth_get_eeprom_t get_eeprom;
+	/**< Get eeprom data */
+	eth_set_eeprom_t set_eeprom;
+	/**< Set eeprom */
+  /* bypass control */
+#ifdef RTE_NIC_BYPASS
+  bypass_init_t bypass_init;
+  bypass_state_set_t bypass_state_set;
+  bypass_state_show_t bypass_state_show;
+  bypass_event_set_t bypass_event_set;
+  bypass_event_show_t bypass_event_show;
+  bypass_wd_timeout_set_t bypass_wd_timeout_set;
+  bypass_wd_timeout_show_t bypass_wd_timeout_show;
+  bypass_ver_show_t bypass_ver_show;
+  bypass_wd_reset_t bypass_wd_reset;
+#endif
+
+	/** Configure RSS hash protocols. */
+	rss_hash_update_t rss_hash_update;
+	/** Get current RSS hash configuration. */
+	rss_hash_conf_get_t rss_hash_conf_get;
+	eth_filter_ctrl_t              filter_ctrl;
+	/**< common filter control. */
+	eth_set_mc_addr_list_t set_mc_addr_list; /**< set list of mcast addrs */
+	eth_rxq_info_get_t rxq_info_get;
+	/**< retrieve RX queue information. */
+	eth_txq_info_get_t txq_info_get;
+	/**< retrieve TX queue information. */
+	/** Turn IEEE1588/802.1AS timestamping on. */
+	eth_timesync_enable_t timesync_enable;
+	/** Turn IEEE1588/802.1AS timestamping off. */
+	eth_timesync_disable_t timesync_disable;
+	/** Read the IEEE1588/802.1AS RX timestamp. */
+	eth_timesync_read_rx_timestamp_t timesync_read_rx_timestamp;
+	/** Read the IEEE1588/802.1AS TX timestamp. */
+	eth_timesync_read_tx_timestamp_t timesync_read_tx_timestamp;
+
+	/** Get DCB information */
+	eth_get_dcb_info get_dcb_info;
+	/** Adjust the device clock.*/
+	eth_timesync_adjust_time timesync_adjust_time;
+	/** Get the device clock time. */
+	eth_timesync_read_time timesync_read_time;
+	/** Set the device clock time. */
+	eth_timesync_write_time timesync_write_time;
+	/** Config ether type of l2 tunnel */
+	eth_l2_tunnel_eth_type_conf_t l2_tunnel_eth_type_conf;
+	/** Enable/disable l2 tunnel offload functions */
+	eth_l2_tunnel_offload_set_t l2_tunnel_offload_set;
+};
+
+/**
+ * Function type used for RX packet processing packet callbacks.
+ *
+ * The callback function is called on RX with a burst of packets that have
+ * been received on the given port and queue.
+ *
+ * @param port
+ *   The Ethernet port on which RX is being performed.
+ * @param queue
+ *   The queue on the Ethernet port which is being used to receive the packets.
+ * @param pkts
+ *   The burst of packets that have just been received.
+ * @param nb_pkts
+ *   The number of packets in the burst pointed to by "pkts".
+ * @param max_pkts
+ *   The max number of packets that can be stored in the "pkts" array.
+ * @param user_param
+ *   The arbitrary user parameter passed in by the application when the callback
+ *   was originally configured.
+ * @return
+ *   The number of packets returned to the user.
+ */
+typedef uint16_t (*rte_rx_callback_fn)(uint8_t port, uint16_t queue,
+	struct rte_mbuf *pkts[], uint16_t nb_pkts, uint16_t max_pkts,
+	void *user_param);
+
+/**
+ * Function type used for TX packet processing packet callbacks.
+ *
+ * The callback function is called on TX with a burst of packets immediately
+ * before the packets are put onto the hardware queue for transmission.
+ *
+ * @param port
+ *   The Ethernet port on which TX is being performed.
+ * @param queue
+ *   The queue on the Ethernet port which is being used to transmit the packets.
+ * @param pkts
+ *   The burst of packets that are about to be transmitted.
+ * @param nb_pkts
+ *   The number of packets in the burst pointed to by "pkts".
+ * @param user_param
+ *   The arbitrary user parameter passed in by the application when the callback
+ *   was originally configured.
+ * @return
+ *   The number of packets to be written to the NIC.
+ */
+typedef uint16_t (*rte_tx_callback_fn)(uint8_t port, uint16_t queue,
+	struct rte_mbuf *pkts[], uint16_t nb_pkts, void *user_param);
+
+/**
+ * @internal
+ * Structure used to hold information about the callbacks to be called for a
+ * queue on RX and TX.
+ */
+struct rte_eth_rxtx_callback {
+	struct rte_eth_rxtx_callback *next;
+	union{
+		rte_rx_callback_fn rx;
+		rte_tx_callback_fn tx;
+	} fn;
+	void *param;
+};
+
+/**
+ * The eth device type.
+ */
+enum rte_eth_dev_type {
+	RTE_ETH_DEV_UNKNOWN,	/**< unknown device type */
+	RTE_ETH_DEV_PCI,
+		/**< Physical function and Virtual function of PCI devices */
+	RTE_ETH_DEV_VIRTUAL,	/**< non hardware device */
+	RTE_ETH_DEV_MAX		/**< max value of this enum */
+};
+
+/**
+ * @internal
+ * The generic data structure associated with each ethernet device.
+ *
+ * Pointers to burst-oriented packet receive and transmit functions are
+ * located at the beginning of the structure, along with the pointer to
+ * where all the data elements for the particular device are stored in shared
+ * memory. This split allows the function pointer and driver data to be per-
+ * process, while the actual configuration data for the device is shared.
+ */
+struct rte_eth_dev {
+	eth_rx_burst_t rx_pkt_burst; /**< Pointer to PMD receive function. */
+	eth_tx_burst_t tx_pkt_burst; /**< Pointer to PMD transmit function. */
+	struct rte_eth_dev_data *data;  /**< Pointer to device data */
+	const struct eth_driver *driver;/**< Driver for this device */
+	const struct eth_dev_ops *dev_ops; /**< Functions exported by PMD */
+	struct rte_pci_device *pci_dev; /**< PCI info. supplied by probing */
+	/** User application callbacks for NIC interrupts */
+	struct rte_eth_dev_cb_list link_intr_cbs;
+	/**
+	 * User-supplied functions called from rx_burst to post-process
+	 * received packets before passing them to the user
+	 */
+	struct rte_eth_rxtx_callback *post_rx_burst_cbs[RTE_MAX_QUEUES_PER_PORT];
+	/**
+	 * User-supplied functions called from tx_burst to pre-process
+	 * received packets before passing them to the driver for transmission.
+	 */
+	struct rte_eth_rxtx_callback *pre_tx_burst_cbs[RTE_MAX_QUEUES_PER_PORT];
+	uint8_t attached; /**< Flag indicating the port is attached */
+	enum rte_eth_dev_type dev_type; /**< Flag indicating the device type */
+};
+
+struct rte_eth_dev_sriov {
+	uint8_t active;               /**< SRIOV is active with 16, 32 or 64 pools */
+	uint8_t nb_q_per_pool;        /**< rx queue number per pool */
+	uint16_t def_vmdq_idx;        /**< Default pool num used for PF */
+	uint16_t def_pool_q_idx;      /**< Default pool queue start reg index */
+};
+#define RTE_ETH_DEV_SRIOV(dev)         ((dev)->data->sriov)
+
+#define RTE_ETH_NAME_MAX_LEN (32)
+
+/**
+ * @internal
+ * The data part, with no function pointers, associated with each ethernet device.
+ *
+ * This structure is safe to place in shared memory to be common among different
+ * processes in a multi-process configuration.
+ */
+struct rte_eth_dev_data {
+	char name[RTE_ETH_NAME_MAX_LEN]; /**< Unique identifier name */
+
+	void **rx_queues; /**< Array of pointers to RX queues. */
+	void **tx_queues; /**< Array of pointers to TX queues. */
+	uint16_t nb_rx_queues; /**< Number of RX queues. */
+	uint16_t nb_tx_queues; /**< Number of TX queues. */
+
+	struct rte_eth_dev_sriov sriov;    /**< SRIOV data */
+
+	void *dev_private;              /**< PMD-specific private data */
+
+	struct rte_eth_link dev_link;
+	/**< Link-level information & status */
+
+	struct rte_eth_conf dev_conf;   /**< Configuration applied to device. */
+	uint16_t mtu;                   /**< Maximum Transmission Unit. */
+
+	uint32_t min_rx_buf_size;
+	/**< Common rx buffer size handled by all queues */
+
+	uint64_t rx_mbuf_alloc_failed; /**< RX ring mbuf allocation failures. */
+	struct ether_addr* mac_addrs;/**< Device Ethernet Link address. */
+	uint64_t mac_pool_sel[ETH_NUM_RECEIVE_MAC_ADDR];
+	/** bitmap array of associating Ethernet MAC addresses to pools */
+	struct ether_addr* hash_mac_addrs;
+	/** Device Ethernet MAC addresses of hash filtering. */
+	uint8_t port_id;           /**< Device [external] port identifier. */
+	uint8_t promiscuous   : 1, /**< RX promiscuous mode ON(1) / OFF(0). */
+		scattered_rx : 1,  /**< RX of scattered packets is ON(1) / OFF(0) */
+		all_multicast : 1, /**< RX all multicast mode ON(1) / OFF(0). */
+		dev_started : 1,   /**< Device state: STARTED(1) / STOPPED(0). */
+		lro         : 1;   /**< RX LRO is ON(1) / OFF(0) */
+	uint8_t rx_queue_state[RTE_MAX_QUEUES_PER_PORT];
+	/** Queues state: STARTED(1) / STOPPED(0) */
+	uint8_t tx_queue_state[RTE_MAX_QUEUES_PER_PORT];
+	/** Queues state: STARTED(1) / STOPPED(0) */
+	uint32_t dev_flags; /**< Capabilities */
+	enum rte_kernel_driver kdrv;    /**< Kernel driver passthrough */
+	int numa_node;  /**< NUMA node connection */
+	const char *drv_name;   /**< Driver name */
+};
+
+/** Device supports hotplug detach */
+#define RTE_ETH_DEV_DETACHABLE   0x0001
+/** Device supports link state interrupt */
+#define RTE_ETH_DEV_INTR_LSC     0x0002
+/** Device is a bonded slave */
+#define RTE_ETH_DEV_BONDED_SLAVE 0x0004
+
+/**
+ * @internal
+ * The pool of *rte_eth_dev* structures. The size of the pool
+ * is configured at compile-time in the <rte_ethdev.c> file.
+ */
+extern struct rte_eth_dev rte_eth_devices[];
+
+/**
+ * Get the total number of Ethernet devices that have been successfully
+ * initialized by the [matching] Ethernet driver during the PCI probing phase.
+ * All devices whose port identifier is in the range
+ * [0,  rte_eth_dev_count() - 1] can be operated on by network applications
+ * immediately after invoking rte_eal_init().
+ * If the application unplugs a port using hotplug function, The enabled port
+ * numbers may be noncontiguous. In the case, the applications need to manage
+ * enabled port by themselves.
+ *
+ * @return
+ *   - The total number of usable Ethernet devices.
+ */
+uint8_t rte_eth_dev_count(void);
+
+/**
+ * @internal
+ * Returns a ethdev slot specified by the unique identifier name.
+ *
+ * @param	name
+ *  The pointer to the Unique identifier name for each Ethernet device
+ * @return
+ *   - The pointer to the ethdev slot, on success. NULL on error
+ */
+struct rte_eth_dev *rte_eth_dev_allocated(const char *name);
+
+/**
+ * @internal
+ * Allocates a new ethdev slot for an ethernet device and returns the pointer
+ * to that slot for the driver to use.
+ *
+ * @param	name	Unique identifier name for each Ethernet device
+ * @param	type	Device type of this Ethernet device
+ * @return
+ *   - Slot in the rte_dev_devices array for a new device;
+ */
+struct rte_eth_dev *rte_eth_dev_allocate(const char *name,
+		enum rte_eth_dev_type type);
+
+/**
+ * @internal
+ * Release the specified ethdev port.
+ *
+ * @param eth_dev
+ * The *eth_dev* pointer is the address of the *rte_eth_dev* structure.
+ * @return
+ *   - 0 on success, negative on error
+ */
+int rte_eth_dev_release_port(struct rte_eth_dev *eth_dev);
+
+/**
+ * Attach a new Ethernet device specified by aruguments.
+ *
+ * @param devargs
+ *  A pointer to a strings array describing the new device
+ *  to be attached. The strings should be a pci address like
+ *  '0000:01:00.0' or virtual device name like 'eth_pcap0'.
+ * @param port_id
+ *  A pointer to a port identifier actually attached.
+ * @return
+ *  0 on success and port_id is filled, negative on error
+ */
+int rte_eth_dev_attach(const char *devargs, uint8_t *port_id);
+
+/**
+ * Detach a Ethernet device specified by port identifier.
+ * This function must be called when the device is in the
+ * closed state.
+ *
+ * @param port_id
+ *   The port identifier of the device to detach.
+ * @param devname
+ *  A pointer to a device name actually detached.
+ * @return
+ *  0 on success and devname is filled, negative on error
+ */
+int rte_eth_dev_detach(uint8_t port_id, char *devname);
+
+struct eth_driver;
+/**
+ * @internal
+ * Initialization function of an Ethernet driver invoked for each matching
+ * Ethernet PCI device detected during the PCI probing phase.
+ *
+ * @param eth_dev
+ *   The *eth_dev* pointer is the address of the *rte_eth_dev* structure
+ *   associated with the matching device and which have been [automatically]
+ *   allocated in the *rte_eth_devices* array.
+ *   The *eth_dev* structure is supplied to the driver initialization function
+ *   with the following fields already initialized:
+ *
+ *   - *pci_dev*: Holds the pointers to the *rte_pci_device* structure which
+ *     contains the generic PCI information of the matching device.
+ *
+ *   - *driver*: Holds the pointer to the *eth_driver* structure.
+ *
+ *   - *dev_private*: Holds a pointer to the device private data structure.
+ *
+ *   - *mtu*: Contains the default Ethernet maximum frame length (1500).
+ *
+ *   - *port_id*: Contains the port index of the device (actually the index
+ *     of the *eth_dev* structure in the *rte_eth_devices* array).
+ *
+ * @return
+ *   - 0: Success, the device is properly initialized by the driver.
+ *        In particular, the driver MUST have set up the *dev_ops* pointer
+ *        of the *eth_dev* structure.
+ *   - <0: Error code of the device initialization failure.
+ */
+typedef int (*eth_dev_init_t)(struct rte_eth_dev *eth_dev);
+
+/**
+ * @internal
+ * Finalization function of an Ethernet driver invoked for each matching
+ * Ethernet PCI device detected during the PCI closing phase.
+ *
+ * @param eth_dev
+ *   The *eth_dev* pointer is the address of the *rte_eth_dev* structure
+ *   associated with the matching device and which have been [automatically]
+ *   allocated in the *rte_eth_devices* array.
+ * @return
+ *   - 0: Success, the device is properly finalized by the driver.
+ *        In particular, the driver MUST free the *dev_ops* pointer
+ *        of the *eth_dev* structure.
+ *   - <0: Error code of the device initialization failure.
+ */
+typedef int (*eth_dev_uninit_t)(struct rte_eth_dev *eth_dev);
+
+/**
+ * @internal
+ * The structure associated with a PMD Ethernet driver.
+ *
+ * Each Ethernet driver acts as a PCI driver and is represented by a generic
+ * *eth_driver* structure that holds:
+ *
+ * - An *rte_pci_driver* structure (which must be the first field).
+ *
+ * - The *eth_dev_init* function invoked for each matching PCI device.
+ *
+ * - The *eth_dev_uninit* function invoked for each matching PCI device.
+ *
+ * - The size of the private data to allocate for each matching device.
+ */
+struct eth_driver {
+	struct rte_pci_driver pci_drv;    /**< The PMD is also a PCI driver. */
+	eth_dev_init_t eth_dev_init;      /**< Device init function. */
+	eth_dev_uninit_t eth_dev_uninit;  /**< Device uninit function. */
+	unsigned int dev_private_size;    /**< Size of device private data. */
+};
+
+/**
+ * @internal
+ * A function invoked by the initialization function of an Ethernet driver
+ * to simultaneously register itself as a PCI driver and as an Ethernet
+ * Poll Mode Driver (PMD).
+ *
+ * @param eth_drv
+ *   The pointer to the *eth_driver* structure associated with
+ *   the Ethernet driver.
+ */
+void rte_eth_driver_register(struct eth_driver *eth_drv);
+
+/**
+ * Convert a numerical speed in Mbps to a bitmap flag that can be used in
+ * the bitmap link_speeds of the struct rte_eth_conf
+ *
+ * @param speed
+ *   Numerical speed value in Mbps
+ * @param duplex
+ *   ETH_LINK_[HALF/FULL]_DUPLEX (only for 10/100M speeds)
+ * @return
+ *   0 if the speed cannot be mapped
+ */
+uint32_t rte_eth_speed_bitflag(uint32_t speed, int duplex);
+
+/**
+ * Configure an Ethernet device.
+ * This function must be invoked first before any other function in the
+ * Ethernet API. This function can also be re-invoked when a device is in the
+ * stopped state.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device to configure.
+ * @param nb_rx_queue
+ *   The number of receive queues to set up for the Ethernet device.
+ * @param nb_tx_queue
+ *   The number of transmit queues to set up for the Ethernet device.
+ * @param eth_conf
+ *   The pointer to the configuration data to be used for the Ethernet device.
+ *   The *rte_eth_conf* structure includes:
+ *     -  the hardware offload features to activate, with dedicated fields for
+ *        each statically configurable offload hardware feature provided by
+ *        Ethernet devices, such as IP checksum or VLAN tag stripping for
+ *        example.
+ *     - the Receive Side Scaling (RSS) configuration when using multiple RX
+ *         queues per port.
+ *
+ *   Embedding all configuration information in a single data structure
+ *   is the more flexible method that allows the addition of new features
+ *   without changing the syntax of the API.
+ * @return
+ *   - 0: Success, device configured.
+ *   - <0: Error code returned by the driver configuration function.
+ */
+int rte_eth_dev_configure(uint8_t port_id, uint16_t nb_rx_queue,
+		uint16_t nb_tx_queue, const struct rte_eth_conf *eth_conf);
+
+/**
+ * Allocate and set up a receive queue for an Ethernet device.
+ *
+ * The function allocates a contiguous block of memory for *nb_rx_desc*
+ * receive descriptors from a memory zone associated with *socket_id*
+ * and initializes each receive descriptor with a network buffer allocated
+ * from the memory pool *mb_pool*.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param rx_queue_id
+ *   The index of the receive queue to set up.
+ *   The value must be in the range [0, nb_rx_queue - 1] previously supplied
+ *   to rte_eth_dev_configure().
+ * @param nb_rx_desc
+ *   The number of receive descriptors to allocate for the receive ring.
+ * @param socket_id
+ *   The *socket_id* argument is the socket identifier in case of NUMA.
+ *   The value can be *SOCKET_ID_ANY* if there is no NUMA constraint for
+ *   the DMA memory allocated for the receive descriptors of the ring.
+ * @param rx_conf
+ *   The pointer to the configuration data to be used for the receive queue.
+ *   NULL value is allowed, in which case default RX configuration
+ *   will be used.
+ *   The *rx_conf* structure contains an *rx_thresh* structure with the values
+ *   of the Prefetch, Host, and Write-Back threshold registers of the receive
+ *   ring.
+ * @param mb_pool
+ *   The pointer to the memory pool from which to allocate *rte_mbuf* network
+ *   memory buffers to populate each descriptor of the receive ring.
+ * @return
+ *   - 0: Success, receive queue correctly set up.
+ *   - -EINVAL: The size of network buffers which can be allocated from the
+ *      memory pool does not fit the various buffer sizes allowed by the
+ *      device controller.
+ *   - -ENOMEM: Unable to allocate the receive ring descriptors or to
+ *      allocate network memory buffers from the memory pool when
+ *      initializing receive descriptors.
+ */
+int rte_eth_rx_queue_setup(uint8_t port_id, uint16_t rx_queue_id,
+		uint16_t nb_rx_desc, unsigned int socket_id,
+		const struct rte_eth_rxconf *rx_conf,
+		struct rte_mempool *mb_pool);
+
+/**
+ * Allocate and set up a transmit queue for an Ethernet device.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param tx_queue_id
+ *   The index of the transmit queue to set up.
+ *   The value must be in the range [0, nb_tx_queue - 1] previously supplied
+ *   to rte_eth_dev_configure().
+ * @param nb_tx_desc
+ *   The number of transmit descriptors to allocate for the transmit ring.
+ * @param socket_id
+ *   The *socket_id* argument is the socket identifier in case of NUMA.
+ *   Its value can be *SOCKET_ID_ANY* if there is no NUMA constraint for
+ *   the DMA memory allocated for the transmit descriptors of the ring.
+ * @param tx_conf
+ *   The pointer to the configuration data to be used for the transmit queue.
+ *   NULL value is allowed, in which case default RX configuration
+ *   will be used.
+ *   The *tx_conf* structure contains the following data:
+ *   - The *tx_thresh* structure with the values of the Prefetch, Host, and
+ *     Write-Back threshold registers of the transmit ring.
+ *     When setting Write-Back threshold to the value greater then zero,
+ *     *tx_rs_thresh* value should be explicitly set to one.
+ *   - The *tx_free_thresh* value indicates the [minimum] number of network
+ *     buffers that must be pending in the transmit ring to trigger their
+ *     [implicit] freeing by the driver transmit function.
+ *   - The *tx_rs_thresh* value indicates the [minimum] number of transmit
+ *     descriptors that must be pending in the transmit ring before setting the
+ *     RS bit on a descriptor by the driver transmit function.
+ *     The *tx_rs_thresh* value should be less or equal then
+ *     *tx_free_thresh* value, and both of them should be less then
+ *     *nb_tx_desc* - 3.
+ *   - The *txq_flags* member contains flags to pass to the TX queue setup
+ *     function to configure the behavior of the TX queue. This should be set
+ *     to 0 if no special configuration is required.
+ *
+ *     Note that setting *tx_free_thresh* or *tx_rs_thresh* value to 0 forces
+ *     the transmit function to use default values.
+ * @return
+ *   - 0: Success, the transmit queue is correctly set up.
+ *   - -ENOMEM: Unable to allocate the transmit ring descriptors.
+ */
+int rte_eth_tx_queue_setup(uint8_t port_id, uint16_t tx_queue_id,
+		uint16_t nb_tx_desc, unsigned int socket_id,
+		const struct rte_eth_txconf *tx_conf);
+
+/*
+ * Return the NUMA socket to which an Ethernet device is connected
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device
+ * @return
+ *   The NUMA socket id to which the Ethernet device is connected or
+ *   a default of zero if the socket could not be determined.
+ *   -1 is returned is the port_id value is out of range.
+ */
+int rte_eth_dev_socket_id(uint8_t port_id);
+
+/*
+ * Check if port_id of device is attached
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device
+ * @return
+ *   - 0 if port is out of range or not attached
+ *   - 1 if device is attached
+ */
+int rte_eth_dev_is_valid_port(uint8_t port_id);
+
+/*
+ * Allocate mbuf from mempool, setup the DMA physical address
+ * and then start RX for specified queue of a port. It is used
+ * when rx_deferred_start flag of the specified queue is true.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device
+ * @param rx_queue_id
+ *   The index of the rx queue to update the ring.
+ *   The value must be in the range [0, nb_rx_queue - 1] previously supplied
+ *   to rte_eth_dev_configure().
+ * @return
+ *   - 0: Success, the transmit queue is correctly set up.
+ *   - -EINVAL: The port_id or the queue_id out of range.
+ *   - -ENOTSUP: The function not supported in PMD driver.
+ */
+int rte_eth_dev_rx_queue_start(uint8_t port_id, uint16_t rx_queue_id);
+
+/*
+ * Stop specified RX queue of a port
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device
+ * @param rx_queue_id
+ *   The index of the rx queue to update the ring.
+ *   The value must be in the range [0, nb_rx_queue - 1] previously supplied
+ *   to rte_eth_dev_configure().
+ * @return
+ *   - 0: Success, the transmit queue is correctly set up.
+ *   - -EINVAL: The port_id or the queue_id out of range.
+ *   - -ENOTSUP: The function not supported in PMD driver.
+ */
+int rte_eth_dev_rx_queue_stop(uint8_t port_id, uint16_t rx_queue_id);
+
+/*
+ * Start TX for specified queue of a port. It is used when tx_deferred_start
+ * flag of the specified queue is true.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device
+ * @param tx_queue_id
+ *   The index of the tx queue to update the ring.
+ *   The value must be in the range [0, nb_tx_queue - 1] previously supplied
+ *   to rte_eth_dev_configure().
+ * @return
+ *   - 0: Success, the transmit queue is correctly set up.
+ *   - -EINVAL: The port_id or the queue_id out of range.
+ *   - -ENOTSUP: The function not supported in PMD driver.
+ */
+int rte_eth_dev_tx_queue_start(uint8_t port_id, uint16_t tx_queue_id);
+
+/*
+ * Stop specified TX queue of a port
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device
+ * @param tx_queue_id
+ *   The index of the tx queue to update the ring.
+ *   The value must be in the range [0, nb_tx_queue - 1] previously supplied
+ *   to rte_eth_dev_configure().
+ * @return
+ *   - 0: Success, the transmit queue is correctly set up.
+ *   - -EINVAL: The port_id or the queue_id out of range.
+ *   - -ENOTSUP: The function not supported in PMD driver.
+ */
+int rte_eth_dev_tx_queue_stop(uint8_t port_id, uint16_t tx_queue_id);
+
+
+
+/**
+ * Start an Ethernet device.
+ *
+ * The device start step is the last one and consists of setting the configured
+ * offload features and in starting the transmit and the receive units of the
+ * device.
+ * On success, all basic functions exported by the Ethernet API (link status,
+ * receive/transmit, and so on) can be invoked.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @return
+ *   - 0: Success, Ethernet device started.
+ *   - <0: Error code of the driver device start function.
+ */
+int rte_eth_dev_start(uint8_t port_id);
+
+/**
+ * Stop an Ethernet device. The device can be restarted with a call to
+ * rte_eth_dev_start()
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ */
+void rte_eth_dev_stop(uint8_t port_id);
+
+
+/**
+ * Link up an Ethernet device.
+ *
+ * Set device link up will re-enable the device rx/tx
+ * functionality after it is previously set device linked down.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @return
+ *   - 0: Success, Ethernet device linked up.
+ *   - <0: Error code of the driver device link up function.
+ */
+int rte_eth_dev_set_link_up(uint8_t port_id);
+
+/**
+ * Link down an Ethernet device.
+ * The device rx/tx functionality will be disabled if success,
+ * and it can be re-enabled with a call to
+ * rte_eth_dev_set_link_up()
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ */
+int rte_eth_dev_set_link_down(uint8_t port_id);
+
+/**
+ * Close a stopped Ethernet device. The device cannot be restarted!
+ * The function frees all resources except for needed by the
+ * closed state. To free these resources, call rte_eth_dev_detach().
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ */
+void rte_eth_dev_close(uint8_t port_id);
+
+/**
+ * Enable receipt in promiscuous mode for an Ethernet device.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ */
+void rte_eth_promiscuous_enable(uint8_t port_id);
+
+/**
+ * Disable receipt in promiscuous mode for an Ethernet device.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ */
+void rte_eth_promiscuous_disable(uint8_t port_id);
+
+/**
+ * Return the value of promiscuous mode for an Ethernet device.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @return
+ *   - (1) if promiscuous is enabled
+ *   - (0) if promiscuous is disabled.
+ *   - (-1) on error
+ */
+int rte_eth_promiscuous_get(uint8_t port_id);
+
+/**
+ * Enable the receipt of any multicast frame by an Ethernet device.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ */
+void rte_eth_allmulticast_enable(uint8_t port_id);
+
+/**
+ * Disable the receipt of all multicast frames by an Ethernet device.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ */
+void rte_eth_allmulticast_disable(uint8_t port_id);
+
+/**
+ * Return the value of allmulticast mode for an Ethernet device.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @return
+ *   - (1) if allmulticast is enabled
+ *   - (0) if allmulticast is disabled.
+ *   - (-1) on error
+ */
+int rte_eth_allmulticast_get(uint8_t port_id);
+
+/**
+ * Retrieve the status (ON/OFF), the speed (in Mbps) and the mode (HALF-DUPLEX
+ * or FULL-DUPLEX) of the physical link of an Ethernet device. It might need
+ * to wait up to 9 seconds in it.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param link
+ *   A pointer to an *rte_eth_link* structure to be filled with
+ *   the status, the speed and the mode of the Ethernet device link.
+ */
+void rte_eth_link_get(uint8_t port_id, struct rte_eth_link *link);
+
+/**
+ * Retrieve the status (ON/OFF), the speed (in Mbps) and the mode (HALF-DUPLEX
+ * or FULL-DUPLEX) of the physical link of an Ethernet device. It is a no-wait
+ * version of rte_eth_link_get().
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param link
+ *   A pointer to an *rte_eth_link* structure to be filled with
+ *   the status, the speed and the mode of the Ethernet device link.
+ */
+void rte_eth_link_get_nowait(uint8_t port_id, struct rte_eth_link *link);
+
+/**
+ * Retrieve the general I/O statistics of an Ethernet device.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param stats
+ *   A pointer to a structure of type *rte_eth_stats* to be filled with
+ *   the values of device counters for the following set of statistics:
+ *   - *ipackets* with the total of successfully received packets.
+ *   - *opackets* with the total of successfully transmitted packets.
+ *   - *ibytes*   with the total of successfully received bytes.
+ *   - *obytes*   with the total of successfully transmitted bytes.
+ *   - *ierrors*  with the total of erroneous received packets.
+ *   - *oerrors*  with the total of failed transmitted packets.
+ * @return
+ *   Zero if successful. Non-zero otherwise.
+ */
+int rte_eth_stats_get(uint8_t port_id, struct rte_eth_stats *stats);
+
+/**
+ * Reset the general I/O statistics of an Ethernet device.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ */
+void rte_eth_stats_reset(uint8_t port_id);
+
+/**
+ * Retrieve extended statistics of an Ethernet device.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param xstats
+ *   A pointer to a table of structure of type *rte_eth_xstats*
+ *   to be filled with device statistics names and values.
+ *   This parameter can be set to NULL if n is 0.
+ * @param n
+ *   The size of the stats table, which should be large enough to store
+ *   all the statistics of the device.
+ * @return
+ *   - positive value lower or equal to n: success. The return value
+ *     is the number of entries filled in the stats table.
+ *   - positive value higher than n: error, the given statistics table
+ *     is too small. The return value corresponds to the size that should
+ *     be given to succeed. The entries in the table are not valid and
+ *     shall not be used by the caller.
+ *   - negative value on error (invalid port id)
+ */
+int rte_eth_xstats_get(uint8_t port_id, struct rte_eth_xstats *xstats,
+		unsigned n);
+
+/**
+ * Reset extended statistics of an Ethernet device.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ */
+void rte_eth_xstats_reset(uint8_t port_id);
+
+/**
+ *  Set a mapping for the specified transmit queue to the specified per-queue
+ *  statistics counter.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param tx_queue_id
+ *   The index of the transmit queue for which a queue stats mapping is required.
+ *   The value must be in the range [0, nb_tx_queue - 1] previously supplied
+ *   to rte_eth_dev_configure().
+ * @param stat_idx
+ *   The per-queue packet statistics functionality number that the transmit
+ *   queue is to be assigned.
+ *   The value must be in the range [0, RTE_MAX_ETHPORT_QUEUE_STATS_MAPS - 1].
+ * @return
+ *   Zero if successful. Non-zero otherwise.
+ */
+int rte_eth_dev_set_tx_queue_stats_mapping(uint8_t port_id,
+		uint16_t tx_queue_id, uint8_t stat_idx);
+
+/**
+ *  Set a mapping for the specified receive queue to the specified per-queue
+ *  statistics counter.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param rx_queue_id
+ *   The index of the receive queue for which a queue stats mapping is required.
+ *   The value must be in the range [0, nb_rx_queue - 1] previously supplied
+ *   to rte_eth_dev_configure().
+ * @param stat_idx
+ *   The per-queue packet statistics functionality number that the receive
+ *   queue is to be assigned.
+ *   The value must be in the range [0, RTE_MAX_ETHPORT_QUEUE_STATS_MAPS - 1].
+ * @return
+ *   Zero if successful. Non-zero otherwise.
+ */
+int rte_eth_dev_set_rx_queue_stats_mapping(uint8_t port_id,
+					   uint16_t rx_queue_id,
+					   uint8_t stat_idx);
+
+/**
+ * Retrieve the Ethernet address of an Ethernet device.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param mac_addr
+ *   A pointer to a structure of type *ether_addr* to be filled with
+ *   the Ethernet address of the Ethernet device.
+ */
+void rte_eth_macaddr_get(uint8_t port_id, struct ether_addr *mac_addr);
+
+/**
+ * Retrieve the contextual information of an Ethernet device.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param dev_info
+ *   A pointer to a structure of type *rte_eth_dev_info* to be filled with
+ *   the contextual information of the Ethernet device.
+ */
+void rte_eth_dev_info_get(uint8_t port_id, struct rte_eth_dev_info *dev_info);
+
+/**
+ * Retrieve the supported packet types of an Ethernet device.
+ *
+ * @note
+ *   Better to invoke this API after the device is already started or rx burst
+ *   function is decided, to obtain correct supported ptypes.
+ * @note
+ *   if a given PMD does not report what ptypes it supports, then the supported
+ *   ptype count is reported as 0.
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param ptype_mask
+ *   A hint of what kind of packet type which the caller is interested in.
+ * @param ptypes
+ *   An array pointer to store adequent packet types, allocated by caller.
+ * @param num
+ *  Size of the array pointed by param ptypes.
+ * @return
+ *   - (>=0) Number of supported ptypes. If the number of types exceeds num,
+ *           only num entries will be filled into the ptypes array, but the full
+ *           count of supported ptypes will be returned.
+ *   - (-ENODEV) if *port_id* invalid.
+ */
+int rte_eth_dev_get_supported_ptypes(uint8_t port_id, uint32_t ptype_mask,
+				     uint32_t *ptypes, int num);
+
+/**
+ * Retrieve the MTU of an Ethernet device.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param mtu
+ *   A pointer to a uint16_t where the retrieved MTU is to be stored.
+ * @return
+ *   - (0) if successful.
+ *   - (-ENODEV) if *port_id* invalid.
+ */
+int rte_eth_dev_get_mtu(uint8_t port_id, uint16_t *mtu);
+
+/**
+ * Change the MTU of an Ethernet device.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param mtu
+ *   A uint16_t for the MTU to be applied.
+ * @return
+ *   - (0) if successful.
+ *   - (-ENOTSUP) if operation is not supported.
+ *   - (-ENODEV) if *port_id* invalid.
+ *   - (-EINVAL) if *mtu* invalid.
+ */
+int rte_eth_dev_set_mtu(uint8_t port_id, uint16_t mtu);
+
+/**
+ * Enable/Disable hardware filtering by an Ethernet device of received
+ * VLAN packets tagged with a given VLAN Tag Identifier.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param vlan_id
+ *   The VLAN Tag Identifier whose filtering must be enabled or disabled.
+ * @param on
+ *   If > 0, enable VLAN filtering of VLAN packets tagged with *vlan_id*.
+ *   Otherwise, disable VLAN filtering of VLAN packets tagged with *vlan_id*.
+ * @return
+ *   - (0) if successful.
+ *   - (-ENOSUP) if hardware-assisted VLAN filtering not configured.
+ *   - (-ENODEV) if *port_id* invalid.
+ *   - (-ENOSYS) if VLAN filtering on *port_id* disabled.
+ *   - (-EINVAL) if *vlan_id* > 4095.
+ */
+int rte_eth_dev_vlan_filter(uint8_t port_id, uint16_t vlan_id, int on);
+
+/**
+ * Enable/Disable hardware VLAN Strip by a rx queue of an Ethernet device.
+ * 82599/X540/X550 can support VLAN stripping at the rx queue level
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param rx_queue_id
+ *   The index of the receive queue for which a queue stats mapping is required.
+ *   The value must be in the range [0, nb_rx_queue - 1] previously supplied
+ *   to rte_eth_dev_configure().
+ * @param on
+ *   If 1, Enable VLAN Stripping of the receive queue of the Ethernet port.
+ *   If 0, Disable VLAN Stripping of the receive queue of the Ethernet port.
+ * @return
+ *   - (0) if successful.
+ *   - (-ENOSUP) if hardware-assisted VLAN stripping not configured.
+ *   - (-ENODEV) if *port_id* invalid.
+ *   - (-EINVAL) if *rx_queue_id* invalid.
+ */
+int rte_eth_dev_set_vlan_strip_on_queue(uint8_t port_id, uint16_t rx_queue_id,
+		int on);
+
+/**
+ * Set the Outer VLAN Ether Type by an Ethernet device, it can be inserted to
+ * the VLAN Header. This is a register setup available on some Intel NIC, not
+ * but all, please check the data sheet for availability.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param vlan_type
+ *   The vlan type.
+ * @param tag_type
+ *   The Tag Protocol ID
+ * @return
+ *   - (0) if successful.
+ *   - (-ENOSUP) if hardware-assisted VLAN TPID setup is not supported.
+ *   - (-ENODEV) if *port_id* invalid.
+ */
+int rte_eth_dev_set_vlan_ether_type(uint8_t port_id,
+				    enum rte_vlan_type vlan_type,
+				    uint16_t tag_type);
+
+/**
+ * Set VLAN offload configuration on an Ethernet device
+ * Enable/Disable Extended VLAN by an Ethernet device, This is a register setup
+ * available on some Intel NIC, not but all, please check the data sheet for
+ * availability.
+ * Enable/Disable VLAN Strip can be done on rx queue for certain NIC, but here
+ * the configuration is applied on the port level.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param offload_mask
+ *   The VLAN Offload bit mask can be mixed use with "OR"
+ *       ETH_VLAN_STRIP_OFFLOAD
+ *       ETH_VLAN_FILTER_OFFLOAD
+ *       ETH_VLAN_EXTEND_OFFLOAD
+ * @return
+ *   - (0) if successful.
+ *   - (-ENOSUP) if hardware-assisted VLAN filtering not configured.
+ *   - (-ENODEV) if *port_id* invalid.
+ */
+int rte_eth_dev_set_vlan_offload(uint8_t port_id, int offload_mask);
+
+/**
+ * Read VLAN Offload configuration from an Ethernet device
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @return
+ *   - (>0) if successful. Bit mask to indicate
+ *       ETH_VLAN_STRIP_OFFLOAD
+ *       ETH_VLAN_FILTER_OFFLOAD
+ *       ETH_VLAN_EXTEND_OFFLOAD
+ *   - (-ENODEV) if *port_id* invalid.
+ */
+int rte_eth_dev_get_vlan_offload(uint8_t port_id);
+
+/**
+ * Set port based TX VLAN insersion on or off.
+ *
+ * @param port_id
+ *  The port identifier of the Ethernet device.
+ * @param pvid
+ *  Port based TX VLAN identifier togeth with user priority.
+ * @param on
+ *  Turn on or off the port based TX VLAN insertion.
+ *
+ * @return
+ *   - (0) if successful.
+ *   - negative if failed.
+ */
+int rte_eth_dev_set_vlan_pvid(uint8_t port_id, uint16_t pvid, int on);
+
+/**
+ *
+ * Retrieve a burst of input packets from a receive queue of an Ethernet
+ * device. The retrieved packets are stored in *rte_mbuf* structures whose
+ * pointers are supplied in the *rx_pkts* array.
+ *
+ * The rte_eth_rx_burst() function loops, parsing the RX ring of the
+ * receive queue, up to *nb_pkts* packets, and for each completed RX
+ * descriptor in the ring, it performs the following operations:
+ *
+ * - Initialize the *rte_mbuf* data structure associated with the
+ *   RX descriptor according to the information provided by the NIC into
+ *   that RX descriptor.
+ *
+ * - Store the *rte_mbuf* data structure into the next entry of the
+ *   *rx_pkts* array.
+ *
+ * - Replenish the RX descriptor with a new *rte_mbuf* buffer
+ *   allocated from the memory pool associated with the receive queue at
+ *   initialization time.
+ *
+ * When retrieving an input packet that was scattered by the controller
+ * into multiple receive descriptors, the rte_eth_rx_burst() function
+ * appends the associated *rte_mbuf* buffers to the first buffer of the
+ * packet.
+ *
+ * The rte_eth_rx_burst() function returns the number of packets
+ * actually retrieved, which is the number of *rte_mbuf* data structures
+ * effectively supplied into the *rx_pkts* array.
+ * A return value equal to *nb_pkts* indicates that the RX queue contained
+ * at least *rx_pkts* packets, and this is likely to signify that other
+ * received packets remain in the input queue. Applications implementing
+ * a "retrieve as much received packets as possible" policy can check this
+ * specific case and keep invoking the rte_eth_rx_burst() function until
+ * a value less than *nb_pkts* is returned.
+ *
+ * This receive method has the following advantages:
+ *
+ * - It allows a run-to-completion network stack engine to retrieve and
+ *   to immediately process received packets in a fast burst-oriented
+ *   approach, avoiding the overhead of unnecessary intermediate packet
+ *   queue/dequeue operations.
+ *
+ * - Conversely, it also allows an asynchronous-oriented processing
+ *   method to retrieve bursts of received packets and to immediately
+ *   queue them for further parallel processing by another logical core,
+ *   for instance. However, instead of having received packets being
+ *   individually queued by the driver, this approach allows the invoker
+ *   of the rte_eth_rx_burst() function to queue a burst of retrieved
+ *   packets at a time and therefore dramatically reduce the cost of
+ *   enqueue/dequeue operations per packet.
+ *
+ * - It allows the rte_eth_rx_burst() function of the driver to take
+ *   advantage of burst-oriented hardware features (CPU cache,
+ *   prefetch instructions, and so on) to minimize the number of CPU
+ *   cycles per packet.
+ *
+ * To summarize, the proposed receive API enables many
+ * burst-oriented optimizations in both synchronous and asynchronous
+ * packet processing environments with no overhead in both cases.
+ *
+ * The rte_eth_rx_burst() function does not provide any error
+ * notification to avoid the corresponding overhead. As a hint, the
+ * upper-level application might check the status of the device link once
+ * being systematically returned a 0 value for a given number of tries.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param queue_id
+ *   The index of the receive queue from which to retrieve input packets.
+ *   The value must be in the range [0, nb_rx_queue - 1] previously supplied
+ *   to rte_eth_dev_configure().
+ * @param rx_pkts
+ *   The address of an array of pointers to *rte_mbuf* structures that
+ *   must be large enough to store *nb_pkts* pointers in it.
+ * @param nb_pkts
+ *   The maximum number of packets to retrieve.
+ * @return
+ *   The number of packets actually retrieved, which is the number
+ *   of pointers to *rte_mbuf* structures effectively supplied to the
+ *   *rx_pkts* array.
+ */
+static inline uint16_t
+rte_eth_rx_burst(uint8_t port_id, uint16_t queue_id,
+		 struct rte_mbuf **rx_pkts, const uint16_t nb_pkts)
+{
+	struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+
+#ifdef RTE_LIBRTE_ETHDEV_DEBUG
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, 0);
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->rx_pkt_burst, 0);
+
+	if (queue_id >= dev->data->nb_rx_queues) {
+		RTE_PMD_DEBUG_TRACE("Invalid RX queue_id=%d\n", queue_id);
+		return 0;
+	}
+#endif
+	int16_t nb_rx = (*dev->rx_pkt_burst)(dev->data->rx_queues[queue_id],
+			rx_pkts, nb_pkts);
+
+#ifdef RTE_ETHDEV_RXTX_CALLBACKS
+	struct rte_eth_rxtx_callback *cb = dev->post_rx_burst_cbs[queue_id];
+
+	if (unlikely(cb != NULL)) {
+		do {
+			nb_rx = cb->fn.rx(port_id, queue_id, rx_pkts, nb_rx,
+						nb_pkts, cb->param);
+			cb = cb->next;
+		} while (cb != NULL);
+	}
+#endif
+
+	return nb_rx;
+}
+
+/**
+ * Get the number of used descriptors in a specific queue
+ *
+ * @param port_id
+ *  The port identifier of the Ethernet device.
+ * @param queue_id
+ *  The queue id on the specific port.
+ * @return
+ *  The number of used descriptors in the specific queue, or:
+ *     (-EINVAL) if *port_id* is invalid
+ *     (-ENOTSUP) if the device does not support this function
+ */
+static inline int
+rte_eth_rx_queue_count(uint8_t port_id, uint16_t queue_id)
+{
+	struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_count, -ENOTSUP);
+        return (*dev->dev_ops->rx_queue_count)(dev, queue_id);
+}
+
+/**
+ * Check if the DD bit of the specific RX descriptor in the queue has been set
+ *
+ * @param port_id
+ *  The port identifier of the Ethernet device.
+ * @param queue_id
+ *  The queue id on the specific port.
+ * @param offset
+ *  The offset of the descriptor ID from tail.
+ * @return
+ *  - (1) if the specific DD bit is set.
+ *  - (0) if the specific DD bit is not set.
+ *  - (-ENODEV) if *port_id* invalid.
+ *  - (-ENOTSUP) if the device does not support this function
+ */
+static inline int
+rte_eth_rx_descriptor_done(uint8_t port_id, uint16_t queue_id, uint16_t offset)
+{
+	struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_descriptor_done, -ENOTSUP);
+	return (*dev->dev_ops->rx_descriptor_done)( \
+		dev->data->rx_queues[queue_id], offset);
+}
+
+/**
+ * Send a burst of output packets on a transmit queue of an Ethernet device.
+ *
+ * The rte_eth_tx_burst() function is invoked to transmit output packets
+ * on the output queue *queue_id* of the Ethernet device designated by its
+ * *port_id*.
+ * The *nb_pkts* parameter is the number of packets to send which are
+ * supplied in the *tx_pkts* array of *rte_mbuf* structures.
+ * The rte_eth_tx_burst() function loops, sending *nb_pkts* packets,
+ * up to the number of transmit descriptors available in the TX ring of the
+ * transmit queue.
+ * For each packet to send, the rte_eth_tx_burst() function performs
+ * the following operations:
+ *
+ * - Pick up the next available descriptor in the transmit ring.
+ *
+ * - Free the network buffer previously sent with that descriptor, if any.
+ *
+ * - Initialize the transmit descriptor with the information provided
+ *   in the *rte_mbuf data structure.
+ *
+ * In the case of a segmented packet composed of a list of *rte_mbuf* buffers,
+ * the rte_eth_tx_burst() function uses several transmit descriptors
+ * of the ring.
+ *
+ * The rte_eth_tx_burst() function returns the number of packets it
+ * actually sent. A return value equal to *nb_pkts* means that all packets
+ * have been sent, and this is likely to signify that other output packets
+ * could be immediately transmitted again. Applications that implement a
+ * "send as many packets to transmit as possible" policy can check this
+ * specific case and keep invoking the rte_eth_tx_burst() function until
+ * a value less than *nb_pkts* is returned.
+ *
+ * It is the responsibility of the rte_eth_tx_burst() function to
+ * transparently free the memory buffers of packets previously sent.
+ * This feature is driven by the *tx_free_thresh* value supplied to the
+ * rte_eth_dev_configure() function at device configuration time.
+ * When the number of free TX descriptors drops below this threshold, the
+ * rte_eth_tx_burst() function must [attempt to] free the *rte_mbuf*  buffers
+ * of those packets whose transmission was effectively completed.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param queue_id
+ *   The index of the transmit queue through which output packets must be
+ *   sent.
+ *   The value must be in the range [0, nb_tx_queue - 1] previously supplied
+ *   to rte_eth_dev_configure().
+ * @param tx_pkts
+ *   The address of an array of *nb_pkts* pointers to *rte_mbuf* structures
+ *   which contain the output packets.
+ * @param nb_pkts
+ *   The maximum number of packets to transmit.
+ * @return
+ *   The number of output packets actually stored in transmit descriptors of
+ *   the transmit ring. The return value can be less than the value of the
+ *   *tx_pkts* parameter when the transmit ring is full or has been filled up.
+ */
+static inline uint16_t
+rte_eth_tx_burst(uint8_t port_id, uint16_t queue_id,
+		 struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
+{
+	struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+
+#ifdef RTE_LIBRTE_ETHDEV_DEBUG
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, 0);
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->tx_pkt_burst, 0);
+
+	if (queue_id >= dev->data->nb_tx_queues) {
+		RTE_PMD_DEBUG_TRACE("Invalid TX queue_id=%d\n", queue_id);
+		return 0;
+	}
+#endif
+
+#ifdef RTE_ETHDEV_RXTX_CALLBACKS
+	struct rte_eth_rxtx_callback *cb = dev->pre_tx_burst_cbs[queue_id];
+
+	if (unlikely(cb != NULL)) {
+		do {
+			nb_pkts = cb->fn.tx(port_id, queue_id, tx_pkts, nb_pkts,
+					cb->param);
+			cb = cb->next;
+		} while (cb != NULL);
+	}
+#endif
+
+	return (*dev->tx_pkt_burst)(dev->data->tx_queues[queue_id], tx_pkts, nb_pkts);
+}
+
+typedef void (*buffer_tx_error_fn)(struct rte_mbuf **unsent, uint16_t count,
+		void *userdata);
+
+/**
+ * Structure used to buffer packets for future TX
+ * Used by APIs rte_eth_tx_buffer and rte_eth_tx_buffer_flush
+ */
+struct rte_eth_dev_tx_buffer {
+	buffer_tx_error_fn error_callback;
+	void *error_userdata;
+	uint16_t size;           /**< Size of buffer for buffered tx */
+	uint16_t length;         /**< Number of packets in the array */
+	struct rte_mbuf *pkts[];
+	/**< Pending packets to be sent on explicit flush or when full */
+};
+
+/**
+ * Calculate the size of the tx buffer.
+ *
+ * @param sz
+ *   Number of stored packets.
+ */
+#define RTE_ETH_TX_BUFFER_SIZE(sz) \
+	(sizeof(struct rte_eth_dev_tx_buffer) + (sz) * sizeof(struct rte_mbuf *))
+
+/**
+ * Initialize default values for buffered transmitting
+ *
+ * @param buffer
+ *   Tx buffer to be initialized.
+ * @param size
+ *   Buffer size
+ * @return
+ *   0 if no error
+ */
+int
+rte_eth_tx_buffer_init(struct rte_eth_dev_tx_buffer *buffer, uint16_t size);
+
+/**
+ * Send any packets queued up for transmission on a port and HW queue
+ *
+ * This causes an explicit flush of packets previously buffered via the
+ * rte_eth_tx_buffer() function. It returns the number of packets successfully
+ * sent to the NIC, and calls the error callback for any unsent packets. Unless
+ * explicitly set up otherwise, the default callback simply frees the unsent
+ * packets back to the owning mempool.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param queue_id
+ *   The index of the transmit queue through which output packets must be
+ *   sent.
+ *   The value must be in the range [0, nb_tx_queue - 1] previously supplied
+ *   to rte_eth_dev_configure().
+ * @param buffer
+ *   Buffer of packets to be transmit.
+ * @return
+ *   The number of packets successfully sent to the Ethernet device. The error
+ *   callback is called for any packets which could not be sent.
+ */
+static inline uint16_t
+rte_eth_tx_buffer_flush(uint8_t port_id, uint16_t queue_id,
+		struct rte_eth_dev_tx_buffer *buffer)
+{
+	uint16_t sent;
+	uint16_t to_send = buffer->length;
+
+	if (to_send == 0)
+		return 0;
+
+	sent = rte_eth_tx_burst(port_id, queue_id, buffer->pkts, to_send);
+
+	buffer->length = 0;
+
+	/* All packets sent, or to be dealt with by callback below */
+	if (unlikely(sent != to_send))
+		buffer->error_callback(&buffer->pkts[sent], to_send - sent,
+				buffer->error_userdata);
+
+	return sent;
+}
+
+/**
+ * Buffer a single packet for future transmission on a port and queue
+ *
+ * This function takes a single mbuf/packet and buffers it for later
+ * transmission on the particular port and queue specified. Once the buffer is
+ * full of packets, an attempt will be made to transmit all the buffered
+ * packets. In case of error, where not all packets can be transmitted, a
+ * callback is called with the unsent packets as a parameter. If no callback
+ * is explicitly set up, the unsent packets are just freed back to the owning
+ * mempool. The function returns the number of packets actually sent i.e.
+ * 0 if no buffer flush occurred, otherwise the number of packets successfully
+ * flushed
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param queue_id
+ *   The index of the transmit queue through which output packets must be
+ *   sent.
+ *   The value must be in the range [0, nb_tx_queue - 1] previously supplied
+ *   to rte_eth_dev_configure().
+ * @param buffer
+ *   Buffer used to collect packets to be sent.
+ * @param tx_pkt
+ *   Pointer to the packet mbuf to be sent.
+ * @return
+ *   0 = packet has been buffered for later transmission
+ *   N > 0 = packet has been buffered, and the buffer was subsequently flushed,
+ *     causing N packets to be sent, and the error callback to be called for
+ *     the rest.
+ */
+static inline uint16_t __attribute__((always_inline))
+rte_eth_tx_buffer(uint8_t port_id, uint16_t queue_id,
+		struct rte_eth_dev_tx_buffer *buffer, struct rte_mbuf *tx_pkt)
+{
+	buffer->pkts[buffer->length++] = tx_pkt;
+	if (buffer->length < buffer->size)
+		return 0;
+
+	return rte_eth_tx_buffer_flush(port_id, queue_id, buffer);
+}
+
+/**
+ * Configure a callback for buffered packets which cannot be sent
+ *
+ * Register a specific callback to be called when an attempt is made to send
+ * all packets buffered on an ethernet port, but not all packets can
+ * successfully be sent. The callback registered here will be called only
+ * from calls to rte_eth_tx_buffer() and rte_eth_tx_buffer_flush() APIs.
+ * The default callback configured for each queue by default just frees the
+ * packets back to the calling mempool. If additional behaviour is required,
+ * for example, to count dropped packets, or to retry transmission of packets
+ * which cannot be sent, this function should be used to register a suitable
+ * callback function to implement the desired behaviour.
+ * The example callback "rte_eth_count_unsent_packet_callback()" is also
+ * provided as reference.
+ *
+ * @param buffer
+ *   The port identifier of the Ethernet device.
+ * @param callback
+ *   The function to be used as the callback.
+ * @param userdata
+ *   Arbitrary parameter to be passed to the callback function
+ * @return
+ *   0 on success, or -1 on error with rte_errno set appropriately
+ */
+int
+rte_eth_tx_buffer_set_err_callback(struct rte_eth_dev_tx_buffer *buffer,
+		buffer_tx_error_fn callback, void *userdata);
+
+/**
+ * Callback function for silently dropping unsent buffered packets.
+ *
+ * This function can be passed to rte_eth_tx_buffer_set_err_callback() to
+ * adjust the default behavior when buffered packets cannot be sent. This
+ * function drops any unsent packets silently and is used by tx buffered
+ * operations as default behavior.
+ *
+ * NOTE: this function should not be called directly, instead it should be used
+ *       as a callback for packet buffering.
+ *
+ * NOTE: when configuring this function as a callback with
+ *       rte_eth_tx_buffer_set_err_callback(), the final, userdata parameter
+ *       should point to an uint64_t value.
+ *
+ * @param pkts
+ *   The previously buffered packets which could not be sent
+ * @param unsent
+ *   The number of unsent packets in the pkts array
+ * @param userdata
+ *   Not used
+ */
+void
+rte_eth_tx_buffer_drop_callback(struct rte_mbuf **pkts, uint16_t unsent,
+		void *userdata);
+
+/**
+ * Callback function for tracking unsent buffered packets.
+ *
+ * This function can be passed to rte_eth_tx_buffer_set_err_callback() to
+ * adjust the default behavior when buffered packets cannot be sent. This
+ * function drops any unsent packets, but also updates a user-supplied counter
+ * to track the overall number of packets dropped. The counter should be an
+ * uint64_t variable.
+ *
+ * NOTE: this function should not be called directly, instead it should be used
+ *       as a callback for packet buffering.
+ *
+ * NOTE: when configuring this function as a callback with
+ *       rte_eth_tx_buffer_set_err_callback(), the final, userdata parameter
+ *       should point to an uint64_t value.
+ *
+ * @param pkts
+ *   The previously buffered packets which could not be sent
+ * @param unsent
+ *   The number of unsent packets in the pkts array
+ * @param userdata
+ *   Pointer to an uint64_t value, which will be incremented by unsent
+ */
+void
+rte_eth_tx_buffer_count_callback(struct rte_mbuf **pkts, uint16_t unsent,
+		void *userdata);
+
+/**
+ * The eth device event type for interrupt, and maybe others in the future.
+ */
+enum rte_eth_event_type {
+	RTE_ETH_EVENT_UNKNOWN,  /**< unknown event type */
+	RTE_ETH_EVENT_INTR_LSC, /**< lsc interrupt event */
+	RTE_ETH_EVENT_QUEUE_STATE,
+				/**< queue state event (enabled/disabled) */
+	RTE_ETH_EVENT_INTR_RESET,
+			/**< reset interrupt event, sent to VF on PF reset */
+	RTE_ETH_EVENT_MAX       /**< max value of this enum */
+};
+
+typedef void (*rte_eth_dev_cb_fn)(uint8_t port_id, \
+		enum rte_eth_event_type event, void *cb_arg);
+/**< user application callback to be registered for interrupts */
+
+
+
+/**
+ * Register a callback function for specific port id.
+ *
+ * @param port_id
+ *  Port id.
+ * @param event
+ *  Event interested.
+ * @param cb_fn
+ *  User supplied callback function to be called.
+ * @param cb_arg
+ *  Pointer to the parameters for the registered callback.
+ *
+ * @return
+ *  - On success, zero.
+ *  - On failure, a negative value.
+ */
+int rte_eth_dev_callback_register(uint8_t port_id,
+			enum rte_eth_event_type event,
+		rte_eth_dev_cb_fn cb_fn, void *cb_arg);
+
+/**
+ * Unregister a callback function for specific port id.
+ *
+ * @param port_id
+ *  Port id.
+ * @param event
+ *  Event interested.
+ * @param cb_fn
+ *  User supplied callback function to be called.
+ * @param cb_arg
+ *  Pointer to the parameters for the registered callback. -1 means to
+ *  remove all for the same callback address and same event.
+ *
+ * @return
+ *  - On success, zero.
+ *  - On failure, a negative value.
+ */
+int rte_eth_dev_callback_unregister(uint8_t port_id,
+			enum rte_eth_event_type event,
+		rte_eth_dev_cb_fn cb_fn, void *cb_arg);
+
+/**
+ * @internal Executes all the user application registered callbacks for
+ * the specific device. It is for DPDK internal user only. User
+ * application should not call it directly.
+ *
+ * @param dev
+ *  Pointer to struct rte_eth_dev.
+ * @param event
+ *  Eth device interrupt event type.
+ *
+ * @return
+ *  void
+ */
+void _rte_eth_dev_callback_process(struct rte_eth_dev *dev,
+				enum rte_eth_event_type event);
+
+/**
+ * When there is no rx packet coming in Rx Queue for a long time, we can
+ * sleep lcore related to RX Queue for power saving, and enable rx interrupt
+ * to be triggered when rx packect arrives.
+ *
+ * The rte_eth_dev_rx_intr_enable() function enables rx queue
+ * interrupt on specific rx queue of a port.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param queue_id
+ *   The index of the receive queue from which to retrieve input packets.
+ *   The value must be in the range [0, nb_rx_queue - 1] previously supplied
+ *   to rte_eth_dev_configure().
+ * @return
+ *   - (0) if successful.
+ *   - (-ENOTSUP) if underlying hardware OR driver doesn't support
+ *     that operation.
+ *   - (-ENODEV) if *port_id* invalid.
+ */
+int rte_eth_dev_rx_intr_enable(uint8_t port_id, uint16_t queue_id);
+
+/**
+ * When lcore wakes up from rx interrupt indicating packet coming, disable rx
+ * interrupt and returns to polling mode.
+ *
+ * The rte_eth_dev_rx_intr_disable() function disables rx queue
+ * interrupt on specific rx queue of a port.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param queue_id
+ *   The index of the receive queue from which to retrieve input packets.
+ *   The value must be in the range [0, nb_rx_queue - 1] previously supplied
+ *   to rte_eth_dev_configure().
+ * @return
+ *   - (0) if successful.
+ *   - (-ENOTSUP) if underlying hardware OR driver doesn't support
+ *     that operation.
+ *   - (-ENODEV) if *port_id* invalid.
+ */
+int rte_eth_dev_rx_intr_disable(uint8_t port_id, uint16_t queue_id);
+
+/**
+ * RX Interrupt control per port.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param epfd
+ *   Epoll instance fd which the intr vector associated to.
+ *   Using RTE_EPOLL_PER_THREAD allows to use per thread epoll instance.
+ * @param op
+ *   The operation be performed for the vector.
+ *   Operation type of {RTE_INTR_EVENT_ADD, RTE_INTR_EVENT_DEL}.
+ * @param data
+ *   User raw data.
+ * @return
+ *   - On success, zero.
+ *   - On failure, a negative value.
+ */
+int rte_eth_dev_rx_intr_ctl(uint8_t port_id, int epfd, int op, void *data);
+
+/**
+ * RX Interrupt control per queue.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param queue_id
+ *   The index of the receive queue from which to retrieve input packets.
+ *   The value must be in the range [0, nb_rx_queue - 1] previously supplied
+ *   to rte_eth_dev_configure().
+ * @param epfd
+ *   Epoll instance fd which the intr vector associated to.
+ *   Using RTE_EPOLL_PER_THREAD allows to use per thread epoll instance.
+ * @param op
+ *   The operation be performed for the vector.
+ *   Operation type of {RTE_INTR_EVENT_ADD, RTE_INTR_EVENT_DEL}.
+ * @param data
+ *   User raw data.
+ * @return
+ *   - On success, zero.
+ *   - On failure, a negative value.
+ */
+int rte_eth_dev_rx_intr_ctl_q(uint8_t port_id, uint16_t queue_id,
+			      int epfd, int op, void *data);
+
+/**
+ * Turn on the LED on the Ethernet device.
+ * This function turns on the LED on the Ethernet device.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @return
+ *   - (0) if successful.
+ *   - (-ENOTSUP) if underlying hardware OR driver doesn't support
+ *     that operation.
+ *   - (-ENODEV) if *port_id* invalid.
+ */
+int  rte_eth_led_on(uint8_t port_id);
+
+/**
+ * Turn off the LED on the Ethernet device.
+ * This function turns off the LED on the Ethernet device.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @return
+ *   - (0) if successful.
+ *   - (-ENOTSUP) if underlying hardware OR driver doesn't support
+ *     that operation.
+ *   - (-ENODEV) if *port_id* invalid.
+ */
+int  rte_eth_led_off(uint8_t port_id);
+
+/**
+ * Get current status of the Ethernet link flow control for Ethernet device
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param fc_conf
+ *   The pointer to the structure where to store the flow control parameters.
+ * @return
+ *   - (0) if successful.
+ *   - (-ENOTSUP) if hardware doesn't support flow control.
+ *   - (-ENODEV)  if *port_id* invalid.
+ */
+int rte_eth_dev_flow_ctrl_get(uint8_t port_id,
+			      struct rte_eth_fc_conf *fc_conf);
+
+/**
+ * Configure the Ethernet link flow control for Ethernet device
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param fc_conf
+ *   The pointer to the structure of the flow control parameters.
+ * @return
+ *   - (0) if successful.
+ *   - (-ENOTSUP) if hardware doesn't support flow control mode.
+ *   - (-ENODEV)  if *port_id* invalid.
+ *   - (-EINVAL)  if bad parameter
+ *   - (-EIO)     if flow control setup failure
+ */
+int rte_eth_dev_flow_ctrl_set(uint8_t port_id,
+			      struct rte_eth_fc_conf *fc_conf);
+
+/**
+ * Configure the Ethernet priority flow control under DCB environment
+ * for Ethernet device.
+ *
+ * @param port_id
+ * The port identifier of the Ethernet device.
+ * @param pfc_conf
+ * The pointer to the structure of the priority flow control parameters.
+ * @return
+ *   - (0) if successful.
+ *   - (-ENOTSUP) if hardware doesn't support priority flow control mode.
+ *   - (-ENODEV)  if *port_id* invalid.
+ *   - (-EINVAL)  if bad parameter
+ *   - (-EIO)     if flow control setup failure
+ */
+int rte_eth_dev_priority_flow_ctrl_set(uint8_t port_id,
+				struct rte_eth_pfc_conf *pfc_conf);
+
+/**
+ * Add a MAC address to an internal array of addresses used to enable whitelist
+ * filtering to accept packets only if the destination MAC address matches.
+ *
+ * @param port
+ *   The port identifier of the Ethernet device.
+ * @param mac_addr
+ *   The MAC address to add.
+ * @param pool
+ *   VMDq pool index to associate address with (if VMDq is enabled). If VMDq is
+ *   not enabled, this should be set to 0.
+ * @return
+ *   - (0) if successfully added or *mac_addr" was already added.
+ *   - (-ENOTSUP) if hardware doesn't support this feature.
+ *   - (-ENODEV) if *port* is invalid.
+ *   - (-ENOSPC) if no more MAC addresses can be added.
+ *   - (-EINVAL) if MAC address is invalid.
+ */
+int rte_eth_dev_mac_addr_add(uint8_t port, struct ether_addr *mac_addr,
+				uint32_t pool);
+
+/**
+ * Remove a MAC address from the internal array of addresses.
+ *
+ * @param port
+ *   The port identifier of the Ethernet device.
+ * @param mac_addr
+ *   MAC address to remove.
+ * @return
+ *   - (0) if successful, or *mac_addr* didn't exist.
+ *   - (-ENOTSUP) if hardware doesn't support.
+ *   - (-ENODEV) if *port* invalid.
+ *   - (-EADDRINUSE) if attempting to remove the default MAC address
+ */
+int rte_eth_dev_mac_addr_remove(uint8_t port, struct ether_addr *mac_addr);
+
+/**
+ * Set the default MAC address.
+ *
+ * @param port
+ *   The port identifier of the Ethernet device.
+ * @param mac_addr
+ *   New default MAC address.
+ * @return
+ *   - (0) if successful, or *mac_addr* didn't exist.
+ *   - (-ENOTSUP) if hardware doesn't support.
+ *   - (-ENODEV) if *port* invalid.
+ *   - (-EINVAL) if MAC address is invalid.
+ */
+int rte_eth_dev_default_mac_addr_set(uint8_t port, struct ether_addr *mac_addr);
+
+
+/**
+ * Update Redirection Table(RETA) of Receive Side Scaling of Ethernet device.
+ *
+ * @param port
+ *   The port identifier of the Ethernet device.
+ * @param reta_conf
+ *   RETA to update.
+ * @param reta_size
+ *   Redirection table size. The table size can be queried by
+ *   rte_eth_dev_info_get().
+ * @return
+ *   - (0) if successful.
+ *   - (-ENOTSUP) if hardware doesn't support.
+ *   - (-EINVAL) if bad parameter.
+ */
+int rte_eth_dev_rss_reta_update(uint8_t port,
+				struct rte_eth_rss_reta_entry64 *reta_conf,
+				uint16_t reta_size);
+
+ /**
+ * Query Redirection Table(RETA) of Receive Side Scaling of Ethernet device.
+ *
+ * @param port
+ *   The port identifier of the Ethernet device.
+ * @param reta_conf
+ *   RETA to query.
+ * @param reta_size
+ *   Redirection table size. The table size can be queried by
+ *   rte_eth_dev_info_get().
+ * @return
+ *   - (0) if successful.
+ *   - (-ENOTSUP) if hardware doesn't support.
+ *   - (-EINVAL) if bad parameter.
+ */
+int rte_eth_dev_rss_reta_query(uint8_t port,
+			       struct rte_eth_rss_reta_entry64 *reta_conf,
+			       uint16_t reta_size);
+
+ /**
+ * Updates unicast hash table for receiving packet with the given destination
+ * MAC address, and the packet is routed to all VFs for which the RX mode is
+ * accept packets that match the unicast hash table.
+ *
+ * @param port
+ *   The port identifier of the Ethernet device.
+ * @param addr
+ *   Unicast MAC address.
+ * @param on
+ *    1 - Set an unicast hash bit for receiving packets with the MAC address.
+ *    0 - Clear an unicast hash bit.
+ * @return
+ *   - (0) if successful.
+ *   - (-ENOTSUP) if hardware doesn't support.
+  *  - (-ENODEV) if *port_id* invalid.
+ *   - (-EINVAL) if bad parameter.
+ */
+int rte_eth_dev_uc_hash_table_set(uint8_t port,struct ether_addr *addr,
+					uint8_t on);
+
+ /**
+ * Updates all unicast hash bitmaps for receiving packet with any Unicast
+ * Ethernet MAC addresses,the packet is routed to all VFs for which the RX
+ * mode is accept packets that match the unicast hash table.
+ *
+ * @param port
+ *   The port identifier of the Ethernet device.
+ * @param on
+ *    1 - Set all unicast hash bitmaps for receiving all the Ethernet
+ *         MAC addresses
+ *    0 - Clear all unicast hash bitmaps
+ * @return
+ *   - (0) if successful.
+ *   - (-ENOTSUP) if hardware doesn't support.
+  *  - (-ENODEV) if *port_id* invalid.
+ *   - (-EINVAL) if bad parameter.
+ */
+int rte_eth_dev_uc_all_hash_table_set(uint8_t port,uint8_t on);
+
+ /**
+ * Set RX L2 Filtering mode of a VF of an Ethernet device.
+ *
+ * @param port
+ *   The port identifier of the Ethernet device.
+ * @param vf
+ *   VF id.
+ * @param rx_mode
+ *    The RX mode mask, which  is one or more of  accepting Untagged Packets,
+ *    packets that match the PFUTA table, Broadcast and Multicast Promiscuous.
+ *    ETH_VMDQ_ACCEPT_UNTAG,ETH_VMDQ_ACCEPT_HASH_UC,
+ *    ETH_VMDQ_ACCEPT_BROADCAST and ETH_VMDQ_ACCEPT_MULTICAST will be used
+ *    in rx_mode.
+ * @param on
+ *    1 - Enable a VF RX mode.
+ *    0 - Disable a VF RX mode.
+ * @return
+ *   - (0) if successful.
+ *   - (-ENOTSUP) if hardware doesn't support.
+ *   - (-ENOTSUP) if hardware doesn't support.
+ *   - (-EINVAL) if bad parameter.
+ */
+int rte_eth_dev_set_vf_rxmode(uint8_t port, uint16_t vf, uint16_t rx_mode,
+				uint8_t on);
+
+/**
+* Enable or disable a VF traffic transmit of the Ethernet device.
+*
+* @param port
+*   The port identifier of the Ethernet device.
+* @param vf
+*   VF id.
+* @param on
+*    1 - Enable a VF traffic transmit.
+*    0 - Disable a VF traffic transmit.
+* @return
+*   - (0) if successful.
+*   - (-ENODEV) if *port_id* invalid.
+*   - (-ENOTSUP) if hardware doesn't support.
+*   - (-EINVAL) if bad parameter.
+*/
+int
+rte_eth_dev_set_vf_tx(uint8_t port,uint16_t vf, uint8_t on);
+
+/**
+* Enable or disable a VF traffic receive of an Ethernet device.
+*
+* @param port
+*   The port identifier of the Ethernet device.
+* @param vf
+*   VF id.
+* @param on
+*    1 - Enable a VF traffic receive.
+*    0 - Disable a VF traffic receive.
+* @return
+*   - (0) if successful.
+*   - (-ENOTSUP) if hardware doesn't support.
+*   - (-ENODEV) if *port_id* invalid.
+*   - (-EINVAL) if bad parameter.
+*/
+int
+rte_eth_dev_set_vf_rx(uint8_t port,uint16_t vf, uint8_t on);
+
+/**
+* Enable/Disable hardware VF VLAN filtering by an Ethernet device of
+* received VLAN packets tagged with a given VLAN Tag Identifier.
+*
+* @param port id
+*   The port identifier of the Ethernet device.
+* @param vlan_id
+*   The VLAN Tag Identifier whose filtering must be enabled or disabled.
+* @param vf_mask
+*    Bitmap listing which VFs participate in the VLAN filtering.
+* @param vlan_on
+*    1 - Enable VFs VLAN filtering.
+*    0 - Disable VFs VLAN filtering.
+* @return
+*   - (0) if successful.
+*   - (-ENOTSUP) if hardware doesn't support.
+*   - (-ENODEV) if *port_id* invalid.
+*   - (-EINVAL) if bad parameter.
+*/
+int
+rte_eth_dev_set_vf_vlan_filter(uint8_t port, uint16_t vlan_id,
+				uint64_t vf_mask,
+				uint8_t vlan_on);
+
+/**
+ * Set a traffic mirroring rule on an Ethernet device
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param mirror_conf
+ *   The pointer to the traffic mirroring structure describing the mirroring rule.
+ *   The *rte_eth_vm_mirror_conf* structure includes the type of mirroring rule,
+ *   destination pool and the value of rule if enable vlan or pool mirroring.
+ *
+ * @param rule_id
+ *   The index of traffic mirroring rule, we support four separated rules.
+ * @param on
+ *   1 - Enable a mirroring rule.
+ *   0 - Disable a mirroring rule.
+ * @return
+ *   - (0) if successful.
+ *   - (-ENOTSUP) if hardware doesn't support this feature.
+ *   - (-ENODEV) if *port_id* invalid.
+ *   - (-EINVAL) if the mr_conf information is not correct.
+ */
+int rte_eth_mirror_rule_set(uint8_t port_id,
+			struct rte_eth_mirror_conf *mirror_conf,
+			uint8_t rule_id,
+			uint8_t on);
+
+/**
+ * Reset a traffic mirroring rule on an Ethernet device.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param rule_id
+ *   The index of traffic mirroring rule, we support four separated rules.
+ * @return
+ *   - (0) if successful.
+ *   - (-ENOTSUP) if hardware doesn't support this feature.
+ *   - (-ENODEV) if *port_id* invalid.
+ *   - (-EINVAL) if bad parameter.
+ */
+int rte_eth_mirror_rule_reset(uint8_t port_id,
+					 uint8_t rule_id);
+
+/**
+ * Set the rate limitation for a queue on an Ethernet device.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param queue_idx
+ *   The queue id.
+ * @param tx_rate
+ *   The tx rate in Mbps. Allocated from the total port link speed.
+ * @return
+ *   - (0) if successful.
+ *   - (-ENOTSUP) if hardware doesn't support this feature.
+ *   - (-ENODEV) if *port_id* invalid.
+ *   - (-EINVAL) if bad parameter.
+ */
+int rte_eth_set_queue_rate_limit(uint8_t port_id, uint16_t queue_idx,
+			uint16_t tx_rate);
+
+/**
+ * Set the rate limitation for a vf on an Ethernet device.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param vf
+ *   VF id.
+ * @param tx_rate
+ *   The tx rate allocated from the total link speed for this VF id.
+ * @param q_msk
+ *   The queue mask which need to set the rate.
+ * @return
+ *   - (0) if successful.
+ *   - (-ENOTSUP) if hardware doesn't support this feature.
+ *   - (-ENODEV) if *port_id* invalid.
+ *   - (-EINVAL) if bad parameter.
+ */
+int rte_eth_set_vf_rate_limit(uint8_t port_id, uint16_t vf,
+			uint16_t tx_rate, uint64_t q_msk);
+
+/**
+ * Initialize bypass logic. This function needs to be called before
+ * executing any other bypass API.
+ *
+ * @param port
+ *   The port identifier of the Ethernet device.
+ * @return
+ *   - (0) if successful.
+ *   - (-ENOTSUP) if hardware doesn't support.
+ *   - (-EINVAL) if bad parameter.
+ */
+int rte_eth_dev_bypass_init(uint8_t port);
+
+/**
+ * Return bypass state.
+ *
+ * @param port
+ *   The port identifier of the Ethernet device.
+ * @param state
+ *   The return bypass state.
+ *   - (1) Normal mode
+ *   - (2) Bypass mode
+ *   - (3) Isolate mode
+ * @return
+ *   - (0) if successful.
+ *   - (-ENOTSUP) if hardware doesn't support.
+ *   - (-EINVAL) if bad parameter.
+ */
+int rte_eth_dev_bypass_state_show(uint8_t port, uint32_t *state);
+
+/**
+ * Set bypass state
+ *
+ * @param port
+ *   The port identifier of the Ethernet device.
+ * @param new_state
+ *   The current bypass state.
+ *   - (1) Normal mode
+ *   - (2) Bypass mode
+ *   - (3) Isolate mode
+ * @return
+ *   - (0) if successful.
+ *   - (-ENOTSUP) if hardware doesn't support.
+ *   - (-EINVAL) if bad parameter.
+ */
+int rte_eth_dev_bypass_state_set(uint8_t port, uint32_t *new_state);
+
+/**
+ * Return bypass state when given event occurs.
+ *
+ * @param port
+ *   The port identifier of the Ethernet device.
+ * @param event
+ *   The bypass event
+ *   - (1) Main power on (power button is pushed)
+ *   - (2) Auxiliary power on (power supply is being plugged)
+ *   - (3) Main power off (system shutdown and power supply is left plugged in)
+ *   - (4) Auxiliary power off (power supply is being unplugged)
+ *   - (5) Display or set the watchdog timer
+ * @param state
+ *   The bypass state when given event occurred.
+ *   - (1) Normal mode
+ *   - (2) Bypass mode
+ *   - (3) Isolate mode
+ * @return
+ *   - (0) if successful.
+ *   - (-ENOTSUP) if hardware doesn't support.
+ *   - (-EINVAL) if bad parameter.
+ */
+int rte_eth_dev_bypass_event_show(uint8_t port, uint32_t event, uint32_t *state);
+
+/**
+ * Set bypass state when given event occurs.
+ *
+ * @param port
+ *   The port identifier of the Ethernet device.
+ * @param event
+ *   The bypass event
+ *   - (1) Main power on (power button is pushed)
+ *   - (2) Auxiliary power on (power supply is being plugged)
+ *   - (3) Main power off (system shutdown and power supply is left plugged in)
+ *   - (4) Auxiliary power off (power supply is being unplugged)
+ *   - (5) Display or set the watchdog timer
+ * @param state
+ *   The assigned state when given event occurs.
+ *   - (1) Normal mode
+ *   - (2) Bypass mode
+ *   - (3) Isolate mode
+ * @return
+ *   - (0) if successful.
+ *   - (-ENOTSUP) if hardware doesn't support.
+ *   - (-EINVAL) if bad parameter.
+ */
+int rte_eth_dev_bypass_event_store(uint8_t port, uint32_t event, uint32_t state);
+
+/**
+ * Set bypass watchdog timeout count.
+ *
+ * @param port
+ *   The port identifier of the Ethernet device.
+ * @param timeout
+ *   The timeout to be set.
+ *   - (0) 0 seconds (timer is off)
+ *   - (1) 1.5 seconds
+ *   - (2) 2 seconds
+ *   - (3) 3 seconds
+ *   - (4) 4 seconds
+ *   - (5) 8 seconds
+ *   - (6) 16 seconds
+ *   - (7) 32 seconds
+ * @return
+ *   - (0) if successful.
+ *   - (-ENOTSUP) if hardware doesn't support.
+ *   - (-EINVAL) if bad parameter.
+ */
+int rte_eth_dev_wd_timeout_store(uint8_t port, uint32_t timeout);
+
+/**
+ * Get bypass firmware version.
+ *
+ * @param port
+ *   The port identifier of the Ethernet device.
+ * @param ver
+ *   The firmware version
+ * @return
+ *   - (0) if successful.
+ *   - (-ENOTSUP) if hardware doesn't support.
+ *   - (-EINVAL) if bad parameter.
+ */
+int rte_eth_dev_bypass_ver_show(uint8_t port, uint32_t *ver);
+
+/**
+ * Return bypass watchdog timeout in seconds
+ *
+ * @param port
+ *   The port identifier of the Ethernet device.
+ * @param wd_timeout
+ *   The return watchdog timeout. "0" represents timer expired
+ *   - (0) 0 seconds (timer is off)
+ *   - (1) 1.5 seconds
+ *   - (2) 2 seconds
+ *   - (3) 3 seconds
+ *   - (4) 4 seconds
+ *   - (5) 8 seconds
+ *   - (6) 16 seconds
+ *   - (7) 32 seconds
+ * @return
+ *   - (0) if successful.
+ *   - (-ENOTSUP) if hardware doesn't support.
+ *   - (-EINVAL) if bad parameter.
+ */
+int rte_eth_dev_bypass_wd_timeout_show(uint8_t port, uint32_t *wd_timeout);
+
+/**
+ * Reset bypass watchdog timer
+ *
+ * @param port
+ *   The port identifier of the Ethernet device.
+ * @return
+ *   - (0) if successful.
+ *   - (-ENOTSUP) if hardware doesn't support.
+ *   - (-EINVAL) if bad parameter.
+ */
+int rte_eth_dev_bypass_wd_reset(uint8_t port);
+
+ /**
+ * Configuration of Receive Side Scaling hash computation of Ethernet device.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param rss_conf
+ *   The new configuration to use for RSS hash computation on the port.
+ * @return
+ *   - (0) if successful.
+ *   - (-ENODEV) if port identifier is invalid.
+ *   - (-ENOTSUP) if hardware doesn't support.
+ *   - (-EINVAL) if bad parameter.
+ */
+int rte_eth_dev_rss_hash_update(uint8_t port_id,
+				struct rte_eth_rss_conf *rss_conf);
+
+ /**
+ * Retrieve current configuration of Receive Side Scaling hash computation
+ * of Ethernet device.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param rss_conf
+ *   Where to store the current RSS hash configuration of the Ethernet device.
+ * @return
+ *   - (0) if successful.
+ *   - (-ENODEV) if port identifier is invalid.
+ *   - (-ENOTSUP) if hardware doesn't support RSS.
+ */
+int
+rte_eth_dev_rss_hash_conf_get(uint8_t port_id,
+			      struct rte_eth_rss_conf *rss_conf);
+
+ /**
+ * Add UDP tunneling port for a specific type of tunnel.
+ * The packets with this UDP port will be identified as this type of tunnel.
+ * Before enabling any offloading function for a tunnel, users can call this API
+ * to change or add more UDP port for the tunnel. So the offloading function
+ * can take effect on the packets with the sepcific UDP port.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param tunnel_udp
+ *   UDP tunneling configuration.
+ *
+ * @return
+ *   - (0) if successful.
+ *   - (-ENODEV) if port identifier is invalid.
+ *   - (-ENOTSUP) if hardware doesn't support tunnel type.
+ */
+int
+rte_eth_dev_udp_tunnel_port_add(uint8_t port_id,
+				struct rte_eth_udp_tunnel *tunnel_udp);
+
+ /**
+ * Delete UDP tunneling port a specific type of tunnel.
+ * The packets with this UDP port will not be identified as this type of tunnel
+ * any more.
+ * Before enabling any offloading function for a tunnel, users can call this API
+ * to delete a UDP port for the tunnel. So the offloading function will not take
+ * effect on the packets with the sepcific UDP port.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param tunnel_udp
+ *   UDP tunneling configuration.
+ *
+ * @return
+ *   - (0) if successful.
+ *   - (-ENODEV) if port identifier is invalid.
+ *   - (-ENOTSUP) if hardware doesn't support tunnel type.
+ */
+int
+rte_eth_dev_udp_tunnel_port_delete(uint8_t port_id,
+				   struct rte_eth_udp_tunnel *tunnel_udp);
+
+/**
+ * Check whether the filter type is supported on an Ethernet device.
+ * All the supported filter types are defined in 'rte_eth_ctrl.h'.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param filter_type
+ *   Filter type.
+ * @return
+ *   - (0) if successful.
+ *   - (-ENOTSUP) if hardware doesn't support this filter type.
+ *   - (-ENODEV) if *port_id* invalid.
+ */
+int rte_eth_dev_filter_supported(uint8_t port_id, enum rte_filter_type filter_type);
+
+/**
+ * Take operations to assigned filter type on an Ethernet device.
+ * All the supported operations and filter types are defined in 'rte_eth_ctrl.h'.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param filter_type
+ *   Filter type.
+ * @param filter_op
+ *   Type of operation.
+ * @param arg
+ *   A pointer to arguments defined specifically for the operation.
+ * @return
+ *   - (0) if successful.
+ *   - (-ENOTSUP) if hardware doesn't support.
+ *   - (-ENODEV) if *port_id* invalid.
+ *   - others depends on the specific operations implementation.
+ */
+int rte_eth_dev_filter_ctrl(uint8_t port_id, enum rte_filter_type filter_type,
+			enum rte_filter_op filter_op, void *arg);
+
+/**
+ * Get DCB information on an Ethernet device.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param dcb_info
+ *   dcb information.
+ * @return
+ *   - (0) if successful.
+ *   - (-ENODEV) if port identifier is invalid.
+ *   - (-ENOTSUP) if hardware doesn't support.
+ */
+int rte_eth_dev_get_dcb_info(uint8_t port_id,
+			     struct rte_eth_dcb_info *dcb_info);
+
+/**
+ * Add a callback to be called on packet RX on a given port and queue.
+ *
+ * This API configures a function to be called for each burst of
+ * packets received on a given NIC port queue. The return value is a pointer
+ * that can be used to later remove the callback using
+ * rte_eth_remove_rx_callback().
+ *
+ * Multiple functions are called in the order that they are added.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param queue_id
+ *   The queue on the Ethernet device on which the callback is to be added.
+ * @param fn
+ *   The callback function
+ * @param user_param
+ *   A generic pointer parameter which will be passed to each invocation of the
+ *   callback function on this port and queue.
+ *
+ * @return
+ *   NULL on error.
+ *   On success, a pointer value which can later be used to remove the callback.
+ */
+void *rte_eth_add_rx_callback(uint8_t port_id, uint16_t queue_id,
+		rte_rx_callback_fn fn, void *user_param);
+
+/**
+ * Add a callback to be called on packet TX on a given port and queue.
+ *
+ * This API configures a function to be called for each burst of
+ * packets sent on a given NIC port queue. The return value is a pointer
+ * that can be used to later remove the callback using
+ * rte_eth_remove_tx_callback().
+ *
+ * Multiple functions are called in the order that they are added.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param queue_id
+ *   The queue on the Ethernet device on which the callback is to be added.
+ * @param fn
+ *   The callback function
+ * @param user_param
+ *   A generic pointer parameter which will be passed to each invocation of the
+ *   callback function on this port and queue.
+ *
+ * @return
+ *   NULL on error.
+ *   On success, a pointer value which can later be used to remove the callback.
+ */
+void *rte_eth_add_tx_callback(uint8_t port_id, uint16_t queue_id,
+		rte_tx_callback_fn fn, void *user_param);
+
+/**
+ * Remove an RX packet callback from a given port and queue.
+ *
+ * This function is used to removed callbacks that were added to a NIC port
+ * queue using rte_eth_add_rx_callback().
+ *
+ * Note: the callback is removed from the callback list but it isn't freed
+ * since the it may still be in use. The memory for the callback can be
+ * subsequently freed back by the application by calling rte_free():
+ *
+ * - Immediately - if the port is stopped, or the user knows that no
+ *   callbacks are in flight e.g. if called from the thread doing RX/TX
+ *   on that queue.
+ *
+ * - After a short delay - where the delay is sufficient to allow any
+ *   in-flight callbacks to complete.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param queue_id
+ *   The queue on the Ethernet device from which the callback is to be removed.
+ * @param user_cb
+ *   User supplied callback created via rte_eth_add_rx_callback().
+ *
+ * @return
+ *   - 0: Success. Callback was removed.
+ *   - -ENOTSUP: Callback support is not available.
+ *   - -EINVAL:  The port_id or the queue_id is out of range, or the callback
+ *               is NULL or not found for the port/queue.
+ */
+int rte_eth_remove_rx_callback(uint8_t port_id, uint16_t queue_id,
+		struct rte_eth_rxtx_callback *user_cb);
+
+/**
+ * Remove a TX packet callback from a given port and queue.
+ *
+ * This function is used to removed callbacks that were added to a NIC port
+ * queue using rte_eth_add_tx_callback().
+ *
+ * Note: the callback is removed from the callback list but it isn't freed
+ * since the it may still be in use. The memory for the callback can be
+ * subsequently freed back by the application by calling rte_free():
+ *
+ * - Immediately - if the port is stopped, or the user knows that no
+ *   callbacks are in flight e.g. if called from the thread doing RX/TX
+ *   on that queue.
+ *
+ * - After a short delay - where the delay is sufficient to allow any
+ *   in-flight callbacks to complete.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param queue_id
+ *   The queue on the Ethernet device from which the callback is to be removed.
+ * @param user_cb
+ *   User supplied callback created via rte_eth_add_tx_callback().
+ *
+ * @return
+ *   - 0: Success. Callback was removed.
+ *   - -ENOTSUP: Callback support is not available.
+ *   - -EINVAL:  The port_id or the queue_id is out of range, or the callback
+ *               is NULL or not found for the port/queue.
+ */
+int rte_eth_remove_tx_callback(uint8_t port_id, uint16_t queue_id,
+		struct rte_eth_rxtx_callback *user_cb);
+
+/**
+ * Retrieve information about given port's RX queue.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param queue_id
+ *   The RX queue on the Ethernet device for which information
+ *   will be retrieved.
+ * @param qinfo
+ *   A pointer to a structure of type *rte_eth_rxq_info_info* to be filled with
+ *   the information of the Ethernet device.
+ *
+ * @return
+ *   - 0: Success
+ *   - -ENOTSUP: routine is not supported by the device PMD.
+ *   - -EINVAL:  The port_id or the queue_id is out of range.
+ */
+int rte_eth_rx_queue_info_get(uint8_t port_id, uint16_t queue_id,
+	struct rte_eth_rxq_info *qinfo);
+
+/**
+ * Retrieve information about given port's TX queue.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param queue_id
+ *   The TX queue on the Ethernet device for which information
+ *   will be retrieved.
+ * @param qinfo
+ *   A pointer to a structure of type *rte_eth_txq_info_info* to be filled with
+ *   the information of the Ethernet device.
+ *
+ * @return
+ *   - 0: Success
+ *   - -ENOTSUP: routine is not supported by the device PMD.
+ *   - -EINVAL:  The port_id or the queue_id is out of range.
+ */
+int rte_eth_tx_queue_info_get(uint8_t port_id, uint16_t queue_id,
+	struct rte_eth_txq_info *qinfo);
+
+/*
+ * Retrieve number of available registers for access
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @return
+ *   - (>=0) number of registers if successful.
+ *   - (-ENOTSUP) if hardware doesn't support.
+ *   - (-ENODEV) if *port_id* invalid.
+ *   - others depends on the specific operations implementation.
+ */
+int rte_eth_dev_get_reg_length(uint8_t port_id);
+
+/**
+ * Retrieve device registers and register attributes
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param info
+ *   The template includes buffer for register data and attribute to be filled.
+ * @return
+ *   - (0) if successful.
+ *   - (-ENOTSUP) if hardware doesn't support.
+ *   - (-ENODEV) if *port_id* invalid.
+ *   - others depends on the specific operations implementation.
+ */
+int rte_eth_dev_get_reg_info(uint8_t port_id, struct rte_dev_reg_info *info);
+
+/**
+ * Retrieve size of device EEPROM
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @return
+ *   - (>=0) EEPROM size if successful.
+ *   - (-ENOTSUP) if hardware doesn't support.
+ *   - (-ENODEV) if *port_id* invalid.
+ *   - others depends on the specific operations implementation.
+ */
+int rte_eth_dev_get_eeprom_length(uint8_t port_id);
+
+/**
+ * Retrieve EEPROM and EEPROM attribute
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param info
+ *   The template includes buffer for return EEPROM data and
+ *   EEPROM attributes to be filled.
+ * @return
+ *   - (0) if successful.
+ *   - (-ENOTSUP) if hardware doesn't support.
+ *   - (-ENODEV) if *port_id* invalid.
+ *   - others depends on the specific operations implementation.
+ */
+int rte_eth_dev_get_eeprom(uint8_t port_id, struct rte_dev_eeprom_info *info);
+
+/**
+ * Program EEPROM with provided data
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param info
+ *   The template includes EEPROM data for programming and
+ *   EEPROM attributes to be filled
+ * @return
+ *   - (0) if successful.
+ *   - (-ENOTSUP) if hardware doesn't support.
+ *   - (-ENODEV) if *port_id* invalid.
+ *   - others depends on the specific operations implementation.
+ */
+int rte_eth_dev_set_eeprom(uint8_t port_id, struct rte_dev_eeprom_info *info);
+
+/**
+ * Set the list of multicast addresses to filter on an Ethernet device.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param mc_addr_set
+ *   The array of multicast addresses to set. Equal to NULL when the function
+ *   is invoked to flush the set of filtered addresses.
+ * @param nb_mc_addr
+ *   The number of multicast addresses in the *mc_addr_set* array. Equal to 0
+ *   when the function is invoked to flush the set of filtered addresses.
+ * @return
+ *   - (0) if successful.
+ *   - (-ENODEV) if *port_id* invalid.
+ *   - (-ENOTSUP) if PMD of *port_id* doesn't support multicast filtering.
+ *   - (-ENOSPC) if *port_id* has not enough multicast filtering resources.
+ */
+int rte_eth_dev_set_mc_addr_list(uint8_t port_id,
+				 struct ether_addr *mc_addr_set,
+				 uint32_t nb_mc_addr);
+
+/**
+ * Enable IEEE1588/802.1AS timestamping for an Ethernet device.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ *
+ * @return
+ *   - 0: Success.
+ *   - -ENODEV: The port ID is invalid.
+ *   - -ENOTSUP: The function is not supported by the Ethernet driver.
+ */
+int rte_eth_timesync_enable(uint8_t port_id);
+
+/**
+ * Disable IEEE1588/802.1AS timestamping for an Ethernet device.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ *
+ * @return
+ *   - 0: Success.
+ *   - -ENODEV: The port ID is invalid.
+ *   - -ENOTSUP: The function is not supported by the Ethernet driver.
+ */
+int rte_eth_timesync_disable(uint8_t port_id);
+
+/**
+ * Read an IEEE1588/802.1AS RX timestamp from an Ethernet device.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param timestamp
+ *   Pointer to the timestamp struct.
+ * @param flags
+ *   Device specific flags. Used to pass the RX timesync register index to
+ *   i40e. Unused in igb/ixgbe, pass 0 instead.
+ *
+ * @return
+ *   - 0: Success.
+ *   - -EINVAL: No timestamp is available.
+ *   - -ENODEV: The port ID is invalid.
+ *   - -ENOTSUP: The function is not supported by the Ethernet driver.
+ */
+int rte_eth_timesync_read_rx_timestamp(uint8_t port_id,
+		struct timespec *timestamp, uint32_t flags);
+
+/**
+ * Read an IEEE1588/802.1AS TX timestamp from an Ethernet device.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param timestamp
+ *   Pointer to the timestamp struct.
+ *
+ * @return
+ *   - 0: Success.
+ *   - -EINVAL: No timestamp is available.
+ *   - -ENODEV: The port ID is invalid.
+ *   - -ENOTSUP: The function is not supported by the Ethernet driver.
+ */
+int rte_eth_timesync_read_tx_timestamp(uint8_t port_id,
+		struct timespec *timestamp);
+
+/**
+ * Adjust the timesync clock on an Ethernet device.
+ *
+ * This is usually used in conjunction with other Ethdev timesync functions to
+ * synchronize the device time using the IEEE1588/802.1AS protocol.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param delta
+ *   The adjustment in nanoseconds.
+ *
+ * @return
+ *   - 0: Success.
+ *   - -ENODEV: The port ID is invalid.
+ *   - -ENOTSUP: The function is not supported by the Ethernet driver.
+ */
+int rte_eth_timesync_adjust_time(uint8_t port_id, int64_t delta);
+
+/**
+ * Read the time from the timesync clock on an Ethernet device.
+ *
+ * This is usually used in conjunction with other Ethdev timesync functions to
+ * synchronize the device time using the IEEE1588/802.1AS protocol.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param time
+ *   Pointer to the timespec struct that holds the time.
+ *
+ * @return
+ *   - 0: Success.
+ */
+int rte_eth_timesync_read_time(uint8_t port_id, struct timespec *time);
+
+/**
+ * Set the time of the timesync clock on an Ethernet device.
+ *
+ * This is usually used in conjunction with other Ethdev timesync functions to
+ * synchronize the device time using the IEEE1588/802.1AS protocol.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param time
+ *   Pointer to the timespec struct that holds the time.
+ *
+ * @return
+ *   - 0: Success.
+ *   - -EINVAL: No timestamp is available.
+ *   - -ENODEV: The port ID is invalid.
+ *   - -ENOTSUP: The function is not supported by the Ethernet driver.
+ */
+int rte_eth_timesync_write_time(uint8_t port_id, const struct timespec *time);
+
+/**
+ * Copy pci device info to the Ethernet device data.
+ *
+ * @param eth_dev
+ * The *eth_dev* pointer is the address of the *rte_eth_dev* structure.
+ * @param pci_dev
+ * The *pci_dev* pointer is the address of the *rte_pci_device* structure.
+ *
+ * @return
+ *   - 0 on success, negative on error
+ */
+void rte_eth_copy_pci_info(struct rte_eth_dev *eth_dev,
+		struct rte_pci_device *pci_dev);
+
+/**
+ * Create memzone for HW rings.
+ * malloc can't be used as the physical address is needed.
+ * If the memzone is already created, then this function returns a ptr
+ * to the old one.
+ *
+ * @param eth_dev
+ *   The *eth_dev* pointer is the address of the *rte_eth_dev* structure
+ * @param name
+ *   The name of the memory zone
+ * @param queue_id
+ *   The index of the queue to add to name
+ * @param size
+ *   The sizeof of the memory area
+ * @param align
+ *   Alignment for resulting memzone. Must be a power of 2.
+ * @param socket_id
+ *   The *socket_id* argument is the socket identifier in case of NUMA.
+ */
+const struct rte_memzone *
+rte_eth_dma_zone_reserve(const struct rte_eth_dev *eth_dev, const char *name,
+			 uint16_t queue_id, size_t size,
+			 unsigned align, int socket_id);
+
+/**
+ * Config l2 tunnel ether type of an Ethernet device for filtering specific
+ * tunnel packets by ether type.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param l2_tunnel
+ *   l2 tunnel configuration.
+ *
+ * @return
+ *   - (0) if successful.
+ *   - (-ENODEV) if port identifier is invalid.
+ *   - (-ENOTSUP) if hardware doesn't support tunnel type.
+ */
+int
+rte_eth_dev_l2_tunnel_eth_type_conf(uint8_t port_id,
+				    struct rte_eth_l2_tunnel_conf *l2_tunnel);
+
+/**
+ * Enable/disable l2 tunnel offload functions. Include,
+ * 1, The ability of parsing a type of l2 tunnel of an Ethernet device.
+ *    Filtering, forwarding and offloading this type of tunnel packets depend on
+ *    this ability.
+ * 2, Stripping the l2 tunnel tag.
+ * 3, Insertion of the l2 tunnel tag.
+ * 4, Forwarding the packets based on the l2 tunnel tag.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param l2_tunnel
+ *   l2 tunnel parameters.
+ * @param mask
+ *   Indicate the offload function.
+ * @param en
+ *   Enable or disable this function.
+ *
+ * @return
+ *   - (0) if successful.
+ *   - (-ENODEV) if port identifier is invalid.
+ *   - (-ENOTSUP) if hardware doesn't support tunnel type.
+ */
+int
+rte_eth_dev_l2_tunnel_offload_set(uint8_t port_id,
+				  struct rte_eth_l2_tunnel_conf *l2_tunnel,
+				  uint32_t mask,
+				  uint8_t en);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_ETHDEV_H_ */
diff --git a/lib/librte_ether/rte_ether.h b/lib/librte_ether/rte_ether.h
new file mode 100644
index 00000000..1d62d8e5
--- /dev/null
+++ b/lib/librte_ether/rte_ether.h
@@ -0,0 +1,416 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_ETHER_H_
+#define _RTE_ETHER_H_
+
+/**
+ * @file
+ *
+ * Ethernet Helpers in RTE
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+#include <stdio.h>
+
+#include <rte_memcpy.h>
+#include <rte_random.h>
+#include <rte_mbuf.h>
+#include <rte_byteorder.h>
+
+#define ETHER_ADDR_LEN  6 /**< Length of Ethernet address. */
+#define ETHER_TYPE_LEN  2 /**< Length of Ethernet type field. */
+#define ETHER_CRC_LEN   4 /**< Length of Ethernet CRC. */
+#define ETHER_HDR_LEN   \
+	(ETHER_ADDR_LEN * 2 + ETHER_TYPE_LEN) /**< Length of Ethernet header. */
+#define ETHER_MIN_LEN   64    /**< Minimum frame len, including CRC. */
+#define ETHER_MAX_LEN   1518  /**< Maximum frame len, including CRC. */
+#define ETHER_MTU       \
+	(ETHER_MAX_LEN - ETHER_HDR_LEN - ETHER_CRC_LEN) /**< Ethernet MTU. */
+
+#define ETHER_MAX_VLAN_FRAME_LEN \
+	(ETHER_MAX_LEN + 4) /**< Maximum VLAN frame length, including CRC. */
+
+#define ETHER_MAX_JUMBO_FRAME_LEN \
+	0x3F00 /**< Maximum Jumbo frame length, including CRC. */
+
+#define ETHER_MAX_VLAN_ID  4095 /**< Maximum VLAN ID. */
+
+#define ETHER_MIN_MTU 68 /**< Minimum MTU for IPv4 packets, see RFC 791. */
+
+/**
+ * Ethernet address:
+ * A universally administered address is uniquely assigned to a device by its
+ * manufacturer. The first three octets (in transmission order) contain the
+ * Organizationally Unique Identifier (OUI). The following three (MAC-48 and
+ * EUI-48) octets are assigned by that organization with the only constraint
+ * of uniqueness.
+ * A locally administered address is assigned to a device by a network
+ * administrator and does not contain OUIs.
+ * See http://standards.ieee.org/regauth/groupmac/tutorial.html
+ */
+struct ether_addr {
+	uint8_t addr_bytes[ETHER_ADDR_LEN]; /**< Address bytes in transmission order */
+} __attribute__((__packed__));
+
+#define ETHER_LOCAL_ADMIN_ADDR 0x02 /**< Locally assigned Eth. address. */
+#define ETHER_GROUP_ADDR       0x01 /**< Multicast or broadcast Eth. address. */
+
+/**
+ * Check if two Ethernet addresses are the same.
+ *
+ * @param ea1
+ *  A pointer to the first ether_addr structure containing
+ *  the ethernet address.
+ * @param ea2
+ *  A pointer to the second ether_addr structure containing
+ *  the ethernet address.
+ *
+ * @return
+ *  True  (1) if the given two ethernet address are the same;
+ *  False (0) otherwise.
+ */
+static inline int is_same_ether_addr(const struct ether_addr *ea1,
+				     const struct ether_addr *ea2)
+{
+	int i;
+	for (i = 0; i < ETHER_ADDR_LEN; i++)
+		if (ea1->addr_bytes[i] != ea2->addr_bytes[i])
+			return 0;
+	return 1;
+}
+
+/**
+ * Check if an Ethernet address is filled with zeros.
+ *
+ * @param ea
+ *   A pointer to a ether_addr structure containing the ethernet address
+ *   to check.
+ * @return
+ *   True  (1) if the given ethernet address is filled with zeros;
+ *   false (0) otherwise.
+ */
+static inline int is_zero_ether_addr(const struct ether_addr *ea)
+{
+	int i;
+	for (i = 0; i < ETHER_ADDR_LEN; i++)
+		if (ea->addr_bytes[i] != 0x00)
+			return 0;
+	return 1;
+}
+
+/**
+ * Check if an Ethernet address is a unicast address.
+ *
+ * @param ea
+ *   A pointer to a ether_addr structure containing the ethernet address
+ *   to check.
+ * @return
+ *   True  (1) if the given ethernet address is a unicast address;
+ *   false (0) otherwise.
+ */
+static inline int is_unicast_ether_addr(const struct ether_addr *ea)
+{
+	return (ea->addr_bytes[0] & ETHER_GROUP_ADDR) == 0;
+}
+
+/**
+ * Check if an Ethernet address is a multicast address.
+ *
+ * @param ea
+ *   A pointer to a ether_addr structure containing the ethernet address
+ *   to check.
+ * @return
+ *   True  (1) if the given ethernet address is a multicast address;
+ *   false (0) otherwise.
+ */
+static inline int is_multicast_ether_addr(const struct ether_addr *ea)
+{
+	return ea->addr_bytes[0] & ETHER_GROUP_ADDR;
+}
+
+/**
+ * Check if an Ethernet address is a broadcast address.
+ *
+ * @param ea
+ *   A pointer to a ether_addr structure containing the ethernet address
+ *   to check.
+ * @return
+ *   True  (1) if the given ethernet address is a broadcast address;
+ *   false (0) otherwise.
+ */
+static inline int is_broadcast_ether_addr(const struct ether_addr *ea)
+{
+	const unaligned_uint16_t *ea_words = (const unaligned_uint16_t *)ea;
+
+	return (ea_words[0] == 0xFFFF && ea_words[1] == 0xFFFF &&
+		ea_words[2] == 0xFFFF);
+}
+
+/**
+ * Check if an Ethernet address is a universally assigned address.
+ *
+ * @param ea
+ *   A pointer to a ether_addr structure containing the ethernet address
+ *   to check.
+ * @return
+ *   True  (1) if the given ethernet address is a universally assigned address;
+ *   false (0) otherwise.
+ */
+static inline int is_universal_ether_addr(const struct ether_addr *ea)
+{
+	return (ea->addr_bytes[0] & ETHER_LOCAL_ADMIN_ADDR) == 0;
+}
+
+/**
+ * Check if an Ethernet address is a locally assigned address.
+ *
+ * @param ea
+ *   A pointer to a ether_addr structure containing the ethernet address
+ *   to check.
+ * @return
+ *   True  (1) if the given ethernet address is a locally assigned address;
+ *   false (0) otherwise.
+ */
+static inline int is_local_admin_ether_addr(const struct ether_addr *ea)
+{
+	return (ea->addr_bytes[0] & ETHER_LOCAL_ADMIN_ADDR) != 0;
+}
+
+/**
+ * Check if an Ethernet address is a valid address. Checks that the address is a
+ * unicast address and is not filled with zeros.
+ *
+ * @param ea
+ *   A pointer to a ether_addr structure containing the ethernet address
+ *   to check.
+ * @return
+ *   True  (1) if the given ethernet address is valid;
+ *   false (0) otherwise.
+ */
+static inline int is_valid_assigned_ether_addr(const struct ether_addr *ea)
+{
+	return is_unicast_ether_addr(ea) && (! is_zero_ether_addr(ea));
+}
+
+/**
+ * Generate a random Ethernet address that is locally administered
+ * and not multicast.
+ * @param addr
+ *   A pointer to Ethernet address.
+ */
+static inline void eth_random_addr(uint8_t *addr)
+{
+	uint64_t rand = rte_rand();
+	uint8_t *p = (uint8_t*)&rand;
+
+	rte_memcpy(addr, p, ETHER_ADDR_LEN);
+	addr[0] &= ~ETHER_GROUP_ADDR;       /* clear multicast bit */
+	addr[0] |= ETHER_LOCAL_ADMIN_ADDR;  /* set local assignment bit */
+}
+
+/**
+ * Fast copy an Ethernet address.
+ *
+ * @param ea_from
+ *   A pointer to a ether_addr structure holding the Ethernet address to copy.
+ * @param ea_to
+ *   A pointer to a ether_addr structure where to copy the Ethernet address.
+ */
+static inline void ether_addr_copy(const struct ether_addr *ea_from,
+				   struct ether_addr *ea_to)
+{
+#ifdef __INTEL_COMPILER
+	uint16_t *from_words = (uint16_t *)(ea_from->addr_bytes);
+	uint16_t *to_words   = (uint16_t *)(ea_to->addr_bytes);
+
+	to_words[0] = from_words[0];
+	to_words[1] = from_words[1];
+	to_words[2] = from_words[2];
+#else
+	/*
+	 * Use the common way, because of a strange gcc warning.
+	 */
+	*ea_to = *ea_from;
+#endif
+}
+
+#define ETHER_ADDR_FMT_SIZE         18
+/**
+ * Format 48bits Ethernet address in pattern xx:xx:xx:xx:xx:xx.
+ *
+ * @param buf
+ *   A pointer to buffer contains the formatted MAC address.
+ * @param size
+ *   The format buffer size.
+ * @param eth_addr
+ *   A pointer to a ether_addr structure.
+ */
+static inline void
+ether_format_addr(char *buf, uint16_t size,
+		  const struct ether_addr *eth_addr)
+{
+	snprintf(buf, size, "%02X:%02X:%02X:%02X:%02X:%02X",
+		 eth_addr->addr_bytes[0],
+		 eth_addr->addr_bytes[1],
+		 eth_addr->addr_bytes[2],
+		 eth_addr->addr_bytes[3],
+		 eth_addr->addr_bytes[4],
+		 eth_addr->addr_bytes[5]);
+}
+
+/**
+ * Ethernet header: Contains the destination address, source address
+ * and frame type.
+ */
+struct ether_hdr {
+	struct ether_addr d_addr; /**< Destination address. */
+	struct ether_addr s_addr; /**< Source address. */
+	uint16_t ether_type;      /**< Frame type. */
+} __attribute__((__packed__));
+
+/**
+ * Ethernet VLAN Header.
+ * Contains the 16-bit VLAN Tag Control Identifier and the Ethernet type
+ * of the encapsulated frame.
+ */
+struct vlan_hdr {
+	uint16_t vlan_tci; /**< Priority (3) + CFI (1) + Identifier Code (12) */
+	uint16_t eth_proto;/**< Ethernet type of encapsulated frame. */
+} __attribute__((__packed__));
+
+/**
+ * VXLAN protocol header.
+ * Contains the 8-bit flag, 24-bit VXLAN Network Identifier and
+ * Reserved fields (24 bits and 8 bits)
+ */
+struct vxlan_hdr {
+	uint32_t vx_flags; /**< flag (8) + Reserved (24). */
+	uint32_t vx_vni;   /**< VNI (24) + Reserved (8). */
+} __attribute__((__packed__));
+
+/* Ethernet frame types */
+#define ETHER_TYPE_IPv4 0x0800 /**< IPv4 Protocol. */
+#define ETHER_TYPE_IPv6 0x86DD /**< IPv6 Protocol. */
+#define ETHER_TYPE_ARP  0x0806 /**< Arp Protocol. */
+#define ETHER_TYPE_RARP 0x8035 /**< Reverse Arp Protocol. */
+#define ETHER_TYPE_VLAN 0x8100 /**< IEEE 802.1Q VLAN tagging. */
+#define ETHER_TYPE_1588 0x88F7 /**< IEEE 802.1AS 1588 Precise Time Protocol. */
+#define ETHER_TYPE_SLOW 0x8809 /**< Slow protocols (LACP and Marker). */
+#define ETHER_TYPE_TEB  0x6558 /**< Transparent Ethernet Bridging. */
+
+#define ETHER_VXLAN_HLEN (sizeof(struct udp_hdr) + sizeof(struct vxlan_hdr))
+/**< VXLAN tunnel header length. */
+
+/**
+ * Extract VLAN tag information into mbuf
+ *
+ * Software version of VLAN stripping
+ *
+ * @param m
+ *   The packet mbuf.
+ * @return
+ *   - 0: Success
+ *   - 1: not a vlan packet
+ */
+static inline int rte_vlan_strip(struct rte_mbuf *m)
+{
+	struct ether_hdr *eh
+		 = rte_pktmbuf_mtod(m, struct ether_hdr *);
+
+	if (eh->ether_type != rte_cpu_to_be_16(ETHER_TYPE_VLAN))
+		return -1;
+
+	struct vlan_hdr *vh = (struct vlan_hdr *)(eh + 1);
+	m->ol_flags |= PKT_RX_VLAN_PKT;
+	m->vlan_tci = rte_be_to_cpu_16(vh->vlan_tci);
+
+	/* Copy ether header over rather than moving whole packet */
+	memmove(rte_pktmbuf_adj(m, sizeof(struct vlan_hdr)),
+		eh, 2 * ETHER_ADDR_LEN);
+
+	return 0;
+}
+
+/**
+ * Insert VLAN tag into mbuf.
+ *
+ * Software version of VLAN unstripping
+ *
+ * @param m
+ *   The packet mbuf.
+ * @return
+ *   - 0: On success
+ *   -EPERM: mbuf is is shared overwriting would be unsafe
+ *   -ENOSPC: not enough headroom in mbuf
+ */
+static inline int rte_vlan_insert(struct rte_mbuf **m)
+{
+	struct ether_hdr *oh, *nh;
+	struct vlan_hdr *vh;
+
+	/* Can't insert header if mbuf is shared */
+	if (rte_mbuf_refcnt_read(*m) > 1) {
+		struct rte_mbuf *copy;
+
+		copy = rte_pktmbuf_clone(*m, (*m)->pool);
+		if (unlikely(copy == NULL))
+			return -ENOMEM;
+		rte_pktmbuf_free(*m);
+		*m = copy;
+	}
+
+	oh = rte_pktmbuf_mtod(*m, struct ether_hdr *);
+	nh = (struct ether_hdr *)
+		rte_pktmbuf_prepend(*m, sizeof(struct vlan_hdr));
+	if (nh == NULL)
+		return -ENOSPC;
+
+	memmove(nh, oh, 2 * ETHER_ADDR_LEN);
+	nh->ether_type = rte_cpu_to_be_16(ETHER_TYPE_VLAN);
+
+	vh = (struct vlan_hdr *) (nh + 1);
+	vh->vlan_tci = rte_cpu_to_be_16((*m)->vlan_tci);
+
+	return 0;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_ETHER_H_ */
diff --git a/lib/librte_ether/rte_ether_version.map b/lib/librte_ether/rte_ether_version.map
new file mode 100644
index 00000000..214ecc73
--- /dev/null
+++ b/lib/librte_ether/rte_ether_version.map
@@ -0,0 +1,134 @@
+DPDK_2.2 {
+	global:
+
+	_rte_eth_dev_callback_process;
+	rte_eth_add_rx_callback;
+	rte_eth_add_tx_callback;
+	rte_eth_allmulticast_disable;
+	rte_eth_allmulticast_enable;
+	rte_eth_allmulticast_get;
+	rte_eth_copy_pci_info;
+	rte_eth_dev_allocate;
+	rte_eth_dev_allocated;
+	rte_eth_dev_attach;
+	rte_eth_dev_bypass_event_show;
+	rte_eth_dev_bypass_event_store;
+	rte_eth_dev_bypass_init;
+	rte_eth_dev_bypass_state_set;
+	rte_eth_dev_bypass_state_show;
+	rte_eth_dev_bypass_ver_show;
+	rte_eth_dev_bypass_wd_reset;
+	rte_eth_dev_bypass_wd_timeout_show;
+	rte_eth_dev_callback_process;
+	rte_eth_dev_callback_register;
+	rte_eth_dev_callback_unregister;
+	rte_eth_dev_close;
+	rte_eth_dev_configure;
+	rte_eth_dev_count;
+	rte_eth_dev_default_mac_addr_set;
+	rte_eth_dev_detach;
+	rte_eth_dev_filter_ctrl;
+	rte_eth_dev_filter_supported;
+	rte_eth_dev_flow_ctrl_get;
+	rte_eth_dev_flow_ctrl_set;
+	rte_eth_dev_get_dcb_info;
+	rte_eth_dev_get_eeprom;
+	rte_eth_dev_get_eeprom_length;
+	rte_eth_dev_get_mtu;
+	rte_eth_dev_get_reg_info;
+	rte_eth_dev_get_reg_length;
+	rte_eth_dev_get_vlan_offload;
+	rte_eth_devices;
+	rte_eth_dev_info_get;
+	rte_eth_dev_is_valid_port;
+	rte_eth_dev_mac_addr_add;
+	rte_eth_dev_mac_addr_remove;
+	rte_eth_dev_priority_flow_ctrl_set;
+	rte_eth_dev_release_port;
+	rte_eth_dev_rss_hash_conf_get;
+	rte_eth_dev_rss_hash_update;
+	rte_eth_dev_rss_reta_query;
+	rte_eth_dev_rss_reta_update;
+	rte_eth_dev_rx_intr_ctl;
+	rte_eth_dev_rx_intr_ctl_q;
+	rte_eth_dev_rx_intr_disable;
+	rte_eth_dev_rx_intr_enable;
+	rte_eth_dev_rx_queue_start;
+	rte_eth_dev_rx_queue_stop;
+	rte_eth_dev_set_eeprom;
+	rte_eth_dev_set_link_down;
+	rte_eth_dev_set_link_up;
+	rte_eth_dev_set_mc_addr_list;
+	rte_eth_dev_set_mtu;
+	rte_eth_dev_set_rx_queue_stats_mapping;
+	rte_eth_dev_set_tx_queue_stats_mapping;
+	rte_eth_dev_set_vf_rx;
+	rte_eth_dev_set_vf_rxmode;
+	rte_eth_dev_set_vf_tx;
+	rte_eth_dev_set_vf_vlan_filter;
+	rte_eth_dev_set_vlan_ether_type;
+	rte_eth_dev_set_vlan_offload;
+	rte_eth_dev_set_vlan_pvid;
+	rte_eth_dev_set_vlan_strip_on_queue;
+	rte_eth_dev_socket_id;
+	rte_eth_dev_start;
+	rte_eth_dev_stop;
+	rte_eth_dev_tx_queue_start;
+	rte_eth_dev_tx_queue_stop;
+	rte_eth_dev_uc_all_hash_table_set;
+	rte_eth_dev_uc_hash_table_set;
+	rte_eth_dev_vlan_filter;
+	rte_eth_dev_wd_timeout_store;
+	rte_eth_dma_zone_reserve;
+	rte_eth_driver_register;
+	rte_eth_led_off;
+	rte_eth_led_on;
+	rte_eth_link;
+	rte_eth_link_get;
+	rte_eth_link_get_nowait;
+	rte_eth_macaddr_get;
+	rte_eth_mirror_rule_reset;
+	rte_eth_mirror_rule_set;
+	rte_eth_promiscuous_disable;
+	rte_eth_promiscuous_enable;
+	rte_eth_promiscuous_get;
+	rte_eth_remove_rx_callback;
+	rte_eth_remove_tx_callback;
+	rte_eth_rx_queue_info_get;
+	rte_eth_rx_queue_setup;
+	rte_eth_set_queue_rate_limit;
+	rte_eth_set_vf_rate_limit;
+	rte_eth_stats;
+	rte_eth_stats_get;
+	rte_eth_stats_reset;
+	rte_eth_timesync_adjust_time;
+	rte_eth_timesync_disable;
+	rte_eth_timesync_enable;
+	rte_eth_timesync_read_rx_timestamp;
+	rte_eth_timesync_read_time;
+	rte_eth_timesync_read_tx_timestamp;
+	rte_eth_timesync_write_time;
+	rte_eth_tx_queue_info_get;
+	rte_eth_tx_queue_setup;
+	rte_eth_xstats_get;
+	rte_eth_xstats_reset;
+
+	local: *;
+};
+
+DPDK_16.04 {
+	global:
+
+	rte_eth_dev_get_supported_ptypes;
+	rte_eth_dev_l2_tunnel_eth_type_conf;
+	rte_eth_dev_l2_tunnel_offload_set;
+	rte_eth_dev_set_vlan_ether_type;
+	rte_eth_dev_udp_tunnel_port_add;
+	rte_eth_dev_udp_tunnel_port_delete;
+	rte_eth_speed_bitflag;
+	rte_eth_tx_buffer_count_callback;
+	rte_eth_tx_buffer_drop_callback;
+	rte_eth_tx_buffer_init;
+	rte_eth_tx_buffer_set_err_callback;
+
+} DPDK_2.2;
diff --git a/lib/librte_hash/Makefile b/lib/librte_hash/Makefile
new file mode 100644
index 00000000..bb1ea990
--- /dev/null
+++ b/lib/librte_hash/Makefile
@@ -0,0 +1,61 @@
+#   BSD LICENSE
+#
+#   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+#   All rights reserved.
+#
+#   Redistribution and use in source and binary forms, with or without
+#   modification, are permitted provided that the following conditions
+#   are met:
+#
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in
+#       the documentation and/or other materials provided with the
+#       distribution.
+#     * Neither the name of Intel Corporation nor the names of its
+#       contributors may be used to endorse or promote products derived
+#       from this software without specific prior written permission.
+#
+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# library name
+LIB = librte_hash.a
+
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR)
+
+EXPORT_MAP := rte_hash_version.map
+
+LIBABIVER := 2
+
+# all source are stored in SRCS-y
+SRCS-$(CONFIG_RTE_LIBRTE_HASH) := rte_cuckoo_hash.c
+SRCS-$(CONFIG_RTE_LIBRTE_HASH) += rte_fbk_hash.c
+
+# install this header file
+SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include := rte_hash.h
+SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include += rte_hash_crc.h
+ifeq ($(CONFIG_RTE_ARCH_ARM64),y)
+SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include += rte_crc_arm64.h
+endif
+SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include += rte_jhash.h
+SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include += rte_thash.h
+SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include += rte_fbk_hash.h
+
+# this lib needs eal and ring
+DEPDIRS-$(CONFIG_RTE_LIBRTE_HASH) += lib/librte_eal lib/librte_ring
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_hash/rte_cmp_arm64.h b/lib/librte_hash/rte_cmp_arm64.h
new file mode 100644
index 00000000..6fd937b1
--- /dev/null
+++ b/lib/librte_hash/rte_cmp_arm64.h
@@ -0,0 +1,114 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2015 Cavium networks. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Cavium networks nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* Functions to compare multiple of 16 byte keys (up to 128 bytes) */
+static int
+rte_hash_k16_cmp_eq(const void *key1, const void *key2,
+		    size_t key_len __rte_unused)
+{
+	uint64_t x0, x1, y0, y1;
+
+	asm volatile(
+		"ldp %x[x1], %x[x0], [%x[p1]]"
+		: [x1]"=r"(x1), [x0]"=r"(x0)
+		: [p1]"r"(key1)
+		);
+	asm volatile(
+		"ldp %x[y1], %x[y0], [%x[p2]]"
+		: [y1]"=r"(y1), [y0]"=r"(y0)
+		: [p2]"r"(key2)
+		);
+	x0 ^= y0;
+	x1 ^= y1;
+	return !(x0 == 0 && x1 == 0);
+}
+
+static int
+rte_hash_k32_cmp_eq(const void *key1, const void *key2, size_t key_len)
+{
+	return rte_hash_k16_cmp_eq(key1, key2, key_len) ||
+		rte_hash_k16_cmp_eq((const char *) key1 + 16,
+				(const char *) key2 + 16, key_len);
+}
+
+static int
+rte_hash_k48_cmp_eq(const void *key1, const void *key2, size_t key_len)
+{
+	return rte_hash_k16_cmp_eq(key1, key2, key_len) ||
+		rte_hash_k16_cmp_eq((const char *) key1 + 16,
+				(const char *) key2 + 16, key_len) ||
+		rte_hash_k16_cmp_eq((const char *) key1 + 32,
+				(const char *) key2 + 32, key_len);
+}
+
+static int
+rte_hash_k64_cmp_eq(const void *key1, const void *key2, size_t key_len)
+{
+	return rte_hash_k32_cmp_eq(key1, key2, key_len) ||
+		rte_hash_k32_cmp_eq((const char *) key1 + 32,
+				(const char *) key2 + 32, key_len);
+}
+
+static int
+rte_hash_k80_cmp_eq(const void *key1, const void *key2, size_t key_len)
+{
+	return rte_hash_k64_cmp_eq(key1, key2, key_len) ||
+		rte_hash_k16_cmp_eq((const char *) key1 + 64,
+				(const char *) key2 + 64, key_len);
+}
+
+static int
+rte_hash_k96_cmp_eq(const void *key1, const void *key2, size_t key_len)
+{
+	return rte_hash_k64_cmp_eq(key1, key2, key_len) ||
+		rte_hash_k32_cmp_eq((const char *) key1 + 64,
+				(const char *) key2 + 64, key_len);
+}
+
+static int
+rte_hash_k112_cmp_eq(const void *key1, const void *key2, size_t key_len)
+{
+	return rte_hash_k64_cmp_eq(key1, key2, key_len) ||
+		rte_hash_k32_cmp_eq((const char *) key1 + 64,
+				(const char *) key2 + 64, key_len) ||
+		rte_hash_k16_cmp_eq((const char *) key1 + 96,
+				(const char *) key2 + 96, key_len);
+}
+
+static int
+rte_hash_k128_cmp_eq(const void *key1, const void *key2, size_t key_len)
+{
+	return rte_hash_k64_cmp_eq(key1, key2, key_len) ||
+		rte_hash_k64_cmp_eq((const char *) key1 + 64,
+				(const char *) key2 + 64, key_len);
+}
diff --git a/lib/librte_hash/rte_cmp_x86.h b/lib/librte_hash/rte_cmp_x86.h
new file mode 100644
index 00000000..e8c484d6
--- /dev/null
+++ b/lib/librte_hash/rte_cmp_x86.h
@@ -0,0 +1,109 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2015 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* Functions to compare multiple of 16 byte keys (up to 128 bytes) */
+static int
+rte_hash_k16_cmp_eq(const void *key1, const void *key2, size_t key_len __rte_unused)
+{
+	const __m128i k1 = _mm_loadu_si128((const __m128i *) key1);
+	const __m128i k2 = _mm_loadu_si128((const __m128i *) key2);
+#ifdef RTE_MACHINE_CPUFLAG_SSE4_1
+	const __m128i x = _mm_xor_si128(k1, k2);
+
+	return !_mm_test_all_zeros(x, x);
+#else
+	const __m128i x = _mm_cmpeq_epi32(k1, k2);
+
+	return _mm_movemask_epi8(x) != 0xffff;
+#endif
+}
+
+static int
+rte_hash_k32_cmp_eq(const void *key1, const void *key2, size_t key_len)
+{
+	return rte_hash_k16_cmp_eq(key1, key2, key_len) ||
+		rte_hash_k16_cmp_eq((const char *) key1 + 16,
+				(const char *) key2 + 16, key_len);
+}
+
+static int
+rte_hash_k48_cmp_eq(const void *key1, const void *key2, size_t key_len)
+{
+	return rte_hash_k16_cmp_eq(key1, key2, key_len) ||
+		rte_hash_k16_cmp_eq((const char *) key1 + 16,
+				(const char *) key2 + 16, key_len) ||
+		rte_hash_k16_cmp_eq((const char *) key1 + 32,
+				(const char *) key2 + 32, key_len);
+}
+
+static int
+rte_hash_k64_cmp_eq(const void *key1, const void *key2, size_t key_len)
+{
+	return rte_hash_k32_cmp_eq(key1, key2, key_len) ||
+		rte_hash_k32_cmp_eq((const char *) key1 + 32,
+				(const char *) key2 + 32, key_len);
+}
+
+static int
+rte_hash_k80_cmp_eq(const void *key1, const void *key2, size_t key_len)
+{
+	return rte_hash_k64_cmp_eq(key1, key2, key_len) ||
+		rte_hash_k16_cmp_eq((const char *) key1 + 64,
+				(const char *) key2 + 64, key_len);
+}
+
+static int
+rte_hash_k96_cmp_eq(const void *key1, const void *key2, size_t key_len)
+{
+	return rte_hash_k64_cmp_eq(key1, key2, key_len) ||
+		rte_hash_k32_cmp_eq((const char *) key1 + 64,
+				(const char *) key2 + 64, key_len);
+}
+
+static int
+rte_hash_k112_cmp_eq(const void *key1, const void *key2, size_t key_len)
+{
+	return rte_hash_k64_cmp_eq(key1, key2, key_len) ||
+		rte_hash_k32_cmp_eq((const char *) key1 + 64,
+				(const char *) key2 + 64, key_len) ||
+		rte_hash_k16_cmp_eq((const char *) key1 + 96,
+				(const char *) key2 + 96, key_len);
+}
+
+static int
+rte_hash_k128_cmp_eq(const void *key1, const void *key2, size_t key_len)
+{
+	return rte_hash_k64_cmp_eq(key1, key2, key_len) ||
+		rte_hash_k64_cmp_eq((const char *) key1 + 64,
+				(const char *) key2 + 64, key_len);
+}
diff --git a/lib/librte_hash/rte_crc_arm64.h b/lib/librte_hash/rte_crc_arm64.h
new file mode 100644
index 00000000..7dd6334e
--- /dev/null
+++ b/lib/librte_hash/rte_crc_arm64.h
@@ -0,0 +1,215 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2015 Cavium networks. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Cavium networks nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_CRC_ARM64_H_
+#define _RTE_CRC_ARM64_H_
+
+/**
+ * @file
+ *
+ * RTE CRC arm64 Hash
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+#include <rte_cpuflags.h>
+#include <rte_branch_prediction.h>
+#include <rte_common.h>
+
+static inline uint32_t
+crc32c_arm64_u8(uint8_t data, uint32_t init_val)
+{
+	asm(".arch armv8-a+crc");
+	__asm__ volatile(
+			"crc32cb %w[crc], %w[crc], %w[value]"
+			: [crc] "+r" (init_val)
+			: [value] "r" (data));
+	return init_val;
+}
+
+static inline uint32_t
+crc32c_arm64_u16(uint16_t data, uint32_t init_val)
+{
+	asm(".arch armv8-a+crc");
+	__asm__ volatile(
+			"crc32ch %w[crc], %w[crc], %w[value]"
+			: [crc] "+r" (init_val)
+			: [value] "r" (data));
+	return init_val;
+}
+
+static inline uint32_t
+crc32c_arm64_u32(uint32_t data, uint32_t init_val)
+{
+	asm(".arch armv8-a+crc");
+	__asm__ volatile(
+			"crc32cw %w[crc], %w[crc], %w[value]"
+			: [crc] "+r" (init_val)
+			: [value] "r" (data));
+	return init_val;
+}
+
+static inline uint32_t
+crc32c_arm64_u64(uint64_t data, uint32_t init_val)
+{
+	asm(".arch armv8-a+crc");
+	__asm__ volatile(
+			"crc32cx %w[crc], %w[crc], %x[value]"
+			: [crc] "+r" (init_val)
+			: [value] "r" (data));
+	return init_val;
+}
+
+/**
+ * Allow or disallow use of arm64 SIMD instrinsics for CRC32 hash
+ * calculation.
+ *
+ * @param alg
+ *   An OR of following flags:
+ *   - (CRC32_SW) Don't use arm64 crc intrinsics
+ *   - (CRC32_ARM64) Use ARMv8 CRC intrinsic if available
+ *
+ */
+static inline void
+rte_hash_crc_set_alg(uint8_t alg)
+{
+	switch (alg) {
+	case CRC32_ARM64:
+		if (!rte_cpu_get_flag_enabled(RTE_CPUFLAG_CRC32))
+			alg = CRC32_SW;
+	case CRC32_SW:
+		crc32_alg = alg;
+	default:
+		break;
+	}
+}
+
+/* Setting the best available algorithm */
+static inline void __attribute__((constructor))
+rte_hash_crc_init_alg(void)
+{
+	rte_hash_crc_set_alg(CRC32_ARM64);
+}
+
+/**
+ * Use single crc32 instruction to perform a hash on a 1 byte value.
+ * Fall back to software crc32 implementation in case arm64 crc intrinsics is
+ * not supported
+ *
+ * @param data
+ *   Data to perform hash on.
+ * @param init_val
+ *   Value to initialise hash generator.
+ * @return
+ *   32bit calculated hash value.
+ */
+static inline uint32_t
+rte_hash_crc_1byte(uint8_t data, uint32_t init_val)
+{
+	if (likely(crc32_alg & CRC32_ARM64))
+		return crc32c_arm64_u8(data, init_val);
+
+	return crc32c_1byte(data, init_val);
+}
+
+/**
+ * Use single crc32 instruction to perform a hash on a 2 bytes value.
+ * Fall back to software crc32 implementation in case arm64 crc intrinsics is
+ * not supported
+ *
+ * @param data
+ *   Data to perform hash on.
+ * @param init_val
+ *   Value to initialise hash generator.
+ * @return
+ *   32bit calculated hash value.
+ */
+static inline uint32_t
+rte_hash_crc_2byte(uint16_t data, uint32_t init_val)
+{
+	if (likely(crc32_alg & CRC32_ARM64))
+		return crc32c_arm64_u16(data, init_val);
+
+	return crc32c_2bytes(data, init_val);
+}
+
+/**
+ * Use single crc32 instruction to perform a hash on a 4 byte value.
+ * Fall back to software crc32 implementation in case arm64 crc intrinsics is
+ * not supported
+ *
+ * @param data
+ *   Data to perform hash on.
+ * @param init_val
+ *   Value to initialise hash generator.
+ * @return
+ *   32bit calculated hash value.
+ */
+static inline uint32_t
+rte_hash_crc_4byte(uint32_t data, uint32_t init_val)
+{
+	if (likely(crc32_alg & CRC32_ARM64))
+		return crc32c_arm64_u32(data, init_val);
+
+	return crc32c_1word(data, init_val);
+}
+
+/**
+ * Use single crc32 instruction to perform a hash on a 8 byte value.
+ * Fall back to software crc32 implementation in case arm64 crc intrinsics is
+ * not supported
+ *
+ * @param data
+ *   Data to perform hash on.
+ * @param init_val
+ *   Value to initialise hash generator.
+ * @return
+ *   32bit calculated hash value.
+ */
+static inline uint32_t
+rte_hash_crc_8byte(uint64_t data, uint32_t init_val)
+{
+	if (likely(crc32_alg == CRC32_ARM64))
+		return crc32c_arm64_u64(data, init_val);
+
+	return crc32c_2words(data, init_val);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_CRC_ARM64_H_ */
diff --git a/lib/librte_hash/rte_cuckoo_hash.c b/lib/librte_hash/rte_cuckoo_hash.c
new file mode 100644
index 00000000..7b7d1f85
--- /dev/null
+++ b/lib/librte_hash/rte_cuckoo_hash.c
@@ -0,0 +1,1323 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <string.h>
+#include <stdint.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <sys/queue.h>
+
+#include <rte_common.h>
+#include <rte_memory.h>         /* for definition of RTE_CACHE_LINE_SIZE */
+#include <rte_log.h>
+#include <rte_memcpy.h>
+#include <rte_prefetch.h>
+#include <rte_branch_prediction.h>
+#include <rte_memzone.h>
+#include <rte_malloc.h>
+#include <rte_eal.h>
+#include <rte_eal_memconfig.h>
+#include <rte_per_lcore.h>
+#include <rte_errno.h>
+#include <rte_string_fns.h>
+#include <rte_cpuflags.h>
+#include <rte_log.h>
+#include <rte_rwlock.h>
+#include <rte_spinlock.h>
+#include <rte_ring.h>
+#include <rte_compat.h>
+
+#include "rte_hash.h"
+#if defined(RTE_ARCH_X86)
+#include "rte_cmp_x86.h"
+#endif
+
+#if defined(RTE_ARCH_ARM64)
+#include "rte_cmp_arm64.h"
+#endif
+
+TAILQ_HEAD(rte_hash_list, rte_tailq_entry);
+
+static struct rte_tailq_elem rte_hash_tailq = {
+	.name = "RTE_HASH",
+};
+EAL_REGISTER_TAILQ(rte_hash_tailq)
+
+/* Macro to enable/disable run-time checking of function parameters */
+#if defined(RTE_LIBRTE_HASH_DEBUG)
+#define RETURN_IF_TRUE(cond, retval) do { \
+	if (cond) \
+		return retval; \
+} while (0)
+#else
+#define RETURN_IF_TRUE(cond, retval)
+#endif
+
+/* Hash function used if none is specified */
+#if defined(RTE_MACHINE_CPUFLAG_SSE4_2) || defined(RTE_MACHINE_CPUFLAG_CRC32)
+#include <rte_hash_crc.h>
+#define DEFAULT_HASH_FUNC       rte_hash_crc
+#else
+#include <rte_jhash.h>
+#define DEFAULT_HASH_FUNC       rte_jhash
+#endif
+
+/** Number of items per bucket. */
+#define RTE_HASH_BUCKET_ENTRIES		4
+
+#define NULL_SIGNATURE			0
+
+#define KEY_ALIGNMENT			16
+
+#define LCORE_CACHE_SIZE		8
+
+#if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64)
+/*
+ * All different options to select a key compare function,
+ * based on the key size and custom function.
+ */
+enum cmp_jump_table_case {
+	KEY_CUSTOM = 0,
+	KEY_16_BYTES,
+	KEY_32_BYTES,
+	KEY_48_BYTES,
+	KEY_64_BYTES,
+	KEY_80_BYTES,
+	KEY_96_BYTES,
+	KEY_112_BYTES,
+	KEY_128_BYTES,
+	KEY_OTHER_BYTES,
+	NUM_KEY_CMP_CASES,
+};
+
+/*
+ * Table storing all different key compare functions
+ * (multi-process supported)
+ */
+const rte_hash_cmp_eq_t cmp_jump_table[NUM_KEY_CMP_CASES] = {
+	NULL,
+	rte_hash_k16_cmp_eq,
+	rte_hash_k32_cmp_eq,
+	rte_hash_k48_cmp_eq,
+	rte_hash_k64_cmp_eq,
+	rte_hash_k80_cmp_eq,
+	rte_hash_k96_cmp_eq,
+	rte_hash_k112_cmp_eq,
+	rte_hash_k128_cmp_eq,
+	memcmp
+};
+#else
+/*
+ * All different options to select a key compare function,
+ * based on the key size and custom function.
+ */
+enum cmp_jump_table_case {
+	KEY_CUSTOM = 0,
+	KEY_OTHER_BYTES,
+	NUM_KEY_CMP_CASES,
+};
+
+/*
+ * Table storing all different key compare functions
+ * (multi-process supported)
+ */
+const rte_hash_cmp_eq_t cmp_jump_table[NUM_KEY_CMP_CASES] = {
+	NULL,
+	memcmp
+};
+
+#endif
+
+struct lcore_cache {
+	unsigned len; /**< Cache len */
+	void *objs[LCORE_CACHE_SIZE]; /**< Cache objects */
+} __rte_cache_aligned;
+
+/** A hash table structure. */
+struct rte_hash {
+	char name[RTE_HASH_NAMESIZE];   /**< Name of the hash. */
+	uint32_t entries;               /**< Total table entries. */
+	uint32_t num_buckets;           /**< Number of buckets in table. */
+	uint32_t key_len;               /**< Length of hash key. */
+	rte_hash_function hash_func;    /**< Function used to calculate hash. */
+	uint32_t hash_func_init_val;    /**< Init value used by hash_func. */
+	rte_hash_cmp_eq_t rte_hash_custom_cmp_eq;
+	/**< Custom function used to compare keys. */
+	enum cmp_jump_table_case cmp_jump_table_idx;
+	/**< Indicates which compare function to use. */
+	uint32_t bucket_bitmask;        /**< Bitmask for getting bucket index
+						from hash signature. */
+	uint32_t key_entry_size;         /**< Size of each key entry. */
+
+	struct rte_ring *free_slots;    /**< Ring that stores all indexes
+						of the free slots in the key table */
+	void *key_store;                /**< Table storing all keys and data */
+	struct rte_hash_bucket *buckets;	/**< Table with buckets storing all the
+							hash values and key indexes
+							to the key table*/
+	uint8_t hw_trans_mem_support;	/**< Hardware transactional
+							memory support */
+	struct lcore_cache *local_free_slots;
+	/**< Local cache per lcore, storing some indexes of the free slots */
+} __rte_cache_aligned;
+
+/* Structure storing both primary and secondary hashes */
+struct rte_hash_signatures {
+	union {
+		struct {
+			hash_sig_t current;
+			hash_sig_t alt;
+		};
+		uint64_t sig;
+	};
+};
+
+/* Structure that stores key-value pair */
+struct rte_hash_key {
+	union {
+		uintptr_t idata;
+		void *pdata;
+	};
+	/* Variable key size */
+	char key[0];
+} __attribute__((aligned(KEY_ALIGNMENT)));
+
+/** Bucket structure */
+struct rte_hash_bucket {
+	struct rte_hash_signatures signatures[RTE_HASH_BUCKET_ENTRIES];
+	/* Includes dummy key index that always contains index 0 */
+	uint32_t key_idx[RTE_HASH_BUCKET_ENTRIES + 1];
+	uint8_t flag[RTE_HASH_BUCKET_ENTRIES];
+} __rte_cache_aligned;
+
+struct rte_hash *
+rte_hash_find_existing(const char *name)
+{
+	struct rte_hash *h = NULL;
+	struct rte_tailq_entry *te;
+	struct rte_hash_list *hash_list;
+
+	hash_list = RTE_TAILQ_CAST(rte_hash_tailq.head, rte_hash_list);
+
+	rte_rwlock_read_lock(RTE_EAL_TAILQ_RWLOCK);
+	TAILQ_FOREACH(te, hash_list, next) {
+		h = (struct rte_hash *) te->data;
+		if (strncmp(name, h->name, RTE_HASH_NAMESIZE) == 0)
+			break;
+	}
+	rte_rwlock_read_unlock(RTE_EAL_TAILQ_RWLOCK);
+
+	if (te == NULL) {
+		rte_errno = ENOENT;
+		return NULL;
+	}
+	return h;
+}
+
+void rte_hash_set_cmp_func(struct rte_hash *h, rte_hash_cmp_eq_t func)
+{
+	h->rte_hash_custom_cmp_eq = func;
+}
+
+static inline int
+rte_hash_cmp_eq(const void *key1, const void *key2, const struct rte_hash *h)
+{
+	if (h->cmp_jump_table_idx == KEY_CUSTOM)
+		return h->rte_hash_custom_cmp_eq(key1, key2, h->key_len);
+	else
+		return cmp_jump_table[h->cmp_jump_table_idx](key1, key2, h->key_len);
+}
+
+struct rte_hash *
+rte_hash_create(const struct rte_hash_parameters *params)
+{
+	struct rte_hash *h = NULL;
+	struct rte_tailq_entry *te = NULL;
+	struct rte_hash_list *hash_list;
+	struct rte_ring *r = NULL;
+	char hash_name[RTE_HASH_NAMESIZE];
+	void *k = NULL;
+	void *buckets = NULL;
+	char ring_name[RTE_RING_NAMESIZE];
+	unsigned num_key_slots;
+	unsigned hw_trans_mem_support = 0;
+	unsigned i;
+
+	hash_list = RTE_TAILQ_CAST(rte_hash_tailq.head, rte_hash_list);
+
+	if (params == NULL) {
+		RTE_LOG(ERR, HASH, "rte_hash_create has no parameters\n");
+		return NULL;
+	}
+
+	/* Check for valid parameters */
+	if ((params->entries > RTE_HASH_ENTRIES_MAX) ||
+			(params->entries < RTE_HASH_BUCKET_ENTRIES) ||
+			!rte_is_power_of_2(RTE_HASH_BUCKET_ENTRIES) ||
+			(params->key_len == 0)) {
+		rte_errno = EINVAL;
+		RTE_LOG(ERR, HASH, "rte_hash_create has invalid parameters\n");
+		return NULL;
+	}
+
+	/* Check extra flags field to check extra options. */
+	if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_TRANS_MEM_SUPPORT)
+		hw_trans_mem_support = 1;
+
+	/* Store all keys and leave the first entry as a dummy entry for lookup_bulk */
+	if (hw_trans_mem_support)
+		/*
+		 * Increase number of slots by total number of indices
+		 * that can be stored in the lcore caches
+		 * except for the first cache
+		 */
+		num_key_slots = params->entries + (RTE_MAX_LCORE - 1) *
+					LCORE_CACHE_SIZE + 1;
+	else
+		num_key_slots = params->entries + 1;
+
+	snprintf(ring_name, sizeof(ring_name), "HT_%s", params->name);
+	r = rte_ring_create(ring_name, rte_align32pow2(num_key_slots),
+			params->socket_id, 0);
+	if (r == NULL) {
+		RTE_LOG(ERR, HASH, "memory allocation failed\n");
+		goto err;
+	}
+
+	snprintf(hash_name, sizeof(hash_name), "HT_%s", params->name);
+
+	rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK);
+
+	/* guarantee there's no existing: this is normally already checked
+	 * by ring creation above */
+	TAILQ_FOREACH(te, hash_list, next) {
+		h = (struct rte_hash *) te->data;
+		if (strncmp(params->name, h->name, RTE_HASH_NAMESIZE) == 0)
+			break;
+	}
+	h = NULL;
+	if (te != NULL) {
+		rte_errno = EEXIST;
+		te = NULL;
+		goto err_unlock;
+	}
+
+	te = rte_zmalloc("HASH_TAILQ_ENTRY", sizeof(*te), 0);
+	if (te == NULL) {
+		RTE_LOG(ERR, HASH, "tailq entry allocation failed\n");
+		goto err_unlock;
+	}
+
+	h = (struct rte_hash *)rte_zmalloc_socket(hash_name, sizeof(struct rte_hash),
+					RTE_CACHE_LINE_SIZE, params->socket_id);
+
+	if (h == NULL) {
+		RTE_LOG(ERR, HASH, "memory allocation failed\n");
+		goto err_unlock;
+	}
+
+	const uint32_t num_buckets = rte_align32pow2(params->entries)
+					/ RTE_HASH_BUCKET_ENTRIES;
+
+	buckets = rte_zmalloc_socket(NULL,
+				num_buckets * sizeof(struct rte_hash_bucket),
+				RTE_CACHE_LINE_SIZE, params->socket_id);
+
+	if (buckets == NULL) {
+		RTE_LOG(ERR, HASH, "memory allocation failed\n");
+		goto err_unlock;
+	}
+
+	const uint32_t key_entry_size = sizeof(struct rte_hash_key) + params->key_len;
+	const uint64_t key_tbl_size = (uint64_t) key_entry_size * num_key_slots;
+
+	k = rte_zmalloc_socket(NULL, key_tbl_size,
+			RTE_CACHE_LINE_SIZE, params->socket_id);
+
+	if (k == NULL) {
+		RTE_LOG(ERR, HASH, "memory allocation failed\n");
+		goto err_unlock;
+	}
+
+/*
+ * If x86 architecture is used, select appropriate compare function,
+ * which may use x86 instrinsics, otherwise use memcmp
+ */
+#if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64)
+	/* Select function to compare keys */
+	switch (params->key_len) {
+	case 16:
+		h->cmp_jump_table_idx = KEY_16_BYTES;
+		break;
+	case 32:
+		h->cmp_jump_table_idx = KEY_32_BYTES;
+		break;
+	case 48:
+		h->cmp_jump_table_idx = KEY_48_BYTES;
+		break;
+	case 64:
+		h->cmp_jump_table_idx = KEY_64_BYTES;
+		break;
+	case 80:
+		h->cmp_jump_table_idx = KEY_80_BYTES;
+		break;
+	case 96:
+		h->cmp_jump_table_idx = KEY_96_BYTES;
+		break;
+	case 112:
+		h->cmp_jump_table_idx = KEY_112_BYTES;
+		break;
+	case 128:
+		h->cmp_jump_table_idx = KEY_128_BYTES;
+		break;
+	default:
+		/* If key is not multiple of 16, use generic memcmp */
+		h->cmp_jump_table_idx = KEY_OTHER_BYTES;
+	}
+#else
+	h->cmp_jump_table_idx = KEY_OTHER_BYTES;
+#endif
+
+	if (hw_trans_mem_support) {
+		h->local_free_slots = rte_zmalloc_socket(NULL,
+				sizeof(struct lcore_cache) * RTE_MAX_LCORE,
+				RTE_CACHE_LINE_SIZE, params->socket_id);
+	}
+
+	/* Setup hash context */
+	snprintf(h->name, sizeof(h->name), "%s", params->name);
+	h->entries = params->entries;
+	h->key_len = params->key_len;
+	h->key_entry_size = key_entry_size;
+	h->hash_func_init_val = params->hash_func_init_val;
+
+	h->num_buckets = num_buckets;
+	h->bucket_bitmask = h->num_buckets - 1;
+	h->buckets = buckets;
+	h->hash_func = (params->hash_func == NULL) ?
+		DEFAULT_HASH_FUNC : params->hash_func;
+	h->key_store = k;
+	h->free_slots = r;
+	h->hw_trans_mem_support = hw_trans_mem_support;
+
+	/* populate the free slots ring. Entry zero is reserved for key misses */
+	for (i = 1; i < params->entries + 1; i++)
+		rte_ring_sp_enqueue(r, (void *)((uintptr_t) i));
+
+	te->data = (void *) h;
+	TAILQ_INSERT_TAIL(hash_list, te, next);
+	rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
+
+	return h;
+err_unlock:
+	rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
+err:
+	rte_ring_free(r);
+	rte_free(te);
+	rte_free(h);
+	rte_free(buckets);
+	rte_free(k);
+	return NULL;
+}
+
+void
+rte_hash_free(struct rte_hash *h)
+{
+	struct rte_tailq_entry *te;
+	struct rte_hash_list *hash_list;
+
+	if (h == NULL)
+		return;
+
+	hash_list = RTE_TAILQ_CAST(rte_hash_tailq.head, rte_hash_list);
+
+	rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK);
+
+	/* find out tailq entry */
+	TAILQ_FOREACH(te, hash_list, next) {
+		if (te->data == (void *) h)
+			break;
+	}
+
+	if (te == NULL) {
+		rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
+		return;
+	}
+
+	TAILQ_REMOVE(hash_list, te, next);
+
+	rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
+
+	if (h->hw_trans_mem_support)
+		rte_free(h->local_free_slots);
+
+	rte_ring_free(h->free_slots);
+	rte_free(h->key_store);
+	rte_free(h->buckets);
+	rte_free(h);
+	rte_free(te);
+}
+
+hash_sig_t
+rte_hash_hash(const struct rte_hash *h, const void *key)
+{
+	/* calc hash result by key */
+	return h->hash_func(key, h->key_len, h->hash_func_init_val);
+}
+
+/* Calc the secondary hash value from the primary hash value of a given key */
+static inline hash_sig_t
+rte_hash_secondary_hash(const hash_sig_t primary_hash)
+{
+	static const unsigned all_bits_shift = 12;
+	static const unsigned alt_bits_xor = 0x5bd1e995;
+
+	uint32_t tag = primary_hash >> all_bits_shift;
+
+	return primary_hash ^ ((tag + 1) * alt_bits_xor);
+}
+
+void
+rte_hash_reset(struct rte_hash *h)
+{
+	void *ptr;
+	unsigned i;
+
+	if (h == NULL)
+		return;
+
+	memset(h->buckets, 0, h->num_buckets * sizeof(struct rte_hash_bucket));
+	memset(h->key_store, 0, h->key_entry_size * (h->entries + 1));
+
+	/* clear the free ring */
+	while (rte_ring_dequeue(h->free_slots, &ptr) == 0)
+		rte_pause();
+
+	/* Repopulate the free slots ring. Entry zero is reserved for key misses */
+	for (i = 1; i < h->entries + 1; i++)
+		rte_ring_sp_enqueue(h->free_slots, (void *)((uintptr_t) i));
+
+	if (h->hw_trans_mem_support) {
+		/* Reset local caches per lcore */
+		for (i = 0; i < RTE_MAX_LCORE; i++)
+			h->local_free_slots[i].len = 0;
+	}
+}
+
+/* Search for an entry that can be pushed to its alternative location */
+static inline int
+make_space_bucket(const struct rte_hash *h, struct rte_hash_bucket *bkt)
+{
+	unsigned i, j;
+	int ret;
+	uint32_t next_bucket_idx;
+	struct rte_hash_bucket *next_bkt[RTE_HASH_BUCKET_ENTRIES];
+
+	/*
+	 * Push existing item (search for bucket with space in
+	 * alternative locations) to its alternative location
+	 */
+	for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
+		/* Search for space in alternative locations */
+		next_bucket_idx = bkt->signatures[i].alt & h->bucket_bitmask;
+		next_bkt[i] = &h->buckets[next_bucket_idx];
+		for (j = 0; j < RTE_HASH_BUCKET_ENTRIES; j++) {
+			if (next_bkt[i]->signatures[j].sig == NULL_SIGNATURE)
+				break;
+		}
+
+		if (j != RTE_HASH_BUCKET_ENTRIES)
+			break;
+	}
+
+	/* Alternative location has spare room (end of recursive function) */
+	if (i != RTE_HASH_BUCKET_ENTRIES) {
+		next_bkt[i]->signatures[j].alt = bkt->signatures[i].current;
+		next_bkt[i]->signatures[j].current = bkt->signatures[i].alt;
+		next_bkt[i]->key_idx[j] = bkt->key_idx[i];
+		return i;
+	}
+
+	/* Pick entry that has not been pushed yet */
+	for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++)
+		if (bkt->flag[i] == 0)
+			break;
+
+	/* All entries have been pushed, so entry cannot be added */
+	if (i == RTE_HASH_BUCKET_ENTRIES)
+		return -ENOSPC;
+
+	/* Set flag to indicate that this entry is going to be pushed */
+	bkt->flag[i] = 1;
+	/* Need room in alternative bucket to insert the pushed entry */
+	ret = make_space_bucket(h, next_bkt[i]);
+	/*
+	 * After recursive function.
+	 * Clear flags and insert the pushed entry
+	 * in its alternative location if successful,
+	 * or return error
+	 */
+	bkt->flag[i] = 0;
+	if (ret >= 0) {
+		next_bkt[i]->signatures[ret].alt = bkt->signatures[i].current;
+		next_bkt[i]->signatures[ret].current = bkt->signatures[i].alt;
+		next_bkt[i]->key_idx[ret] = bkt->key_idx[i];
+		return i;
+	} else
+		return ret;
+
+}
+
+/*
+ * Function called to enqueue back an index in the cache/ring,
+ * as slot has not being used and it can be used in the
+ * next addition attempt.
+ */
+static inline void
+enqueue_slot_back(const struct rte_hash *h,
+		struct lcore_cache *cached_free_slots,
+		void *slot_id)
+{
+	if (h->hw_trans_mem_support) {
+		cached_free_slots->objs[cached_free_slots->len] = slot_id;
+		cached_free_slots->len++;
+	} else
+		rte_ring_sp_enqueue(h->free_slots, slot_id);
+}
+
+static inline int32_t
+__rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key,
+						hash_sig_t sig, void *data)
+{
+	hash_sig_t alt_hash;
+	uint32_t prim_bucket_idx, sec_bucket_idx;
+	unsigned i;
+	struct rte_hash_bucket *prim_bkt, *sec_bkt;
+	struct rte_hash_key *new_k, *k, *keys = h->key_store;
+	void *slot_id = NULL;
+	uint32_t new_idx;
+	int ret;
+	unsigned n_slots;
+	unsigned lcore_id;
+	struct lcore_cache *cached_free_slots = NULL;
+
+	prim_bucket_idx = sig & h->bucket_bitmask;
+	prim_bkt = &h->buckets[prim_bucket_idx];
+	rte_prefetch0(prim_bkt);
+
+	alt_hash = rte_hash_secondary_hash(sig);
+	sec_bucket_idx = alt_hash & h->bucket_bitmask;
+	sec_bkt = &h->buckets[sec_bucket_idx];
+	rte_prefetch0(sec_bkt);
+
+	/* Get a new slot for storing the new key */
+	if (h->hw_trans_mem_support) {
+		lcore_id = rte_lcore_id();
+		cached_free_slots = &h->local_free_slots[lcore_id];
+		/* Try to get a free slot from the local cache */
+		if (cached_free_slots->len == 0) {
+			/* Need to get another burst of free slots from global ring */
+			n_slots = rte_ring_mc_dequeue_burst(h->free_slots,
+					cached_free_slots->objs, LCORE_CACHE_SIZE);
+			if (n_slots == 0)
+				return -ENOSPC;
+
+			cached_free_slots->len += n_slots;
+		}
+
+		/* Get a free slot from the local cache */
+		cached_free_slots->len--;
+		slot_id = cached_free_slots->objs[cached_free_slots->len];
+	} else {
+		if (rte_ring_sc_dequeue(h->free_slots, &slot_id) != 0)
+			return -ENOSPC;
+	}
+
+	new_k = RTE_PTR_ADD(keys, (uintptr_t)slot_id * h->key_entry_size);
+	rte_prefetch0(new_k);
+	new_idx = (uint32_t)((uintptr_t) slot_id);
+
+	/* Check if key is already inserted in primary location */
+	for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
+		if (prim_bkt->signatures[i].current == sig &&
+				prim_bkt->signatures[i].alt == alt_hash) {
+			k = (struct rte_hash_key *) ((char *)keys +
+					prim_bkt->key_idx[i] * h->key_entry_size);
+			if (rte_hash_cmp_eq(key, k->key, h) == 0) {
+				/* Enqueue index of free slot back in the ring. */
+				enqueue_slot_back(h, cached_free_slots, slot_id);
+				/* Update data */
+				k->pdata = data;
+				/*
+				 * Return index where key is stored,
+				 * substracting the first dummy index
+				 */
+				return prim_bkt->key_idx[i] - 1;
+			}
+		}
+	}
+
+	/* Check if key is already inserted in secondary location */
+	for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
+		if (sec_bkt->signatures[i].alt == sig &&
+				sec_bkt->signatures[i].current == alt_hash) {
+			k = (struct rte_hash_key *) ((char *)keys +
+					sec_bkt->key_idx[i] * h->key_entry_size);
+			if (rte_hash_cmp_eq(key, k->key, h) == 0) {
+				/* Enqueue index of free slot back in the ring. */
+				enqueue_slot_back(h, cached_free_slots, slot_id);
+				/* Update data */
+				k->pdata = data;
+				/*
+				 * Return index where key is stored,
+				 * substracting the first dummy index
+				 */
+				return sec_bkt->key_idx[i] - 1;
+			}
+		}
+	}
+
+	/* Copy key */
+	rte_memcpy(new_k->key, key, h->key_len);
+	new_k->pdata = data;
+
+	/* Insert new entry is there is room in the primary bucket */
+	for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
+		/* Check if slot is available */
+		if (likely(prim_bkt->signatures[i].sig == NULL_SIGNATURE)) {
+			prim_bkt->signatures[i].current = sig;
+			prim_bkt->signatures[i].alt = alt_hash;
+			prim_bkt->key_idx[i] = new_idx;
+			return new_idx - 1;
+		}
+	}
+
+	/* Primary bucket is full, so we need to make space for new entry */
+	ret = make_space_bucket(h, prim_bkt);
+	/*
+	 * After recursive function.
+	 * Insert the new entry in the position of the pushed entry
+	 * if successful or return error and
+	 * store the new slot back in the ring
+	 */
+	if (ret >= 0) {
+		prim_bkt->signatures[ret].current = sig;
+		prim_bkt->signatures[ret].alt = alt_hash;
+		prim_bkt->key_idx[ret] = new_idx;
+		return new_idx - 1;
+	}
+
+	/* Error in addition, store new slot back in the ring and return error */
+	enqueue_slot_back(h, cached_free_slots, (void *)((uintptr_t) new_idx));
+
+	return ret;
+}
+
+int32_t
+rte_hash_add_key_with_hash(const struct rte_hash *h,
+			const void *key, hash_sig_t sig)
+{
+	RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
+	return __rte_hash_add_key_with_hash(h, key, sig, 0);
+}
+
+int32_t
+rte_hash_add_key(const struct rte_hash *h, const void *key)
+{
+	RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
+	return __rte_hash_add_key_with_hash(h, key, rte_hash_hash(h, key), 0);
+}
+
+int
+rte_hash_add_key_with_hash_data(const struct rte_hash *h,
+			const void *key, hash_sig_t sig, void *data)
+{
+	int ret;
+
+	RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
+	ret = __rte_hash_add_key_with_hash(h, key, sig, data);
+	if (ret >= 0)
+		return 0;
+	else
+		return ret;
+}
+
+int
+rte_hash_add_key_data(const struct rte_hash *h, const void *key, void *data)
+{
+	int ret;
+
+	RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
+
+	ret = __rte_hash_add_key_with_hash(h, key, rte_hash_hash(h, key), data);
+	if (ret >= 0)
+		return 0;
+	else
+		return ret;
+}
+static inline int32_t
+__rte_hash_lookup_with_hash(const struct rte_hash *h, const void *key,
+					hash_sig_t sig, void **data)
+{
+	uint32_t bucket_idx;
+	hash_sig_t alt_hash;
+	unsigned i;
+	struct rte_hash_bucket *bkt;
+	struct rte_hash_key *k, *keys = h->key_store;
+
+	bucket_idx = sig & h->bucket_bitmask;
+	bkt = &h->buckets[bucket_idx];
+
+	/* Check if key is in primary location */
+	for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
+		if (bkt->signatures[i].current == sig &&
+				bkt->signatures[i].sig != NULL_SIGNATURE) {
+			k = (struct rte_hash_key *) ((char *)keys +
+					bkt->key_idx[i] * h->key_entry_size);
+			if (rte_hash_cmp_eq(key, k->key, h) == 0) {
+				if (data != NULL)
+					*data = k->pdata;
+				/*
+				 * Return index where key is stored,
+				 * substracting the first dummy index
+				 */
+				return bkt->key_idx[i] - 1;
+			}
+		}
+	}
+
+	/* Calculate secondary hash */
+	alt_hash = rte_hash_secondary_hash(sig);
+	bucket_idx = alt_hash & h->bucket_bitmask;
+	bkt = &h->buckets[bucket_idx];
+
+	/* Check if key is in secondary location */
+	for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
+		if (bkt->signatures[i].current == alt_hash &&
+				bkt->signatures[i].alt == sig) {
+			k = (struct rte_hash_key *) ((char *)keys +
+					bkt->key_idx[i] * h->key_entry_size);
+			if (rte_hash_cmp_eq(key, k->key, h) == 0) {
+				if (data != NULL)
+					*data = k->pdata;
+				/*
+				 * Return index where key is stored,
+				 * substracting the first dummy index
+				 */
+				return bkt->key_idx[i] - 1;
+			}
+		}
+	}
+
+	return -ENOENT;
+}
+
+int32_t
+rte_hash_lookup_with_hash(const struct rte_hash *h,
+			const void *key, hash_sig_t sig)
+{
+	RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
+	return __rte_hash_lookup_with_hash(h, key, sig, NULL);
+}
+
+int32_t
+rte_hash_lookup(const struct rte_hash *h, const void *key)
+{
+	RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
+	return __rte_hash_lookup_with_hash(h, key, rte_hash_hash(h, key), NULL);
+}
+
+int
+rte_hash_lookup_with_hash_data(const struct rte_hash *h,
+			const void *key, hash_sig_t sig, void **data)
+{
+	RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
+	return __rte_hash_lookup_with_hash(h, key, sig, data);
+}
+
+int
+rte_hash_lookup_data(const struct rte_hash *h, const void *key, void **data)
+{
+	RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
+	return __rte_hash_lookup_with_hash(h, key, rte_hash_hash(h, key), data);
+}
+
+static inline void
+remove_entry(const struct rte_hash *h, struct rte_hash_bucket *bkt, unsigned i)
+{
+	unsigned lcore_id, n_slots;
+	struct lcore_cache *cached_free_slots;
+
+	bkt->signatures[i].sig = NULL_SIGNATURE;
+	if (h->hw_trans_mem_support) {
+		lcore_id = rte_lcore_id();
+		cached_free_slots = &h->local_free_slots[lcore_id];
+		/* Cache full, need to free it. */
+		if (cached_free_slots->len == LCORE_CACHE_SIZE) {
+			/* Need to enqueue the free slots in global ring. */
+			n_slots = rte_ring_mp_enqueue_burst(h->free_slots,
+						cached_free_slots->objs,
+						LCORE_CACHE_SIZE);
+			cached_free_slots->len -= n_slots;
+		}
+		/* Put index of new free slot in cache. */
+		cached_free_slots->objs[cached_free_slots->len] =
+				(void *)((uintptr_t)bkt->key_idx[i]);
+		cached_free_slots->len++;
+	} else {
+		rte_ring_sp_enqueue(h->free_slots,
+				(void *)((uintptr_t)bkt->key_idx[i]));
+	}
+}
+
+static inline int32_t
+__rte_hash_del_key_with_hash(const struct rte_hash *h, const void *key,
+						hash_sig_t sig)
+{
+	uint32_t bucket_idx;
+	hash_sig_t alt_hash;
+	unsigned i;
+	struct rte_hash_bucket *bkt;
+	struct rte_hash_key *k, *keys = h->key_store;
+
+	bucket_idx = sig & h->bucket_bitmask;
+	bkt = &h->buckets[bucket_idx];
+
+	/* Check if key is in primary location */
+	for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
+		if (bkt->signatures[i].current == sig &&
+				bkt->signatures[i].sig != NULL_SIGNATURE) {
+			k = (struct rte_hash_key *) ((char *)keys +
+					bkt->key_idx[i] * h->key_entry_size);
+			if (rte_hash_cmp_eq(key, k->key, h) == 0) {
+				remove_entry(h, bkt, i);
+
+				/*
+				 * Return index where key is stored,
+				 * substracting the first dummy index
+				 */
+				return bkt->key_idx[i] - 1;
+			}
+		}
+	}
+
+	/* Calculate secondary hash */
+	alt_hash = rte_hash_secondary_hash(sig);
+	bucket_idx = alt_hash & h->bucket_bitmask;
+	bkt = &h->buckets[bucket_idx];
+
+	/* Check if key is in secondary location */
+	for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
+		if (bkt->signatures[i].current == alt_hash &&
+				bkt->signatures[i].sig != NULL_SIGNATURE) {
+			k = (struct rte_hash_key *) ((char *)keys +
+					bkt->key_idx[i] * h->key_entry_size);
+			if (rte_hash_cmp_eq(key, k->key, h) == 0) {
+				remove_entry(h, bkt, i);
+
+				/*
+				 * Return index where key is stored,
+				 * substracting the first dummy index
+				 */
+				return bkt->key_idx[i] - 1;
+			}
+		}
+	}
+
+	return -ENOENT;
+}
+
+int32_t
+rte_hash_del_key_with_hash(const struct rte_hash *h,
+			const void *key, hash_sig_t sig)
+{
+	RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
+	return __rte_hash_del_key_with_hash(h, key, sig);
+}
+
+int32_t
+rte_hash_del_key(const struct rte_hash *h, const void *key)
+{
+	RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
+	return __rte_hash_del_key_with_hash(h, key, rte_hash_hash(h, key));
+}
+
+/* Lookup bulk stage 0: Prefetch input key */
+static inline void
+lookup_stage0(unsigned *idx, uint64_t *lookup_mask,
+		const void * const *keys)
+{
+	*idx = __builtin_ctzl(*lookup_mask);
+	if (*lookup_mask == 0)
+		*idx = 0;
+
+	rte_prefetch0(keys[*idx]);
+	*lookup_mask &= ~(1llu << *idx);
+}
+
+/*
+ * Lookup bulk stage 1: Calculate primary/secondary hashes
+ * and prefetch primary/secondary buckets
+ */
+static inline void
+lookup_stage1(unsigned idx, hash_sig_t *prim_hash, hash_sig_t *sec_hash,
+		const struct rte_hash_bucket **primary_bkt,
+		const struct rte_hash_bucket **secondary_bkt,
+		hash_sig_t *hash_vals, const void * const *keys,
+		const struct rte_hash *h)
+{
+	*prim_hash = rte_hash_hash(h, keys[idx]);
+	hash_vals[idx] = *prim_hash;
+	*sec_hash = rte_hash_secondary_hash(*prim_hash);
+
+	*primary_bkt = &h->buckets[*prim_hash & h->bucket_bitmask];
+	*secondary_bkt = &h->buckets[*sec_hash & h->bucket_bitmask];
+
+	rte_prefetch0(*primary_bkt);
+	rte_prefetch0(*secondary_bkt);
+}
+
+/*
+ * Lookup bulk stage 2:  Search for match hashes in primary/secondary locations
+ * and prefetch first key slot
+ */
+static inline void
+lookup_stage2(unsigned idx, hash_sig_t prim_hash, hash_sig_t sec_hash,
+		const struct rte_hash_bucket *prim_bkt,
+		const struct rte_hash_bucket *sec_bkt,
+		const struct rte_hash_key **key_slot, int32_t *positions,
+		uint64_t *extra_hits_mask, const void *keys,
+		const struct rte_hash *h)
+{
+	unsigned prim_hash_matches, sec_hash_matches, key_idx, i;
+	unsigned total_hash_matches;
+
+	prim_hash_matches = 1 << RTE_HASH_BUCKET_ENTRIES;
+	sec_hash_matches = 1 << RTE_HASH_BUCKET_ENTRIES;
+	for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
+		prim_hash_matches |= ((prim_hash == prim_bkt->signatures[i].current) << i);
+		sec_hash_matches |= ((sec_hash == sec_bkt->signatures[i].current) << i);
+	}
+
+	key_idx = prim_bkt->key_idx[__builtin_ctzl(prim_hash_matches)];
+	if (key_idx == 0)
+		key_idx = sec_bkt->key_idx[__builtin_ctzl(sec_hash_matches)];
+
+	total_hash_matches = (prim_hash_matches |
+				(sec_hash_matches << (RTE_HASH_BUCKET_ENTRIES + 1)));
+	*key_slot = (const struct rte_hash_key *) ((const char *)keys +
+					key_idx * h->key_entry_size);
+
+	rte_prefetch0(*key_slot);
+	/*
+	 * Return index where key is stored,
+	 * substracting the first dummy index
+	 */
+	positions[idx] = (key_idx - 1);
+
+	*extra_hits_mask |= (uint64_t)(__builtin_popcount(total_hash_matches) > 3) << idx;
+
+}
+
+
+/* Lookup bulk stage 3: Check if key matches, update hit mask and return data */
+static inline void
+lookup_stage3(unsigned idx, const struct rte_hash_key *key_slot, const void * const *keys,
+		const int32_t *positions, void *data[], uint64_t *hits,
+		const struct rte_hash *h)
+{
+	unsigned hit;
+	unsigned key_idx;
+
+	hit = !rte_hash_cmp_eq(key_slot->key, keys[idx], h);
+	if (data != NULL)
+		data[idx] = key_slot->pdata;
+
+	key_idx = positions[idx] + 1;
+	/*
+	 * If key index is 0, force hit to be 0, in case key to be looked up
+	 * is all zero (as in the dummy slot), which would result in a wrong hit
+	 */
+	*hits |= (uint64_t)(hit && !!key_idx)  << idx;
+}
+
+static inline void
+__rte_hash_lookup_bulk(const struct rte_hash *h, const void **keys,
+			uint32_t num_keys, int32_t *positions,
+			uint64_t *hit_mask, void *data[])
+{
+	uint64_t hits = 0;
+	uint64_t extra_hits_mask = 0;
+	uint64_t lookup_mask, miss_mask;
+	unsigned idx;
+	const void *key_store = h->key_store;
+	int ret;
+	hash_sig_t hash_vals[RTE_HASH_LOOKUP_BULK_MAX];
+
+	unsigned idx00, idx01, idx10, idx11, idx20, idx21, idx30, idx31;
+	const struct rte_hash_bucket *primary_bkt10, *primary_bkt11;
+	const struct rte_hash_bucket *secondary_bkt10, *secondary_bkt11;
+	const struct rte_hash_bucket *primary_bkt20, *primary_bkt21;
+	const struct rte_hash_bucket *secondary_bkt20, *secondary_bkt21;
+	const struct rte_hash_key *k_slot20, *k_slot21, *k_slot30, *k_slot31;
+	hash_sig_t primary_hash10, primary_hash11;
+	hash_sig_t secondary_hash10, secondary_hash11;
+	hash_sig_t primary_hash20, primary_hash21;
+	hash_sig_t secondary_hash20, secondary_hash21;
+
+	lookup_mask = (uint64_t) -1 >> (64 - num_keys);
+	miss_mask = lookup_mask;
+
+	lookup_stage0(&idx00, &lookup_mask, keys);
+	lookup_stage0(&idx01, &lookup_mask, keys);
+
+	idx10 = idx00, idx11 = idx01;
+
+	lookup_stage0(&idx00, &lookup_mask, keys);
+	lookup_stage0(&idx01, &lookup_mask, keys);
+	lookup_stage1(idx10, &primary_hash10, &secondary_hash10,
+			&primary_bkt10, &secondary_bkt10, hash_vals, keys, h);
+	lookup_stage1(idx11, &primary_hash11, &secondary_hash11,
+			&primary_bkt11,	&secondary_bkt11, hash_vals, keys, h);
+
+	primary_bkt20 = primary_bkt10;
+	primary_bkt21 = primary_bkt11;
+	secondary_bkt20 = secondary_bkt10;
+	secondary_bkt21 = secondary_bkt11;
+	primary_hash20 = primary_hash10;
+	primary_hash21 = primary_hash11;
+	secondary_hash20 = secondary_hash10;
+	secondary_hash21 = secondary_hash11;
+	idx20 = idx10, idx21 = idx11;
+	idx10 = idx00, idx11 = idx01;
+
+	lookup_stage0(&idx00, &lookup_mask, keys);
+	lookup_stage0(&idx01, &lookup_mask, keys);
+	lookup_stage1(idx10, &primary_hash10, &secondary_hash10,
+			&primary_bkt10, &secondary_bkt10, hash_vals, keys, h);
+	lookup_stage1(idx11, &primary_hash11, &secondary_hash11,
+			&primary_bkt11,	&secondary_bkt11, hash_vals, keys, h);
+	lookup_stage2(idx20, primary_hash20, secondary_hash20, primary_bkt20,
+			secondary_bkt20, &k_slot20, positions, &extra_hits_mask,
+			key_store, h);
+	lookup_stage2(idx21, primary_hash21, secondary_hash21, primary_bkt21,
+			secondary_bkt21, &k_slot21, positions, &extra_hits_mask,
+			key_store, h);
+
+	while (lookup_mask) {
+		k_slot30 = k_slot20, k_slot31 = k_slot21;
+		idx30 = idx20, idx31 = idx21;
+		primary_bkt20 = primary_bkt10;
+		primary_bkt21 = primary_bkt11;
+		secondary_bkt20 = secondary_bkt10;
+		secondary_bkt21 = secondary_bkt11;
+		primary_hash20 = primary_hash10;
+		primary_hash21 = primary_hash11;
+		secondary_hash20 = secondary_hash10;
+		secondary_hash21 = secondary_hash11;
+		idx20 = idx10, idx21 = idx11;
+		idx10 = idx00, idx11 = idx01;
+
+		lookup_stage0(&idx00, &lookup_mask, keys);
+		lookup_stage0(&idx01, &lookup_mask, keys);
+		lookup_stage1(idx10, &primary_hash10, &secondary_hash10,
+			&primary_bkt10, &secondary_bkt10, hash_vals, keys, h);
+		lookup_stage1(idx11, &primary_hash11, &secondary_hash11,
+			&primary_bkt11,	&secondary_bkt11, hash_vals, keys, h);
+		lookup_stage2(idx20, primary_hash20, secondary_hash20,
+			primary_bkt20, secondary_bkt20, &k_slot20, positions,
+			&extra_hits_mask, key_store, h);
+		lookup_stage2(idx21, primary_hash21, secondary_hash21,
+			primary_bkt21, secondary_bkt21,	&k_slot21, positions,
+			&extra_hits_mask, key_store, h);
+		lookup_stage3(idx30, k_slot30, keys, positions, data, &hits, h);
+		lookup_stage3(idx31, k_slot31, keys, positions, data, &hits, h);
+	}
+
+	k_slot30 = k_slot20, k_slot31 = k_slot21;
+	idx30 = idx20, idx31 = idx21;
+	primary_bkt20 = primary_bkt10;
+	primary_bkt21 = primary_bkt11;
+	secondary_bkt20 = secondary_bkt10;
+	secondary_bkt21 = secondary_bkt11;
+	primary_hash20 = primary_hash10;
+	primary_hash21 = primary_hash11;
+	secondary_hash20 = secondary_hash10;
+	secondary_hash21 = secondary_hash11;
+	idx20 = idx10, idx21 = idx11;
+	idx10 = idx00, idx11 = idx01;
+
+	lookup_stage1(idx10, &primary_hash10, &secondary_hash10,
+		&primary_bkt10, &secondary_bkt10, hash_vals, keys, h);
+	lookup_stage1(idx11, &primary_hash11, &secondary_hash11,
+		&primary_bkt11,	&secondary_bkt11, hash_vals, keys, h);
+	lookup_stage2(idx20, primary_hash20, secondary_hash20, primary_bkt20,
+		secondary_bkt20, &k_slot20, positions, &extra_hits_mask,
+		key_store, h);
+	lookup_stage2(idx21, primary_hash21, secondary_hash21, primary_bkt21,
+		secondary_bkt21, &k_slot21, positions, &extra_hits_mask,
+		key_store, h);
+	lookup_stage3(idx30, k_slot30, keys, positions, data, &hits, h);
+	lookup_stage3(idx31, k_slot31, keys, positions, data, &hits, h);
+
+	k_slot30 = k_slot20, k_slot31 = k_slot21;
+	idx30 = idx20, idx31 = idx21;
+	primary_bkt20 = primary_bkt10;
+	primary_bkt21 = primary_bkt11;
+	secondary_bkt20 = secondary_bkt10;
+	secondary_bkt21 = secondary_bkt11;
+	primary_hash20 = primary_hash10;
+	primary_hash21 = primary_hash11;
+	secondary_hash20 = secondary_hash10;
+	secondary_hash21 = secondary_hash11;
+	idx20 = idx10, idx21 = idx11;
+
+	lookup_stage2(idx20, primary_hash20, secondary_hash20, primary_bkt20,
+		secondary_bkt20, &k_slot20, positions, &extra_hits_mask,
+		key_store, h);
+	lookup_stage2(idx21, primary_hash21, secondary_hash21, primary_bkt21,
+		secondary_bkt21, &k_slot21, positions, &extra_hits_mask,
+		key_store, h);
+	lookup_stage3(idx30, k_slot30, keys, positions, data, &hits, h);
+	lookup_stage3(idx31, k_slot31, keys, positions, data, &hits, h);
+
+	k_slot30 = k_slot20, k_slot31 = k_slot21;
+	idx30 = idx20, idx31 = idx21;
+
+	lookup_stage3(idx30, k_slot30, keys, positions, data, &hits, h);
+	lookup_stage3(idx31, k_slot31, keys, positions, data, &hits, h);
+
+	/* ignore any items we have already found */
+	extra_hits_mask &= ~hits;
+
+	if (unlikely(extra_hits_mask)) {
+		/* run a single search for each remaining item */
+		do {
+			idx = __builtin_ctzl(extra_hits_mask);
+			if (data != NULL) {
+				ret = rte_hash_lookup_with_hash_data(h,
+						keys[idx], hash_vals[idx], &data[idx]);
+				if (ret >= 0)
+					hits |= 1ULL << idx;
+			} else {
+				positions[idx] = rte_hash_lookup_with_hash(h,
+							keys[idx], hash_vals[idx]);
+				if (positions[idx] >= 0)
+					hits |= 1llu << idx;
+			}
+			extra_hits_mask &= ~(1llu << idx);
+		} while (extra_hits_mask);
+	}
+
+	miss_mask &= ~hits;
+	if (unlikely(miss_mask)) {
+		do {
+			idx = __builtin_ctzl(miss_mask);
+			positions[idx] = -ENOENT;
+			miss_mask &= ~(1llu << idx);
+		} while (miss_mask);
+	}
+
+	if (hit_mask != NULL)
+		*hit_mask = hits;
+}
+
+int
+rte_hash_lookup_bulk(const struct rte_hash *h, const void **keys,
+		      uint32_t num_keys, int32_t *positions)
+{
+	RETURN_IF_TRUE(((h == NULL) || (keys == NULL) || (num_keys == 0) ||
+			(num_keys > RTE_HASH_LOOKUP_BULK_MAX) ||
+			(positions == NULL)), -EINVAL);
+
+	__rte_hash_lookup_bulk(h, keys, num_keys, positions, NULL, NULL);
+	return 0;
+}
+
+int
+rte_hash_lookup_bulk_data(const struct rte_hash *h, const void **keys,
+		      uint32_t num_keys, uint64_t *hit_mask, void *data[])
+{
+	RETURN_IF_TRUE(((h == NULL) || (keys == NULL) || (num_keys == 0) ||
+			(num_keys > RTE_HASH_LOOKUP_BULK_MAX) ||
+			(hit_mask == NULL)), -EINVAL);
+
+	int32_t positions[num_keys];
+
+	__rte_hash_lookup_bulk(h, keys, num_keys, positions, hit_mask, data);
+
+	/* Return number of hits */
+	return __builtin_popcountl(*hit_mask);
+}
+
+int32_t
+rte_hash_iterate(const struct rte_hash *h, const void **key, void **data, uint32_t *next)
+{
+	uint32_t bucket_idx, idx, position;
+	struct rte_hash_key *next_key;
+
+	RETURN_IF_TRUE(((h == NULL) || (next == NULL)), -EINVAL);
+
+	const uint32_t total_entries = h->num_buckets * RTE_HASH_BUCKET_ENTRIES;
+	/* Out of bounds */
+	if (*next >= total_entries)
+		return -ENOENT;
+
+	/* Calculate bucket and index of current iterator */
+	bucket_idx = *next / RTE_HASH_BUCKET_ENTRIES;
+	idx = *next % RTE_HASH_BUCKET_ENTRIES;
+
+	/* If current position is empty, go to the next one */
+	while (h->buckets[bucket_idx].signatures[idx].sig == NULL_SIGNATURE) {
+		(*next)++;
+		/* End of table */
+		if (*next == total_entries)
+			return -ENOENT;
+		bucket_idx = *next / RTE_HASH_BUCKET_ENTRIES;
+		idx = *next % RTE_HASH_BUCKET_ENTRIES;
+	}
+
+	/* Get position of entry in key table */
+	position = h->buckets[bucket_idx].key_idx[idx];
+	next_key = (struct rte_hash_key *) ((char *)h->key_store +
+				position * h->key_entry_size);
+	/* Return key and data */
+	*key = next_key->key;
+	*data = next_key->pdata;
+
+	/* Increment iterator */
+	(*next)++;
+
+	return position - 1;
+}
diff --git a/lib/librte_hash/rte_fbk_hash.c b/lib/librte_hash/rte_fbk_hash.c
new file mode 100644
index 00000000..55c9f358
--- /dev/null
+++ b/lib/librte_hash/rte_fbk_hash.c
@@ -0,0 +1,231 @@
+/**
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <string.h>
+#include <errno.h>
+
+#include <sys/queue.h>
+#include <rte_memory.h>
+#include <rte_memzone.h>
+#include <rte_eal.h>
+#include <rte_eal_memconfig.h>
+#include <rte_malloc.h>
+#include <rte_common.h>
+#include <rte_per_lcore.h>
+#include <rte_errno.h>
+#include <rte_string_fns.h>
+#include <rte_cpuflags.h>
+#include <rte_log.h>
+#include <rte_spinlock.h>
+
+#include "rte_fbk_hash.h"
+
+TAILQ_HEAD(rte_fbk_hash_list, rte_tailq_entry);
+
+static struct rte_tailq_elem rte_fbk_hash_tailq = {
+	.name = "RTE_FBK_HASH",
+};
+EAL_REGISTER_TAILQ(rte_fbk_hash_tailq)
+
+/**
+ * Performs a lookup for an existing hash table, and returns a pointer to
+ * the table if found.
+ *
+ * @param name
+ *   Name of the hash table to find
+ *
+ * @return
+ *   pointer to hash table structure or NULL on error.
+ */
+struct rte_fbk_hash_table *
+rte_fbk_hash_find_existing(const char *name)
+{
+	struct rte_fbk_hash_table *h = NULL;
+	struct rte_tailq_entry *te;
+	struct rte_fbk_hash_list *fbk_hash_list;
+
+	fbk_hash_list = RTE_TAILQ_CAST(rte_fbk_hash_tailq.head,
+				       rte_fbk_hash_list);
+
+	rte_rwlock_read_lock(RTE_EAL_TAILQ_RWLOCK);
+	TAILQ_FOREACH(te, fbk_hash_list, next) {
+		h = (struct rte_fbk_hash_table *) te->data;
+		if (strncmp(name, h->name, RTE_FBK_HASH_NAMESIZE) == 0)
+			break;
+	}
+	rte_rwlock_read_unlock(RTE_EAL_TAILQ_RWLOCK);
+	if (te == NULL) {
+		rte_errno = ENOENT;
+		return NULL;
+	}
+	return h;
+}
+
+/**
+ * Create a new hash table for use with four byte keys.
+ *
+ * @param params
+ *   Parameters used in creation of hash table.
+ *
+ * @return
+ *   Pointer to hash table structure that is used in future hash table
+ *   operations, or NULL on error.
+ */
+struct rte_fbk_hash_table *
+rte_fbk_hash_create(const struct rte_fbk_hash_params *params)
+{
+	struct rte_fbk_hash_table *ht = NULL;
+	struct rte_tailq_entry *te;
+	char hash_name[RTE_FBK_HASH_NAMESIZE];
+	const uint32_t mem_size =
+			sizeof(*ht) + (sizeof(ht->t[0]) * params->entries);
+	uint32_t i;
+	struct rte_fbk_hash_list *fbk_hash_list;
+
+	fbk_hash_list = RTE_TAILQ_CAST(rte_fbk_hash_tailq.head,
+				       rte_fbk_hash_list);
+
+	/* Error checking of parameters. */
+	if ((!rte_is_power_of_2(params->entries)) ||
+			(!rte_is_power_of_2(params->entries_per_bucket)) ||
+			(params->entries == 0) ||
+			(params->entries_per_bucket == 0) ||
+			(params->entries_per_bucket > params->entries) ||
+			(params->entries > RTE_FBK_HASH_ENTRIES_MAX) ||
+			(params->entries_per_bucket > RTE_FBK_HASH_ENTRIES_PER_BUCKET_MAX)){
+		rte_errno = EINVAL;
+		return NULL;
+	}
+
+	snprintf(hash_name, sizeof(hash_name), "FBK_%s", params->name);
+
+	rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK);
+
+	/* guarantee there's no existing */
+	TAILQ_FOREACH(te, fbk_hash_list, next) {
+		ht = (struct rte_fbk_hash_table *) te->data;
+		if (strncmp(params->name, ht->name, RTE_FBK_HASH_NAMESIZE) == 0)
+			break;
+	}
+	ht = NULL;
+	if (te != NULL) {
+		rte_errno = EEXIST;
+		goto exit;
+	}
+
+	te = rte_zmalloc("FBK_HASH_TAILQ_ENTRY", sizeof(*te), 0);
+	if (te == NULL) {
+		RTE_LOG(ERR, HASH, "Failed to allocate tailq entry\n");
+		goto exit;
+	}
+
+	/* Allocate memory for table. */
+	ht = (struct rte_fbk_hash_table *)rte_zmalloc_socket(hash_name, mem_size,
+			0, params->socket_id);
+	if (ht == NULL) {
+		RTE_LOG(ERR, HASH, "Failed to allocate fbk hash table\n");
+		rte_free(te);
+		goto exit;
+	}
+
+	/* Set up hash table context. */
+	snprintf(ht->name, sizeof(ht->name), "%s", params->name);
+	ht->entries = params->entries;
+	ht->entries_per_bucket = params->entries_per_bucket;
+	ht->used_entries = 0;
+	ht->bucket_mask = (params->entries / params->entries_per_bucket) - 1;
+	for (ht->bucket_shift = 0, i = 1;
+	    (params->entries_per_bucket & i) == 0;
+	    ht->bucket_shift++, i <<= 1)
+		; /* empty loop body */
+
+	if (params->hash_func != NULL) {
+		ht->hash_func = params->hash_func;
+		ht->init_val = params->init_val;
+	}
+	else {
+		ht->hash_func = RTE_FBK_HASH_FUNC_DEFAULT;
+		ht->init_val = RTE_FBK_HASH_INIT_VAL_DEFAULT;
+	}
+
+	te->data = (void *) ht;
+
+	TAILQ_INSERT_TAIL(fbk_hash_list, te, next);
+
+exit:
+	rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
+
+	return ht;
+}
+
+/**
+ * Free all memory used by a hash table.
+ *
+ * @param ht
+ *   Hash table to deallocate.
+ */
+void
+rte_fbk_hash_free(struct rte_fbk_hash_table *ht)
+{
+	struct rte_tailq_entry *te;
+	struct rte_fbk_hash_list *fbk_hash_list;
+
+	if (ht == NULL)
+		return;
+
+	fbk_hash_list = RTE_TAILQ_CAST(rte_fbk_hash_tailq.head,
+				       rte_fbk_hash_list);
+
+	rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK);
+
+	/* find out tailq entry */
+	TAILQ_FOREACH(te, fbk_hash_list, next) {
+		if (te->data == (void *) ht)
+			break;
+	}
+
+	if (te == NULL) {
+		rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
+		return;
+	}
+
+	TAILQ_REMOVE(fbk_hash_list, te, next);
+
+	rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
+
+	rte_free(ht);
+	rte_free(te);
+}
diff --git a/lib/librte_hash/rte_fbk_hash.h b/lib/librte_hash/rte_fbk_hash.h
new file mode 100644
index 00000000..a430961d
--- /dev/null
+++ b/lib/librte_hash/rte_fbk_hash.h
@@ -0,0 +1,396 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_FBK_HASH_H_
+#define _RTE_FBK_HASH_H_
+
+/**
+ * @file
+ *
+ * This is a hash table implementation for four byte keys (fbk).
+ *
+ * Note that the return value of the add function should always be checked as,
+ * if a bucket is full, the key is not added even if there is space in other
+ * buckets. This keeps the lookup function very simple and therefore fast.
+ */
+
+#include <stdint.h>
+#include <errno.h>
+#include <sys/queue.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <string.h>
+
+#ifndef RTE_FBK_HASH_FUNC_DEFAULT
+#if defined(RTE_MACHINE_CPUFLAG_SSE4_2) || defined(RTE_MACHINE_CPUFLAG_CRC32)
+#include <rte_hash_crc.h>
+/** Default four-byte key hash function if none is specified. */
+#define RTE_FBK_HASH_FUNC_DEFAULT		rte_hash_crc_4byte
+#else
+#include <rte_jhash.h>
+#define RTE_FBK_HASH_FUNC_DEFAULT		rte_jhash_1word
+#endif
+#endif
+
+#ifndef RTE_FBK_HASH_INIT_VAL_DEFAULT
+/** Initialising value used when calculating hash. */
+#define RTE_FBK_HASH_INIT_VAL_DEFAULT		0xFFFFFFFF
+#endif
+
+/** The maximum number of entries in the hash table that is supported. */
+#define RTE_FBK_HASH_ENTRIES_MAX		(1 << 20)
+
+/** The maximum number of entries in each bucket that is supported. */
+#define RTE_FBK_HASH_ENTRIES_PER_BUCKET_MAX	256
+
+/** Maximum size of string for naming the hash. */
+#define RTE_FBK_HASH_NAMESIZE			32
+
+/** Type of function that can be used for calculating the hash value. */
+typedef uint32_t (*rte_fbk_hash_fn)(uint32_t key, uint32_t init_val);
+
+/** Parameters used when creating four-byte key hash table. */
+struct rte_fbk_hash_params {
+	const char *name;		/**< Name of the hash table. */
+	uint32_t entries;		/**< Total number of entries. */
+	uint32_t entries_per_bucket;	/**< Number of entries in a bucket. */
+	int socket_id;			/**< Socket to allocate memory on. */
+	rte_fbk_hash_fn hash_func;	/**< The hash function. */
+	uint32_t init_val;		/**< For initialising hash function. */
+};
+
+/** Individual entry in the four-byte key hash table. */
+union rte_fbk_hash_entry {
+	uint64_t whole_entry;		/**< For accessing entire entry. */
+	struct {
+		uint16_t is_entry;	/**< Non-zero if entry is active. */
+		uint16_t value;		/**< Value returned by lookup. */
+		uint32_t key;		/**< Key used to find value. */
+	} entry;			/**< For accessing each entry part. */
+};
+
+
+/** The four-byte key hash table structure. */
+struct rte_fbk_hash_table {
+	char name[RTE_FBK_HASH_NAMESIZE];	/**< Name of the hash. */
+	uint32_t entries;		/**< Total number of entries. */
+	uint32_t entries_per_bucket;	/**< Number of entries in a bucket. */
+	uint32_t used_entries;		/**< How many entries are used. */
+	uint32_t bucket_mask;		/**< To find which bucket the key is in. */
+	uint32_t bucket_shift;		/**< Convert bucket to table offset. */
+	rte_fbk_hash_fn hash_func;	/**< The hash function. */
+	uint32_t init_val;		/**< For initialising hash function. */
+
+	/** A flat table of all buckets. */
+	union rte_fbk_hash_entry t[0];
+};
+
+/**
+ * Find the offset into hash table of the bucket containing a particular key.
+ *
+ * @param ht
+ *   Pointer to hash table.
+ * @param key
+ *   Key to calculate bucket for.
+ * @return
+ *   Offset into hash table.
+ */
+static inline uint32_t
+rte_fbk_hash_get_bucket(const struct rte_fbk_hash_table *ht, uint32_t key)
+{
+	return (ht->hash_func(key, ht->init_val) & ht->bucket_mask) <<
+			ht->bucket_shift;
+}
+
+/**
+ * Add a key to an existing hash table with bucket id.
+ * This operation is not multi-thread safe
+ * and should only be called from one thread.
+ *
+ * @param ht
+ *   Hash table to add the key to.
+ * @param key
+ *   Key to add to the hash table.
+ * @param value
+ *   Value to associate with key.
+ * @param bucket
+ *   Bucket to associate with key.
+ * @return
+ *   0 if ok, or negative value on error.
+ */
+static inline int
+rte_fbk_hash_add_key_with_bucket(struct rte_fbk_hash_table *ht,
+			uint32_t key, uint16_t value, uint32_t bucket)
+{
+	/*
+	 * The writing of a new value to the hash table is done as a single
+	 * 64bit operation. This should help prevent individual entries being
+	 * corrupted due to race conditions, but it's still possible to
+	 * overwrite entries that have just been made valid.
+	 */
+	const uint64_t new_entry = ((uint64_t)(key) << 32) |
+			((uint64_t)(value) << 16) |
+			1;  /* 1 = is_entry bit. */
+	uint32_t i;
+
+	for (i = 0; i < ht->entries_per_bucket; i++) {
+		/* Set entry if unused. */
+		if (! ht->t[bucket + i].entry.is_entry) {
+			ht->t[bucket + i].whole_entry = new_entry;
+			ht->used_entries++;
+			return 0;
+		}
+		/* Change value if key already exists. */
+		if (ht->t[bucket + i].entry.key == key) {
+			ht->t[bucket + i].entry.value = value;
+			return 0;
+		}
+	}
+
+	return -ENOSPC; /* No space in bucket. */
+}
+
+/**
+ * Add a key to an existing hash table. This operation is not multi-thread safe
+ * and should only be called from one thread.
+ *
+ * @param ht
+ *   Hash table to add the key to.
+ * @param key
+ *   Key to add to the hash table.
+ * @param value
+ *   Value to associate with key.
+ * @return
+ *   0 if ok, or negative value on error.
+ */
+static inline int
+rte_fbk_hash_add_key(struct rte_fbk_hash_table *ht,
+			uint32_t key, uint16_t value)
+{
+	return rte_fbk_hash_add_key_with_bucket(ht,
+				key, value, rte_fbk_hash_get_bucket(ht, key));
+}
+
+/**
+ * Remove a key with a given bucket id from an existing hash table.
+ * This operation is not multi-thread
+ * safe and should only be called from one thread.
+ *
+ * @param ht
+ *   Hash table to remove the key from.
+ * @param key
+ *   Key to remove from the hash table.
+ * @param bucket
+ *   Bucket id associate with key.
+ * @return
+ *   0 if ok, or negative value on error.
+ */
+static inline int
+rte_fbk_hash_delete_key_with_bucket(struct rte_fbk_hash_table *ht,
+					uint32_t key, uint32_t bucket)
+{
+	uint32_t last_entry = ht->entries_per_bucket - 1;
+	uint32_t i, j;
+
+	for (i = 0; i < ht->entries_per_bucket; i++) {
+		if (ht->t[bucket + i].entry.key == key) {
+			/* Find last key in bucket. */
+			for (j = ht->entries_per_bucket - 1; j > i; j-- ) {
+				if (! ht->t[bucket + j].entry.is_entry) {
+					last_entry = j - 1;
+				}
+			}
+			/*
+			 * Move the last key to the deleted key's position, and
+			 * delete the last key. lastEntry and i may be same but
+			 * it doesn't matter.
+			 */
+			ht->t[bucket + i].whole_entry =
+					ht->t[bucket + last_entry].whole_entry;
+			ht->t[bucket + last_entry].whole_entry = 0;
+
+			ht->used_entries--;
+			return 0;
+		}
+	}
+
+	return -ENOENT; /* Key didn't exist. */
+}
+
+/**
+ * Remove a key from an existing hash table. This operation is not multi-thread
+ * safe and should only be called from one thread.
+ *
+ * @param ht
+ *   Hash table to remove the key from.
+ * @param key
+ *   Key to remove from the hash table.
+ * @return
+ *   0 if ok, or negative value on error.
+ */
+static inline int
+rte_fbk_hash_delete_key(struct rte_fbk_hash_table *ht, uint32_t key)
+{
+	return rte_fbk_hash_delete_key_with_bucket(ht,
+				key, rte_fbk_hash_get_bucket(ht, key));
+}
+
+/**
+ * Find a key in the hash table with a given bucketid.
+ * This operation is multi-thread safe.
+ *
+ * @param ht
+ *   Hash table to look in.
+ * @param key
+ *   Key to find.
+ * @param bucket
+ *   Bucket associate to the key.
+ * @return
+ *   The value that was associated with the key, or negative value on error.
+ */
+static inline int
+rte_fbk_hash_lookup_with_bucket(const struct rte_fbk_hash_table *ht,
+				uint32_t key, uint32_t bucket)
+{
+	union rte_fbk_hash_entry current_entry;
+	uint32_t i;
+
+	for (i = 0; i < ht->entries_per_bucket; i++) {
+		/* Single read of entry, which should be atomic. */
+		current_entry.whole_entry = ht->t[bucket + i].whole_entry;
+		if (! current_entry.entry.is_entry) {
+			return -ENOENT; /* Error once we hit an empty field. */
+		}
+		if (current_entry.entry.key == key) {
+			return current_entry.entry.value;
+		}
+	}
+	return -ENOENT; /* Key didn't exist. */
+}
+
+/**
+ * Find a key in the hash table. This operation is multi-thread safe.
+ *
+ * @param ht
+ *   Hash table to look in.
+ * @param key
+ *   Key to find.
+ * @return
+ *   The value that was associated with the key, or negative value on error.
+ */
+static inline int
+rte_fbk_hash_lookup(const struct rte_fbk_hash_table *ht, uint32_t key)
+{
+	return rte_fbk_hash_lookup_with_bucket(ht,
+				key, rte_fbk_hash_get_bucket(ht, key));
+}
+
+/**
+ * Delete all entries in a hash table. This operation is not multi-thread
+ * safe and should only be called from one thread.
+ *
+ * @param ht
+ *   Hash table to delete entries in.
+ */
+static inline void
+rte_fbk_hash_clear_all(struct rte_fbk_hash_table *ht)
+{
+	memset(ht->t, 0, sizeof(ht->t[0]) * ht->entries);
+	ht->used_entries = 0;
+}
+
+/**
+ * Find what fraction of entries are being used.
+ *
+ * @param ht
+ *   Hash table to find how many entries are being used in.
+ * @return
+ *   Load factor of the hash table, or negative value on error.
+ */
+static inline double
+rte_fbk_hash_get_load_factor(struct rte_fbk_hash_table *ht)
+{
+	return (double)ht->used_entries / (double)ht->entries;
+}
+
+/**
+ * Performs a lookup for an existing hash table, and returns a pointer to
+ * the table if found.
+ *
+ * @param name
+ *   Name of the hash table to find
+ *
+ * @return
+ *   pointer to hash table structure or NULL on error with rte_errno
+ *   set appropriately. Possible rte_errno values include:
+ *    - ENOENT - required entry not available to return.
+ */
+struct rte_fbk_hash_table *rte_fbk_hash_find_existing(const char *name);
+
+/**
+ * Create a new hash table for use with four byte keys.
+ *
+ * @param params
+ *   Parameters used in creation of hash table.
+ *
+ * @return
+ *   Pointer to hash table structure that is used in future hash table
+ *   operations, or NULL on error with rte_errno set appropriately.
+ *   Possible rte_errno error values include:
+ *    - E_RTE_NO_CONFIG - function could not get pointer to rte_config structure
+ *    - E_RTE_SECONDARY - function was called from a secondary process instance
+ *    - EINVAL - invalid parameter value passed to function
+ *    - ENOSPC - the maximum number of memzones has already been allocated
+ *    - EEXIST - a memzone with the same name already exists
+ *    - ENOMEM - no appropriate memory area found in which to create memzone
+ */
+struct rte_fbk_hash_table * \
+rte_fbk_hash_create(const struct rte_fbk_hash_params *params);
+
+/**
+ * Free all memory used by a hash table.
+ * Has no effect on hash tables allocated in memory zones
+ *
+ * @param ht
+ *   Hash table to deallocate.
+ */
+void rte_fbk_hash_free(struct rte_fbk_hash_table *ht);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_FBK_HASH_H_ */
diff --git a/lib/librte_hash/rte_hash.h b/lib/librte_hash/rte_hash.h
new file mode 100644
index 00000000..ae00b658
--- /dev/null
+++ b/lib/librte_hash/rte_hash.h
@@ -0,0 +1,436 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_HASH_H_
+#define _RTE_HASH_H_
+
+/**
+ * @file
+ *
+ * RTE Hash Table
+ */
+
+#include <stdint.h>
+#include <stddef.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** Maximum size of hash table that can be created. */
+#define RTE_HASH_ENTRIES_MAX			(1 << 30)
+
+/** Maximum number of characters in hash name.*/
+#define RTE_HASH_NAMESIZE			32
+
+/** Maximum number of keys that can be searched for using rte_hash_lookup_bulk. */
+#define RTE_HASH_LOOKUP_BULK_MAX		64
+#define RTE_HASH_LOOKUP_MULTI_MAX		RTE_HASH_LOOKUP_BULK_MAX
+
+/** Enable Hardware transactional memory support. */
+#define RTE_HASH_EXTRA_FLAGS_TRANS_MEM_SUPPORT	0x01
+
+/** Signature of key that is stored internally. */
+typedef uint32_t hash_sig_t;
+
+/** Type of function that can be used for calculating the hash value. */
+typedef uint32_t (*rte_hash_function)(const void *key, uint32_t key_len,
+				      uint32_t init_val);
+
+/** Type of function used to compare the hash key. */
+typedef int (*rte_hash_cmp_eq_t)(const void *key1, const void *key2, size_t key_len);
+
+/**
+ * Parameters used when creating the hash table.
+ */
+struct rte_hash_parameters {
+	const char *name;		/**< Name of the hash. */
+	uint32_t entries;		/**< Total hash table entries. */
+	uint32_t reserved;		/**< Unused field. Should be set to 0 */
+	uint32_t key_len;		/**< Length of hash key. */
+	rte_hash_function hash_func;	/**< Primary Hash function used to calculate hash. */
+	uint32_t hash_func_init_val;	/**< Init value used by hash_func. */
+	int socket_id;			/**< NUMA Socket ID for memory. */
+	uint8_t extra_flag;		/**< Indicate if additional parameters are present. */
+};
+
+/** @internal A hash table structure. */
+struct rte_hash;
+
+/**
+ * Create a new hash table.
+ *
+ * @param params
+ *   Parameters used to create and initialise the hash table.
+ * @return
+ *   Pointer to hash table structure that is used in future hash table
+ *   operations, or NULL on error, with error code set in rte_errno.
+ *   Possible rte_errno errors include:
+ *    - E_RTE_NO_CONFIG - function could not get pointer to rte_config structure
+ *    - E_RTE_SECONDARY - function was called from a secondary process instance
+ *    - ENOENT - missing entry
+ *    - EINVAL - invalid parameter passed to function
+ *    - ENOSPC - the maximum number of memzones has already been allocated
+ *    - EEXIST - a memzone with the same name already exists
+ *    - ENOMEM - no appropriate memory area found in which to create memzone
+ */
+struct rte_hash *
+rte_hash_create(const struct rte_hash_parameters *params);
+
+/**
+ * Set a new hash compare function other than the default one.
+ *
+ * @note Function pointer does not work with multi-process, so do not use it
+ * in multi-process mode.
+ *
+ * @param h
+ *   Hash table for which the function is to be changed
+ * @param func
+ *   New compare function
+ */
+void rte_hash_set_cmp_func(struct rte_hash *h, rte_hash_cmp_eq_t func);
+
+/**
+ * Find an existing hash table object and return a pointer to it.
+ *
+ * @param name
+ *   Name of the hash table as passed to rte_hash_create()
+ * @return
+ *   Pointer to hash table or NULL if object not found
+ *   with rte_errno set appropriately. Possible rte_errno values include:
+ *    - ENOENT - value not available for return
+ */
+struct rte_hash *
+rte_hash_find_existing(const char *name);
+
+/**
+ * De-allocate all memory used by hash table.
+ * @param h
+ *   Hash table to free
+ */
+void
+rte_hash_free(struct rte_hash *h);
+
+/**
+ * Reset all hash structure, by zeroing all entries
+ * @param h
+ *   Hash table to reset
+ */
+void
+rte_hash_reset(struct rte_hash *h);
+
+/**
+ * Add a key-value pair to an existing hash table.
+ * This operation is not multi-thread safe
+ * and should only be called from one thread.
+ *
+ * @param h
+ *   Hash table to add the key to.
+ * @param key
+ *   Key to add to the hash table.
+ * @param data
+ *   Data to add to the hash table.
+ * @return
+ *   - 0 if added successfully
+ *   - -EINVAL if the parameters are invalid.
+ *   - -ENOSPC if there is no space in the hash for this key.
+ */
+int
+rte_hash_add_key_data(const struct rte_hash *h, const void *key, void *data);
+
+/**
+ * Add a key-value pair with a pre-computed hash value
+ * to an existing hash table.
+ * This operation is not multi-thread safe
+ * and should only be called from one thread.
+ *
+ * @param h
+ *   Hash table to add the key to.
+ * @param key
+ *   Key to add to the hash table.
+ * @param sig
+ *   Precomputed hash value for 'key'
+ * @param data
+ *   Data to add to the hash table.
+ * @return
+ *   - 0 if added successfully
+ *   - -EINVAL if the parameters are invalid.
+ *   - -ENOSPC if there is no space in the hash for this key.
+ */
+int32_t
+rte_hash_add_key_with_hash_data(const struct rte_hash *h, const void *key,
+						hash_sig_t sig, void *data);
+
+/**
+ * Add a key to an existing hash table. This operation is not multi-thread safe
+ * and should only be called from one thread.
+ *
+ * @param h
+ *   Hash table to add the key to.
+ * @param key
+ *   Key to add to the hash table.
+ * @return
+ *   - -EINVAL if the parameters are invalid.
+ *   - -ENOSPC if there is no space in the hash for this key.
+ *   - A positive value that can be used by the caller as an offset into an
+ *     array of user data. This value is unique for this key.
+ */
+int32_t
+rte_hash_add_key(const struct rte_hash *h, const void *key);
+
+/**
+ * Add a key to an existing hash table.
+ * This operation is not multi-thread safe
+ * and should only be called from one thread.
+ *
+ * @param h
+ *   Hash table to add the key to.
+ * @param key
+ *   Key to add to the hash table.
+ * @param sig
+ *   Precomputed hash value for 'key'.
+ * @return
+ *   - -EINVAL if the parameters are invalid.
+ *   - -ENOSPC if there is no space in the hash for this key.
+ *   - A positive value that can be used by the caller as an offset into an
+ *     array of user data. This value is unique for this key.
+ */
+int32_t
+rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key, hash_sig_t sig);
+
+/**
+ * Remove a key from an existing hash table.
+ * This operation is not multi-thread safe
+ * and should only be called from one thread.
+ *
+ * @param h
+ *   Hash table to remove the key from.
+ * @param key
+ *   Key to remove from the hash table.
+ * @return
+ *   - -EINVAL if the parameters are invalid.
+ *   - -ENOENT if the key is not found.
+ *   - A positive value that can be used by the caller as an offset into an
+ *     array of user data. This value is unique for this key, and is the same
+ *     value that was returned when the key was added.
+ */
+int32_t
+rte_hash_del_key(const struct rte_hash *h, const void *key);
+
+/**
+ * Remove a key from an existing hash table.
+ * This operation is not multi-thread safe
+ * and should only be called from one thread.
+ *
+ * @param h
+ *   Hash table to remove the key from.
+ * @param key
+ *   Key to remove from the hash table.
+ * @param sig
+ *   Precomputed hash value for 'key'.
+ * @return
+ *   - -EINVAL if the parameters are invalid.
+ *   - -ENOENT if the key is not found.
+ *   - A positive value that can be used by the caller as an offset into an
+ *     array of user data. This value is unique for this key, and is the same
+ *     value that was returned when the key was added.
+ */
+int32_t
+rte_hash_del_key_with_hash(const struct rte_hash *h, const void *key, hash_sig_t sig);
+
+
+/**
+ * Find a key-value pair in the hash table.
+ * This operation is multi-thread safe.
+ *
+ * @param h
+ *   Hash table to look in.
+ * @param key
+ *   Key to find.
+ * @param data
+ *   Output with pointer to data returned from the hash table.
+ * @return
+ *   0 if successful lookup
+ *   - EINVAL if the parameters are invalid.
+ *   - ENOENT if the key is not found.
+ */
+int
+rte_hash_lookup_data(const struct rte_hash *h, const void *key, void **data);
+
+/**
+ * Find a key-value pair with a pre-computed hash value
+ * to an existing hash table.
+ * This operation is multi-thread safe.
+ *
+ * @param h
+ *   Hash table to look in.
+ * @param key
+ *   Key to find.
+ * @param sig
+ *   Precomputed hash value for 'key'
+ * @param data
+ *   Output with pointer to data returned from the hash table.
+ * @return
+ *   0 if successful lookup
+ *   - EINVAL if the parameters are invalid.
+ *   - ENOENT if the key is not found.
+ */
+int
+rte_hash_lookup_with_hash_data(const struct rte_hash *h, const void *key,
+					hash_sig_t sig, void **data);
+
+/**
+ * Find a key in the hash table.
+ * This operation is multi-thread safe.
+ *
+ * @param h
+ *   Hash table to look in.
+ * @param key
+ *   Key to find.
+ * @return
+ *   - -EINVAL if the parameters are invalid.
+ *   - -ENOENT if the key is not found.
+ *   - A positive value that can be used by the caller as an offset into an
+ *     array of user data. This value is unique for this key, and is the same
+ *     value that was returned when the key was added.
+ */
+int32_t
+rte_hash_lookup(const struct rte_hash *h, const void *key);
+
+/**
+ * Find a key in the hash table.
+ * This operation is multi-thread safe.
+ *
+ * @param h
+ *   Hash table to look in.
+ * @param key
+ *   Key to find.
+ * @param sig
+ *   Hash value to remove from the hash table.
+ * @return
+ *   - -EINVAL if the parameters are invalid.
+ *   - -ENOENT if the key is not found.
+ *   - A positive value that can be used by the caller as an offset into an
+ *     array of user data. This value is unique for this key, and is the same
+ *     value that was returned when the key was added.
+ */
+int32_t
+rte_hash_lookup_with_hash(const struct rte_hash *h,
+				const void *key, hash_sig_t sig);
+
+/**
+ * Calc a hash value by key.
+ * This operation is not multi-thread safe.
+ *
+ * @param h
+ *   Hash table to look in.
+ * @param key
+ *   Key to find.
+ * @return
+ *   - hash value
+ */
+hash_sig_t
+rte_hash_hash(const struct rte_hash *h, const void *key);
+
+#define rte_hash_lookup_multi rte_hash_lookup_bulk
+#define rte_hash_lookup_multi_data rte_hash_lookup_bulk_data
+/**
+ * Find multiple keys in the hash table.
+ * This operation is multi-thread safe.
+ *
+ * @param h
+ *   Hash table to look in.
+ * @param keys
+ *   A pointer to a list of keys to look for.
+ * @param num_keys
+ *   How many keys are in the keys list (less than RTE_HASH_LOOKUP_BULK_MAX).
+ * @param hit_mask
+ *   Output containing a bitmask with all successful lookups.
+ * @param data
+ *   Output containing array of data returned from all the successful lookups.
+ * @return
+ *   -EINVAL if there's an error, otherwise number of successful lookups.
+ */
+int
+rte_hash_lookup_bulk_data(const struct rte_hash *h, const void **keys,
+		      uint32_t num_keys, uint64_t *hit_mask, void *data[]);
+
+/**
+ * Find multiple keys in the hash table.
+ * This operation is multi-thread safe.
+ *
+ * @param h
+ *   Hash table to look in.
+ * @param keys
+ *   A pointer to a list of keys to look for.
+ * @param num_keys
+ *   How many keys are in the keys list (less than RTE_HASH_LOOKUP_BULK_MAX).
+ * @param positions
+ *   Output containing a list of values, corresponding to the list of keys that
+ *   can be used by the caller as an offset into an array of user data. These
+ *   values are unique for each key, and are the same values that were returned
+ *   when each key was added. If a key in the list was not found, then -ENOENT
+ *   will be the value.
+ * @return
+ *   -EINVAL if there's an error, otherwise 0.
+ */
+int
+rte_hash_lookup_bulk(const struct rte_hash *h, const void **keys,
+		      uint32_t num_keys, int32_t *positions);
+
+/**
+ * Iterate through the hash table, returning key-value pairs.
+ *
+ * @param h
+ *   Hash table to iterate
+ * @param key
+ *   Output containing the key where current iterator
+ *   was pointing at
+ * @param data
+ *   Output containing the data associated with key.
+ *   Returns NULL if data was not stored.
+ * @param next
+ *   Pointer to iterator. Should be 0 to start iterating the hash table.
+ *   Iterator is incremented after each call of this function.
+ * @return
+ *   Position where key was stored, if successful.
+ *   - -EINVAL if the parameters are invalid.
+ *   - -ENOENT if end of the hash table.
+ */
+int32_t
+rte_hash_iterate(const struct rte_hash *h, const void **key, void **data, uint32_t *next);
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_HASH_H_ */
diff --git a/lib/librte_hash/rte_hash_crc.h b/lib/librte_hash/rte_hash_crc.h
new file mode 100644
index 00000000..63e74aa4
--- /dev/null
+++ b/lib/librte_hash/rte_hash_crc.h
@@ -0,0 +1,639 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_HASH_CRC_H_
+#define _RTE_HASH_CRC_H_
+
+/**
+ * @file
+ *
+ * RTE CRC Hash
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+#include <rte_cpuflags.h>
+#include <rte_branch_prediction.h>
+#include <rte_common.h>
+
+/* Lookup tables for software implementation of CRC32C */
+static const uint32_t crc32c_tables[8][256] = {{
+ 0x00000000, 0xF26B8303, 0xE13B70F7, 0x1350F3F4, 0xC79A971F, 0x35F1141C, 0x26A1E7E8, 0xD4CA64EB,
+ 0x8AD958CF, 0x78B2DBCC, 0x6BE22838, 0x9989AB3B, 0x4D43CFD0, 0xBF284CD3, 0xAC78BF27, 0x5E133C24,
+ 0x105EC76F, 0xE235446C, 0xF165B798, 0x030E349B, 0xD7C45070, 0x25AFD373, 0x36FF2087, 0xC494A384,
+ 0x9A879FA0, 0x68EC1CA3, 0x7BBCEF57, 0x89D76C54, 0x5D1D08BF, 0xAF768BBC, 0xBC267848, 0x4E4DFB4B,
+ 0x20BD8EDE, 0xD2D60DDD, 0xC186FE29, 0x33ED7D2A, 0xE72719C1, 0x154C9AC2, 0x061C6936, 0xF477EA35,
+ 0xAA64D611, 0x580F5512, 0x4B5FA6E6, 0xB93425E5, 0x6DFE410E, 0x9F95C20D, 0x8CC531F9, 0x7EAEB2FA,
+ 0x30E349B1, 0xC288CAB2, 0xD1D83946, 0x23B3BA45, 0xF779DEAE, 0x05125DAD, 0x1642AE59, 0xE4292D5A,
+ 0xBA3A117E, 0x4851927D, 0x5B016189, 0xA96AE28A, 0x7DA08661, 0x8FCB0562, 0x9C9BF696, 0x6EF07595,
+ 0x417B1DBC, 0xB3109EBF, 0xA0406D4B, 0x522BEE48, 0x86E18AA3, 0x748A09A0, 0x67DAFA54, 0x95B17957,
+ 0xCBA24573, 0x39C9C670, 0x2A993584, 0xD8F2B687, 0x0C38D26C, 0xFE53516F, 0xED03A29B, 0x1F682198,
+ 0x5125DAD3, 0xA34E59D0, 0xB01EAA24, 0x42752927, 0x96BF4DCC, 0x64D4CECF, 0x77843D3B, 0x85EFBE38,
+ 0xDBFC821C, 0x2997011F, 0x3AC7F2EB, 0xC8AC71E8, 0x1C661503, 0xEE0D9600, 0xFD5D65F4, 0x0F36E6F7,
+ 0x61C69362, 0x93AD1061, 0x80FDE395, 0x72966096, 0xA65C047D, 0x5437877E, 0x4767748A, 0xB50CF789,
+ 0xEB1FCBAD, 0x197448AE, 0x0A24BB5A, 0xF84F3859, 0x2C855CB2, 0xDEEEDFB1, 0xCDBE2C45, 0x3FD5AF46,
+ 0x7198540D, 0x83F3D70E, 0x90A324FA, 0x62C8A7F9, 0xB602C312, 0x44694011, 0x5739B3E5, 0xA55230E6,
+ 0xFB410CC2, 0x092A8FC1, 0x1A7A7C35, 0xE811FF36, 0x3CDB9BDD, 0xCEB018DE, 0xDDE0EB2A, 0x2F8B6829,
+ 0x82F63B78, 0x709DB87B, 0x63CD4B8F, 0x91A6C88C, 0x456CAC67, 0xB7072F64, 0xA457DC90, 0x563C5F93,
+ 0x082F63B7, 0xFA44E0B4, 0xE9141340, 0x1B7F9043, 0xCFB5F4A8, 0x3DDE77AB, 0x2E8E845F, 0xDCE5075C,
+ 0x92A8FC17, 0x60C37F14, 0x73938CE0, 0x81F80FE3, 0x55326B08, 0xA759E80B, 0xB4091BFF, 0x466298FC,
+ 0x1871A4D8, 0xEA1A27DB, 0xF94AD42F, 0x0B21572C, 0xDFEB33C7, 0x2D80B0C4, 0x3ED04330, 0xCCBBC033,
+ 0xA24BB5A6, 0x502036A5, 0x4370C551, 0xB11B4652, 0x65D122B9, 0x97BAA1BA, 0x84EA524E, 0x7681D14D,
+ 0x2892ED69, 0xDAF96E6A, 0xC9A99D9E, 0x3BC21E9D, 0xEF087A76, 0x1D63F975, 0x0E330A81, 0xFC588982,
+ 0xB21572C9, 0x407EF1CA, 0x532E023E, 0xA145813D, 0x758FE5D6, 0x87E466D5, 0x94B49521, 0x66DF1622,
+ 0x38CC2A06, 0xCAA7A905, 0xD9F75AF1, 0x2B9CD9F2, 0xFF56BD19, 0x0D3D3E1A, 0x1E6DCDEE, 0xEC064EED,
+ 0xC38D26C4, 0x31E6A5C7, 0x22B65633, 0xD0DDD530, 0x0417B1DB, 0xF67C32D8, 0xE52CC12C, 0x1747422F,
+ 0x49547E0B, 0xBB3FFD08, 0xA86F0EFC, 0x5A048DFF, 0x8ECEE914, 0x7CA56A17, 0x6FF599E3, 0x9D9E1AE0,
+ 0xD3D3E1AB, 0x21B862A8, 0x32E8915C, 0xC083125F, 0x144976B4, 0xE622F5B7, 0xF5720643, 0x07198540,
+ 0x590AB964, 0xAB613A67, 0xB831C993, 0x4A5A4A90, 0x9E902E7B, 0x6CFBAD78, 0x7FAB5E8C, 0x8DC0DD8F,
+ 0xE330A81A, 0x115B2B19, 0x020BD8ED, 0xF0605BEE, 0x24AA3F05, 0xD6C1BC06, 0xC5914FF2, 0x37FACCF1,
+ 0x69E9F0D5, 0x9B8273D6, 0x88D28022, 0x7AB90321, 0xAE7367CA, 0x5C18E4C9, 0x4F48173D, 0xBD23943E,
+ 0xF36E6F75, 0x0105EC76, 0x12551F82, 0xE03E9C81, 0x34F4F86A, 0xC69F7B69, 0xD5CF889D, 0x27A40B9E,
+ 0x79B737BA, 0x8BDCB4B9, 0x988C474D, 0x6AE7C44E, 0xBE2DA0A5, 0x4C4623A6, 0x5F16D052, 0xAD7D5351
+},
+{
+ 0x00000000, 0x13A29877, 0x274530EE, 0x34E7A899, 0x4E8A61DC, 0x5D28F9AB, 0x69CF5132, 0x7A6DC945,
+ 0x9D14C3B8, 0x8EB65BCF, 0xBA51F356, 0xA9F36B21, 0xD39EA264, 0xC03C3A13, 0xF4DB928A, 0xE7790AFD,
+ 0x3FC5F181, 0x2C6769F6, 0x1880C16F, 0x0B225918, 0x714F905D, 0x62ED082A, 0x560AA0B3, 0x45A838C4,
+ 0xA2D13239, 0xB173AA4E, 0x859402D7, 0x96369AA0, 0xEC5B53E5, 0xFFF9CB92, 0xCB1E630B, 0xD8BCFB7C,
+ 0x7F8BE302, 0x6C297B75, 0x58CED3EC, 0x4B6C4B9B, 0x310182DE, 0x22A31AA9, 0x1644B230, 0x05E62A47,
+ 0xE29F20BA, 0xF13DB8CD, 0xC5DA1054, 0xD6788823, 0xAC154166, 0xBFB7D911, 0x8B507188, 0x98F2E9FF,
+ 0x404E1283, 0x53EC8AF4, 0x670B226D, 0x74A9BA1A, 0x0EC4735F, 0x1D66EB28, 0x298143B1, 0x3A23DBC6,
+ 0xDD5AD13B, 0xCEF8494C, 0xFA1FE1D5, 0xE9BD79A2, 0x93D0B0E7, 0x80722890, 0xB4958009, 0xA737187E,
+ 0xFF17C604, 0xECB55E73, 0xD852F6EA, 0xCBF06E9D, 0xB19DA7D8, 0xA23F3FAF, 0x96D89736, 0x857A0F41,
+ 0x620305BC, 0x71A19DCB, 0x45463552, 0x56E4AD25, 0x2C896460, 0x3F2BFC17, 0x0BCC548E, 0x186ECCF9,
+ 0xC0D23785, 0xD370AFF2, 0xE797076B, 0xF4359F1C, 0x8E585659, 0x9DFACE2E, 0xA91D66B7, 0xBABFFEC0,
+ 0x5DC6F43D, 0x4E646C4A, 0x7A83C4D3, 0x69215CA4, 0x134C95E1, 0x00EE0D96, 0x3409A50F, 0x27AB3D78,
+ 0x809C2506, 0x933EBD71, 0xA7D915E8, 0xB47B8D9F, 0xCE1644DA, 0xDDB4DCAD, 0xE9537434, 0xFAF1EC43,
+ 0x1D88E6BE, 0x0E2A7EC9, 0x3ACDD650, 0x296F4E27, 0x53028762, 0x40A01F15, 0x7447B78C, 0x67E52FFB,
+ 0xBF59D487, 0xACFB4CF0, 0x981CE469, 0x8BBE7C1E, 0xF1D3B55B, 0xE2712D2C, 0xD69685B5, 0xC5341DC2,
+ 0x224D173F, 0x31EF8F48, 0x050827D1, 0x16AABFA6, 0x6CC776E3, 0x7F65EE94, 0x4B82460D, 0x5820DE7A,
+ 0xFBC3FAF9, 0xE861628E, 0xDC86CA17, 0xCF245260, 0xB5499B25, 0xA6EB0352, 0x920CABCB, 0x81AE33BC,
+ 0x66D73941, 0x7575A136, 0x419209AF, 0x523091D8, 0x285D589D, 0x3BFFC0EA, 0x0F186873, 0x1CBAF004,
+ 0xC4060B78, 0xD7A4930F, 0xE3433B96, 0xF0E1A3E1, 0x8A8C6AA4, 0x992EF2D3, 0xADC95A4A, 0xBE6BC23D,
+ 0x5912C8C0, 0x4AB050B7, 0x7E57F82E, 0x6DF56059, 0x1798A91C, 0x043A316B, 0x30DD99F2, 0x237F0185,
+ 0x844819FB, 0x97EA818C, 0xA30D2915, 0xB0AFB162, 0xCAC27827, 0xD960E050, 0xED8748C9, 0xFE25D0BE,
+ 0x195CDA43, 0x0AFE4234, 0x3E19EAAD, 0x2DBB72DA, 0x57D6BB9F, 0x447423E8, 0x70938B71, 0x63311306,
+ 0xBB8DE87A, 0xA82F700D, 0x9CC8D894, 0x8F6A40E3, 0xF50789A6, 0xE6A511D1, 0xD242B948, 0xC1E0213F,
+ 0x26992BC2, 0x353BB3B5, 0x01DC1B2C, 0x127E835B, 0x68134A1E, 0x7BB1D269, 0x4F567AF0, 0x5CF4E287,
+ 0x04D43CFD, 0x1776A48A, 0x23910C13, 0x30339464, 0x4A5E5D21, 0x59FCC556, 0x6D1B6DCF, 0x7EB9F5B8,
+ 0x99C0FF45, 0x8A626732, 0xBE85CFAB, 0xAD2757DC, 0xD74A9E99, 0xC4E806EE, 0xF00FAE77, 0xE3AD3600,
+ 0x3B11CD7C, 0x28B3550B, 0x1C54FD92, 0x0FF665E5, 0x759BACA0, 0x663934D7, 0x52DE9C4E, 0x417C0439,
+ 0xA6050EC4, 0xB5A796B3, 0x81403E2A, 0x92E2A65D, 0xE88F6F18, 0xFB2DF76F, 0xCFCA5FF6, 0xDC68C781,
+ 0x7B5FDFFF, 0x68FD4788, 0x5C1AEF11, 0x4FB87766, 0x35D5BE23, 0x26772654, 0x12908ECD, 0x013216BA,
+ 0xE64B1C47, 0xF5E98430, 0xC10E2CA9, 0xD2ACB4DE, 0xA8C17D9B, 0xBB63E5EC, 0x8F844D75, 0x9C26D502,
+ 0x449A2E7E, 0x5738B609, 0x63DF1E90, 0x707D86E7, 0x0A104FA2, 0x19B2D7D5, 0x2D557F4C, 0x3EF7E73B,
+ 0xD98EEDC6, 0xCA2C75B1, 0xFECBDD28, 0xED69455F, 0x97048C1A, 0x84A6146D, 0xB041BCF4, 0xA3E32483
+},
+{
+ 0x00000000, 0xA541927E, 0x4F6F520D, 0xEA2EC073, 0x9EDEA41A, 0x3B9F3664, 0xD1B1F617, 0x74F06469,
+ 0x38513EC5, 0x9D10ACBB, 0x773E6CC8, 0xD27FFEB6, 0xA68F9ADF, 0x03CE08A1, 0xE9E0C8D2, 0x4CA15AAC,
+ 0x70A27D8A, 0xD5E3EFF4, 0x3FCD2F87, 0x9A8CBDF9, 0xEE7CD990, 0x4B3D4BEE, 0xA1138B9D, 0x045219E3,
+ 0x48F3434F, 0xEDB2D131, 0x079C1142, 0xA2DD833C, 0xD62DE755, 0x736C752B, 0x9942B558, 0x3C032726,
+ 0xE144FB14, 0x4405696A, 0xAE2BA919, 0x0B6A3B67, 0x7F9A5F0E, 0xDADBCD70, 0x30F50D03, 0x95B49F7D,
+ 0xD915C5D1, 0x7C5457AF, 0x967A97DC, 0x333B05A2, 0x47CB61CB, 0xE28AF3B5, 0x08A433C6, 0xADE5A1B8,
+ 0x91E6869E, 0x34A714E0, 0xDE89D493, 0x7BC846ED, 0x0F382284, 0xAA79B0FA, 0x40577089, 0xE516E2F7,
+ 0xA9B7B85B, 0x0CF62A25, 0xE6D8EA56, 0x43997828, 0x37691C41, 0x92288E3F, 0x78064E4C, 0xDD47DC32,
+ 0xC76580D9, 0x622412A7, 0x880AD2D4, 0x2D4B40AA, 0x59BB24C3, 0xFCFAB6BD, 0x16D476CE, 0xB395E4B0,
+ 0xFF34BE1C, 0x5A752C62, 0xB05BEC11, 0x151A7E6F, 0x61EA1A06, 0xC4AB8878, 0x2E85480B, 0x8BC4DA75,
+ 0xB7C7FD53, 0x12866F2D, 0xF8A8AF5E, 0x5DE93D20, 0x29195949, 0x8C58CB37, 0x66760B44, 0xC337993A,
+ 0x8F96C396, 0x2AD751E8, 0xC0F9919B, 0x65B803E5, 0x1148678C, 0xB409F5F2, 0x5E273581, 0xFB66A7FF,
+ 0x26217BCD, 0x8360E9B3, 0x694E29C0, 0xCC0FBBBE, 0xB8FFDFD7, 0x1DBE4DA9, 0xF7908DDA, 0x52D11FA4,
+ 0x1E704508, 0xBB31D776, 0x511F1705, 0xF45E857B, 0x80AEE112, 0x25EF736C, 0xCFC1B31F, 0x6A802161,
+ 0x56830647, 0xF3C29439, 0x19EC544A, 0xBCADC634, 0xC85DA25D, 0x6D1C3023, 0x8732F050, 0x2273622E,
+ 0x6ED23882, 0xCB93AAFC, 0x21BD6A8F, 0x84FCF8F1, 0xF00C9C98, 0x554D0EE6, 0xBF63CE95, 0x1A225CEB,
+ 0x8B277743, 0x2E66E53D, 0xC448254E, 0x6109B730, 0x15F9D359, 0xB0B84127, 0x5A968154, 0xFFD7132A,
+ 0xB3764986, 0x1637DBF8, 0xFC191B8B, 0x595889F5, 0x2DA8ED9C, 0x88E97FE2, 0x62C7BF91, 0xC7862DEF,
+ 0xFB850AC9, 0x5EC498B7, 0xB4EA58C4, 0x11ABCABA, 0x655BAED3, 0xC01A3CAD, 0x2A34FCDE, 0x8F756EA0,
+ 0xC3D4340C, 0x6695A672, 0x8CBB6601, 0x29FAF47F, 0x5D0A9016, 0xF84B0268, 0x1265C21B, 0xB7245065,
+ 0x6A638C57, 0xCF221E29, 0x250CDE5A, 0x804D4C24, 0xF4BD284D, 0x51FCBA33, 0xBBD27A40, 0x1E93E83E,
+ 0x5232B292, 0xF77320EC, 0x1D5DE09F, 0xB81C72E1, 0xCCEC1688, 0x69AD84F6, 0x83834485, 0x26C2D6FB,
+ 0x1AC1F1DD, 0xBF8063A3, 0x55AEA3D0, 0xF0EF31AE, 0x841F55C7, 0x215EC7B9, 0xCB7007CA, 0x6E3195B4,
+ 0x2290CF18, 0x87D15D66, 0x6DFF9D15, 0xC8BE0F6B, 0xBC4E6B02, 0x190FF97C, 0xF321390F, 0x5660AB71,
+ 0x4C42F79A, 0xE90365E4, 0x032DA597, 0xA66C37E9, 0xD29C5380, 0x77DDC1FE, 0x9DF3018D, 0x38B293F3,
+ 0x7413C95F, 0xD1525B21, 0x3B7C9B52, 0x9E3D092C, 0xEACD6D45, 0x4F8CFF3B, 0xA5A23F48, 0x00E3AD36,
+ 0x3CE08A10, 0x99A1186E, 0x738FD81D, 0xD6CE4A63, 0xA23E2E0A, 0x077FBC74, 0xED517C07, 0x4810EE79,
+ 0x04B1B4D5, 0xA1F026AB, 0x4BDEE6D8, 0xEE9F74A6, 0x9A6F10CF, 0x3F2E82B1, 0xD50042C2, 0x7041D0BC,
+ 0xAD060C8E, 0x08479EF0, 0xE2695E83, 0x4728CCFD, 0x33D8A894, 0x96993AEA, 0x7CB7FA99, 0xD9F668E7,
+ 0x9557324B, 0x3016A035, 0xDA386046, 0x7F79F238, 0x0B899651, 0xAEC8042F, 0x44E6C45C, 0xE1A75622,
+ 0xDDA47104, 0x78E5E37A, 0x92CB2309, 0x378AB177, 0x437AD51E, 0xE63B4760, 0x0C158713, 0xA954156D,
+ 0xE5F54FC1, 0x40B4DDBF, 0xAA9A1DCC, 0x0FDB8FB2, 0x7B2BEBDB, 0xDE6A79A5, 0x3444B9D6, 0x91052BA8
+},
+{
+ 0x00000000, 0xDD45AAB8, 0xBF672381, 0x62228939, 0x7B2231F3, 0xA6679B4B, 0xC4451272, 0x1900B8CA,
+ 0xF64463E6, 0x2B01C95E, 0x49234067, 0x9466EADF, 0x8D665215, 0x5023F8AD, 0x32017194, 0xEF44DB2C,
+ 0xE964B13D, 0x34211B85, 0x560392BC, 0x8B463804, 0x924680CE, 0x4F032A76, 0x2D21A34F, 0xF06409F7,
+ 0x1F20D2DB, 0xC2657863, 0xA047F15A, 0x7D025BE2, 0x6402E328, 0xB9474990, 0xDB65C0A9, 0x06206A11,
+ 0xD725148B, 0x0A60BE33, 0x6842370A, 0xB5079DB2, 0xAC072578, 0x71428FC0, 0x136006F9, 0xCE25AC41,
+ 0x2161776D, 0xFC24DDD5, 0x9E0654EC, 0x4343FE54, 0x5A43469E, 0x8706EC26, 0xE524651F, 0x3861CFA7,
+ 0x3E41A5B6, 0xE3040F0E, 0x81268637, 0x5C632C8F, 0x45639445, 0x98263EFD, 0xFA04B7C4, 0x27411D7C,
+ 0xC805C650, 0x15406CE8, 0x7762E5D1, 0xAA274F69, 0xB327F7A3, 0x6E625D1B, 0x0C40D422, 0xD1057E9A,
+ 0xABA65FE7, 0x76E3F55F, 0x14C17C66, 0xC984D6DE, 0xD0846E14, 0x0DC1C4AC, 0x6FE34D95, 0xB2A6E72D,
+ 0x5DE23C01, 0x80A796B9, 0xE2851F80, 0x3FC0B538, 0x26C00DF2, 0xFB85A74A, 0x99A72E73, 0x44E284CB,
+ 0x42C2EEDA, 0x9F874462, 0xFDA5CD5B, 0x20E067E3, 0x39E0DF29, 0xE4A57591, 0x8687FCA8, 0x5BC25610,
+ 0xB4868D3C, 0x69C32784, 0x0BE1AEBD, 0xD6A40405, 0xCFA4BCCF, 0x12E11677, 0x70C39F4E, 0xAD8635F6,
+ 0x7C834B6C, 0xA1C6E1D4, 0xC3E468ED, 0x1EA1C255, 0x07A17A9F, 0xDAE4D027, 0xB8C6591E, 0x6583F3A6,
+ 0x8AC7288A, 0x57828232, 0x35A00B0B, 0xE8E5A1B3, 0xF1E51979, 0x2CA0B3C1, 0x4E823AF8, 0x93C79040,
+ 0x95E7FA51, 0x48A250E9, 0x2A80D9D0, 0xF7C57368, 0xEEC5CBA2, 0x3380611A, 0x51A2E823, 0x8CE7429B,
+ 0x63A399B7, 0xBEE6330F, 0xDCC4BA36, 0x0181108E, 0x1881A844, 0xC5C402FC, 0xA7E68BC5, 0x7AA3217D,
+ 0x52A0C93F, 0x8FE56387, 0xEDC7EABE, 0x30824006, 0x2982F8CC, 0xF4C75274, 0x96E5DB4D, 0x4BA071F5,
+ 0xA4E4AAD9, 0x79A10061, 0x1B838958, 0xC6C623E0, 0xDFC69B2A, 0x02833192, 0x60A1B8AB, 0xBDE41213,
+ 0xBBC47802, 0x6681D2BA, 0x04A35B83, 0xD9E6F13B, 0xC0E649F1, 0x1DA3E349, 0x7F816A70, 0xA2C4C0C8,
+ 0x4D801BE4, 0x90C5B15C, 0xF2E73865, 0x2FA292DD, 0x36A22A17, 0xEBE780AF, 0x89C50996, 0x5480A32E,
+ 0x8585DDB4, 0x58C0770C, 0x3AE2FE35, 0xE7A7548D, 0xFEA7EC47, 0x23E246FF, 0x41C0CFC6, 0x9C85657E,
+ 0x73C1BE52, 0xAE8414EA, 0xCCA69DD3, 0x11E3376B, 0x08E38FA1, 0xD5A62519, 0xB784AC20, 0x6AC10698,
+ 0x6CE16C89, 0xB1A4C631, 0xD3864F08, 0x0EC3E5B0, 0x17C35D7A, 0xCA86F7C2, 0xA8A47EFB, 0x75E1D443,
+ 0x9AA50F6F, 0x47E0A5D7, 0x25C22CEE, 0xF8878656, 0xE1873E9C, 0x3CC29424, 0x5EE01D1D, 0x83A5B7A5,
+ 0xF90696D8, 0x24433C60, 0x4661B559, 0x9B241FE1, 0x8224A72B, 0x5F610D93, 0x3D4384AA, 0xE0062E12,
+ 0x0F42F53E, 0xD2075F86, 0xB025D6BF, 0x6D607C07, 0x7460C4CD, 0xA9256E75, 0xCB07E74C, 0x16424DF4,
+ 0x106227E5, 0xCD278D5D, 0xAF050464, 0x7240AEDC, 0x6B401616, 0xB605BCAE, 0xD4273597, 0x09629F2F,
+ 0xE6264403, 0x3B63EEBB, 0x59416782, 0x8404CD3A, 0x9D0475F0, 0x4041DF48, 0x22635671, 0xFF26FCC9,
+ 0x2E238253, 0xF36628EB, 0x9144A1D2, 0x4C010B6A, 0x5501B3A0, 0x88441918, 0xEA669021, 0x37233A99,
+ 0xD867E1B5, 0x05224B0D, 0x6700C234, 0xBA45688C, 0xA345D046, 0x7E007AFE, 0x1C22F3C7, 0xC167597F,
+ 0xC747336E, 0x1A0299D6, 0x782010EF, 0xA565BA57, 0xBC65029D, 0x6120A825, 0x0302211C, 0xDE478BA4,
+ 0x31035088, 0xEC46FA30, 0x8E647309, 0x5321D9B1, 0x4A21617B, 0x9764CBC3, 0xF54642FA, 0x2803E842
+},
+{
+ 0x00000000, 0x38116FAC, 0x7022DF58, 0x4833B0F4, 0xE045BEB0, 0xD854D11C, 0x906761E8, 0xA8760E44,
+ 0xC5670B91, 0xFD76643D, 0xB545D4C9, 0x8D54BB65, 0x2522B521, 0x1D33DA8D, 0x55006A79, 0x6D1105D5,
+ 0x8F2261D3, 0xB7330E7F, 0xFF00BE8B, 0xC711D127, 0x6F67DF63, 0x5776B0CF, 0x1F45003B, 0x27546F97,
+ 0x4A456A42, 0x725405EE, 0x3A67B51A, 0x0276DAB6, 0xAA00D4F2, 0x9211BB5E, 0xDA220BAA, 0xE2336406,
+ 0x1BA8B557, 0x23B9DAFB, 0x6B8A6A0F, 0x539B05A3, 0xFBED0BE7, 0xC3FC644B, 0x8BCFD4BF, 0xB3DEBB13,
+ 0xDECFBEC6, 0xE6DED16A, 0xAEED619E, 0x96FC0E32, 0x3E8A0076, 0x069B6FDA, 0x4EA8DF2E, 0x76B9B082,
+ 0x948AD484, 0xAC9BBB28, 0xE4A80BDC, 0xDCB96470, 0x74CF6A34, 0x4CDE0598, 0x04EDB56C, 0x3CFCDAC0,
+ 0x51EDDF15, 0x69FCB0B9, 0x21CF004D, 0x19DE6FE1, 0xB1A861A5, 0x89B90E09, 0xC18ABEFD, 0xF99BD151,
+ 0x37516AAE, 0x0F400502, 0x4773B5F6, 0x7F62DA5A, 0xD714D41E, 0xEF05BBB2, 0xA7360B46, 0x9F2764EA,
+ 0xF236613F, 0xCA270E93, 0x8214BE67, 0xBA05D1CB, 0x1273DF8F, 0x2A62B023, 0x625100D7, 0x5A406F7B,
+ 0xB8730B7D, 0x806264D1, 0xC851D425, 0xF040BB89, 0x5836B5CD, 0x6027DA61, 0x28146A95, 0x10050539,
+ 0x7D1400EC, 0x45056F40, 0x0D36DFB4, 0x3527B018, 0x9D51BE5C, 0xA540D1F0, 0xED736104, 0xD5620EA8,
+ 0x2CF9DFF9, 0x14E8B055, 0x5CDB00A1, 0x64CA6F0D, 0xCCBC6149, 0xF4AD0EE5, 0xBC9EBE11, 0x848FD1BD,
+ 0xE99ED468, 0xD18FBBC4, 0x99BC0B30, 0xA1AD649C, 0x09DB6AD8, 0x31CA0574, 0x79F9B580, 0x41E8DA2C,
+ 0xA3DBBE2A, 0x9BCAD186, 0xD3F96172, 0xEBE80EDE, 0x439E009A, 0x7B8F6F36, 0x33BCDFC2, 0x0BADB06E,
+ 0x66BCB5BB, 0x5EADDA17, 0x169E6AE3, 0x2E8F054F, 0x86F90B0B, 0xBEE864A7, 0xF6DBD453, 0xCECABBFF,
+ 0x6EA2D55C, 0x56B3BAF0, 0x1E800A04, 0x269165A8, 0x8EE76BEC, 0xB6F60440, 0xFEC5B4B4, 0xC6D4DB18,
+ 0xABC5DECD, 0x93D4B161, 0xDBE70195, 0xE3F66E39, 0x4B80607D, 0x73910FD1, 0x3BA2BF25, 0x03B3D089,
+ 0xE180B48F, 0xD991DB23, 0x91A26BD7, 0xA9B3047B, 0x01C50A3F, 0x39D46593, 0x71E7D567, 0x49F6BACB,
+ 0x24E7BF1E, 0x1CF6D0B2, 0x54C56046, 0x6CD40FEA, 0xC4A201AE, 0xFCB36E02, 0xB480DEF6, 0x8C91B15A,
+ 0x750A600B, 0x4D1B0FA7, 0x0528BF53, 0x3D39D0FF, 0x954FDEBB, 0xAD5EB117, 0xE56D01E3, 0xDD7C6E4F,
+ 0xB06D6B9A, 0x887C0436, 0xC04FB4C2, 0xF85EDB6E, 0x5028D52A, 0x6839BA86, 0x200A0A72, 0x181B65DE,
+ 0xFA2801D8, 0xC2396E74, 0x8A0ADE80, 0xB21BB12C, 0x1A6DBF68, 0x227CD0C4, 0x6A4F6030, 0x525E0F9C,
+ 0x3F4F0A49, 0x075E65E5, 0x4F6DD511, 0x777CBABD, 0xDF0AB4F9, 0xE71BDB55, 0xAF286BA1, 0x9739040D,
+ 0x59F3BFF2, 0x61E2D05E, 0x29D160AA, 0x11C00F06, 0xB9B60142, 0x81A76EEE, 0xC994DE1A, 0xF185B1B6,
+ 0x9C94B463, 0xA485DBCF, 0xECB66B3B, 0xD4A70497, 0x7CD10AD3, 0x44C0657F, 0x0CF3D58B, 0x34E2BA27,
+ 0xD6D1DE21, 0xEEC0B18D, 0xA6F30179, 0x9EE26ED5, 0x36946091, 0x0E850F3D, 0x46B6BFC9, 0x7EA7D065,
+ 0x13B6D5B0, 0x2BA7BA1C, 0x63940AE8, 0x5B856544, 0xF3F36B00, 0xCBE204AC, 0x83D1B458, 0xBBC0DBF4,
+ 0x425B0AA5, 0x7A4A6509, 0x3279D5FD, 0x0A68BA51, 0xA21EB415, 0x9A0FDBB9, 0xD23C6B4D, 0xEA2D04E1,
+ 0x873C0134, 0xBF2D6E98, 0xF71EDE6C, 0xCF0FB1C0, 0x6779BF84, 0x5F68D028, 0x175B60DC, 0x2F4A0F70,
+ 0xCD796B76, 0xF56804DA, 0xBD5BB42E, 0x854ADB82, 0x2D3CD5C6, 0x152DBA6A, 0x5D1E0A9E, 0x650F6532,
+ 0x081E60E7, 0x300F0F4B, 0x783CBFBF, 0x402DD013, 0xE85BDE57, 0xD04AB1FB, 0x9879010F, 0xA0686EA3
+},
+{
+ 0x00000000, 0xEF306B19, 0xDB8CA0C3, 0x34BCCBDA, 0xB2F53777, 0x5DC55C6E, 0x697997B4, 0x8649FCAD,
+ 0x6006181F, 0x8F367306, 0xBB8AB8DC, 0x54BAD3C5, 0xD2F32F68, 0x3DC34471, 0x097F8FAB, 0xE64FE4B2,
+ 0xC00C303E, 0x2F3C5B27, 0x1B8090FD, 0xF4B0FBE4, 0x72F90749, 0x9DC96C50, 0xA975A78A, 0x4645CC93,
+ 0xA00A2821, 0x4F3A4338, 0x7B8688E2, 0x94B6E3FB, 0x12FF1F56, 0xFDCF744F, 0xC973BF95, 0x2643D48C,
+ 0x85F4168D, 0x6AC47D94, 0x5E78B64E, 0xB148DD57, 0x370121FA, 0xD8314AE3, 0xEC8D8139, 0x03BDEA20,
+ 0xE5F20E92, 0x0AC2658B, 0x3E7EAE51, 0xD14EC548, 0x570739E5, 0xB83752FC, 0x8C8B9926, 0x63BBF23F,
+ 0x45F826B3, 0xAAC84DAA, 0x9E748670, 0x7144ED69, 0xF70D11C4, 0x183D7ADD, 0x2C81B107, 0xC3B1DA1E,
+ 0x25FE3EAC, 0xCACE55B5, 0xFE729E6F, 0x1142F576, 0x970B09DB, 0x783B62C2, 0x4C87A918, 0xA3B7C201,
+ 0x0E045BEB, 0xE13430F2, 0xD588FB28, 0x3AB89031, 0xBCF16C9C, 0x53C10785, 0x677DCC5F, 0x884DA746,
+ 0x6E0243F4, 0x813228ED, 0xB58EE337, 0x5ABE882E, 0xDCF77483, 0x33C71F9A, 0x077BD440, 0xE84BBF59,
+ 0xCE086BD5, 0x213800CC, 0x1584CB16, 0xFAB4A00F, 0x7CFD5CA2, 0x93CD37BB, 0xA771FC61, 0x48419778,
+ 0xAE0E73CA, 0x413E18D3, 0x7582D309, 0x9AB2B810, 0x1CFB44BD, 0xF3CB2FA4, 0xC777E47E, 0x28478F67,
+ 0x8BF04D66, 0x64C0267F, 0x507CEDA5, 0xBF4C86BC, 0x39057A11, 0xD6351108, 0xE289DAD2, 0x0DB9B1CB,
+ 0xEBF65579, 0x04C63E60, 0x307AF5BA, 0xDF4A9EA3, 0x5903620E, 0xB6330917, 0x828FC2CD, 0x6DBFA9D4,
+ 0x4BFC7D58, 0xA4CC1641, 0x9070DD9B, 0x7F40B682, 0xF9094A2F, 0x16392136, 0x2285EAEC, 0xCDB581F5,
+ 0x2BFA6547, 0xC4CA0E5E, 0xF076C584, 0x1F46AE9D, 0x990F5230, 0x763F3929, 0x4283F2F3, 0xADB399EA,
+ 0x1C08B7D6, 0xF338DCCF, 0xC7841715, 0x28B47C0C, 0xAEFD80A1, 0x41CDEBB8, 0x75712062, 0x9A414B7B,
+ 0x7C0EAFC9, 0x933EC4D0, 0xA7820F0A, 0x48B26413, 0xCEFB98BE, 0x21CBF3A7, 0x1577387D, 0xFA475364,
+ 0xDC0487E8, 0x3334ECF1, 0x0788272B, 0xE8B84C32, 0x6EF1B09F, 0x81C1DB86, 0xB57D105C, 0x5A4D7B45,
+ 0xBC029FF7, 0x5332F4EE, 0x678E3F34, 0x88BE542D, 0x0EF7A880, 0xE1C7C399, 0xD57B0843, 0x3A4B635A,
+ 0x99FCA15B, 0x76CCCA42, 0x42700198, 0xAD406A81, 0x2B09962C, 0xC439FD35, 0xF08536EF, 0x1FB55DF6,
+ 0xF9FAB944, 0x16CAD25D, 0x22761987, 0xCD46729E, 0x4B0F8E33, 0xA43FE52A, 0x90832EF0, 0x7FB345E9,
+ 0x59F09165, 0xB6C0FA7C, 0x827C31A6, 0x6D4C5ABF, 0xEB05A612, 0x0435CD0B, 0x308906D1, 0xDFB96DC8,
+ 0x39F6897A, 0xD6C6E263, 0xE27A29B9, 0x0D4A42A0, 0x8B03BE0D, 0x6433D514, 0x508F1ECE, 0xBFBF75D7,
+ 0x120CEC3D, 0xFD3C8724, 0xC9804CFE, 0x26B027E7, 0xA0F9DB4A, 0x4FC9B053, 0x7B757B89, 0x94451090,
+ 0x720AF422, 0x9D3A9F3B, 0xA98654E1, 0x46B63FF8, 0xC0FFC355, 0x2FCFA84C, 0x1B736396, 0xF443088F,
+ 0xD200DC03, 0x3D30B71A, 0x098C7CC0, 0xE6BC17D9, 0x60F5EB74, 0x8FC5806D, 0xBB794BB7, 0x544920AE,
+ 0xB206C41C, 0x5D36AF05, 0x698A64DF, 0x86BA0FC6, 0x00F3F36B, 0xEFC39872, 0xDB7F53A8, 0x344F38B1,
+ 0x97F8FAB0, 0x78C891A9, 0x4C745A73, 0xA344316A, 0x250DCDC7, 0xCA3DA6DE, 0xFE816D04, 0x11B1061D,
+ 0xF7FEE2AF, 0x18CE89B6, 0x2C72426C, 0xC3422975, 0x450BD5D8, 0xAA3BBEC1, 0x9E87751B, 0x71B71E02,
+ 0x57F4CA8E, 0xB8C4A197, 0x8C786A4D, 0x63480154, 0xE501FDF9, 0x0A3196E0, 0x3E8D5D3A, 0xD1BD3623,
+ 0x37F2D291, 0xD8C2B988, 0xEC7E7252, 0x034E194B, 0x8507E5E6, 0x6A378EFF, 0x5E8B4525, 0xB1BB2E3C
+},
+{
+ 0x00000000, 0x68032CC8, 0xD0065990, 0xB8057558, 0xA5E0C5D1, 0xCDE3E919, 0x75E69C41, 0x1DE5B089,
+ 0x4E2DFD53, 0x262ED19B, 0x9E2BA4C3, 0xF628880B, 0xEBCD3882, 0x83CE144A, 0x3BCB6112, 0x53C84DDA,
+ 0x9C5BFAA6, 0xF458D66E, 0x4C5DA336, 0x245E8FFE, 0x39BB3F77, 0x51B813BF, 0xE9BD66E7, 0x81BE4A2F,
+ 0xD27607F5, 0xBA752B3D, 0x02705E65, 0x6A7372AD, 0x7796C224, 0x1F95EEEC, 0xA7909BB4, 0xCF93B77C,
+ 0x3D5B83BD, 0x5558AF75, 0xED5DDA2D, 0x855EF6E5, 0x98BB466C, 0xF0B86AA4, 0x48BD1FFC, 0x20BE3334,
+ 0x73767EEE, 0x1B755226, 0xA370277E, 0xCB730BB6, 0xD696BB3F, 0xBE9597F7, 0x0690E2AF, 0x6E93CE67,
+ 0xA100791B, 0xC90355D3, 0x7106208B, 0x19050C43, 0x04E0BCCA, 0x6CE39002, 0xD4E6E55A, 0xBCE5C992,
+ 0xEF2D8448, 0x872EA880, 0x3F2BDDD8, 0x5728F110, 0x4ACD4199, 0x22CE6D51, 0x9ACB1809, 0xF2C834C1,
+ 0x7AB7077A, 0x12B42BB2, 0xAAB15EEA, 0xC2B27222, 0xDF57C2AB, 0xB754EE63, 0x0F519B3B, 0x6752B7F3,
+ 0x349AFA29, 0x5C99D6E1, 0xE49CA3B9, 0x8C9F8F71, 0x917A3FF8, 0xF9791330, 0x417C6668, 0x297F4AA0,
+ 0xE6ECFDDC, 0x8EEFD114, 0x36EAA44C, 0x5EE98884, 0x430C380D, 0x2B0F14C5, 0x930A619D, 0xFB094D55,
+ 0xA8C1008F, 0xC0C22C47, 0x78C7591F, 0x10C475D7, 0x0D21C55E, 0x6522E996, 0xDD279CCE, 0xB524B006,
+ 0x47EC84C7, 0x2FEFA80F, 0x97EADD57, 0xFFE9F19F, 0xE20C4116, 0x8A0F6DDE, 0x320A1886, 0x5A09344E,
+ 0x09C17994, 0x61C2555C, 0xD9C72004, 0xB1C40CCC, 0xAC21BC45, 0xC422908D, 0x7C27E5D5, 0x1424C91D,
+ 0xDBB77E61, 0xB3B452A9, 0x0BB127F1, 0x63B20B39, 0x7E57BBB0, 0x16549778, 0xAE51E220, 0xC652CEE8,
+ 0x959A8332, 0xFD99AFFA, 0x459CDAA2, 0x2D9FF66A, 0x307A46E3, 0x58796A2B, 0xE07C1F73, 0x887F33BB,
+ 0xF56E0EF4, 0x9D6D223C, 0x25685764, 0x4D6B7BAC, 0x508ECB25, 0x388DE7ED, 0x808892B5, 0xE88BBE7D,
+ 0xBB43F3A7, 0xD340DF6F, 0x6B45AA37, 0x034686FF, 0x1EA33676, 0x76A01ABE, 0xCEA56FE6, 0xA6A6432E,
+ 0x6935F452, 0x0136D89A, 0xB933ADC2, 0xD130810A, 0xCCD53183, 0xA4D61D4B, 0x1CD36813, 0x74D044DB,
+ 0x27180901, 0x4F1B25C9, 0xF71E5091, 0x9F1D7C59, 0x82F8CCD0, 0xEAFBE018, 0x52FE9540, 0x3AFDB988,
+ 0xC8358D49, 0xA036A181, 0x1833D4D9, 0x7030F811, 0x6DD54898, 0x05D66450, 0xBDD31108, 0xD5D03DC0,
+ 0x8618701A, 0xEE1B5CD2, 0x561E298A, 0x3E1D0542, 0x23F8B5CB, 0x4BFB9903, 0xF3FEEC5B, 0x9BFDC093,
+ 0x546E77EF, 0x3C6D5B27, 0x84682E7F, 0xEC6B02B7, 0xF18EB23E, 0x998D9EF6, 0x2188EBAE, 0x498BC766,
+ 0x1A438ABC, 0x7240A674, 0xCA45D32C, 0xA246FFE4, 0xBFA34F6D, 0xD7A063A5, 0x6FA516FD, 0x07A63A35,
+ 0x8FD9098E, 0xE7DA2546, 0x5FDF501E, 0x37DC7CD6, 0x2A39CC5F, 0x423AE097, 0xFA3F95CF, 0x923CB907,
+ 0xC1F4F4DD, 0xA9F7D815, 0x11F2AD4D, 0x79F18185, 0x6414310C, 0x0C171DC4, 0xB412689C, 0xDC114454,
+ 0x1382F328, 0x7B81DFE0, 0xC384AAB8, 0xAB878670, 0xB66236F9, 0xDE611A31, 0x66646F69, 0x0E6743A1,
+ 0x5DAF0E7B, 0x35AC22B3, 0x8DA957EB, 0xE5AA7B23, 0xF84FCBAA, 0x904CE762, 0x2849923A, 0x404ABEF2,
+ 0xB2828A33, 0xDA81A6FB, 0x6284D3A3, 0x0A87FF6B, 0x17624FE2, 0x7F61632A, 0xC7641672, 0xAF673ABA,
+ 0xFCAF7760, 0x94AC5BA8, 0x2CA92EF0, 0x44AA0238, 0x594FB2B1, 0x314C9E79, 0x8949EB21, 0xE14AC7E9,
+ 0x2ED97095, 0x46DA5C5D, 0xFEDF2905, 0x96DC05CD, 0x8B39B544, 0xE33A998C, 0x5B3FECD4, 0x333CC01C,
+ 0x60F48DC6, 0x08F7A10E, 0xB0F2D456, 0xD8F1F89E, 0xC5144817, 0xAD1764DF, 0x15121187, 0x7D113D4F
+},
+{
+ 0x00000000, 0x493C7D27, 0x9278FA4E, 0xDB448769, 0x211D826D, 0x6821FF4A, 0xB3657823, 0xFA590504,
+ 0x423B04DA, 0x0B0779FD, 0xD043FE94, 0x997F83B3, 0x632686B7, 0x2A1AFB90, 0xF15E7CF9, 0xB86201DE,
+ 0x847609B4, 0xCD4A7493, 0x160EF3FA, 0x5F328EDD, 0xA56B8BD9, 0xEC57F6FE, 0x37137197, 0x7E2F0CB0,
+ 0xC64D0D6E, 0x8F717049, 0x5435F720, 0x1D098A07, 0xE7508F03, 0xAE6CF224, 0x7528754D, 0x3C14086A,
+ 0x0D006599, 0x443C18BE, 0x9F789FD7, 0xD644E2F0, 0x2C1DE7F4, 0x65219AD3, 0xBE651DBA, 0xF759609D,
+ 0x4F3B6143, 0x06071C64, 0xDD439B0D, 0x947FE62A, 0x6E26E32E, 0x271A9E09, 0xFC5E1960, 0xB5626447,
+ 0x89766C2D, 0xC04A110A, 0x1B0E9663, 0x5232EB44, 0xA86BEE40, 0xE1579367, 0x3A13140E, 0x732F6929,
+ 0xCB4D68F7, 0x827115D0, 0x593592B9, 0x1009EF9E, 0xEA50EA9A, 0xA36C97BD, 0x782810D4, 0x31146DF3,
+ 0x1A00CB32, 0x533CB615, 0x8878317C, 0xC1444C5B, 0x3B1D495F, 0x72213478, 0xA965B311, 0xE059CE36,
+ 0x583BCFE8, 0x1107B2CF, 0xCA4335A6, 0x837F4881, 0x79264D85, 0x301A30A2, 0xEB5EB7CB, 0xA262CAEC,
+ 0x9E76C286, 0xD74ABFA1, 0x0C0E38C8, 0x453245EF, 0xBF6B40EB, 0xF6573DCC, 0x2D13BAA5, 0x642FC782,
+ 0xDC4DC65C, 0x9571BB7B, 0x4E353C12, 0x07094135, 0xFD504431, 0xB46C3916, 0x6F28BE7F, 0x2614C358,
+ 0x1700AEAB, 0x5E3CD38C, 0x857854E5, 0xCC4429C2, 0x361D2CC6, 0x7F2151E1, 0xA465D688, 0xED59ABAF,
+ 0x553BAA71, 0x1C07D756, 0xC743503F, 0x8E7F2D18, 0x7426281C, 0x3D1A553B, 0xE65ED252, 0xAF62AF75,
+ 0x9376A71F, 0xDA4ADA38, 0x010E5D51, 0x48322076, 0xB26B2572, 0xFB575855, 0x2013DF3C, 0x692FA21B,
+ 0xD14DA3C5, 0x9871DEE2, 0x4335598B, 0x0A0924AC, 0xF05021A8, 0xB96C5C8F, 0x6228DBE6, 0x2B14A6C1,
+ 0x34019664, 0x7D3DEB43, 0xA6796C2A, 0xEF45110D, 0x151C1409, 0x5C20692E, 0x8764EE47, 0xCE589360,
+ 0x763A92BE, 0x3F06EF99, 0xE44268F0, 0xAD7E15D7, 0x572710D3, 0x1E1B6DF4, 0xC55FEA9D, 0x8C6397BA,
+ 0xB0779FD0, 0xF94BE2F7, 0x220F659E, 0x6B3318B9, 0x916A1DBD, 0xD856609A, 0x0312E7F3, 0x4A2E9AD4,
+ 0xF24C9B0A, 0xBB70E62D, 0x60346144, 0x29081C63, 0xD3511967, 0x9A6D6440, 0x4129E329, 0x08159E0E,
+ 0x3901F3FD, 0x703D8EDA, 0xAB7909B3, 0xE2457494, 0x181C7190, 0x51200CB7, 0x8A648BDE, 0xC358F6F9,
+ 0x7B3AF727, 0x32068A00, 0xE9420D69, 0xA07E704E, 0x5A27754A, 0x131B086D, 0xC85F8F04, 0x8163F223,
+ 0xBD77FA49, 0xF44B876E, 0x2F0F0007, 0x66337D20, 0x9C6A7824, 0xD5560503, 0x0E12826A, 0x472EFF4D,
+ 0xFF4CFE93, 0xB67083B4, 0x6D3404DD, 0x240879FA, 0xDE517CFE, 0x976D01D9, 0x4C2986B0, 0x0515FB97,
+ 0x2E015D56, 0x673D2071, 0xBC79A718, 0xF545DA3F, 0x0F1CDF3B, 0x4620A21C, 0x9D642575, 0xD4585852,
+ 0x6C3A598C, 0x250624AB, 0xFE42A3C2, 0xB77EDEE5, 0x4D27DBE1, 0x041BA6C6, 0xDF5F21AF, 0x96635C88,
+ 0xAA7754E2, 0xE34B29C5, 0x380FAEAC, 0x7133D38B, 0x8B6AD68F, 0xC256ABA8, 0x19122CC1, 0x502E51E6,
+ 0xE84C5038, 0xA1702D1F, 0x7A34AA76, 0x3308D751, 0xC951D255, 0x806DAF72, 0x5B29281B, 0x1215553C,
+ 0x230138CF, 0x6A3D45E8, 0xB179C281, 0xF845BFA6, 0x021CBAA2, 0x4B20C785, 0x906440EC, 0xD9583DCB,
+ 0x613A3C15, 0x28064132, 0xF342C65B, 0xBA7EBB7C, 0x4027BE78, 0x091BC35F, 0xD25F4436, 0x9B633911,
+ 0xA777317B, 0xEE4B4C5C, 0x350FCB35, 0x7C33B612, 0x866AB316, 0xCF56CE31, 0x14124958, 0x5D2E347F,
+ 0xE54C35A1, 0xAC704886, 0x7734CFEF, 0x3E08B2C8, 0xC451B7CC, 0x8D6DCAEB, 0x56294D82, 0x1F1530A5
+}};
+
+#define CRC32_UPD(crc, n) \
+	(crc32c_tables[(n)][(crc) & 0xFF] ^ \
+	 crc32c_tables[(n)-1][((crc) >> 8) & 0xFF])
+
+static inline uint32_t
+crc32c_1byte(uint8_t data, uint32_t init_val)
+{
+	uint32_t crc;
+	crc = init_val;
+	crc ^= data;
+
+	return crc32c_tables[0][crc & 0xff] ^ (crc >> 8);
+}
+
+static inline uint32_t
+crc32c_2bytes(uint16_t data, uint32_t init_val)
+{
+	uint32_t crc;
+	crc = init_val;
+	crc ^= data;
+
+	crc = CRC32_UPD(crc, 1) ^ (crc >> 16);
+
+	return crc;
+}
+
+static inline uint32_t
+crc32c_1word(uint32_t data, uint32_t init_val)
+{
+	uint32_t crc, term1, term2;
+	crc = init_val;
+	crc ^= data;
+
+	term1 = CRC32_UPD(crc, 3);
+	term2 = crc >> 16;
+	crc = term1 ^ CRC32_UPD(term2, 1);
+
+	return crc;
+}
+
+static inline uint32_t
+crc32c_2words(uint64_t data, uint32_t init_val)
+{
+	union {
+		uint64_t u64;
+		uint32_t u32[2];
+	} d;
+	d.u64 = data;
+
+	uint32_t crc, term1, term2;
+
+	crc = init_val;
+	crc ^= d.u32[0];
+
+	term1 = CRC32_UPD(crc, 7);
+	term2 = crc >> 16;
+	crc = term1 ^ CRC32_UPD(term2, 5);
+	term1 = CRC32_UPD(d.u32[1], 3);
+	term2 = d.u32[1] >> 16;
+	crc ^= term1 ^ CRC32_UPD(term2, 1);
+
+	return crc;
+}
+
+#if defined(RTE_ARCH_I686) || defined(RTE_ARCH_X86_64)
+static inline uint32_t
+crc32c_sse42_u8(uint8_t data, uint32_t init_val)
+{
+	__asm__ volatile(
+			"crc32b %[data], %[init_val];"
+			: [init_val] "+r" (init_val)
+			: [data] "rm" (data));
+	return init_val;
+}
+
+static inline uint32_t
+crc32c_sse42_u16(uint16_t data, uint32_t init_val)
+{
+	__asm__ volatile(
+			"crc32w %[data], %[init_val];"
+			: [init_val] "+r" (init_val)
+			: [data] "rm" (data));
+	return init_val;
+}
+
+static inline uint32_t
+crc32c_sse42_u32(uint32_t data, uint32_t init_val)
+{
+	__asm__ volatile(
+			"crc32l %[data], %[init_val];"
+			: [init_val] "+r" (init_val)
+			: [data] "rm" (data));
+	return init_val;
+}
+
+static inline uint32_t
+crc32c_sse42_u64_mimic(uint64_t data, uint64_t init_val)
+{
+	union {
+		uint32_t u32[2];
+		uint64_t u64;
+	} d;
+
+	d.u64 = data;
+	init_val = crc32c_sse42_u32(d.u32[0], init_val);
+	init_val = crc32c_sse42_u32(d.u32[1], init_val);
+	return init_val;
+}
+#endif
+
+#ifdef RTE_ARCH_X86_64
+static inline uint32_t
+crc32c_sse42_u64(uint64_t data, uint64_t init_val)
+{
+	__asm__ volatile(
+			"crc32q %[data], %[init_val];"
+			: [init_val] "+r" (init_val)
+			: [data] "rm" (data));
+	return init_val;
+}
+#endif
+
+#define CRC32_SW            (1U << 0)
+#define CRC32_SSE42         (1U << 1)
+#define CRC32_x64           (1U << 2)
+#define CRC32_SSE42_x64     (CRC32_x64|CRC32_SSE42)
+#define CRC32_ARM64         (1U << 3)
+
+static uint8_t crc32_alg = CRC32_SW;
+
+#if defined(RTE_ARCH_ARM64)
+#include "rte_crc_arm64.h"
+#else
+
+/**
+ * Allow or disallow use of SSE4.2 instrinsics for CRC32 hash
+ * calculation.
+ *
+ * @param alg
+ *   An OR of following flags:
+ *   - (CRC32_SW) Don't use SSE4.2 intrinsics
+ *   - (CRC32_SSE42) Use SSE4.2 intrinsics if available
+ *   - (CRC32_SSE42_x64) Use 64-bit SSE4.2 intrinsic if available (default)
+ *
+ */
+static inline void
+rte_hash_crc_set_alg(uint8_t alg)
+{
+	switch (alg) {
+#if defined(RTE_ARCH_I686) || defined(RTE_ARCH_X86_64)
+	case CRC32_SSE42_x64:
+		if (! rte_cpu_get_flag_enabled(RTE_CPUFLAG_EM64T))
+			alg = CRC32_SSE42;
+	case CRC32_SSE42:
+		if (! rte_cpu_get_flag_enabled(RTE_CPUFLAG_SSE4_2))
+			alg = CRC32_SW;
+#endif
+	case CRC32_SW:
+		crc32_alg = alg;
+	default:
+		break;
+	}
+}
+
+/* Setting the best available algorithm */
+static inline void __attribute__((constructor))
+rte_hash_crc_init_alg(void)
+{
+	rte_hash_crc_set_alg(CRC32_SSE42_x64);
+}
+
+/**
+ * Use single crc32 instruction to perform a hash on a byte value.
+ * Fall back to software crc32 implementation in case SSE4.2 is
+ * not supported
+ *
+ * @param data
+ *   Data to perform hash on.
+ * @param init_val
+ *   Value to initialise hash generator.
+ * @return
+ *   32bit calculated hash value.
+ */
+static inline uint32_t
+rte_hash_crc_1byte(uint8_t data, uint32_t init_val)
+{
+#if defined RTE_ARCH_I686 || defined RTE_ARCH_X86_64
+	if (likely(crc32_alg & CRC32_SSE42))
+		return crc32c_sse42_u8(data, init_val);
+#endif
+
+	return crc32c_1byte(data, init_val);
+}
+
+/**
+ * Use single crc32 instruction to perform a hash on a 2 bytes value.
+ * Fall back to software crc32 implementation in case SSE4.2 is
+ * not supported
+ *
+ * @param data
+ *   Data to perform hash on.
+ * @param init_val
+ *   Value to initialise hash generator.
+ * @return
+ *   32bit calculated hash value.
+ */
+static inline uint32_t
+rte_hash_crc_2byte(uint16_t data, uint32_t init_val)
+{
+#if defined RTE_ARCH_I686 || defined RTE_ARCH_X86_64
+	if (likely(crc32_alg & CRC32_SSE42))
+		return crc32c_sse42_u16(data, init_val);
+#endif
+
+	return crc32c_2bytes(data, init_val);
+}
+
+/**
+ * Use single crc32 instruction to perform a hash on a 4 byte value.
+ * Fall back to software crc32 implementation in case SSE4.2 is
+ * not supported
+ *
+ * @param data
+ *   Data to perform hash on.
+ * @param init_val
+ *   Value to initialise hash generator.
+ * @return
+ *   32bit calculated hash value.
+ */
+static inline uint32_t
+rte_hash_crc_4byte(uint32_t data, uint32_t init_val)
+{
+#if defined RTE_ARCH_I686 || defined RTE_ARCH_X86_64
+	if (likely(crc32_alg & CRC32_SSE42))
+		return crc32c_sse42_u32(data, init_val);
+#endif
+
+	return crc32c_1word(data, init_val);
+}
+
+/**
+ * Use single crc32 instruction to perform a hash on a 8 byte value.
+ * Fall back to software crc32 implementation in case SSE4.2 is
+ * not supported
+ *
+ * @param data
+ *   Data to perform hash on.
+ * @param init_val
+ *   Value to initialise hash generator.
+ * @return
+ *   32bit calculated hash value.
+ */
+static inline uint32_t
+rte_hash_crc_8byte(uint64_t data, uint32_t init_val)
+{
+#ifdef RTE_ARCH_X86_64
+	if (likely(crc32_alg == CRC32_SSE42_x64))
+		return crc32c_sse42_u64(data, init_val);
+#endif
+
+#if defined RTE_ARCH_I686 || defined RTE_ARCH_X86_64
+	if (likely(crc32_alg & CRC32_SSE42))
+		return crc32c_sse42_u64_mimic(data, init_val);
+#endif
+
+	return crc32c_2words(data, init_val);
+}
+
+#endif
+
+/**
+ * Calculate CRC32 hash on user-supplied byte array.
+ *
+ * @param data
+ *   Data to perform hash on.
+ * @param data_len
+ *   How many bytes to use to calculate hash value.
+ * @param init_val
+ *   Value to initialise hash generator.
+ * @return
+ *   32bit calculated hash value.
+ */
+static inline uint32_t
+rte_hash_crc(const void *data, uint32_t data_len, uint32_t init_val)
+{
+	unsigned i;
+	uintptr_t pd = (uintptr_t) data;
+
+	for (i = 0; i < data_len / 8; i++) {
+		init_val = rte_hash_crc_8byte(*(const uint64_t *)pd, init_val);
+		pd += 8;
+	}
+
+	if (data_len & 0x4) {
+		init_val = rte_hash_crc_4byte(*(const uint32_t *)pd, init_val);
+		pd += 4;
+	}
+
+	if (data_len & 0x2) {
+		init_val = rte_hash_crc_2byte(*(const uint16_t *)pd, init_val);
+		pd += 2;
+	}
+
+	if (data_len & 0x1)
+		init_val = rte_hash_crc_1byte(*(const uint8_t *)pd, init_val);
+
+	return init_val;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_HASH_CRC_H_ */
diff --git a/lib/librte_hash/rte_hash_version.map b/lib/librte_hash/rte_hash_version.map
new file mode 100644
index 00000000..4f25436e
--- /dev/null
+++ b/lib/librte_hash/rte_hash_version.map
@@ -0,0 +1,40 @@
+DPDK_2.0 {
+	global:
+
+	rte_fbk_hash_create;
+	rte_fbk_hash_find_existing;
+	rte_fbk_hash_free;
+	rte_hash_add_key;
+	rte_hash_add_key_with_hash;
+	rte_hash_create;
+	rte_hash_del_key;
+	rte_hash_del_key_with_hash;
+	rte_hash_find_existing;
+	rte_hash_free;
+	rte_hash_hash;
+	rte_hash_lookup;
+	rte_hash_lookup_bulk;
+	rte_hash_lookup_with_hash;
+
+	local: *;
+};
+
+DPDK_2.1 {
+	global:
+
+	rte_hash_add_key_data;
+	rte_hash_add_key_with_hash_data;
+	rte_hash_iterate;
+	rte_hash_lookup_bulk_data;
+	rte_hash_lookup_data;
+	rte_hash_lookup_with_hash_data;
+	rte_hash_reset;
+
+} DPDK_2.0;
+
+DPDK_2.2 {
+	global:
+
+	rte_hash_set_cmp_func;
+
+} DPDK_2.1;
diff --git a/lib/librte_hash/rte_jhash.h b/lib/librte_hash/rte_jhash.h
new file mode 100644
index 00000000..207478c2
--- /dev/null
+++ b/lib/librte_hash/rte_jhash.h
@@ -0,0 +1,410 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_JHASH_H
+#define _RTE_JHASH_H
+
+/**
+ * @file
+ *
+ * jhash functions.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+#include <string.h>
+#include <limits.h>
+
+#include <rte_log.h>
+#include <rte_byteorder.h>
+
+/* jhash.h: Jenkins hash support.
+ *
+ * Copyright (C) 2006 Bob Jenkins (bob_jenkins@burtleburtle.net)
+ *
+ * http://burtleburtle.net/bob/hash/
+ *
+ * These are the credits from Bob's sources:
+ *
+ * lookup3.c, by Bob Jenkins, May 2006, Public Domain.
+ *
+ * These are functions for producing 32-bit hashes for hash table lookup.
+ * hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final()
+ * are externally useful functions.  Routines to test the hash are included
+ * if SELF_TEST is defined.  You can use this free for any purpose.  It's in
+ * the public domain.  It has no warranty.
+ *
+ * $FreeBSD$
+ */
+
+#define rot(x, k) (((x) << (k)) | ((x) >> (32-(k))))
+
+/** @internal Internal function. NOTE: Arguments are modified. */
+#define __rte_jhash_mix(a, b, c) do { \
+	a -= c; a ^= rot(c, 4); c += b; \
+	b -= a; b ^= rot(a, 6); a += c; \
+	c -= b; c ^= rot(b, 8); b += a; \
+	a -= c; a ^= rot(c, 16); c += b; \
+	b -= a; b ^= rot(a, 19); a += c; \
+	c -= b; c ^= rot(b, 4); b += a; \
+} while (0)
+
+#define __rte_jhash_final(a, b, c) do { \
+	c ^= b; c -= rot(b, 14); \
+	a ^= c; a -= rot(c, 11); \
+	b ^= a; b -= rot(a, 25); \
+	c ^= b; c -= rot(b, 16); \
+	a ^= c; a -= rot(c, 4);  \
+	b ^= a; b -= rot(a, 14); \
+	c ^= b; c -= rot(b, 24); \
+} while (0)
+
+/** The golden ratio: an arbitrary value. */
+#define RTE_JHASH_GOLDEN_RATIO      0xdeadbeef
+
+#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
+#define BIT_SHIFT(x, y, k) (((x) >> (k)) | ((uint64_t)(y) << (32-(k))))
+#else
+#define BIT_SHIFT(x, y, k) (((uint64_t)(x) << (k)) | ((y) >> (32-(k))))
+#endif
+
+#define LOWER8b_MASK rte_le_to_cpu_32(0xff)
+#define LOWER16b_MASK rte_le_to_cpu_32(0xffff)
+#define LOWER24b_MASK rte_le_to_cpu_32(0xffffff)
+
+static inline void
+__rte_jhash_2hashes(const void *key, uint32_t length, uint32_t *pc,
+		uint32_t *pb, unsigned check_align)
+{
+	uint32_t a, b, c;
+
+	/* Set up the internal state */
+	a = b = c = RTE_JHASH_GOLDEN_RATIO + ((uint32_t)length) + *pc;
+	c += *pb;
+
+	/*
+	 * Check key alignment. For x86 architecture, first case is always optimal
+	 * If check_align is not set, first case will be used
+	 */
+#if defined(RTE_ARCH_X86_64) || defined(RTE_ARCH_I686) || defined(RTE_ARCH_X86_X32)
+	const uint32_t *k = (const uint32_t *)key;
+	const uint32_t s = 0;
+#else
+	const uint32_t *k = (uint32_t *)((uintptr_t)key & (uintptr_t)~3);
+	const uint32_t s = ((uintptr_t)key & 3) * CHAR_BIT;
+#endif
+	if (!check_align || s == 0) {
+		while (length > 12) {
+			a += k[0];
+			b += k[1];
+			c += k[2];
+
+			__rte_jhash_mix(a, b, c);
+
+			k += 3;
+			length -= 12;
+		}
+
+		switch (length) {
+		case 12:
+			c += k[2]; b += k[1]; a += k[0]; break;
+		case 11:
+			c += k[2] & LOWER24b_MASK; b += k[1]; a += k[0]; break;
+		case 10:
+			c += k[2] & LOWER16b_MASK; b += k[1]; a += k[0]; break;
+		case 9:
+			c += k[2] & LOWER8b_MASK; b += k[1]; a += k[0]; break;
+		case 8:
+			b += k[1]; a += k[0]; break;
+		case 7:
+			b += k[1] & LOWER24b_MASK; a += k[0]; break;
+		case 6:
+			b += k[1] & LOWER16b_MASK; a += k[0]; break;
+		case 5:
+			b += k[1] & LOWER8b_MASK; a += k[0]; break;
+		case 4:
+			a += k[0]; break;
+		case 3:
+			a += k[0] & LOWER24b_MASK; break;
+		case 2:
+			a += k[0] & LOWER16b_MASK; break;
+		case 1:
+			a += k[0] & LOWER8b_MASK; break;
+		/* zero length strings require no mixing */
+		case 0:
+			*pc = c;
+			*pb = b;
+			return;
+		};
+	} else {
+		/* all but the last block: affect some 32 bits of (a, b, c) */
+		while (length > 12) {
+			a += BIT_SHIFT(k[0], k[1], s);
+			b += BIT_SHIFT(k[1], k[2], s);
+			c += BIT_SHIFT(k[2], k[3], s);
+			__rte_jhash_mix(a, b, c);
+
+			k += 3;
+			length -= 12;
+		}
+
+		/* last block: affect all 32 bits of (c) */
+		switch (length) {
+		case 12:
+			a += BIT_SHIFT(k[0], k[1], s);
+			b += BIT_SHIFT(k[1], k[2], s);
+			c += BIT_SHIFT(k[2], k[3], s);
+			break;
+		case 11:
+			a += BIT_SHIFT(k[0], k[1], s);
+			b += BIT_SHIFT(k[1], k[2], s);
+			c += BIT_SHIFT(k[2], k[3], s) & LOWER24b_MASK;
+			break;
+		case 10:
+			a += BIT_SHIFT(k[0], k[1], s);
+			b += BIT_SHIFT(k[1], k[2], s);
+			c += BIT_SHIFT(k[2], k[3], s) & LOWER16b_MASK;
+			break;
+		case 9:
+			a += BIT_SHIFT(k[0], k[1], s);
+			b += BIT_SHIFT(k[1], k[2], s);
+			c += BIT_SHIFT(k[2], k[3], s) & LOWER8b_MASK;
+			break;
+		case 8:
+			a += BIT_SHIFT(k[0], k[1], s);
+			b += BIT_SHIFT(k[1], k[2], s);
+			break;
+		case 7:
+			a += BIT_SHIFT(k[0], k[1], s);
+			b += BIT_SHIFT(k[1], k[2], s) & LOWER24b_MASK;
+			break;
+		case 6:
+			a += BIT_SHIFT(k[0], k[1], s);
+			b += BIT_SHIFT(k[1], k[2], s) & LOWER16b_MASK;
+			break;
+		case 5:
+			a += BIT_SHIFT(k[0], k[1], s);
+			b += BIT_SHIFT(k[1], k[2], s) & LOWER8b_MASK;
+			break;
+		case 4:
+			a += BIT_SHIFT(k[0], k[1], s);
+			break;
+		case 3:
+			a += BIT_SHIFT(k[0], k[1], s) & LOWER24b_MASK;
+			break;
+		case 2:
+			a += BIT_SHIFT(k[0], k[1], s) & LOWER16b_MASK;
+			break;
+		case 1:
+			a += BIT_SHIFT(k[0], k[1], s) & LOWER8b_MASK;
+			break;
+		/* zero length strings require no mixing */
+		case 0:
+			*pc = c;
+			*pb = b;
+			return;
+		}
+	}
+
+	__rte_jhash_final(a, b, c);
+
+	*pc = c;
+	*pb = b;
+}
+
+/**
+ * Same as rte_jhash, but takes two seeds and return two uint32_ts.
+ * pc and pb must be non-null, and *pc and *pb must both be initialized
+ * with seeds. If you pass in (*pb)=0, the output (*pc) will be
+ * the same as the return value from rte_jhash.
+ *
+ * @param key
+ *   Key to calculate hash of.
+ * @param length
+ *   Length of key in bytes.
+ * @param pc
+ *   IN: seed OUT: primary hash value.
+ * @param pb
+ *   IN: second seed OUT: secondary hash value.
+ */
+static inline void
+rte_jhash_2hashes(const void *key, uint32_t length, uint32_t *pc, uint32_t *pb)
+{
+	__rte_jhash_2hashes(key, length, pc, pb, 1);
+}
+
+/**
+ * Same as rte_jhash_32b, but takes two seeds and return two uint32_ts.
+ * pc and pb must be non-null, and *pc and *pb must both be initialized
+ * with seeds. If you pass in (*pb)=0, the output (*pc) will be
+ * the same as the return value from rte_jhash_32b.
+ *
+ * @param k
+ *   Key to calculate hash of.
+ * @param length
+ *   Length of key in units of 4 bytes.
+ * @param pc
+ *   IN: seed OUT: primary hash value.
+ * @param pb
+ *   IN: second seed OUT: secondary hash value.
+ */
+static inline void
+rte_jhash_32b_2hashes(const uint32_t *k, uint32_t length, uint32_t *pc, uint32_t *pb)
+{
+	__rte_jhash_2hashes((const void *) k, (length << 2), pc, pb, 0);
+}
+
+/**
+ * The most generic version, hashes an arbitrary sequence
+ * of bytes.  No alignment or length assumptions are made about
+ * the input key.
+ *
+ * @param key
+ *   Key to calculate hash of.
+ * @param length
+ *   Length of key in bytes.
+ * @param initval
+ *   Initialising value of hash.
+ * @return
+ *   Calculated hash value.
+ */
+static inline uint32_t
+rte_jhash(const void *key, uint32_t length, uint32_t initval)
+{
+	uint32_t initval2 = 0;
+
+	rte_jhash_2hashes(key, length, &initval, &initval2);
+
+	return initval;
+}
+
+/**
+ * A special optimized version that handles 1 or more of uint32_ts.
+ * The length parameter here is the number of uint32_ts in the key.
+ *
+ * @param k
+ *   Key to calculate hash of.
+ * @param length
+ *   Length of key in units of 4 bytes.
+ * @param initval
+ *   Initialising value of hash.
+ * @return
+ *   Calculated hash value.
+ */
+static inline uint32_t
+rte_jhash_32b(const uint32_t *k, uint32_t length, uint32_t initval)
+{
+	uint32_t initval2 = 0;
+
+	rte_jhash_32b_2hashes(k, length, &initval, &initval2);
+
+	return initval;
+}
+
+static inline uint32_t
+__rte_jhash_3words(uint32_t a, uint32_t b, uint32_t c, uint32_t initval)
+{
+	a += RTE_JHASH_GOLDEN_RATIO + initval;
+	b += RTE_JHASH_GOLDEN_RATIO + initval;
+	c += RTE_JHASH_GOLDEN_RATIO + initval;
+
+	__rte_jhash_final(a, b, c);
+
+	return c;
+}
+
+/**
+ * A special ultra-optimized versions that knows it is hashing exactly
+ * 3 words.
+ *
+ * @param a
+ *   First word to calculate hash of.
+ * @param b
+ *   Second word to calculate hash of.
+ * @param c
+ *   Third word to calculate hash of.
+ * @param initval
+ *   Initialising value of hash.
+ * @return
+ *   Calculated hash value.
+ */
+static inline uint32_t
+rte_jhash_3words(uint32_t a, uint32_t b, uint32_t c, uint32_t initval)
+{
+	return __rte_jhash_3words(a + 12, b + 12, c + 12, initval);
+}
+
+/**
+ * A special ultra-optimized versions that knows it is hashing exactly
+ * 2 words.
+ *
+ * @param a
+ *   First word to calculate hash of.
+ * @param b
+ *   Second word to calculate hash of.
+ * @param initval
+ *   Initialising value of hash.
+ * @return
+ *   Calculated hash value.
+ */
+static inline uint32_t
+rte_jhash_2words(uint32_t a, uint32_t b, uint32_t initval)
+{
+	return __rte_jhash_3words(a + 8, b + 8, 8, initval);
+}
+
+/**
+ * A special ultra-optimized versions that knows it is hashing exactly
+ * 1 word.
+ *
+ * @param a
+ *   Word to calculate hash of.
+ * @param initval
+ *   Initialising value of hash.
+ * @return
+ *   Calculated hash value.
+ */
+static inline uint32_t
+rte_jhash_1word(uint32_t a, uint32_t initval)
+{
+	return __rte_jhash_3words(a + 4, 4, 4, initval);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_JHASH_H */
diff --git a/lib/librte_hash/rte_thash.h b/lib/librte_hash/rte_thash.h
new file mode 100644
index 00000000..d98e98e7
--- /dev/null
+++ b/lib/librte_hash/rte_thash.h
@@ -0,0 +1,250 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2015 Vladimir Medvedkin <medvedkinv@gmail.com>
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_THASH_H
+#define _RTE_THASH_H
+
+/**
+ * @file
+ *
+ * toeplitz hash functions.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Software implementation of the Toeplitz hash function used by RSS.
+ * Can be used either for packet distribution on single queue NIC
+ * or for simulating of RSS computation on specific NIC (for example
+ * after GRE header decapsulating)
+ */
+
+#include <stdint.h>
+#include <rte_byteorder.h>
+#include <rte_ip.h>
+
+#ifdef __SSE3__
+#include <rte_vect.h>
+#endif
+
+#ifdef __SSE3__
+/* Byte swap mask used for converting IPv6 address
+ * 4-byte chunks to CPU byte order
+ */
+static const __m128i rte_thash_ipv6_bswap_mask = {
+		0x0405060700010203ULL, 0x0C0D0E0F08090A0BULL};
+#endif
+
+/**
+ * length in dwords of input tuple to
+ * calculate hash of ipv4 header only
+ */
+#define RTE_THASH_V4_L3_LEN	((sizeof(struct rte_ipv4_tuple) -	\
+			sizeof(((struct rte_ipv4_tuple *)0)->sctp_tag)) / 4)
+
+/**
+ * length in dwords of input tuple to
+ * calculate hash of ipv4 header +
+ * transport header
+ */
+#define RTE_THASH_V4_L4_LEN	 ((sizeof(struct rte_ipv4_tuple)) / 4)
+
+/**
+ * length in dwords of input tuple to
+ * calculate hash of ipv6 header only
+ */
+#define RTE_THASH_V6_L3_LEN	((sizeof(struct rte_ipv6_tuple) -       \
+			sizeof(((struct rte_ipv6_tuple *)0)->sctp_tag)) / 4)
+
+/**
+ * length in dwords of input tuple to
+ * calculate hash of ipv6 header +
+ * transport header
+ */
+#define RTE_THASH_V6_L4_LEN	((sizeof(struct rte_ipv6_tuple)) / 4)
+
+/**
+ * IPv4 tuple
+ * addresses and ports/sctp_tag have to be CPU byte order
+ */
+struct rte_ipv4_tuple {
+	uint32_t	src_addr;
+	uint32_t	dst_addr;
+	union {
+		struct {
+			uint16_t dport;
+			uint16_t sport;
+		};
+		uint32_t        sctp_tag;
+	};
+};
+
+/**
+ * IPv6 tuple
+ * Addresses have to be filled by rte_thash_load_v6_addr()
+ * ports/sctp_tag have to be CPU byte order
+ */
+struct rte_ipv6_tuple {
+	uint8_t		src_addr[16];
+	uint8_t		dst_addr[16];
+	union {
+		struct {
+			uint16_t dport;
+			uint16_t sport;
+		};
+		uint32_t        sctp_tag;
+	};
+};
+
+union rte_thash_tuple {
+	struct rte_ipv4_tuple	v4;
+	struct rte_ipv6_tuple	v6;
+#ifdef __SSE3__
+} __attribute__((aligned(XMM_SIZE)));
+#else
+};
+#endif
+
+/**
+ * Prepare special converted key to use with rte_softrss_be()
+ * @param orig
+ *   pointer to original RSS key
+ * @param targ
+ *   pointer to target RSS key
+ * @param len
+ *   RSS key length
+ */
+static inline void
+rte_convert_rss_key(const uint32_t *orig, uint32_t *targ, int len)
+{
+	int i;
+
+	for (i = 0; i < (len >> 2); i++)
+		targ[i] = rte_be_to_cpu_32(orig[i]);
+}
+
+/**
+ * Prepare and load IPv6 addresses (src and dst)
+ * into target tuple
+ * @param orig
+ *   Pointer to ipv6 header of the original packet
+ * @param targ
+ *   Pointer to rte_ipv6_tuple structure
+ */
+static inline void
+rte_thash_load_v6_addrs(const struct ipv6_hdr *orig, union rte_thash_tuple *targ)
+{
+#ifdef __SSE3__
+	__m128i ipv6 = _mm_loadu_si128((const __m128i *)orig->src_addr);
+	*(__m128i *)targ->v6.src_addr =
+			_mm_shuffle_epi8(ipv6, rte_thash_ipv6_bswap_mask);
+	ipv6 = _mm_loadu_si128((const __m128i *)orig->dst_addr);
+	*(__m128i *)targ->v6.dst_addr =
+			_mm_shuffle_epi8(ipv6, rte_thash_ipv6_bswap_mask);
+#else
+	int i;
+	for (i = 0; i < 4; i++) {
+		*((uint32_t *)targ->v6.src_addr + i) =
+			rte_be_to_cpu_32(*((const uint32_t *)orig->src_addr + i));
+		*((uint32_t *)targ->v6.dst_addr + i) =
+			rte_be_to_cpu_32(*((const uint32_t *)orig->dst_addr + i));
+	}
+#endif
+}
+
+/**
+ * Generic implementation. Can be used with original rss_key
+ * @param input_tuple
+ *   Pointer to input tuple
+ * @param input_len
+ *   Length of input_tuple in 4-bytes chunks
+ * @param rss_key
+ *   Pointer to RSS hash key.
+ * @return
+ *   Calculated hash value.
+ */
+static inline uint32_t
+rte_softrss(uint32_t *input_tuple, uint32_t input_len,
+		const uint8_t *rss_key)
+{
+	uint32_t i, j, ret = 0;
+
+	for (j = 0; j < input_len; j++) {
+		for (i = 0; i < 32; i++) {
+			if (input_tuple[j] & (1 << (31 - i))) {
+				ret ^= rte_cpu_to_be_32(((const uint32_t *)rss_key)[j]) << i |
+					(uint32_t)((uint64_t)(rte_cpu_to_be_32(((const uint32_t *)rss_key)[j + 1])) >>
+					(32 - i));
+			}
+		}
+	}
+	return ret;
+}
+
+/**
+ * Optimized implementation.
+ * If you want the calculated hash value matches NIC RSS value
+ * you have to use special converted key with rte_convert_rss_key() fn.
+ * @param input_tuple
+ *   Pointer to input tuple
+ * @param input_len
+ *   Length of input_tuple in 4-bytes chunks
+ * @param *rss_key
+ *   Pointer to RSS hash key.
+ * @return
+ *   Calculated hash value.
+ */
+static inline uint32_t
+rte_softrss_be(uint32_t *input_tuple, uint32_t input_len,
+		const uint8_t *rss_key)
+{
+	uint32_t i, j, ret = 0;
+
+	for (j = 0; j < input_len; j++) {
+		for (i = 0; i < 32; i++) {
+			if (input_tuple[j] & (1 << (31 - i))) {
+				ret ^= ((const uint32_t *)rss_key)[j] << i |
+					(uint32_t)((uint64_t)(((const uint32_t *)rss_key)[j + 1]) >> (32 - i));
+			}
+		}
+	}
+	return ret;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_THASH_H */
diff --git a/lib/librte_ip_frag/Makefile b/lib/librte_ip_frag/Makefile
new file mode 100644
index 00000000..9d06780d
--- /dev/null
+++ b/lib/librte_ip_frag/Makefile
@@ -0,0 +1,59 @@
+#   BSD LICENSE
+#
+#   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+#   All rights reserved.
+#
+#   Redistribution and use in source and binary forms, with or without
+#   modification, are permitted provided that the following conditions
+#   are met:
+#
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in
+#       the documentation and/or other materials provided with the
+#       distribution.
+#     * Neither the name of Intel Corporation nor the names of its
+#       contributors may be used to endorse or promote products derived
+#       from this software without specific prior written permission.
+#
+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# library name
+LIB = librte_ip_frag.a
+
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR)
+
+EXPORT_MAP := rte_ipfrag_version.map
+
+LIBABIVER := 1
+
+#source files
+SRCS-$(CONFIG_RTE_LIBRTE_IP_FRAG) += rte_ipv4_fragmentation.c
+SRCS-$(CONFIG_RTE_LIBRTE_IP_FRAG) += rte_ipv6_fragmentation.c
+SRCS-$(CONFIG_RTE_LIBRTE_IP_FRAG) += rte_ipv4_reassembly.c
+SRCS-$(CONFIG_RTE_LIBRTE_IP_FRAG) += rte_ipv6_reassembly.c
+SRCS-$(CONFIG_RTE_LIBRTE_IP_FRAG) += rte_ip_frag_common.c
+SRCS-$(CONFIG_RTE_LIBRTE_IP_FRAG) += ip_frag_internal.c
+
+# install this header file
+SYMLINK-$(CONFIG_RTE_LIBRTE_IP_FRAG)-include += rte_ip_frag.h
+
+
+# this library depends on rte_ether
+DEPDIRS-$(CONFIG_RTE_LIBRTE_IP_FRAG) += lib/librte_mempool lib/librte_ether
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_ip_frag/ip_frag_common.h b/lib/librte_ip_frag/ip_frag_common.h
new file mode 100644
index 00000000..cde6ed4a
--- /dev/null
+++ b/lib/librte_ip_frag/ip_frag_common.h
@@ -0,0 +1,169 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _IP_FRAG_COMMON_H_
+#define _IP_FRAG_COMMON_H_
+
+#include "rte_ip_frag.h"
+
+/* logging macros. */
+#ifdef RTE_LIBRTE_IP_FRAG_DEBUG
+
+#define	IP_FRAG_LOG(lvl, fmt, args...)	RTE_LOG(lvl, USER1, fmt, ##args)
+
+#define	IP_FRAG_ASSERT(exp)					\
+if (!(exp))	{							\
+	rte_panic("function %s, line%d\tassert \"" #exp "\" failed\n",	\
+		__func__, __LINE__);					\
+}
+#else
+#define	IP_FRAG_LOG(lvl, fmt, args...)	do {} while(0)
+#define IP_FRAG_ASSERT(exp)	do {} while (0)
+#endif /* IP_FRAG_DEBUG */
+
+#define IPV4_KEYLEN 1
+#define IPV6_KEYLEN 4
+
+/* helper macros */
+#define	IP_FRAG_MBUF2DR(dr, mb)	((dr)->row[(dr)->cnt++] = (mb))
+
+#define IPv6_KEY_BYTES(key) \
+	(key)[0], (key)[1], (key)[2], (key)[3]
+#define IPv6_KEY_BYTES_FMT \
+	"%08" PRIx64 "%08" PRIx64 "%08" PRIx64 "%08" PRIx64
+
+/* internal functions declarations */
+struct rte_mbuf * ip_frag_process(struct ip_frag_pkt *fp,
+		struct rte_ip_frag_death_row *dr, struct rte_mbuf *mb,
+		uint16_t ofs, uint16_t len, uint16_t more_frags);
+
+struct ip_frag_pkt * ip_frag_find(struct rte_ip_frag_tbl *tbl,
+		struct rte_ip_frag_death_row *dr,
+		const struct ip_frag_key *key, uint64_t tms);
+
+struct ip_frag_pkt * ip_frag_lookup(struct rte_ip_frag_tbl *tbl,
+	const struct ip_frag_key *key, uint64_t tms,
+	struct ip_frag_pkt **free, struct ip_frag_pkt **stale);
+
+/* these functions need to be declared here as ip_frag_process relies on them */
+struct rte_mbuf * ipv4_frag_reassemble(const struct ip_frag_pkt *fp);
+struct rte_mbuf * ipv6_frag_reassemble(const struct ip_frag_pkt *fp);
+
+
+
+/*
+ * misc frag key functions
+ */
+
+/* check if key is empty */
+static inline int
+ip_frag_key_is_empty(const struct ip_frag_key * key)
+{
+	uint32_t i;
+	for (i = 0; i < RTE_MIN(key->key_len, RTE_DIM(key->src_dst)); i++)
+		if (key->src_dst[i] != 0)
+			return 0;
+	return 1;
+}
+
+/* empty the key */
+static inline void
+ip_frag_key_invalidate(struct ip_frag_key * key)
+{
+	uint32_t i;
+	for (i = 0; i < key->key_len; i++)
+		key->src_dst[i] = 0;
+}
+
+/* compare two keys */
+static inline int
+ip_frag_key_cmp(const struct ip_frag_key * k1, const struct ip_frag_key * k2)
+{
+	uint32_t i, val;
+	val = k1->id ^ k2->id;
+	for (i = 0; i < k1->key_len; i++)
+		val |= k1->src_dst[i] ^ k2->src_dst[i];
+	return val;
+}
+
+/*
+ * misc fragment functions
+ */
+
+/* put fragment on death row */
+static inline void
+ip_frag_free(struct ip_frag_pkt *fp, struct rte_ip_frag_death_row *dr)
+{
+	uint32_t i, k;
+
+	k = dr->cnt;
+	for (i = 0; i != fp->last_idx; i++) {
+		if (fp->frags[i].mb != NULL) {
+			dr->row[k++] = fp->frags[i].mb;
+			fp->frags[i].mb = NULL;
+		}
+	}
+
+	fp->last_idx = 0;
+	dr->cnt = k;
+}
+
+/* if key is empty, mark key as in use */
+static inline void
+ip_frag_inuse(struct rte_ip_frag_tbl *tbl, const struct  ip_frag_pkt *fp)
+{
+	if (ip_frag_key_is_empty(&fp->key)) {
+		TAILQ_REMOVE(&tbl->lru, fp, lru);
+		tbl->use_entries--;
+	}
+}
+
+/* reset the fragment */
+static inline void
+ip_frag_reset(struct ip_frag_pkt *fp, uint64_t tms)
+{
+	static const struct ip_frag zero_frag = {
+		.ofs = 0,
+		.len = 0,
+		.mb = NULL,
+	};
+
+	fp->start = tms;
+	fp->total_size = UINT32_MAX;
+	fp->frag_size = 0;
+	fp->last_idx = IP_MIN_FRAG_NUM;
+	fp->frags[IP_LAST_FRAG_IDX] = zero_frag;
+	fp->frags[IP_FIRST_FRAG_IDX] = zero_frag;
+}
+
+#endif /* _IP_FRAG_COMMON_H_ */
diff --git a/lib/librte_ip_frag/ip_frag_internal.c b/lib/librte_ip_frag/ip_frag_internal.c
new file mode 100644
index 00000000..b679ff43
--- /dev/null
+++ b/lib/librte_ip_frag/ip_frag_internal.c
@@ -0,0 +1,418 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stddef.h>
+
+#include <rte_jhash.h>
+#ifdef RTE_MACHINE_CPUFLAG_SSE4_2
+#include <rte_hash_crc.h>
+#endif /* RTE_MACHINE_CPUFLAG_SSE4_2 */
+
+#include "ip_frag_common.h"
+
+#define	PRIME_VALUE	0xeaad8405
+
+#define	IP_FRAG_TBL_POS(tbl, sig)	\
+	((tbl)->pkt + ((sig) & (tbl)->entry_mask))
+
+#ifdef RTE_LIBRTE_IP_FRAG_TBL_STAT
+#define	IP_FRAG_TBL_STAT_UPDATE(s, f, v)	((s)->f += (v))
+#else
+#define	IP_FRAG_TBL_STAT_UPDATE(s, f, v)	do {} while (0)
+#endif /* IP_FRAG_TBL_STAT */
+
+/* local frag table helper functions */
+static inline void
+ip_frag_tbl_del(struct rte_ip_frag_tbl *tbl, struct rte_ip_frag_death_row *dr,
+	struct ip_frag_pkt *fp)
+{
+	ip_frag_free(fp, dr);
+	ip_frag_key_invalidate(&fp->key);
+	TAILQ_REMOVE(&tbl->lru, fp, lru);
+	tbl->use_entries--;
+	IP_FRAG_TBL_STAT_UPDATE(&tbl->stat, del_num, 1);
+}
+
+static inline void
+ip_frag_tbl_add(struct rte_ip_frag_tbl *tbl,  struct ip_frag_pkt *fp,
+	const struct ip_frag_key *key, uint64_t tms)
+{
+	fp->key = key[0];
+	ip_frag_reset(fp, tms);
+	TAILQ_INSERT_TAIL(&tbl->lru, fp, lru);
+	tbl->use_entries++;
+	IP_FRAG_TBL_STAT_UPDATE(&tbl->stat, add_num, 1);
+}
+
+static inline void
+ip_frag_tbl_reuse(struct rte_ip_frag_tbl *tbl, struct rte_ip_frag_death_row *dr,
+	struct ip_frag_pkt *fp, uint64_t tms)
+{
+	ip_frag_free(fp, dr);
+	ip_frag_reset(fp, tms);
+	TAILQ_REMOVE(&tbl->lru, fp, lru);
+	TAILQ_INSERT_TAIL(&tbl->lru, fp, lru);
+	IP_FRAG_TBL_STAT_UPDATE(&tbl->stat, reuse_num, 1);
+}
+
+
+static inline void
+ipv4_frag_hash(const struct ip_frag_key *key, uint32_t *v1, uint32_t *v2)
+{
+	uint32_t v;
+	const uint32_t *p;
+
+	p = (const uint32_t *)&key->src_dst;
+
+#ifdef RTE_MACHINE_CPUFLAG_SSE4_2
+	v = rte_hash_crc_4byte(p[0], PRIME_VALUE);
+	v = rte_hash_crc_4byte(p[1], v);
+	v = rte_hash_crc_4byte(key->id, v);
+#else
+
+	v = rte_jhash_3words(p[0], p[1], key->id, PRIME_VALUE);
+#endif /* RTE_MACHINE_CPUFLAG_SSE4_2 */
+
+	*v1 =  v;
+	*v2 = (v << 7) + (v >> 14);
+}
+
+static inline void
+ipv6_frag_hash(const struct ip_frag_key *key, uint32_t *v1, uint32_t *v2)
+{
+	uint32_t v;
+	const uint32_t *p;
+
+	p = (const uint32_t *) &key->src_dst;
+
+#ifdef RTE_MACHINE_CPUFLAG_SSE4_2
+	v = rte_hash_crc_4byte(p[0], PRIME_VALUE);
+	v = rte_hash_crc_4byte(p[1], v);
+	v = rte_hash_crc_4byte(p[2], v);
+	v = rte_hash_crc_4byte(p[3], v);
+	v = rte_hash_crc_4byte(p[4], v);
+	v = rte_hash_crc_4byte(p[5], v);
+	v = rte_hash_crc_4byte(p[6], v);
+	v = rte_hash_crc_4byte(p[7], v);
+	v = rte_hash_crc_4byte(key->id, v);
+#else
+
+	v = rte_jhash_3words(p[0], p[1], p[2], PRIME_VALUE);
+	v = rte_jhash_3words(p[3], p[4], p[5], v);
+	v = rte_jhash_3words(p[6], p[7], key->id, v);
+#endif /* RTE_MACHINE_CPUFLAG_SSE4_2 */
+
+	*v1 =  v;
+	*v2 = (v << 7) + (v >> 14);
+}
+
+struct rte_mbuf *
+ip_frag_process(struct ip_frag_pkt *fp, struct rte_ip_frag_death_row *dr,
+	struct rte_mbuf *mb, uint16_t ofs, uint16_t len, uint16_t more_frags)
+{
+	uint32_t idx;
+
+	fp->frag_size += len;
+
+	/* this is the first fragment. */
+	if (ofs == 0) {
+		idx = (fp->frags[IP_FIRST_FRAG_IDX].mb == NULL) ?
+				IP_FIRST_FRAG_IDX : UINT32_MAX;
+
+	/* this is the last fragment. */
+	} else if (more_frags == 0) {
+		fp->total_size = ofs + len;
+		idx = (fp->frags[IP_LAST_FRAG_IDX].mb == NULL) ?
+				IP_LAST_FRAG_IDX : UINT32_MAX;
+
+	/* this is the intermediate fragment. */
+	} else if ((idx = fp->last_idx) <
+		sizeof (fp->frags) / sizeof (fp->frags[0])) {
+		fp->last_idx++;
+	}
+
+	/*
+	 * errorneous packet: either exceeed max allowed number of fragments,
+	 * or duplicate first/last fragment encountered.
+	 */
+	if (idx >= sizeof (fp->frags) / sizeof (fp->frags[0])) {
+
+		/* report an error. */
+		if (fp->key.key_len == IPV4_KEYLEN)
+			IP_FRAG_LOG(DEBUG, "%s:%d invalid fragmented packet:\n"
+				"ipv4_frag_pkt: %p, key: <%" PRIx64 ", %#x>, "
+				"total_size: %u, frag_size: %u, last_idx: %u\n"
+				"first fragment: ofs: %u, len: %u\n"
+				"last fragment: ofs: %u, len: %u\n\n",
+				__func__, __LINE__,
+				fp, fp->key.src_dst[0], fp->key.id,
+				fp->total_size, fp->frag_size, fp->last_idx,
+				fp->frags[IP_FIRST_FRAG_IDX].ofs,
+				fp->frags[IP_FIRST_FRAG_IDX].len,
+				fp->frags[IP_LAST_FRAG_IDX].ofs,
+				fp->frags[IP_LAST_FRAG_IDX].len);
+		else
+			IP_FRAG_LOG(DEBUG, "%s:%d invalid fragmented packet:\n"
+				"ipv4_frag_pkt: %p, key: <" IPv6_KEY_BYTES_FMT ", %#x>, "
+				"total_size: %u, frag_size: %u, last_idx: %u\n"
+				"first fragment: ofs: %u, len: %u\n"
+				"last fragment: ofs: %u, len: %u\n\n",
+				__func__, __LINE__,
+				fp, IPv6_KEY_BYTES(fp->key.src_dst), fp->key.id,
+				fp->total_size, fp->frag_size, fp->last_idx,
+				fp->frags[IP_FIRST_FRAG_IDX].ofs,
+				fp->frags[IP_FIRST_FRAG_IDX].len,
+				fp->frags[IP_LAST_FRAG_IDX].ofs,
+				fp->frags[IP_LAST_FRAG_IDX].len);
+
+		/* free all fragments, invalidate the entry. */
+		ip_frag_free(fp, dr);
+		ip_frag_key_invalidate(&fp->key);
+		IP_FRAG_MBUF2DR(dr, mb);
+
+		return NULL;
+	}
+
+	fp->frags[idx].ofs = ofs;
+	fp->frags[idx].len = len;
+	fp->frags[idx].mb = mb;
+
+	mb = NULL;
+
+	/* not all fragments are collected yet. */
+	if (likely (fp->frag_size < fp->total_size)) {
+		return mb;
+
+	/* if we collected all fragments, then try to reassemble. */
+	} else if (fp->frag_size == fp->total_size &&
+			fp->frags[IP_FIRST_FRAG_IDX].mb != NULL) {
+		if (fp->key.key_len == IPV4_KEYLEN)
+			mb = ipv4_frag_reassemble(fp);
+		else
+			mb = ipv6_frag_reassemble(fp);
+	}
+
+	/* errorenous set of fragments. */
+	if (mb == NULL) {
+
+		/* report an error. */
+		if (fp->key.key_len == IPV4_KEYLEN)
+			IP_FRAG_LOG(DEBUG, "%s:%d invalid fragmented packet:\n"
+				"ipv4_frag_pkt: %p, key: <%" PRIx64 ", %#x>, "
+				"total_size: %u, frag_size: %u, last_idx: %u\n"
+				"first fragment: ofs: %u, len: %u\n"
+				"last fragment: ofs: %u, len: %u\n\n",
+				__func__, __LINE__,
+				fp, fp->key.src_dst[0], fp->key.id,
+				fp->total_size, fp->frag_size, fp->last_idx,
+				fp->frags[IP_FIRST_FRAG_IDX].ofs,
+				fp->frags[IP_FIRST_FRAG_IDX].len,
+				fp->frags[IP_LAST_FRAG_IDX].ofs,
+				fp->frags[IP_LAST_FRAG_IDX].len);
+		else
+			IP_FRAG_LOG(DEBUG, "%s:%d invalid fragmented packet:\n"
+				"ipv4_frag_pkt: %p, key: <" IPv6_KEY_BYTES_FMT ", %#x>, "
+				"total_size: %u, frag_size: %u, last_idx: %u\n"
+				"first fragment: ofs: %u, len: %u\n"
+				"last fragment: ofs: %u, len: %u\n\n",
+				__func__, __LINE__,
+				fp, IPv6_KEY_BYTES(fp->key.src_dst), fp->key.id,
+				fp->total_size, fp->frag_size, fp->last_idx,
+				fp->frags[IP_FIRST_FRAG_IDX].ofs,
+				fp->frags[IP_FIRST_FRAG_IDX].len,
+				fp->frags[IP_LAST_FRAG_IDX].ofs,
+				fp->frags[IP_LAST_FRAG_IDX].len);
+
+		/* free associated resources. */
+		ip_frag_free(fp, dr);
+	}
+
+	/* we are done with that entry, invalidate it. */
+	ip_frag_key_invalidate(&fp->key);
+	return mb;
+}
+
+
+/*
+ * Find an entry in the table for the corresponding fragment.
+ * If such entry is not present, then allocate a new one.
+ * If the entry is stale, then free and reuse it.
+ */
+struct ip_frag_pkt *
+ip_frag_find(struct rte_ip_frag_tbl *tbl, struct rte_ip_frag_death_row *dr,
+	const struct ip_frag_key *key, uint64_t tms)
+{
+	struct ip_frag_pkt *pkt, *free, *stale, *lru;
+	uint64_t max_cycles;
+
+	/*
+	 * Actually the two line below are totally redundant.
+	 * they are here, just to make gcc 4.6 happy.
+	 */
+	free = NULL;
+	stale = NULL;
+	max_cycles = tbl->max_cycles;
+
+	IP_FRAG_TBL_STAT_UPDATE(&tbl->stat, find_num, 1);
+
+	if ((pkt = ip_frag_lookup(tbl, key, tms, &free, &stale)) == NULL) {
+
+		/*timed-out entry, free and invalidate it*/
+		if (stale != NULL) {
+			ip_frag_tbl_del(tbl, dr, stale);
+			free = stale;
+
+		/*
+		 * we found a free entry, check if we can use it.
+		 * If we run out of free entries in the table, then
+		 * check if we have a timed out entry to delete.
+		 */
+		} else if (free != NULL &&
+				tbl->max_entries <= tbl->use_entries) {
+			lru = TAILQ_FIRST(&tbl->lru);
+			if (max_cycles + lru->start < tms) {
+				ip_frag_tbl_del(tbl, dr, lru);
+			} else {
+				free = NULL;
+				IP_FRAG_TBL_STAT_UPDATE(&tbl->stat,
+					fail_nospace, 1);
+			}
+		}
+
+		/* found a free entry to reuse. */
+		if (free != NULL) {
+			ip_frag_tbl_add(tbl,  free, key, tms);
+			pkt = free;
+		}
+
+	/*
+	 * we found the flow, but it is already timed out,
+	 * so free associated resources, reposition it in the LRU list,
+	 * and reuse it.
+	 */
+	} else if (max_cycles + pkt->start < tms) {
+		ip_frag_tbl_reuse(tbl, dr, pkt, tms);
+	}
+
+	IP_FRAG_TBL_STAT_UPDATE(&tbl->stat, fail_total, (pkt == NULL));
+
+	tbl->last = pkt;
+	return pkt;
+}
+
+struct ip_frag_pkt *
+ip_frag_lookup(struct rte_ip_frag_tbl *tbl,
+	const struct ip_frag_key *key, uint64_t tms,
+	struct ip_frag_pkt **free, struct ip_frag_pkt **stale)
+{
+	struct ip_frag_pkt *p1, *p2;
+	struct ip_frag_pkt *empty, *old;
+	uint64_t max_cycles;
+	uint32_t i, assoc, sig1, sig2;
+
+	empty = NULL;
+	old = NULL;
+
+	max_cycles = tbl->max_cycles;
+	assoc = tbl->bucket_entries;
+
+	if (tbl->last != NULL && ip_frag_key_cmp(key, &tbl->last->key) == 0)
+		return tbl->last;
+
+	/* different hashing methods for IPv4 and IPv6 */
+	if (key->key_len == IPV4_KEYLEN)
+		ipv4_frag_hash(key, &sig1, &sig2);
+	else
+		ipv6_frag_hash(key, &sig1, &sig2);
+
+	p1 = IP_FRAG_TBL_POS(tbl, sig1);
+	p2 = IP_FRAG_TBL_POS(tbl, sig2);
+
+	for (i = 0; i != assoc; i++) {
+		if (p1->key.key_len == IPV4_KEYLEN)
+			IP_FRAG_LOG(DEBUG, "%s:%d:\n"
+					"tbl: %p, max_entries: %u, use_entries: %u\n"
+					"ipv6_frag_pkt line0: %p, index: %u from %u\n"
+			"key: <%" PRIx64 ", %#x>, start: %" PRIu64 "\n",
+					__func__, __LINE__,
+					tbl, tbl->max_entries, tbl->use_entries,
+					p1, i, assoc,
+			p1[i].key.src_dst[0], p1[i].key.id, p1[i].start);
+		else
+			IP_FRAG_LOG(DEBUG, "%s:%d:\n"
+					"tbl: %p, max_entries: %u, use_entries: %u\n"
+					"ipv6_frag_pkt line0: %p, index: %u from %u\n"
+			"key: <" IPv6_KEY_BYTES_FMT ", %#x>, start: %" PRIu64 "\n",
+					__func__, __LINE__,
+					tbl, tbl->max_entries, tbl->use_entries,
+					p1, i, assoc,
+			IPv6_KEY_BYTES(p1[i].key.src_dst), p1[i].key.id, p1[i].start);
+
+		if (ip_frag_key_cmp(key, &p1[i].key) == 0)
+			return p1 + i;
+		else if (ip_frag_key_is_empty(&p1[i].key))
+			empty = (empty == NULL) ? (p1 + i) : empty;
+		else if (max_cycles + p1[i].start < tms)
+			old = (old == NULL) ? (p1 + i) : old;
+
+		if (p2->key.key_len == IPV4_KEYLEN)
+			IP_FRAG_LOG(DEBUG, "%s:%d:\n"
+					"tbl: %p, max_entries: %u, use_entries: %u\n"
+					"ipv6_frag_pkt line1: %p, index: %u from %u\n"
+			"key: <%" PRIx64 ", %#x>, start: %" PRIu64 "\n",
+					__func__, __LINE__,
+					tbl, tbl->max_entries, tbl->use_entries,
+					p2, i, assoc,
+			p2[i].key.src_dst[0], p2[i].key.id, p2[i].start);
+		else
+			IP_FRAG_LOG(DEBUG, "%s:%d:\n"
+					"tbl: %p, max_entries: %u, use_entries: %u\n"
+					"ipv6_frag_pkt line1: %p, index: %u from %u\n"
+			"key: <" IPv6_KEY_BYTES_FMT ", %#x>, start: %" PRIu64 "\n",
+					__func__, __LINE__,
+					tbl, tbl->max_entries, tbl->use_entries,
+					p2, i, assoc,
+			IPv6_KEY_BYTES(p2[i].key.src_dst), p2[i].key.id, p2[i].start);
+
+		if (ip_frag_key_cmp(key, &p2[i].key) == 0)
+			return p2 + i;
+		else if (ip_frag_key_is_empty(&p2[i].key))
+			empty = (empty == NULL) ?( p2 + i) : empty;
+		else if (max_cycles + p2[i].start < tms)
+			old = (old == NULL) ? (p2 + i) : old;
+	}
+
+	*free = empty;
+	*stale = old;
+	return NULL;
+}
diff --git a/lib/librte_ip_frag/rte_ip_frag.h b/lib/librte_ip_frag/rte_ip_frag.h
new file mode 100644
index 00000000..92cedf2f
--- /dev/null
+++ b/lib/librte_ip_frag/rte_ip_frag.h
@@ -0,0 +1,363 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_IP_FRAG_H_
+#define _RTE_IP_FRAG_H_
+
+/**
+ * @file
+ * RTE IP Fragmentation and Reassembly
+ *
+ * Implementation of IP packet fragmentation and reassembly.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+#include <stdio.h>
+
+#include <rte_malloc.h>
+#include <rte_memory.h>
+#include <rte_ip.h>
+#include <rte_byteorder.h>
+
+struct rte_mbuf;
+
+enum {
+	IP_LAST_FRAG_IDX,    /**< index of last fragment */
+	IP_FIRST_FRAG_IDX,   /**< index of first fragment */
+	IP_MIN_FRAG_NUM,     /**< minimum number of fragments */
+	IP_MAX_FRAG_NUM = RTE_LIBRTE_IP_FRAG_MAX_FRAG,
+	/**< maximum number of fragments per packet */
+};
+
+/** @internal fragmented mbuf */
+struct ip_frag {
+	uint16_t ofs;          /**< offset into the packet */
+	uint16_t len;          /**< length of fragment */
+	struct rte_mbuf *mb;   /**< fragment mbuf */
+};
+
+/** @internal <src addr, dst_addr, id> to uniquely indetify fragmented datagram. */
+struct ip_frag_key {
+	uint64_t src_dst[4];      /**< src address, first 8 bytes used for IPv4 */
+	uint32_t id;           /**< dst address */
+	uint32_t key_len;      /**< src/dst key length */
+};
+
+/*
+ * @internal Fragmented packet to reassemble.
+ * First two entries in the frags[] array are for the last and first fragments.
+ */
+struct ip_frag_pkt {
+	TAILQ_ENTRY(ip_frag_pkt) lru;   /**< LRU list */
+	struct ip_frag_key key;           /**< fragmentation key */
+	uint64_t             start;       /**< creation timestamp */
+	uint32_t             total_size;  /**< expected reassembled size */
+	uint32_t             frag_size;   /**< size of fragments received */
+	uint32_t             last_idx;    /**< index of next entry to fill */
+	struct ip_frag       frags[IP_MAX_FRAG_NUM]; /**< fragments */
+} __rte_cache_aligned;
+
+#define IP_FRAG_DEATH_ROW_LEN 32 /**< death row size (in packets) */
+
+/** mbuf death row (packets to be freed) */
+struct rte_ip_frag_death_row {
+	uint32_t cnt;          /**< number of mbufs currently on death row */
+	struct rte_mbuf *row[IP_FRAG_DEATH_ROW_LEN * (IP_MAX_FRAG_NUM + 1)];
+	/**< mbufs to be freed */
+};
+
+TAILQ_HEAD(ip_pkt_list, ip_frag_pkt); /**< @internal fragments tailq */
+
+/** fragmentation table statistics */
+struct ip_frag_tbl_stat {
+	uint64_t find_num;      /**< total # of find/insert attempts. */
+	uint64_t add_num;       /**< # of add ops. */
+	uint64_t del_num;       /**< # of del ops. */
+	uint64_t reuse_num;     /**< # of reuse (del/add) ops. */
+	uint64_t fail_total;    /**< total # of add failures. */
+	uint64_t fail_nospace;  /**< # of 'no space' add failures. */
+} __rte_cache_aligned;
+
+/** fragmentation table */
+struct rte_ip_frag_tbl {
+	uint64_t             max_cycles;      /**< ttl for table entries. */
+	uint32_t             entry_mask;      /**< hash value mask. */
+	uint32_t             max_entries;     /**< max entries allowed. */
+	uint32_t             use_entries;     /**< entries in use. */
+	uint32_t             bucket_entries;  /**< hash assocaitivity. */
+	uint32_t             nb_entries;      /**< total size of the table. */
+	uint32_t             nb_buckets;      /**< num of associativity lines. */
+	struct ip_frag_pkt *last;         /**< last used entry. */
+	struct ip_pkt_list lru;           /**< LRU list for table entries. */
+	struct ip_frag_tbl_stat stat;     /**< statistics counters. */
+	struct ip_frag_pkt pkt[0];        /**< hash table. */
+};
+
+/** IPv6 fragment extension header */
+#define	RTE_IPV6_EHDR_MF_SHIFT			0
+#define	RTE_IPV6_EHDR_MF_MASK			1
+#define	RTE_IPV6_EHDR_FO_SHIFT			3
+#define	RTE_IPV6_EHDR_FO_MASK			(~((1 << RTE_IPV6_EHDR_FO_SHIFT) - 1))
+
+#define RTE_IPV6_FRAG_USED_MASK			\
+	(RTE_IPV6_EHDR_MF_MASK | RTE_IPV6_EHDR_FO_MASK)
+
+#define RTE_IPV6_GET_MF(x)				((x) & RTE_IPV6_EHDR_MF_MASK)
+#define RTE_IPV6_GET_FO(x)				((x) >> RTE_IPV6_EHDR_FO_SHIFT)
+
+#define RTE_IPV6_SET_FRAG_DATA(fo, mf)	\
+	(((fo) & RTE_IPV6_EHDR_FO_MASK) | ((mf) & RTE_IPV6_EHDR_MF_MASK))
+
+struct ipv6_extension_fragment {
+	uint8_t next_header;            /**< Next header type */
+	uint8_t reserved;               /**< Reserved */
+	uint16_t frag_data;             /**< All fragmentation data */
+	uint32_t id;                    /**< Packet ID */
+} __attribute__((__packed__));
+
+
+
+/*
+ * Create a new IP fragmentation table.
+ *
+ * @param bucket_num
+ *   Number of buckets in the hash table.
+ * @param bucket_entries
+ *   Number of entries per bucket (e.g. hash associativity).
+ *   Should be power of two.
+ * @param max_entries
+ *   Maximum number of entries that could be stored in the table.
+ *   The value should be less or equal then bucket_num * bucket_entries.
+ * @param max_cycles
+ *   Maximum TTL in cycles for each fragmented packet.
+ * @param socket_id
+ *   The *socket_id* argument is the socket identifier in the case of
+ *   NUMA. The value can be *SOCKET_ID_ANY* if there is no NUMA constraints.
+ * @return
+ *   The pointer to the new allocated fragmentation table, on success. NULL on error.
+ */
+struct rte_ip_frag_tbl * rte_ip_frag_table_create(uint32_t bucket_num,
+		uint32_t bucket_entries,  uint32_t max_entries,
+		uint64_t max_cycles, int socket_id);
+
+/*
+ * Free allocated IP fragmentation table.
+ *
+ * @param btl
+ *   Fragmentation table to free.
+ */
+static inline void
+rte_ip_frag_table_destroy( struct rte_ip_frag_tbl *tbl)
+{
+	rte_free(tbl);
+}
+
+/**
+ * This function implements the fragmentation of IPv6 packets.
+ *
+ * @param pkt_in
+ *   The input packet.
+ * @param pkts_out
+ *   Array storing the output fragments.
+ * @param nb_pkts_out
+ *   Number of fragments.
+ * @param mtu_size
+ *   Size in bytes of the Maximum Transfer Unit (MTU) for the outgoing IPv6
+ *   datagrams. This value includes the size of the IPv6 header.
+ * @param pool_direct
+ *   MBUF pool used for allocating direct buffers for the output fragments.
+ * @param pool_indirect
+ *   MBUF pool used for allocating indirect buffers for the output fragments.
+ * @return
+ *   Upon successful completion - number of output fragments placed
+ *   in the pkts_out array.
+ *   Otherwise - (-1) * errno.
+ */
+int32_t
+rte_ipv6_fragment_packet(struct rte_mbuf *pkt_in,
+		struct rte_mbuf **pkts_out,
+		uint16_t nb_pkts_out,
+		uint16_t mtu_size,
+		struct rte_mempool *pool_direct,
+		struct rte_mempool *pool_indirect);
+
+/*
+ * This function implements reassembly of fragmented IPv6 packets.
+ * Incoming mbuf should have its l2_len/l3_len fields setup correctly.
+ *
+ * @param tbl
+ *   Table where to lookup/add the fragmented packet.
+ * @param dr
+ *   Death row to free buffers to
+ * @param mb
+ *   Incoming mbuf with IPv6 fragment.
+ * @param tms
+ *   Fragment arrival timestamp.
+ * @param ip_hdr
+ *   Pointer to the IPv6 header.
+ * @param frag_hdr
+ *   Pointer to the IPv6 fragment extension header.
+ * @return
+ *   Pointer to mbuf for reassembled packet, or NULL if:
+ *   - an error occured.
+ *   - not all fragments of the packet are collected yet.
+ */
+struct rte_mbuf *rte_ipv6_frag_reassemble_packet(struct rte_ip_frag_tbl *tbl,
+		struct rte_ip_frag_death_row *dr,
+		struct rte_mbuf *mb, uint64_t tms, struct ipv6_hdr *ip_hdr,
+		struct ipv6_extension_fragment *frag_hdr);
+
+/*
+ * Return a pointer to the packet's fragment header, if found.
+ * It only looks at the extension header that's right after the fixed IPv6
+ * header, and doesn't follow the whole chain of extension headers.
+ *
+ * @param hdr
+ *   Pointer to the IPv6 header.
+ * @return
+ *   Pointer to the IPv6 fragment extension header, or NULL if it's not
+ *   present.
+ */
+static inline struct ipv6_extension_fragment *
+rte_ipv6_frag_get_ipv6_fragment_header(struct ipv6_hdr *hdr)
+{
+	if (hdr->proto == IPPROTO_FRAGMENT) {
+		return (struct ipv6_extension_fragment *) ++hdr;
+	}
+	else
+		return NULL;
+}
+
+/**
+ * IPv4 fragmentation.
+ *
+ * This function implements the fragmentation of IPv4 packets.
+ *
+ * @param pkt_in
+ *   The input packet.
+ * @param pkts_out
+ *   Array storing the output fragments.
+ * @param nb_pkts_out
+ *   Number of fragments.
+ * @param mtu_size
+ *   Size in bytes of the Maximum Transfer Unit (MTU) for the outgoing IPv4
+ *   datagrams. This value includes the size of the IPv4 header.
+ * @param pool_direct
+ *   MBUF pool used for allocating direct buffers for the output fragments.
+ * @param pool_indirect
+ *   MBUF pool used for allocating indirect buffers for the output fragments.
+ * @return
+ *   Upon successful completion - number of output fragments placed
+ *   in the pkts_out array.
+ *   Otherwise - (-1) * errno.
+ */
+int32_t rte_ipv4_fragment_packet(struct rte_mbuf *pkt_in,
+			struct rte_mbuf **pkts_out,
+			uint16_t nb_pkts_out, uint16_t mtu_size,
+			struct rte_mempool *pool_direct,
+			struct rte_mempool *pool_indirect);
+
+/*
+ * This function implements reassembly of fragmented IPv4 packets.
+ * Incoming mbufs should have its l2_len/l3_len fields setup correclty.
+ *
+ * @param tbl
+ *   Table where to lookup/add the fragmented packet.
+ * @param dr
+ *   Death row to free buffers to
+ * @param mb
+ *   Incoming mbuf with IPv4 fragment.
+ * @param tms
+ *   Fragment arrival timestamp.
+ * @param ip_hdr
+ *   Pointer to the IPV4 header inside the fragment.
+ * @return
+ *   Pointer to mbuf for reassebled packet, or NULL if:
+ *   - an error occured.
+ *   - not all fragments of the packet are collected yet.
+ */
+struct rte_mbuf * rte_ipv4_frag_reassemble_packet(struct rte_ip_frag_tbl *tbl,
+		struct rte_ip_frag_death_row *dr,
+		struct rte_mbuf *mb, uint64_t tms, struct ipv4_hdr *ip_hdr);
+
+/*
+ * Check if the IPv4 packet is fragmented
+ *
+ * @param hdr
+ *   IPv4 header of the packet
+ * @return
+ *   1 if fragmented, 0 if not fragmented
+ */
+static inline int
+rte_ipv4_frag_pkt_is_fragmented(const struct ipv4_hdr * hdr) {
+	uint16_t flag_offset, ip_flag, ip_ofs;
+
+	flag_offset = rte_be_to_cpu_16(hdr->fragment_offset);
+	ip_ofs = (uint16_t)(flag_offset & IPV4_HDR_OFFSET_MASK);
+	ip_flag = (uint16_t)(flag_offset & IPV4_HDR_MF_FLAG);
+
+	return ip_flag != 0 || ip_ofs  != 0;
+}
+
+/*
+ * Free mbufs on a given death row.
+ *
+ * @param dr
+ *   Death row to free mbufs in.
+ * @param prefetch
+ *   How many buffers to prefetch before freeing.
+ */
+void rte_ip_frag_free_death_row(struct rte_ip_frag_death_row *dr,
+		uint32_t prefetch);
+
+
+/*
+ * Dump fragmentation table statistics to file.
+ *
+ * @param f
+ *   File to dump statistics to
+ * @param tbl
+ *   Fragmentation table to dump statistics from
+ */
+void
+rte_ip_frag_table_statistics_dump(FILE * f, const struct rte_ip_frag_tbl *tbl);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_IP_FRAG_H_ */
diff --git a/lib/librte_ip_frag/rte_ip_frag_common.c b/lib/librte_ip_frag/rte_ip_frag_common.c
new file mode 100644
index 00000000..6176ff4e
--- /dev/null
+++ b/lib/librte_ip_frag/rte_ip_frag_common.c
@@ -0,0 +1,139 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stddef.h>
+#include <stdio.h>
+
+#include <rte_memory.h>
+#include <rte_log.h>
+
+#include "ip_frag_common.h"
+
+#define	IP_FRAG_HASH_FNUM	2
+
+/* free mbufs from death row */
+void
+rte_ip_frag_free_death_row(struct rte_ip_frag_death_row *dr,
+		uint32_t prefetch)
+{
+	uint32_t i, k, n;
+
+	k = RTE_MIN(prefetch, dr->cnt);
+	n = dr->cnt;
+
+	for (i = 0; i != k; i++)
+		rte_prefetch0(dr->row[i]);
+
+	for (i = 0; i != n - k; i++) {
+		rte_prefetch0(dr->row[i + k]);
+		rte_pktmbuf_free(dr->row[i]);
+	}
+
+	for (; i != n; i++)
+		rte_pktmbuf_free(dr->row[i]);
+
+	dr->cnt = 0;
+}
+
+/* create fragmentation table */
+struct rte_ip_frag_tbl *
+rte_ip_frag_table_create(uint32_t bucket_num, uint32_t bucket_entries,
+	uint32_t max_entries, uint64_t max_cycles, int socket_id)
+{
+	struct rte_ip_frag_tbl *tbl;
+	size_t sz;
+	uint64_t nb_entries;
+
+	nb_entries = rte_align32pow2(bucket_num);
+	nb_entries *= bucket_entries;
+	nb_entries *= IP_FRAG_HASH_FNUM;
+
+	/* check input parameters. */
+	if (rte_is_power_of_2(bucket_entries) == 0 ||
+			nb_entries > UINT32_MAX || nb_entries == 0 ||
+			nb_entries < max_entries) {
+		RTE_LOG(ERR, USER1, "%s: invalid input parameter\n", __func__);
+		return NULL;
+	}
+
+	sz = sizeof (*tbl) + nb_entries * sizeof (tbl->pkt[0]);
+	if ((tbl = rte_zmalloc_socket(__func__, sz, RTE_CACHE_LINE_SIZE,
+			socket_id)) == NULL) {
+		RTE_LOG(ERR, USER1,
+			"%s: allocation of %zu bytes at socket %d failed do\n",
+			__func__, sz, socket_id);
+		return NULL;
+	}
+
+	RTE_LOG(INFO, USER1, "%s: allocated of %zu bytes at socket %d\n",
+		__func__, sz, socket_id);
+
+	tbl->max_cycles = max_cycles;
+	tbl->max_entries = max_entries;
+	tbl->nb_entries = (uint32_t)nb_entries;
+	tbl->nb_buckets = bucket_num;
+	tbl->bucket_entries = bucket_entries;
+	tbl->entry_mask = (tbl->nb_entries - 1) & ~(tbl->bucket_entries  - 1);
+
+	TAILQ_INIT(&(tbl->lru));
+	return tbl;
+}
+
+/* dump frag table statistics to file */
+void
+rte_ip_frag_table_statistics_dump(FILE *f, const struct rte_ip_frag_tbl *tbl)
+{
+	uint64_t fail_total, fail_nospace;
+
+	fail_total = tbl->stat.fail_total;
+	fail_nospace = tbl->stat.fail_nospace;
+
+	fprintf(f, "max entries:\t%u;\n"
+		"entries in use:\t%u;\n"
+		"finds/inserts:\t%" PRIu64 ";\n"
+		"entries added:\t%" PRIu64 ";\n"
+		"entries deleted by timeout:\t%" PRIu64 ";\n"
+		"entries reused by timeout:\t%" PRIu64 ";\n"
+		"total add failures:\t%" PRIu64 ";\n"
+		"add no-space failures:\t%" PRIu64 ";\n"
+		"add hash-collisions failures:\t%" PRIu64 ";\n",
+		tbl->max_entries,
+		tbl->use_entries,
+		tbl->stat.find_num,
+		tbl->stat.add_num,
+		tbl->stat.del_num,
+		tbl->stat.reuse_num,
+		fail_total,
+		fail_nospace,
+		fail_total - fail_nospace);
+}
diff --git a/lib/librte_ip_frag/rte_ipfrag_version.map b/lib/librte_ip_frag/rte_ipfrag_version.map
new file mode 100644
index 00000000..354fa082
--- /dev/null
+++ b/lib/librte_ip_frag/rte_ipfrag_version.map
@@ -0,0 +1,13 @@
+DPDK_2.0 {
+	global:
+
+	rte_ip_frag_free_death_row;
+	rte_ip_frag_table_create;
+	rte_ip_frag_table_statistics_dump;
+	rte_ipv4_frag_reassemble_packet;
+	rte_ipv4_fragment_packet;
+	rte_ipv6_frag_reassemble_packet;
+	rte_ipv6_fragment_packet;
+
+	local: *;
+};
diff --git a/lib/librte_ip_frag/rte_ipv4_fragmentation.c b/lib/librte_ip_frag/rte_ipv4_fragmentation.c
new file mode 100644
index 00000000..a4ed9238
--- /dev/null
+++ b/lib/librte_ip_frag/rte_ipv4_fragmentation.c
@@ -0,0 +1,209 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stddef.h>
+#include <errno.h>
+
+#include <rte_memcpy.h>
+#include <rte_mempool.h>
+#include <rte_debug.h>
+
+#include "ip_frag_common.h"
+
+/* Fragment Offset */
+#define	IPV4_HDR_DF_SHIFT			14
+#define	IPV4_HDR_MF_SHIFT			13
+#define	IPV4_HDR_FO_SHIFT			3
+
+#define	IPV4_HDR_DF_MASK			(1 << IPV4_HDR_DF_SHIFT)
+#define	IPV4_HDR_MF_MASK			(1 << IPV4_HDR_MF_SHIFT)
+
+#define	IPV4_HDR_FO_MASK			((1 << IPV4_HDR_FO_SHIFT) - 1)
+
+static inline void __fill_ipv4hdr_frag(struct ipv4_hdr *dst,
+		const struct ipv4_hdr *src, uint16_t len, uint16_t fofs,
+		uint16_t dofs, uint32_t mf)
+{
+	rte_memcpy(dst, src, sizeof(*dst));
+	fofs = (uint16_t)(fofs + (dofs >> IPV4_HDR_FO_SHIFT));
+	fofs = (uint16_t)(fofs | mf << IPV4_HDR_MF_SHIFT);
+	dst->fragment_offset = rte_cpu_to_be_16(fofs);
+	dst->total_length = rte_cpu_to_be_16(len);
+	dst->hdr_checksum = 0;
+}
+
+static inline void __free_fragments(struct rte_mbuf *mb[], uint32_t num)
+{
+	uint32_t i;
+	for (i = 0; i != num; i++)
+		rte_pktmbuf_free(mb[i]);
+}
+
+/**
+ * IPv4 fragmentation.
+ *
+ * This function implements the fragmentation of IPv4 packets.
+ *
+ * @param pkt_in
+ *   The input packet.
+ * @param pkts_out
+ *   Array storing the output fragments.
+ * @param mtu_size
+ *   Size in bytes of the Maximum Transfer Unit (MTU) for the outgoing IPv4
+ *   datagrams. This value includes the size of the IPv4 header.
+ * @param pool_direct
+ *   MBUF pool used for allocating direct buffers for the output fragments.
+ * @param pool_indirect
+ *   MBUF pool used for allocating indirect buffers for the output fragments.
+ * @return
+ *   Upon successful completion - number of output fragments placed
+ *   in the pkts_out array.
+ *   Otherwise - (-1) * <errno>.
+ */
+int32_t
+rte_ipv4_fragment_packet(struct rte_mbuf *pkt_in,
+	struct rte_mbuf **pkts_out,
+	uint16_t nb_pkts_out,
+	uint16_t mtu_size,
+	struct rte_mempool *pool_direct,
+	struct rte_mempool *pool_indirect)
+{
+	struct rte_mbuf *in_seg = NULL;
+	struct ipv4_hdr *in_hdr;
+	uint32_t out_pkt_pos, in_seg_data_pos;
+	uint32_t more_in_segs;
+	uint16_t fragment_offset, flag_offset, frag_size;
+
+	frag_size = (uint16_t)(mtu_size - sizeof(struct ipv4_hdr));
+
+	/* Fragment size should be a multiply of 8. */
+	IP_FRAG_ASSERT((frag_size & IPV4_HDR_FO_MASK) == 0);
+
+	in_hdr = rte_pktmbuf_mtod(pkt_in, struct ipv4_hdr *);
+	flag_offset = rte_cpu_to_be_16(in_hdr->fragment_offset);
+
+	/* If Don't Fragment flag is set */
+	if (unlikely ((flag_offset & IPV4_HDR_DF_MASK) != 0))
+		return -ENOTSUP;
+
+	/* Check that pkts_out is big enough to hold all fragments */
+	if (unlikely(frag_size * nb_pkts_out <
+	    (uint16_t)(pkt_in->pkt_len - sizeof (struct ipv4_hdr))))
+		return -EINVAL;
+
+	in_seg = pkt_in;
+	in_seg_data_pos = sizeof(struct ipv4_hdr);
+	out_pkt_pos = 0;
+	fragment_offset = 0;
+
+	more_in_segs = 1;
+	while (likely(more_in_segs)) {
+		struct rte_mbuf *out_pkt = NULL, *out_seg_prev = NULL;
+		uint32_t more_out_segs;
+		struct ipv4_hdr *out_hdr;
+
+		/* Allocate direct buffer */
+		out_pkt = rte_pktmbuf_alloc(pool_direct);
+		if (unlikely(out_pkt == NULL)) {
+			__free_fragments(pkts_out, out_pkt_pos);
+			return -ENOMEM;
+		}
+
+		/* Reserve space for the IP header that will be built later */
+		out_pkt->data_len = sizeof(struct ipv4_hdr);
+		out_pkt->pkt_len = sizeof(struct ipv4_hdr);
+
+		out_seg_prev = out_pkt;
+		more_out_segs = 1;
+		while (likely(more_out_segs && more_in_segs)) {
+			struct rte_mbuf *out_seg = NULL;
+			uint32_t len;
+
+			/* Allocate indirect buffer */
+			out_seg = rte_pktmbuf_alloc(pool_indirect);
+			if (unlikely(out_seg == NULL)) {
+				rte_pktmbuf_free(out_pkt);
+				__free_fragments(pkts_out, out_pkt_pos);
+				return -ENOMEM;
+			}
+			out_seg_prev->next = out_seg;
+			out_seg_prev = out_seg;
+
+			/* Prepare indirect buffer */
+			rte_pktmbuf_attach(out_seg, in_seg);
+			len = mtu_size - out_pkt->pkt_len;
+			if (len > (in_seg->data_len - in_seg_data_pos)) {
+				len = in_seg->data_len - in_seg_data_pos;
+			}
+			out_seg->data_off = in_seg->data_off + in_seg_data_pos;
+			out_seg->data_len = (uint16_t)len;
+			out_pkt->pkt_len = (uint16_t)(len +
+			    out_pkt->pkt_len);
+			out_pkt->nb_segs += 1;
+			in_seg_data_pos += len;
+
+			/* Current output packet (i.e. fragment) done ? */
+			if (unlikely(out_pkt->pkt_len >= mtu_size))
+				more_out_segs = 0;
+
+			/* Current input segment done ? */
+			if (unlikely(in_seg_data_pos == in_seg->data_len)) {
+				in_seg = in_seg->next;
+				in_seg_data_pos = 0;
+
+				if (unlikely(in_seg == NULL))
+					more_in_segs = 0;
+			}
+		}
+
+		/* Build the IP header */
+
+		out_hdr = rte_pktmbuf_mtod(out_pkt, struct ipv4_hdr *);
+
+		__fill_ipv4hdr_frag(out_hdr, in_hdr,
+		    (uint16_t)out_pkt->pkt_len,
+		    flag_offset, fragment_offset, more_in_segs);
+
+		fragment_offset = (uint16_t)(fragment_offset +
+		    out_pkt->pkt_len - sizeof(struct ipv4_hdr));
+
+		out_pkt->ol_flags |= PKT_TX_IP_CKSUM;
+		out_pkt->l3_len = sizeof(struct ipv4_hdr);
+
+		/* Write the fragment to the output list */
+		pkts_out[out_pkt_pos] = out_pkt;
+		out_pkt_pos ++;
+	}
+
+	return out_pkt_pos;
+}
diff --git a/lib/librte_ip_frag/rte_ipv4_reassembly.c b/lib/librte_ip_frag/rte_ipv4_reassembly.c
new file mode 100644
index 00000000..26d07f9a
--- /dev/null
+++ b/lib/librte_ip_frag/rte_ipv4_reassembly.c
@@ -0,0 +1,184 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stddef.h>
+
+#include <rte_debug.h>
+
+#include "ip_frag_common.h"
+
+/*
+ * Reassemble fragments into one packet.
+ */
+struct rte_mbuf *
+ipv4_frag_reassemble(const struct ip_frag_pkt *fp)
+{
+	struct ipv4_hdr *ip_hdr;
+	struct rte_mbuf *m, *prev;
+	uint32_t i, n, ofs, first_len;
+
+	first_len = fp->frags[IP_FIRST_FRAG_IDX].len;
+	n = fp->last_idx - 1;
+
+	/*start from the last fragment. */
+	m = fp->frags[IP_LAST_FRAG_IDX].mb;
+	ofs = fp->frags[IP_LAST_FRAG_IDX].ofs;
+
+	while (ofs != first_len) {
+
+		prev = m;
+
+		for (i = n; i != IP_FIRST_FRAG_IDX && ofs != first_len; i--) {
+
+			/* previous fragment found. */
+			if(fp->frags[i].ofs + fp->frags[i].len == ofs) {
+
+				/* adjust start of the last fragment data. */
+				rte_pktmbuf_adj(m, (uint16_t)(m->l2_len + m->l3_len));
+				rte_pktmbuf_chain(fp->frags[i].mb, m);
+
+				/* update our last fragment and offset. */
+				m = fp->frags[i].mb;
+				ofs = fp->frags[i].ofs;
+			}
+		}
+
+		/* error - hole in the packet. */
+		if (m == prev) {
+			return NULL;
+		}
+	}
+
+	/* chain with the first fragment. */
+	rte_pktmbuf_adj(m, (uint16_t)(m->l2_len + m->l3_len));
+	rte_pktmbuf_chain(fp->frags[IP_FIRST_FRAG_IDX].mb, m);
+	m = fp->frags[IP_FIRST_FRAG_IDX].mb;
+
+	/* update mbuf fields for reassembled packet. */
+	m->ol_flags |= PKT_TX_IP_CKSUM;
+
+	/* update ipv4 header for the reassmebled packet */
+	ip_hdr = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *, m->l2_len);
+
+	ip_hdr->total_length = rte_cpu_to_be_16((uint16_t)(fp->total_size +
+		m->l3_len));
+	ip_hdr->fragment_offset = (uint16_t)(ip_hdr->fragment_offset &
+		rte_cpu_to_be_16(IPV4_HDR_DF_FLAG));
+	ip_hdr->hdr_checksum = 0;
+
+	return m;
+}
+
+/*
+ * Process new mbuf with fragment of IPV4 packet.
+ * Incoming mbuf should have it's l2_len/l3_len fields setuped correclty.
+ * @param tbl
+ *   Table where to lookup/add the fragmented packet.
+ * @param mb
+ *   Incoming mbuf with IPV4 fragment.
+ * @param tms
+ *   Fragment arrival timestamp.
+ * @param ip_hdr
+ *   Pointer to the IPV4 header inside the fragment.
+ * @return
+ *   Pointer to mbuf for reassebled packet, or NULL if:
+ *   - an error occured.
+ *   - not all fragments of the packet are collected yet.
+ */
+struct rte_mbuf *
+rte_ipv4_frag_reassemble_packet(struct rte_ip_frag_tbl *tbl,
+		struct rte_ip_frag_death_row *dr, struct rte_mbuf *mb, uint64_t tms,
+		struct ipv4_hdr *ip_hdr)
+{
+	struct ip_frag_pkt *fp;
+	struct ip_frag_key key;
+	const unaligned_uint64_t *psd;
+	uint16_t ip_len;
+	uint16_t flag_offset, ip_ofs, ip_flag;
+
+	flag_offset = rte_be_to_cpu_16(ip_hdr->fragment_offset);
+	ip_ofs = (uint16_t)(flag_offset & IPV4_HDR_OFFSET_MASK);
+	ip_flag = (uint16_t)(flag_offset & IPV4_HDR_MF_FLAG);
+
+	psd = (unaligned_uint64_t *)&ip_hdr->src_addr;
+	/* use first 8 bytes only */
+	key.src_dst[0] = psd[0];
+	key.id = ip_hdr->packet_id;
+	key.key_len = IPV4_KEYLEN;
+
+	ip_ofs *= IPV4_HDR_OFFSET_UNITS;
+	ip_len = (uint16_t)(rte_be_to_cpu_16(ip_hdr->total_length) -
+		mb->l3_len);
+
+	IP_FRAG_LOG(DEBUG, "%s:%d:\n"
+		"mbuf: %p, tms: %" PRIu64
+		", key: <%" PRIx64 ", %#x>, ofs: %u, len: %u, flags: %#x\n"
+		"tbl: %p, max_cycles: %" PRIu64 ", entry_mask: %#x, "
+		"max_entries: %u, use_entries: %u\n\n",
+		__func__, __LINE__,
+		mb, tms, key.src_dst[0], key.id, ip_ofs, ip_len, ip_flag,
+		tbl, tbl->max_cycles, tbl->entry_mask, tbl->max_entries,
+		tbl->use_entries);
+
+	/* try to find/add entry into the fragment's table. */
+	if ((fp = ip_frag_find(tbl, dr, &key, tms)) == NULL) {
+		IP_FRAG_MBUF2DR(dr, mb);
+		return NULL;
+	}
+
+	IP_FRAG_LOG(DEBUG, "%s:%d:\n"
+		"tbl: %p, max_entries: %u, use_entries: %u\n"
+		"ipv4_frag_pkt: %p, key: <%" PRIx64 ", %#x>, start: %" PRIu64
+		", total_size: %u, frag_size: %u, last_idx: %u\n\n",
+		__func__, __LINE__,
+		tbl, tbl->max_entries, tbl->use_entries,
+		fp, fp->key.src_dst[0], fp->key.id, fp->start,
+		fp->total_size, fp->frag_size, fp->last_idx);
+
+
+	/* process the fragmented packet. */
+	mb = ip_frag_process(fp, dr, mb, ip_ofs, ip_len, ip_flag);
+	ip_frag_inuse(tbl, fp);
+
+	IP_FRAG_LOG(DEBUG, "%s:%d:\n"
+		"mbuf: %p\n"
+		"tbl: %p, max_entries: %u, use_entries: %u\n"
+		"ipv4_frag_pkt: %p, key: <%" PRIx64 ", %#x>, start: %" PRIu64
+		", total_size: %u, frag_size: %u, last_idx: %u\n\n",
+		__func__, __LINE__, mb,
+		tbl, tbl->max_entries, tbl->use_entries,
+		fp, fp->key.src_dst[0], fp->key.id, fp->start,
+		fp->total_size, fp->frag_size, fp->last_idx);
+
+	return mb;
+}
diff --git a/lib/librte_ip_frag/rte_ipv6_fragmentation.c b/lib/librte_ip_frag/rte_ipv6_fragmentation.c
new file mode 100644
index 00000000..1e30004f
--- /dev/null
+++ b/lib/librte_ip_frag/rte_ipv6_fragmentation.c
@@ -0,0 +1,207 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stddef.h>
+#include <errno.h>
+
+#include <rte_memcpy.h>
+
+#include "ip_frag_common.h"
+
+/**
+ * @file
+ * RTE IPv6 Fragmentation
+ *
+ * Implementation of IPv6 fragmentation.
+ *
+ */
+
+static inline void
+__fill_ipv6hdr_frag(struct ipv6_hdr *dst,
+		const struct ipv6_hdr *src, uint16_t len, uint16_t fofs,
+		uint32_t mf)
+{
+	struct ipv6_extension_fragment *fh;
+
+	rte_memcpy(dst, src, sizeof(*dst));
+	dst->payload_len = rte_cpu_to_be_16(len);
+	dst->proto = IPPROTO_FRAGMENT;
+
+	fh = (struct ipv6_extension_fragment *) ++dst;
+	fh->next_header = src->proto;
+	fh->reserved = 0;
+	fh->frag_data = rte_cpu_to_be_16(RTE_IPV6_SET_FRAG_DATA(fofs, mf));
+	fh->id = 0;
+}
+
+static inline void
+__free_fragments(struct rte_mbuf *mb[], uint32_t num)
+{
+	uint32_t i;
+	for (i = 0; i < num; i++)
+		rte_pktmbuf_free(mb[i]);
+}
+
+/**
+ * IPv6 fragmentation.
+ *
+ * This function implements the fragmentation of IPv6 packets.
+ *
+ * @param pkt_in
+ *   The input packet.
+ * @param pkts_out
+ *   Array storing the output fragments.
+ * @param mtu_size
+ *   Size in bytes of the Maximum Transfer Unit (MTU) for the outgoing IPv6
+ *   datagrams. This value includes the size of the IPv6 header.
+ * @param pool_direct
+ *   MBUF pool used for allocating direct buffers for the output fragments.
+ * @param pool_indirect
+ *   MBUF pool used for allocating indirect buffers for the output fragments.
+ * @return
+ *   Upon successful completion - number of output fragments placed
+ *   in the pkts_out array.
+ *   Otherwise - (-1) * <errno>.
+ */
+int32_t
+rte_ipv6_fragment_packet(struct rte_mbuf *pkt_in,
+	struct rte_mbuf **pkts_out,
+	uint16_t nb_pkts_out,
+	uint16_t mtu_size,
+	struct rte_mempool *pool_direct,
+	struct rte_mempool *pool_indirect)
+{
+	struct rte_mbuf *in_seg = NULL;
+	struct ipv6_hdr *in_hdr;
+	uint32_t out_pkt_pos, in_seg_data_pos;
+	uint32_t more_in_segs;
+	uint16_t fragment_offset, frag_size;
+
+	frag_size = (uint16_t)(mtu_size - sizeof(struct ipv6_hdr));
+
+	/* Fragment size should be a multiple of 8. */
+	IP_FRAG_ASSERT((frag_size & ~RTE_IPV6_EHDR_FO_MASK) == 0);
+
+	/* Check that pkts_out is big enough to hold all fragments */
+	if (unlikely (frag_size * nb_pkts_out <
+	    (uint16_t)(pkt_in->pkt_len - sizeof (struct ipv6_hdr))))
+		return -EINVAL;
+
+	in_hdr = rte_pktmbuf_mtod(pkt_in, struct ipv6_hdr *);
+
+	in_seg = pkt_in;
+	in_seg_data_pos = sizeof(struct ipv6_hdr);
+	out_pkt_pos = 0;
+	fragment_offset = 0;
+
+	more_in_segs = 1;
+	while (likely(more_in_segs)) {
+		struct rte_mbuf *out_pkt = NULL, *out_seg_prev = NULL;
+		uint32_t more_out_segs;
+		struct ipv6_hdr *out_hdr;
+
+		/* Allocate direct buffer */
+		out_pkt = rte_pktmbuf_alloc(pool_direct);
+		if (unlikely(out_pkt == NULL)) {
+			__free_fragments(pkts_out, out_pkt_pos);
+			return -ENOMEM;
+		}
+
+		/* Reserve space for the IP header that will be built later */
+		out_pkt->data_len = sizeof(struct ipv6_hdr) + sizeof(struct ipv6_extension_fragment);
+		out_pkt->pkt_len  = sizeof(struct ipv6_hdr) + sizeof(struct ipv6_extension_fragment);
+
+		out_seg_prev = out_pkt;
+		more_out_segs = 1;
+		while (likely(more_out_segs && more_in_segs)) {
+			struct rte_mbuf *out_seg = NULL;
+			uint32_t len;
+
+			/* Allocate indirect buffer */
+			out_seg = rte_pktmbuf_alloc(pool_indirect);
+			if (unlikely(out_seg == NULL)) {
+				rte_pktmbuf_free(out_pkt);
+				__free_fragments(pkts_out, out_pkt_pos);
+				return -ENOMEM;
+			}
+			out_seg_prev->next = out_seg;
+			out_seg_prev = out_seg;
+
+			/* Prepare indirect buffer */
+			rte_pktmbuf_attach(out_seg, in_seg);
+			len = mtu_size - out_pkt->pkt_len;
+			if (len > (in_seg->data_len - in_seg_data_pos)) {
+				len = in_seg->data_len - in_seg_data_pos;
+			}
+			out_seg->data_off = in_seg->data_off + in_seg_data_pos;
+			out_seg->data_len = (uint16_t)len;
+			out_pkt->pkt_len = (uint16_t)(len +
+			    out_pkt->pkt_len);
+			out_pkt->nb_segs += 1;
+			in_seg_data_pos += len;
+
+			/* Current output packet (i.e. fragment) done ? */
+			if (unlikely(out_pkt->pkt_len >= mtu_size)) {
+				more_out_segs = 0;
+			}
+
+			/* Current input segment done ? */
+			if (unlikely(in_seg_data_pos == in_seg->data_len)) {
+				in_seg = in_seg->next;
+				in_seg_data_pos = 0;
+
+				if (unlikely(in_seg == NULL)) {
+					more_in_segs = 0;
+				}
+			}
+		}
+
+		/* Build the IP header */
+
+		out_hdr = rte_pktmbuf_mtod(out_pkt, struct ipv6_hdr *);
+
+		__fill_ipv6hdr_frag(out_hdr, in_hdr,
+		    (uint16_t) out_pkt->pkt_len - sizeof(struct ipv6_hdr),
+		    fragment_offset, more_in_segs);
+
+		fragment_offset = (uint16_t)(fragment_offset +
+		    out_pkt->pkt_len - sizeof(struct ipv6_hdr)
+			- sizeof(struct ipv6_extension_fragment));
+
+		/* Write the fragment to the output list */
+		pkts_out[out_pkt_pos] = out_pkt;
+		out_pkt_pos ++;
+	}
+
+	return out_pkt_pos;
+}
diff --git a/lib/librte_ip_frag/rte_ipv6_reassembly.c b/lib/librte_ip_frag/rte_ipv6_reassembly.c
new file mode 100644
index 00000000..d29cb1da
--- /dev/null
+++ b/lib/librte_ip_frag/rte_ipv6_reassembly.c
@@ -0,0 +1,225 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stddef.h>
+
+#include <rte_memcpy.h>
+
+#include "ip_frag_common.h"
+
+/**
+ * @file
+ * IPv6 reassemble
+ *
+ * Implementation of IPv6 reassembly.
+ *
+ */
+
+static inline void
+ip_frag_memmove(char *dst, char *src, int len)
+{
+	int i;
+
+	/* go backwards to make sure we don't overwrite anything important */
+	for (i = len - 1; i >= 0; i--)
+		dst[i] = src[i];
+}
+
+/*
+ * Reassemble fragments into one packet.
+ */
+struct rte_mbuf *
+ipv6_frag_reassemble(const struct ip_frag_pkt *fp)
+{
+	struct ipv6_hdr *ip_hdr;
+	struct ipv6_extension_fragment *frag_hdr;
+	struct rte_mbuf *m, *prev;
+	uint32_t i, n, ofs, first_len;
+	uint32_t last_len, move_len, payload_len;
+
+	first_len = fp->frags[IP_FIRST_FRAG_IDX].len;
+	n = fp->last_idx - 1;
+
+	/*start from the last fragment. */
+	m = fp->frags[IP_LAST_FRAG_IDX].mb;
+	ofs = fp->frags[IP_LAST_FRAG_IDX].ofs;
+	last_len = fp->frags[IP_LAST_FRAG_IDX].len;
+
+	payload_len = ofs + last_len;
+
+	while (ofs != first_len) {
+
+		prev = m;
+
+		for (i = n; i != IP_FIRST_FRAG_IDX && ofs != first_len; i--) {
+
+			/* previous fragment found. */
+			if (fp->frags[i].ofs + fp->frags[i].len == ofs) {
+
+				/* adjust start of the last fragment data. */
+				rte_pktmbuf_adj(m, (uint16_t)(m->l2_len + m->l3_len));
+				rte_pktmbuf_chain(fp->frags[i].mb, m);
+
+				/* update our last fragment and offset. */
+				m = fp->frags[i].mb;
+				ofs = fp->frags[i].ofs;
+			}
+		}
+
+		/* error - hole in the packet. */
+		if (m == prev) {
+			return NULL;
+		}
+	}
+
+	/* chain with the first fragment. */
+	rte_pktmbuf_adj(m, (uint16_t)(m->l2_len + m->l3_len));
+	rte_pktmbuf_chain(fp->frags[IP_FIRST_FRAG_IDX].mb, m);
+	m = fp->frags[IP_FIRST_FRAG_IDX].mb;
+
+	/* update mbuf fields for reassembled packet. */
+	m->ol_flags |= PKT_TX_IP_CKSUM;
+
+	/* update ipv6 header for the reassembled datagram */
+	ip_hdr = rte_pktmbuf_mtod_offset(m, struct ipv6_hdr *, m->l2_len);
+
+	ip_hdr->payload_len = rte_cpu_to_be_16(payload_len);
+
+	/*
+	 * remove fragmentation header. note that per RFC2460, we need to update
+	 * the last non-fragmentable header with the "next header" field to contain
+	 * type of the first fragmentable header, but we currently don't support
+	 * other headers, so we assume there are no other headers and thus update
+	 * the main IPv6 header instead.
+	 */
+	move_len = m->l2_len + m->l3_len - sizeof(*frag_hdr);
+	frag_hdr = (struct ipv6_extension_fragment *) (ip_hdr + 1);
+	ip_hdr->proto = frag_hdr->next_header;
+
+	ip_frag_memmove(rte_pktmbuf_mtod_offset(m, char *, sizeof(*frag_hdr)),
+			rte_pktmbuf_mtod(m, char*), move_len);
+
+	rte_pktmbuf_adj(m, sizeof(*frag_hdr));
+
+	return m;
+}
+
+/*
+ * Process new mbuf with fragment of IPV6 datagram.
+ * Incoming mbuf should have its l2_len/l3_len fields setup correctly.
+ * @param tbl
+ *   Table where to lookup/add the fragmented packet.
+ * @param mb
+ *   Incoming mbuf with IPV6 fragment.
+ * @param tms
+ *   Fragment arrival timestamp.
+ * @param ip_hdr
+ *   Pointer to the IPV6 header.
+ * @param frag_hdr
+ *   Pointer to the IPV6 fragment extension header.
+ * @return
+ *   Pointer to mbuf for reassembled packet, or NULL if:
+ *   - an error occured.
+ *   - not all fragments of the packet are collected yet.
+ */
+#define MORE_FRAGS(x) (((x) & 0x100) >> 8)
+#define FRAG_OFFSET(x) (rte_cpu_to_be_16(x) >> 3)
+struct rte_mbuf *
+rte_ipv6_frag_reassemble_packet(struct rte_ip_frag_tbl *tbl,
+		struct rte_ip_frag_death_row *dr, struct rte_mbuf *mb, uint64_t tms,
+		struct ipv6_hdr *ip_hdr, struct ipv6_extension_fragment *frag_hdr)
+{
+	struct ip_frag_pkt *fp;
+	struct ip_frag_key key;
+	uint16_t ip_len, ip_ofs;
+
+	rte_memcpy(&key.src_dst[0], ip_hdr->src_addr, 16);
+	rte_memcpy(&key.src_dst[2], ip_hdr->dst_addr, 16);
+
+	key.id = frag_hdr->id;
+	key.key_len = IPV6_KEYLEN;
+
+	ip_ofs = FRAG_OFFSET(frag_hdr->frag_data) * 8;
+
+	/*
+	 * as per RFC2460, payload length contains all extension headers as well.
+	 * since we don't support anything but frag headers, this is what we remove
+	 * from the payload len.
+	 */
+	ip_len = rte_be_to_cpu_16(ip_hdr->payload_len) - sizeof(*frag_hdr);
+
+	IP_FRAG_LOG(DEBUG, "%s:%d:\n"
+		"mbuf: %p, tms: %" PRIu64
+		", key: <" IPv6_KEY_BYTES_FMT ", %#x>, ofs: %u, len: %u, flags: %#x\n"
+		"tbl: %p, max_cycles: %" PRIu64 ", entry_mask: %#x, "
+		"max_entries: %u, use_entries: %u\n\n",
+		__func__, __LINE__,
+		mb, tms, IPv6_KEY_BYTES(key.src_dst), key.id, ip_ofs, ip_len,
+		RTE_IPV6_GET_MF(frag_hdr->frag_data),
+		tbl, tbl->max_cycles, tbl->entry_mask, tbl->max_entries,
+		tbl->use_entries);
+
+	/* try to find/add entry into the fragment's table. */
+	fp = ip_frag_find(tbl, dr, &key, tms);
+	if (fp == NULL) {
+		IP_FRAG_MBUF2DR(dr, mb);
+		return NULL;
+	}
+
+	IP_FRAG_LOG(DEBUG, "%s:%d:\n"
+		"tbl: %p, max_entries: %u, use_entries: %u\n"
+		"ipv6_frag_pkt: %p, key: <" IPv6_KEY_BYTES_FMT ", %#x>, start: %" PRIu64
+		", total_size: %u, frag_size: %u, last_idx: %u\n\n",
+		__func__, __LINE__,
+		tbl, tbl->max_entries, tbl->use_entries,
+		fp, IPv6_KEY_BYTES(fp->key.src_dst), fp->key.id, fp->start,
+		fp->total_size, fp->frag_size, fp->last_idx);
+
+
+	/* process the fragmented packet. */
+	mb = ip_frag_process(fp, dr, mb, ip_ofs, ip_len,
+			MORE_FRAGS(frag_hdr->frag_data));
+	ip_frag_inuse(tbl, fp);
+
+	IP_FRAG_LOG(DEBUG, "%s:%d:\n"
+		"mbuf: %p\n"
+		"tbl: %p, max_entries: %u, use_entries: %u\n"
+		"ipv6_frag_pkt: %p, key: <" IPv6_KEY_BYTES_FMT ", %#x>, start: %" PRIu64
+		", total_size: %u, frag_size: %u, last_idx: %u\n\n",
+		__func__, __LINE__, mb,
+		tbl, tbl->max_entries, tbl->use_entries,
+		fp, IPv6_KEY_BYTES(fp->key.src_dst), fp->key.id, fp->start,
+		fp->total_size, fp->frag_size, fp->last_idx);
+
+	return mb;
+}
diff --git a/lib/librte_ivshmem/Makefile b/lib/librte_ivshmem/Makefile
new file mode 100644
index 00000000..16defdba
--- /dev/null
+++ b/lib/librte_ivshmem/Makefile
@@ -0,0 +1,52 @@
+#   BSD LICENSE
+#
+#   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+#   All rights reserved.
+#
+#   Redistribution and use in source and binary forms, with or without
+#   modification, are permitted provided that the following conditions
+#   are met:
+#
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in
+#       the documentation and/or other materials provided with the
+#       distribution.
+#     * Neither the name of Intel Corporation nor the names of its
+#       contributors may be used to endorse or promote products derived
+#       from this software without specific prior written permission.
+#
+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# library name
+LIB = librte_ivshmem.a
+
+CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3
+
+EXPORT_MAP := rte_ivshmem_version.map
+
+LIBABIVER := 1
+
+# all source are stored in SRCS-y
+SRCS-$(CONFIG_RTE_LIBRTE_IVSHMEM) := rte_ivshmem.c
+
+# install includes
+SYMLINK-$(CONFIG_RTE_LIBRTE_IVSHMEM)-include := rte_ivshmem.h
+
+# this lib needs eal
+DEPDIRS-$(CONFIG_RTE_LIBRTE_IVSHMEM) += lib/librte_mempool
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_ivshmem/rte_ivshmem.c b/lib/librte_ivshmem/rte_ivshmem.c
new file mode 100644
index 00000000..c8b332ce
--- /dev/null
+++ b/lib/librte_ivshmem/rte_ivshmem.c
@@ -0,0 +1,905 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <fcntl.h>
+#include <limits.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <string.h>
+#include <stdio.h>
+
+#include <rte_eal_memconfig.h>
+#include <rte_memory.h>
+#include <rte_ivshmem.h>
+#include <rte_string_fns.h>
+#include <rte_common.h>
+#include <rte_log.h>
+#include <rte_debug.h>
+#include <rte_spinlock.h>
+#include <rte_common.h>
+#include <rte_malloc.h>
+
+#include "rte_ivshmem.h"
+
+#define IVSHMEM_CONFIG_FILE_FMT "/var/run/.dpdk_ivshmem_metadata_%s"
+#define IVSHMEM_QEMU_CMD_LINE_HEADER_FMT "-device ivshmem,size=%" PRIu64 "M,shm=fd%s"
+#define IVSHMEM_QEMU_CMD_FD_FMT ":%s:0x%" PRIx64 ":0x%" PRIx64
+#define IVSHMEM_QEMU_CMDLINE_BUFSIZE 1024
+#define IVSHMEM_MAX_PAGES (1 << 12)
+#define adjacent(x,y) (((x).phys_addr+(x).len)==(y).phys_addr)
+#define METADATA_SIZE_ALIGNED \
+	(RTE_ALIGN_CEIL(sizeof(struct rte_ivshmem_metadata),pagesz))
+
+#define GET_PAGEMAP_ADDR(in,addr,dlm,err)    \
+{                                      \
+	char *end;                         \
+	errno = 0;                         \
+	addr = strtoull((in), &end, 16);   \
+	if (errno != 0 || *end != (dlm)) { \
+		RTE_LOG(ERR, EAL, err);        \
+		goto error;                    \
+	}                                  \
+	(in) = end + 1;                    \
+}
+
+static int pagesz;
+
+struct memseg_cache_entry {
+	char filepath[PATH_MAX];
+	uint64_t offset;
+	uint64_t len;
+};
+
+struct ivshmem_config {
+	struct rte_ivshmem_metadata * metadata;
+	struct memseg_cache_entry memseg_cache[IVSHMEM_MAX_PAGES];
+		/**< account for multiple files per segment case */
+	struct flock lock;
+	rte_spinlock_t sl;
+};
+
+static struct ivshmem_config
+ivshmem_global_config[RTE_LIBRTE_IVSHMEM_MAX_METADATA_FILES];
+
+static rte_spinlock_t global_cfg_sl;
+
+static struct ivshmem_config *
+get_config_by_name(const char * name)
+{
+	struct rte_ivshmem_metadata * config;
+	unsigned i;
+
+	for (i = 0; i < RTE_DIM(ivshmem_global_config); i++) {
+		config = ivshmem_global_config[i].metadata;
+		if (config == NULL)
+			return NULL;
+		if (strncmp(name, config->name, IVSHMEM_NAME_LEN) == 0)
+			return &ivshmem_global_config[i];
+	}
+
+	return NULL;
+}
+
+static int
+overlap(const struct rte_memzone * s1, const struct rte_memzone * s2)
+{
+	uint64_t start1, end1, start2, end2;
+
+	start1 = s1->addr_64;
+	end1 = s1->addr_64 + s1->len;
+	start2 = s2->addr_64;
+	end2 = s2->addr_64 + s2->len;
+
+	if (start1 >= start2 && start1 < end2)
+		return 1;
+	if (start2 >= start1 && start2 < end1)
+		return 1;
+
+	return 0;
+}
+
+static struct rte_memzone *
+get_memzone_by_addr(const void * addr)
+{
+	struct rte_memzone * tmp, * mz;
+	struct rte_mem_config * mcfg;
+	int i;
+
+	mcfg = rte_eal_get_configuration()->mem_config;
+	mz = NULL;
+
+	/* find memzone for the ring */
+	for (i = 0; i < RTE_MAX_MEMZONE; i++) {
+		tmp = &mcfg->memzone[i];
+
+		if (tmp->addr_64 == (uint64_t) addr) {
+			mz = tmp;
+			break;
+		}
+	}
+
+	return mz;
+}
+
+static int
+entry_compare(const void * a, const void * b)
+{
+	const struct rte_ivshmem_metadata_entry * e1 =
+			(const struct rte_ivshmem_metadata_entry*) a;
+	const struct rte_ivshmem_metadata_entry * e2 =
+			(const struct rte_ivshmem_metadata_entry*) b;
+
+	/* move unallocated zones to the end */
+	if (e1->mz.addr == NULL && e2->mz.addr == NULL)
+		return 0;
+	if (e1->mz.addr == 0)
+		return 1;
+	if (e2->mz.addr == 0)
+		return -1;
+
+	return e1->mz.phys_addr > e2->mz.phys_addr;
+}
+
+/* fills hugepage cache entry for a given start virt_addr */
+static int
+get_hugefile_by_virt_addr(uint64_t virt_addr, struct memseg_cache_entry * e)
+{
+	uint64_t start_addr, end_addr;
+	char *start,*path_end;
+	char buf[PATH_MAX*2];
+	FILE *f;
+
+	start = NULL;
+	path_end = NULL;
+	start_addr = 0;
+
+	memset(e->filepath, 0, sizeof(e->filepath));
+
+	/* open /proc/self/maps */
+	f = fopen("/proc/self/maps", "r");
+	if (f == NULL) {
+		RTE_LOG(ERR, EAL, "cannot open /proc/self/maps!\n");
+		return -1;
+	}
+
+	/* parse maps */
+	while (fgets(buf, sizeof(buf), f) != NULL) {
+
+		/* get endptr to end of start addr */
+		start = buf;
+
+		GET_PAGEMAP_ADDR(start,start_addr,'-',
+				"Cannot find start address in maps!\n");
+
+		/* if start address is bigger than our address, skip */
+		if (start_addr > virt_addr)
+			continue;
+
+		GET_PAGEMAP_ADDR(start,end_addr,' ',
+				"Cannot find end address in maps!\n");
+
+		/* if end address is less than our address, skip */
+		if (end_addr <= virt_addr)
+			continue;
+
+		/* find where the path starts */
+		start = strstr(start, "/");
+
+		if (start == NULL)
+			continue;
+
+		/* at this point, we know that this is our map.
+		 * now let's find the file */
+		path_end = strstr(start, "\n");
+		break;
+	}
+
+	if (path_end == NULL) {
+		RTE_LOG(ERR, EAL, "Hugefile path not found!\n");
+		goto error;
+	}
+
+	/* calculate offset and copy the file path */
+	snprintf(e->filepath, RTE_PTR_DIFF(path_end, start) + 1, "%s", start);
+
+	e->offset = virt_addr - start_addr;
+
+	fclose(f);
+
+	return 0;
+error:
+	fclose(f);
+	return -1;
+}
+
+/*
+ * This is a complex function. What it does is the following:
+ *  1. Goes through metadata and gets list of hugepages involved
+ *  2. Sorts the hugepages by size (1G first)
+ *  3. Goes through metadata again and writes correct offsets
+ *  4. Goes through pages and finds out their filenames, offsets etc.
+ */
+static int
+build_config(struct rte_ivshmem_metadata * metadata)
+{
+	struct rte_ivshmem_metadata_entry * e_local;
+	struct memseg_cache_entry * ms_local;
+	struct rte_memseg pages[IVSHMEM_MAX_PAGES];
+	struct rte_ivshmem_metadata_entry *entry;
+	struct memseg_cache_entry * c_entry, * prev_entry;
+	struct ivshmem_config * config;
+	unsigned i, j, mz_iter, ms_iter;
+	uint64_t biggest_len;
+	int biggest_idx;
+
+	/* return error if we try to use an unknown config file */
+	config = get_config_by_name(metadata->name);
+	if (config == NULL) {
+		RTE_LOG(ERR, EAL, "Cannot find IVSHMEM config %s!\n", metadata->name);
+		goto fail_e;
+	}
+
+	memset(pages, 0, sizeof(pages));
+
+	e_local = malloc(sizeof(config->metadata->entry));
+	if (e_local == NULL)
+		goto fail_e;
+	ms_local = malloc(sizeof(config->memseg_cache));
+	if (ms_local == NULL)
+		goto fail_ms;
+
+
+	/* make local copies before doing anything */
+	memcpy(e_local, config->metadata->entry, sizeof(config->metadata->entry));
+	memcpy(ms_local, config->memseg_cache, sizeof(config->memseg_cache));
+
+	qsort(e_local, RTE_DIM(config->metadata->entry), sizeof(struct rte_ivshmem_metadata_entry),
+			entry_compare);
+
+	/* first pass - collect all huge pages */
+	for (mz_iter = 0; mz_iter < RTE_DIM(config->metadata->entry); mz_iter++) {
+
+		entry = &e_local[mz_iter];
+
+		uint64_t start_addr = RTE_ALIGN_FLOOR(entry->mz.addr_64,
+				entry->mz.hugepage_sz);
+		uint64_t offset = entry->mz.addr_64 - start_addr;
+		uint64_t len = RTE_ALIGN_CEIL(entry->mz.len + offset,
+				entry->mz.hugepage_sz);
+
+		if (entry->mz.addr_64 == 0 || start_addr == 0 || len == 0)
+			continue;
+
+		int start_page;
+
+		/* find first unused page - mz are phys_addr sorted so we don't have to
+		 * look out for holes */
+		for (i = 0; i < RTE_DIM(pages); i++) {
+
+			/* skip if we already have this page */
+			if (pages[i].addr_64 == start_addr) {
+				start_addr += entry->mz.hugepage_sz;
+				len -= entry->mz.hugepage_sz;
+				continue;
+			}
+			/* we found a new page */
+			else if (pages[i].addr_64 == 0) {
+				start_page = i;
+				break;
+			}
+		}
+		if (i == RTE_DIM(pages)) {
+			RTE_LOG(ERR, EAL, "Cannot find unused page!\n");
+			goto fail;
+		}
+
+		/* populate however many pages the memzone has */
+		for (i = start_page; i < RTE_DIM(pages) && len != 0; i++) {
+
+			pages[i].addr_64 = start_addr;
+			pages[i].len = entry->mz.hugepage_sz;
+			start_addr += entry->mz.hugepage_sz;
+			len -= entry->mz.hugepage_sz;
+		}
+		/* if there's still length left */
+		if (len != 0) {
+			RTE_LOG(ERR, EAL, "Not enough space for pages!\n");
+			goto fail;
+		}
+	}
+
+	/* second pass - sort pages by size */
+	for (i = 0; i < RTE_DIM(pages); i++) {
+
+		if (pages[i].addr == NULL)
+			break;
+
+		biggest_len = 0;
+		biggest_idx = -1;
+
+		/*
+		 * browse all entries starting at 'i', and find the
+		 * entry with the smallest addr
+		 */
+		for (j=i; j< RTE_DIM(pages); j++) {
+			if (pages[j].addr == NULL)
+					break;
+			if (biggest_len == 0 ||
+				pages[j].len > biggest_len) {
+				biggest_len = pages[j].len;
+				biggest_idx = j;
+			}
+		}
+
+		/* should not happen */
+		if (biggest_idx == -1) {
+			RTE_LOG(ERR, EAL, "Error sorting by size!\n");
+			goto fail;
+		}
+		if (i != (unsigned) biggest_idx) {
+			struct rte_memseg tmp;
+
+			memcpy(&tmp, &pages[biggest_idx], sizeof(struct rte_memseg));
+
+			/* we don't want to break contiguousness, so instead of just
+			 * swapping segments, we move all the preceding segments to the
+			 * right and then put the old segment @ biggest_idx in place of
+			 * segment @ i */
+			for (j = biggest_idx - 1; j >= i; j--) {
+				memcpy(&pages[j+1], &pages[j], sizeof(struct rte_memseg));
+				memset(&pages[j], 0, sizeof(struct rte_memseg));
+				if (j == 0)
+					break;
+			}
+
+			/* put old biggest segment to its new place */
+			memcpy(&pages[i], &tmp, sizeof(struct rte_memseg));
+		}
+	}
+
+	/* third pass - write correct offsets */
+	for (mz_iter = 0; mz_iter < RTE_DIM(config->metadata->entry); mz_iter++) {
+
+		uint64_t offset = 0;
+
+		entry = &e_local[mz_iter];
+
+		if (entry->mz.addr_64 == 0)
+			break;
+
+		/* find page for current memzone */
+		for (i = 0; i < RTE_DIM(pages); i++) {
+			/* we found our page */
+			if (entry->mz.addr_64 >= pages[i].addr_64 &&
+					entry->mz.addr_64 < pages[i].addr_64 + pages[i].len) {
+				entry->offset = (entry->mz.addr_64 - pages[i].addr_64) +
+						offset;
+				break;
+			}
+			offset += pages[i].len;
+		}
+		if (i == RTE_DIM(pages)) {
+			RTE_LOG(ERR, EAL, "Page not found!\n");
+			goto fail;
+		}
+	}
+
+	ms_iter = 0;
+	prev_entry = NULL;
+
+	/* fourth pass - create proper memseg cache */
+	for (i = 0; i < RTE_DIM(pages) &&
+			ms_iter <= RTE_DIM(config->memseg_cache); i++) {
+		if (pages[i].addr_64 == 0)
+			break;
+
+
+		if (ms_iter == RTE_DIM(pages)) {
+			RTE_LOG(ERR, EAL, "The universe has collapsed!\n");
+			goto fail;
+		}
+
+		c_entry = &ms_local[ms_iter];
+		c_entry->len = pages[i].len;
+
+		if (get_hugefile_by_virt_addr(pages[i].addr_64, c_entry) < 0)
+			goto fail;
+
+		/* if previous entry has the same filename and is contiguous,
+		 * clear current entry and increase previous entry's length
+		 */
+		if (prev_entry != NULL &&
+				strncmp(c_entry->filepath, prev_entry->filepath,
+				sizeof(c_entry->filepath)) == 0 &&
+				prev_entry->offset + prev_entry->len == c_entry->offset) {
+			prev_entry->len += pages[i].len;
+			memset(c_entry, 0, sizeof(struct memseg_cache_entry));
+		}
+		else {
+			prev_entry = c_entry;
+			ms_iter++;
+		}
+	}
+
+	/* update current configuration with new valid data */
+	memcpy(config->metadata->entry, e_local, sizeof(config->metadata->entry));
+	memcpy(config->memseg_cache, ms_local, sizeof(config->memseg_cache));
+
+	free(ms_local);
+	free(e_local);
+
+	return 0;
+fail:
+	free(ms_local);
+fail_ms:
+	free(e_local);
+fail_e:
+	return -1;
+}
+
+static int
+add_memzone_to_metadata(const struct rte_memzone * mz,
+		struct ivshmem_config * config)
+{
+	struct rte_ivshmem_metadata_entry * entry;
+	unsigned i, idx;
+	struct rte_mem_config *mcfg;
+
+	if (mz->len == 0) {
+		RTE_LOG(ERR, EAL, "Trying to add an empty memzone\n");
+		return -1;
+	}
+
+	rte_spinlock_lock(&config->sl);
+
+	mcfg = rte_eal_get_configuration()->mem_config;
+
+	/* it prevents the memzone being freed while we add it to the metadata */
+	rte_rwlock_write_lock(&mcfg->mlock);
+
+	/* find free slot in this config */
+	for (i = 0; i < RTE_DIM(config->metadata->entry); i++) {
+		entry = &config->metadata->entry[i];
+
+		if (&entry->mz.addr_64 != 0 && overlap(mz, &entry->mz)) {
+			RTE_LOG(ERR, EAL, "Overlapping memzones!\n");
+			goto fail;
+		}
+
+		/* if addr is zero, the memzone is probably free */
+		if (entry->mz.addr_64 == 0) {
+			RTE_LOG(DEBUG, EAL, "Adding memzone '%s' at %p to metadata %s\n",
+					mz->name, mz->addr, config->metadata->name);
+			memcpy(&entry->mz, mz, sizeof(struct rte_memzone));
+
+			/* run config file parser */
+			if (build_config(config->metadata) < 0)
+				goto fail;
+
+			break;
+		}
+	}
+
+	/* if we reached the maximum, that means we have no place in config */
+	if (i == RTE_DIM(config->metadata->entry)) {
+		RTE_LOG(ERR, EAL, "No space left in IVSHMEM metadata %s!\n",
+				config->metadata->name);
+		goto fail;
+	}
+
+	idx = ((uintptr_t)mz - (uintptr_t)mcfg->memzone);
+	idx = idx / sizeof(struct rte_memzone);
+
+	/* mark the memzone not freeable */
+	mcfg->memzone[idx].ioremap_addr = mz->phys_addr;
+
+	rte_rwlock_write_unlock(&mcfg->mlock);
+	rte_spinlock_unlock(&config->sl);
+	return 0;
+fail:
+	rte_rwlock_write_unlock(&mcfg->mlock);
+	rte_spinlock_unlock(&config->sl);
+	return -1;
+}
+
+static int
+add_ring_to_metadata(const struct rte_ring * r,
+		struct ivshmem_config * config)
+{
+	struct rte_memzone * mz;
+
+	mz = get_memzone_by_addr(r);
+
+	if (!mz) {
+		RTE_LOG(ERR, EAL, "Cannot find memzone for ring!\n");
+		return -1;
+	}
+
+	return add_memzone_to_metadata(mz, config);
+}
+
+static int
+add_mempool_to_metadata(const struct rte_mempool * mp,
+		struct ivshmem_config * config)
+{
+	struct rte_memzone * mz;
+	int ret;
+
+	mz = get_memzone_by_addr(mp);
+	ret = 0;
+
+	if (!mz) {
+		RTE_LOG(ERR, EAL, "Cannot find memzone for mempool!\n");
+		return -1;
+	}
+
+	/* mempool consists of memzone and ring */
+	ret = add_memzone_to_metadata(mz, config);
+	if (ret < 0)
+		return -1;
+
+	return add_ring_to_metadata(mp->ring, config);
+}
+
+int
+rte_ivshmem_metadata_add_ring(const struct rte_ring * r, const char * name)
+{
+	struct ivshmem_config * config;
+
+	if (name == NULL || r == NULL)
+		return -1;
+
+	config = get_config_by_name(name);
+
+	if (config == NULL) {
+		RTE_LOG(ERR, EAL, "Cannot find IVSHMEM config %s!\n", name);
+		return -1;
+	}
+
+	return add_ring_to_metadata(r, config);
+}
+
+int
+rte_ivshmem_metadata_add_memzone(const struct rte_memzone * mz, const char * name)
+{
+	struct ivshmem_config * config;
+
+	if (name == NULL || mz == NULL)
+		return -1;
+
+	config = get_config_by_name(name);
+
+	if (config == NULL) {
+		RTE_LOG(ERR, EAL, "Cannot find IVSHMEM config %s!\n", name);
+		return -1;
+	}
+
+	return add_memzone_to_metadata(mz, config);
+}
+
+int
+rte_ivshmem_metadata_add_mempool(const struct rte_mempool * mp, const char * name)
+{
+	struct ivshmem_config * config;
+
+	if (name == NULL || mp == NULL)
+		return -1;
+
+	config = get_config_by_name(name);
+
+	if (config == NULL) {
+		RTE_LOG(ERR, EAL, "Cannot find IVSHMEM config %s!\n", name);
+		return -1;
+	}
+
+	return add_mempool_to_metadata(mp, config);
+}
+
+static inline void
+ivshmem_config_path(char *buffer, size_t bufflen, const char *name)
+{
+	snprintf(buffer, bufflen, IVSHMEM_CONFIG_FILE_FMT, name);
+}
+
+
+
+static inline
+void *ivshmem_metadata_create(const char *name, size_t size,
+		struct flock *lock)
+{
+	int retval, fd;
+	void *metadata_addr;
+	char pathname[PATH_MAX];
+
+	ivshmem_config_path(pathname, sizeof(pathname), name);
+
+	fd = open(pathname, O_RDWR | O_CREAT, 0660);
+	if (fd < 0) {
+		RTE_LOG(ERR, EAL, "Cannot open '%s'\n", pathname);
+		return NULL;
+	}
+
+	size = METADATA_SIZE_ALIGNED;
+
+	retval = fcntl(fd, F_SETLK, lock);
+	if (retval < 0){
+		close(fd);
+		RTE_LOG(ERR, EAL, "Cannot create lock on '%s'. Is another "
+				"process using it?\n", pathname);
+		return NULL;
+	}
+
+	retval = ftruncate(fd, size);
+	if (retval < 0){
+		close(fd);
+		RTE_LOG(ERR, EAL, "Cannot resize '%s'\n", pathname);
+		return NULL;
+	}
+
+	metadata_addr = mmap(NULL, size,
+				PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+
+	if (metadata_addr == MAP_FAILED){
+		RTE_LOG(ERR, EAL, "Cannot mmap memory for '%s'\n", pathname);
+
+		/* we don't care if we can't unlock */
+		fcntl(fd, F_UNLCK, lock);
+		close(fd);
+
+		return NULL;
+	}
+
+	return metadata_addr;
+}
+
+int rte_ivshmem_metadata_create(const char *name)
+{
+	struct ivshmem_config * ivshmem_config;
+	unsigned index;
+
+	if (pagesz == 0)
+		pagesz = getpagesize();
+
+	if (name == NULL)
+		return -1;
+
+	rte_spinlock_lock(&global_cfg_sl);
+
+	for (index = 0; index < RTE_DIM(ivshmem_global_config); index++) {
+		if (ivshmem_global_config[index].metadata == NULL) {
+			ivshmem_config = &ivshmem_global_config[index];
+			break;
+		}
+	}
+
+	if (index == RTE_DIM(ivshmem_global_config)) {
+		RTE_LOG(ERR, EAL, "Cannot create more ivshmem config files. "
+		"Maximum has been reached\n");
+		rte_spinlock_unlock(&global_cfg_sl);
+		return -1;
+	}
+
+	ivshmem_config->lock.l_type = F_WRLCK;
+	ivshmem_config->lock.l_whence = SEEK_SET;
+
+	ivshmem_config->lock.l_start = 0;
+	ivshmem_config->lock.l_len = METADATA_SIZE_ALIGNED;
+
+	ivshmem_global_config[index].metadata = ((struct rte_ivshmem_metadata *)
+			ivshmem_metadata_create(
+					name,
+					sizeof(struct rte_ivshmem_metadata),
+					&ivshmem_config->lock));
+
+	if (ivshmem_global_config[index].metadata == NULL) {
+		rte_spinlock_unlock(&global_cfg_sl);
+		return -1;
+	}
+
+	/* Metadata setup */
+	memset(ivshmem_config->metadata, 0, sizeof(struct rte_ivshmem_metadata));
+	ivshmem_config->metadata->magic_number = IVSHMEM_MAGIC;
+	snprintf(ivshmem_config->metadata->name,
+			sizeof(ivshmem_config->metadata->name), "%s", name);
+
+	rte_spinlock_unlock(&global_cfg_sl);
+
+	return 0;
+}
+
+int
+rte_ivshmem_metadata_cmdline_generate(char *buffer, unsigned size, const char *name)
+{
+	const struct memseg_cache_entry * ms_cache, *entry;
+	struct ivshmem_config * config;
+	char cmdline[IVSHMEM_QEMU_CMDLINE_BUFSIZE], *cmdline_ptr;
+	char cfg_file_path[PATH_MAX];
+	unsigned remaining_len, tmplen, iter;
+	uint64_t shared_mem_size, zero_size, total_size;
+
+	if (buffer == NULL || name == NULL)
+		return -1;
+
+	config = get_config_by_name(name);
+
+	if (config == NULL) {
+		RTE_LOG(ERR, EAL, "Config %s not found!\n", name);
+		return -1;
+	}
+
+	rte_spinlock_lock(&config->sl);
+
+	/* prepare metadata file path */
+	snprintf(cfg_file_path, sizeof(cfg_file_path), IVSHMEM_CONFIG_FILE_FMT,
+			config->metadata->name);
+
+	ms_cache = config->memseg_cache;
+
+	cmdline_ptr = cmdline;
+	remaining_len = sizeof(cmdline);
+
+	shared_mem_size = 0;
+	iter = 0;
+
+	while ((ms_cache[iter].len != 0) && (iter < RTE_DIM(config->metadata->entry))) {
+
+		entry = &ms_cache[iter];
+
+		/* Offset and sizes within the current pathname */
+		tmplen = snprintf(cmdline_ptr, remaining_len, IVSHMEM_QEMU_CMD_FD_FMT,
+				entry->filepath, entry->offset, entry->len);
+
+		shared_mem_size += entry->len;
+
+		cmdline_ptr = RTE_PTR_ADD(cmdline_ptr, tmplen);
+		remaining_len -= tmplen;
+
+		if (remaining_len == 0) {
+			RTE_LOG(ERR, EAL, "Command line too long!\n");
+			rte_spinlock_unlock(&config->sl);
+			return -1;
+		}
+
+		iter++;
+	}
+
+	total_size = rte_align64pow2(shared_mem_size + METADATA_SIZE_ALIGNED);
+	zero_size = total_size - shared_mem_size - METADATA_SIZE_ALIGNED;
+
+	/* add /dev/zero to command-line to fill the space */
+	tmplen = snprintf(cmdline_ptr, remaining_len, IVSHMEM_QEMU_CMD_FD_FMT,
+			"/dev/zero",
+			(uint64_t)0x0,
+			zero_size);
+
+	cmdline_ptr = RTE_PTR_ADD(cmdline_ptr, tmplen);
+	remaining_len -= tmplen;
+
+	if (remaining_len == 0) {
+		RTE_LOG(ERR, EAL, "Command line too long!\n");
+		rte_spinlock_unlock(&config->sl);
+		return -1;
+	}
+
+	/* add metadata file to the end of command-line */
+	tmplen = snprintf(cmdline_ptr, remaining_len, IVSHMEM_QEMU_CMD_FD_FMT,
+			cfg_file_path,
+			(uint64_t)0x0,
+			METADATA_SIZE_ALIGNED);
+
+	cmdline_ptr = RTE_PTR_ADD(cmdline_ptr, tmplen);
+	remaining_len -= tmplen;
+
+	if (remaining_len == 0) {
+		RTE_LOG(ERR, EAL, "Command line too long!\n");
+		rte_spinlock_unlock(&config->sl);
+		return -1;
+	}
+
+	/* if current length of the command line is bigger than the buffer supplied
+	 * by the user, or if command-line is bigger than what IVSHMEM accepts */
+	if ((sizeof(cmdline) - remaining_len) > size) {
+		RTE_LOG(ERR, EAL, "Buffer is too short!\n");
+		rte_spinlock_unlock(&config->sl);
+		return -1;
+	}
+	/* complete the command-line */
+	snprintf(buffer, size,
+			IVSHMEM_QEMU_CMD_LINE_HEADER_FMT,
+			total_size >> 20,
+			cmdline);
+
+	rte_spinlock_unlock(&config->sl);
+
+	return 0;
+}
+
+void
+rte_ivshmem_metadata_dump(FILE *f, const char *name)
+{
+	unsigned i = 0;
+	struct ivshmem_config * config;
+	struct rte_ivshmem_metadata_entry *entry;
+#ifdef RTE_LIBRTE_IVSHMEM_DEBUG
+	uint64_t addr;
+	uint64_t end, hugepage_sz;
+	struct memseg_cache_entry e;
+#endif
+
+	if (name == NULL)
+		return;
+
+	/* return error if we try to use an unknown config file */
+	config = get_config_by_name(name);
+	if (config == NULL) {
+		RTE_LOG(ERR, EAL, "Cannot find IVSHMEM config %s!\n", name);
+		return;
+	}
+
+	rte_spinlock_lock(&config->sl);
+
+	entry = &config->metadata->entry[0];
+
+	while (entry->mz.addr != NULL && i < RTE_DIM(config->metadata->entry)) {
+
+		fprintf(f, "Entry %u: name:<%-20s>, phys:0x%-15lx, len:0x%-15lx, "
+			"virt:%-15p, off:0x%-15lx\n",
+			i,
+			entry->mz.name,
+			entry->mz.phys_addr,
+			entry->mz.len,
+			entry->mz.addr,
+			entry->offset);
+		i++;
+
+#ifdef RTE_LIBRTE_IVSHMEM_DEBUG
+		fprintf(f, "\tHugepage files:\n");
+
+		hugepage_sz = entry->mz.hugepage_sz;
+		addr = RTE_ALIGN_FLOOR(entry->mz.addr_64, hugepage_sz);
+		end = addr + RTE_ALIGN_CEIL(entry->mz.len + (entry->mz.addr_64 - addr),
+				hugepage_sz);
+
+		for (; addr < end; addr += hugepage_sz) {
+			memset(&e, 0, sizeof(e));
+
+			get_hugefile_by_virt_addr(addr, &e);
+
+			fprintf(f, "\t0x%"PRIx64 "-0x%" PRIx64 " offset: 0x%" PRIx64 " %s\n",
+					addr, addr + hugepage_sz, e.offset, e.filepath);
+		}
+#endif
+		entry++;
+	}
+
+	rte_spinlock_unlock(&config->sl);
+}
diff --git a/lib/librte_ivshmem/rte_ivshmem.h b/lib/librte_ivshmem/rte_ivshmem.h
new file mode 100644
index 00000000..a5d36d6b
--- /dev/null
+++ b/lib/librte_ivshmem/rte_ivshmem.h
@@ -0,0 +1,165 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef RTE_IVSHMEM_H_
+#define RTE_IVSHMEM_H_
+
+#include <rte_memzone.h>
+#include <rte_mempool.h>
+
+/**
+ * @file
+ *
+ * The RTE IVSHMEM interface provides functions to create metadata files
+ * describing memory segments to be shared via QEMU IVSHMEM.
+ */
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define IVSHMEM_MAGIC 0x0BADC0DE
+#define IVSHMEM_NAME_LEN 32
+
+/**
+ * Structure that holds IVSHMEM shared metadata entry.
+ */
+struct rte_ivshmem_metadata_entry {
+	struct rte_memzone mz;	/**< shared memzone */
+	uint64_t offset;	/**< offset of memzone within IVSHMEM device */
+};
+
+/**
+ * Structure that holds IVSHMEM metadata.
+ */
+struct rte_ivshmem_metadata {
+	int magic_number;				/**< magic number */
+	char name[IVSHMEM_NAME_LEN];	/**< name of the metadata file */
+	struct rte_ivshmem_metadata_entry entry[RTE_LIBRTE_IVSHMEM_MAX_ENTRIES];
+			/**< metadata entries */
+};
+
+/**
+ * Creates metadata file with a given name
+ *
+ * @param name
+ *  Name of metadata file to be created
+ *
+ * @return
+ *  - On success, zero
+ *  - On failure, a negative value
+ */
+int rte_ivshmem_metadata_create(const char * name);
+
+/**
+ * Adds memzone to a specific metadata file
+ *
+ * @param mz
+ *  Memzone to be added
+ * @param md_name
+ *  Name of metadata file for the memzone to be added to
+ *
+ * @return
+ *  - On success, zero
+ *  - On failure, a negative value
+ */
+int rte_ivshmem_metadata_add_memzone(const struct rte_memzone * mz,
+		const char * md_name);
+
+/**
+ * Adds a ring descriptor to a specific metadata file
+ *
+ * @param r
+ *  Ring descriptor to be added
+ * @param md_name
+ *  Name of metadata file for the ring to be added to
+ *
+ * @return
+ *  - On success, zero
+ *  - On failure, a negative value
+ */
+int rte_ivshmem_metadata_add_ring(const struct rte_ring * r,
+		const char * md_name);
+
+/**
+ * Adds a mempool to a specific metadata file
+ *
+ * @param mp
+ *  Mempool to be added
+ * @param md_name
+ *  Name of metadata file for the mempool to be added to
+ *
+ * @return
+ *  - On success, zero
+ *  - On failure, a negative value
+ */
+int rte_ivshmem_metadata_add_mempool(const struct rte_mempool * mp,
+		const char * md_name);
+
+
+/**
+ * Generates the QEMU command-line for IVSHMEM device for a given metadata file.
+ * This function is to be called after all the objects were added.
+ *
+ * @param buffer
+ *  Buffer to be filled with the command line arguments.
+ * @param size
+ *  Size of the buffer.
+ * @param name
+ *  Name of metadata file to generate QEMU command-line parameters for
+ *
+ * @return
+ *  - On success, zero
+ *  - On failure, a negative value
+ */
+int rte_ivshmem_metadata_cmdline_generate(char *buffer, unsigned size,
+		const char *name);
+
+
+/**
+ * Dump all metadata entries from a given metadata file to the console.
+ *
+ * @param f
+ *   A pointer to a file for output
+ * @name
+ *  Name of the metadata file to be dumped to console.
+ */
+void rte_ivshmem_metadata_dump(FILE *f, const char *name);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* RTE_IVSHMEM_H_ */
diff --git a/lib/librte_ivshmem/rte_ivshmem_version.map b/lib/librte_ivshmem/rte_ivshmem_version.map
new file mode 100644
index 00000000..5a393ddc
--- /dev/null
+++ b/lib/librte_ivshmem/rte_ivshmem_version.map
@@ -0,0 +1,12 @@
+DPDK_2.0 {
+	global:
+
+	rte_ivshmem_metadata_add_mempool;
+	rte_ivshmem_metadata_add_memzone;
+	rte_ivshmem_metadata_add_ring;
+	rte_ivshmem_metadata_cmdline_generate;
+	rte_ivshmem_metadata_create;
+	rte_ivshmem_metadata_dump;
+
+	local: *;
+};
diff --git a/lib/librte_jobstats/Makefile b/lib/librte_jobstats/Makefile
new file mode 100644
index 00000000..136a448e
--- /dev/null
+++ b/lib/librte_jobstats/Makefile
@@ -0,0 +1,53 @@
+#   BSD LICENSE
+#
+#   Copyright(c) 2015 Intel Corporation. All rights reserved.
+#   All rights reserved.
+#
+#   Redistribution and use in source and binary forms, with or without
+#   modification, are permitted provided that the following conditions
+#   are met:
+#
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in
+#       the documentation and/or other materials provided with the
+#       distribution.
+#     * Neither the name of Intel Corporation nor the names of its
+#       contributors may be used to endorse or promote products derived
+#       from this software without specific prior written permission.
+#
+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# library name
+LIB = librte_jobstats.a
+
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR)
+
+EXPORT_MAP := rte_jobstats_version.map
+
+LIBABIVER := 1
+
+# all source are stored in SRCS-y
+SRCS-$(CONFIG_RTE_LIBRTE_JOBSTATS) := rte_jobstats.c
+
+# install this header file
+SYMLINK-$(CONFIG_RTE_LIBRTE_JOBSTATS)-include := rte_jobstats.h
+
+# this lib needs eal
+DEPDIRS-$(CONFIG_RTE_LIBRTE_JOBSTATS) += lib/librte_eal
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_jobstats/rte_jobstats.c b/lib/librte_jobstats/rte_jobstats.c
new file mode 100644
index 00000000..2b42050c
--- /dev/null
+++ b/lib/librte_jobstats/rte_jobstats.c
@@ -0,0 +1,293 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2015 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <string.h>
+#include <stdlib.h>
+#include <errno.h>
+
+#include <rte_errno.h>
+#include <rte_common.h>
+#include <rte_eal.h>
+#include <rte_log.h>
+#include <rte_cycles.h>
+#include <rte_branch_prediction.h>
+
+#include "rte_jobstats.h"
+
+#define ADD_TIME_MIN_MAX(obj, type, value) do {      \
+	typeof(value) tmp = (value);                     \
+	(obj)->type ## _time += tmp;                     \
+	if (tmp < (obj)->min_ ## type ## _time)          \
+		(obj)->min_ ## type ## _time = tmp;          \
+	if (tmp > (obj)->max_ ## type ## _time)          \
+		(obj)->max_ ## type ## _time = tmp;          \
+} while (0)
+
+#define RESET_TIME_MIN_MAX(obj, type) do {           \
+	(obj)->type ## _time = 0;                        \
+	(obj)->min_ ## type ## _time = UINT64_MAX;       \
+	(obj)->max_ ## type ## _time = 0;                \
+} while (0)
+
+static inline uint64_t
+get_time(void)
+{
+	rte_rmb();
+	return rte_get_timer_cycles();
+}
+
+/* Those are steps used to adjust job period.
+ * Experiments show that for forwarding apps the up step must be less than down
+ * step to achieve optimal performance.
+ */
+#define JOB_UPDATE_STEP_UP    1
+#define JOB_UPDATE_STEP_DOWN  4
+
+/*
+ * Default update function that implements simple period adjustment.
+ */
+static void
+default_update_function(struct rte_jobstats *job, int64_t result)
+{
+	int64_t err = job->target - result;
+
+	/* Job is happy. Nothing to do */
+	if (err == 0)
+		return;
+
+	if (err > 0) {
+		if (job->period + JOB_UPDATE_STEP_UP < job->max_period)
+			job->period += JOB_UPDATE_STEP_UP;
+	} else {
+		if (job->min_period + JOB_UPDATE_STEP_DOWN < job->period)
+			job->period -= JOB_UPDATE_STEP_DOWN;
+	}
+}
+
+int
+rte_jobstats_context_init(struct rte_jobstats_context *ctx)
+{
+	if (ctx == NULL)
+		return -EINVAL;
+
+	/* Init only needed parameters. Zero out everything else. */
+	memset(ctx, 0, sizeof(struct rte_jobstats_context));
+
+	rte_jobstats_context_reset(ctx);
+
+	return 0;
+}
+
+void
+rte_jobstats_context_start(struct rte_jobstats_context *ctx)
+{
+	uint64_t now;
+
+	ctx->loop_executed_jobs = 0;
+
+	now = get_time();
+	ADD_TIME_MIN_MAX(ctx, management, now - ctx->state_time);
+	ctx->state_time = now;
+}
+
+void
+rte_jobstats_context_finish(struct rte_jobstats_context *ctx)
+{
+	uint64_t now;
+
+	if (likely(ctx->loop_executed_jobs))
+		ctx->loop_cnt++;
+
+	now = get_time();
+	ADD_TIME_MIN_MAX(ctx, management, now - ctx->state_time);
+	ctx->state_time = now;
+}
+
+void
+rte_jobstats_context_reset(struct rte_jobstats_context *ctx)
+{
+	RESET_TIME_MIN_MAX(ctx, exec);
+	RESET_TIME_MIN_MAX(ctx, management);
+	ctx->start_time = get_time();
+	ctx->state_time = ctx->start_time;
+	ctx->job_exec_cnt = 0;
+	ctx->loop_cnt = 0;
+}
+
+void
+rte_jobstats_set_target(struct rte_jobstats *job, int64_t target)
+{
+	job->target = target;
+}
+
+int
+rte_jobstats_start(struct rte_jobstats_context *ctx, struct rte_jobstats *job)
+{
+	uint64_t now;
+
+	/* Some sanity check. */
+	if (unlikely(ctx == NULL || job == NULL || job->context != NULL))
+		return -EINVAL;
+
+	/* Link job with context object. */
+	job->context = ctx;
+
+	now = get_time();
+	ADD_TIME_MIN_MAX(ctx, management, now - ctx->state_time);
+	ctx->state_time = now;
+
+	return 0;
+}
+
+int
+rte_jobstats_abort(struct rte_jobstats *job)
+{
+	struct rte_jobstats_context *ctx;
+	uint64_t now, exec_time;
+
+	/* Some sanity check. */
+	if (unlikely(job == NULL || job->context == NULL))
+		return -EINVAL;
+
+	ctx = job->context;
+	now = get_time();
+	exec_time = now - ctx->state_time;
+	ADD_TIME_MIN_MAX(ctx, management, exec_time);
+	ctx->state_time = now;
+	job->context = NULL;
+
+	return 0;
+}
+
+int
+rte_jobstats_finish(struct rte_jobstats *job, int64_t job_value)
+{
+	struct rte_jobstats_context *ctx;
+	uint64_t now, exec_time;
+	int need_update;
+
+	/* Some sanity check. */
+	if (unlikely(job == NULL || job->context == NULL))
+		return -EINVAL;
+
+	need_update = job->target != job_value;
+	/* Adjust period only if job is unhappy of its current period. */
+	if (need_update)
+		(*job->update_period_cb)(job, job_value);
+
+	ctx = job->context;
+
+	/* Update execution time is considered as runtime so get time after it is
+	 * executed. */
+	now = get_time();
+	exec_time = now - ctx->state_time;
+	ADD_TIME_MIN_MAX(job, exec, exec_time);
+	ADD_TIME_MIN_MAX(ctx, exec, exec_time);
+
+	ctx->state_time = now;
+
+	ctx->loop_executed_jobs++;
+	ctx->job_exec_cnt++;
+
+	job->exec_cnt++;
+	job->context = NULL;
+
+	return need_update;
+}
+
+void
+rte_jobstats_set_period(struct rte_jobstats *job, uint64_t period,
+		uint8_t saturate)
+{
+	if (saturate != 0) {
+		if (period < job->min_period)
+			period = job->min_period;
+		else if (period > job->max_period)
+			period = job->max_period;
+	}
+
+	job->period = period;
+}
+
+void
+rte_jobstats_set_min(struct rte_jobstats *job, uint64_t period)
+{
+	job->min_period = period;
+	if (job->period < period)
+		job->period = period;
+}
+
+void
+rte_jobstats_set_max(struct rte_jobstats *job, uint64_t period)
+{
+	job->max_period = period;
+	if (job->period > period)
+		job->period = period;
+}
+
+int
+rte_jobstats_init(struct rte_jobstats *job, const char *name,
+		uint64_t min_period, uint64_t max_period, uint64_t initial_period,
+		int64_t target)
+{
+	if (job == NULL)
+		return -EINVAL;
+
+	job->period = initial_period;
+	job->min_period = min_period;
+	job->max_period = max_period;
+	job->target = target;
+	job->update_period_cb = &default_update_function;
+	rte_jobstats_reset(job);
+	snprintf(job->name, RTE_DIM(job->name), "%s", name == NULL ? "" : name);
+	job->context = NULL;
+
+	return 0;
+}
+
+void
+rte_jobstats_set_update_period_function(struct rte_jobstats *job,
+		rte_job_update_period_cb_t update_period_cb)
+{
+	if (update_period_cb == NULL)
+		update_period_cb = default_update_function;
+
+	job->update_period_cb = update_period_cb;
+}
+
+void
+rte_jobstats_reset(struct rte_jobstats *job)
+{
+	RESET_TIME_MIN_MAX(job, exec);
+	job->exec_cnt = 0;
+}
diff --git a/lib/librte_jobstats/rte_jobstats.h b/lib/librte_jobstats/rte_jobstats.h
new file mode 100644
index 00000000..c2b285f8
--- /dev/null
+++ b/lib/librte_jobstats/rte_jobstats.h
@@ -0,0 +1,336 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2015 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef JOBSTATS_H_
+#define JOBSTATS_H_
+
+#include <stdint.h>
+
+#include <rte_memory.h>
+#include <rte_memcpy.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define RTE_JOBSTATS_NAMESIZE 32
+
+/* Forward declarations. */
+struct rte_jobstats_context;
+struct rte_jobstats;
+
+/**
+ * This function should calculate new period and set it using
+ * rte_jobstats_set_period() function. Time spent in this function will be
+ * added to job's runtime.
+ *
+ * @param job
+ *  The job data structure handler.
+ * @param job_result
+ *  Result of calling job callback.
+ */
+typedef void (*rte_job_update_period_cb_t)(struct rte_jobstats *job,
+		int64_t job_result);
+
+struct rte_jobstats {
+	uint64_t period;
+	/**< Estimated period of execution. */
+
+	uint64_t min_period;
+	/**< Minimum period. */
+
+	uint64_t max_period;
+	/**< Maximum period. */
+
+	int64_t target;
+	/**< Desired value for this job. */
+
+	rte_job_update_period_cb_t update_period_cb;
+	/**< Period update callback. */
+
+	uint64_t exec_time;
+	/**< Total time (sum) that this job was executing. */
+
+	uint64_t min_exec_time;
+	/**< Minimum execute time. */
+
+	uint64_t max_exec_time;
+	/**< Minimum execute time. */
+
+	uint64_t exec_cnt;
+	/**< Execute count. */
+
+	char name[RTE_JOBSTATS_NAMESIZE];
+	/**< Name of this job */
+
+	struct rte_jobstats_context *context;
+	/**< Job stats context object that is executing this job. */
+} __rte_cache_aligned;
+
+struct rte_jobstats_context {
+	/** Viariable holding time at different points:
+	 * -# loop start time if loop was started but no job executed yet.
+	 * -# job start time if job is currently executing.
+	 * -# job finish time if job finished its execution.
+	 * -# loop finish time if loop finished its execution. */
+	uint64_t state_time;
+
+	uint64_t loop_executed_jobs;
+	/**< Count of executed jobs in this loop. */
+
+	/* Statistics start. */
+
+	uint64_t exec_time;
+	/**< Total time taken to execute jobs, not including management time. */
+
+	uint64_t min_exec_time;
+	/**< Minimum loop execute time. */
+
+	uint64_t max_exec_time;
+	/**< Minimum loop execute time. */
+
+	/**
+	 * Sum of time that is not the execute time (ex: from job finish to next
+	 * job start).
+	 *
+	 * This time might be considered as overhead of library + job scheduling.
+	 */
+	uint64_t management_time;
+
+	uint64_t min_management_time;
+	/**< Minimum management time */
+
+	uint64_t max_management_time;
+	/**< Maximum management time */
+
+	uint64_t start_time;
+	/**< Time since last reset stats. */
+
+	uint64_t job_exec_cnt;
+	/**< Total count of executed jobs. */
+
+	uint64_t loop_cnt;
+	/**< Total count of executed loops with at least one executed job. */
+} __rte_cache_aligned;
+
+/**
+ * Initialize given context object with default values.
+ *
+ * @param ctx
+ *  Job stats context object to initialize.
+ *
+ * @return
+ *  0 on success
+ *  -EINVAL if *ctx* is NULL
+ */
+int
+rte_jobstats_context_init(struct rte_jobstats_context *ctx);
+
+/**
+ * Mark that new set of jobs start executing.
+ *
+ * @param ctx
+ *  Job stats context object.
+ */
+void
+rte_jobstats_context_start(struct rte_jobstats_context *ctx);
+
+/**
+ * Mark that there is no more jobs ready to execute in this turn. Calculate
+ * stats for this loop turn.
+ *
+ * @param ctx
+ *  Job stats context.
+ */
+void
+rte_jobstats_context_finish(struct rte_jobstats_context *ctx);
+
+/**
+ * Function resets job context statistics.
+ *
+ * @param ctx
+ *  Job stats context which statistics will be reset.
+ */
+void
+rte_jobstats_context_reset(struct rte_jobstats_context *ctx);
+
+/**
+ * Initialize given job stats object.
+ *
+ * @param job
+ *  Job object.
+ * @param name
+ *  Optional job name.
+ * @param min_period
+ *  Minimum period that this job can accept.
+ * @param max_period
+ *  Maximum period that this job can accept.
+ * @param initial_period
+ *  Initial period. It will be checked against *min_period* and *max_period*.
+ * @param target
+ *  Target value that this job try to achieve.
+ *
+ * @return
+ *  0 on success
+ *  -EINVAL if *job* is NULL
+ */
+int
+rte_jobstats_init(struct rte_jobstats *job, const char *name,
+		uint64_t min_period, uint64_t max_period, uint64_t initial_period,
+		int64_t target);
+
+/**
+ * Set job desired target value. Difference between target and job value
+ * value must be used to properly adjust job execute period value.
+ *
+ * @param job
+ *  The job object.
+ * @param target
+ *  New target.
+ */
+void
+rte_jobstats_set_target(struct rte_jobstats *job, int64_t target);
+
+/**
+ * Mark that *job* is starting of its execution in context of *ctx* object.
+ *
+ * @param ctx
+ *  Job stats context.
+ * @param job
+ *  Job object.
+ * @return
+ *  0 on success
+ *  -EINVAL if *ctx* or *job* is NULL or *job* is executing in another context
+ *  context already,
+ */
+int
+rte_jobstats_start(struct rte_jobstats_context *ctx, struct rte_jobstats *job);
+
+/**
+ * Mark that *job* finished its execution, but time of this work will be skipped
+ * and added to management time.
+ *
+ * @param job
+ *  Job object.
+ *
+ * @return
+ *  0 on success
+ *  -EINVAL if job is NULL or job was not started (it have no context).
+ */
+int
+rte_jobstats_abort(struct rte_jobstats *job);
+
+/**
+ * Mark that *job* finished its execution. Context in which it was executing
+ * will receive stat update. After this function call *job* object is ready to
+ * be executed in other context.
+ *
+ * @param job
+ *  Job object.
+ * @param job_value
+ *  Job value. Job should pass in this parameter a value that it try to optimize
+ *  for example the number of packets it processed.
+ *
+ * @return
+ *  0 if job's period was not updated (job target equals *job_value*)
+ *  1 if job's period was updated
+ *  -EINVAL if job is NULL or job was not started (it have no context).
+ */
+int
+rte_jobstats_finish(struct rte_jobstats *job, int64_t job_value);
+
+/**
+ * Set execute period of given job.
+ *
+ * @param job
+ *  The job object.
+ * @param period
+ *  New period value.
+ * @param saturate
+ *  If zero, skip period saturation to min, max range.
+ */
+void
+rte_jobstats_set_period(struct rte_jobstats *job, uint64_t period,
+		uint8_t saturate);
+/**
+ * Set minimum execute period of given job. Current period will be checked
+ * against new minimum value.
+ *
+ * @param job
+ *  The job object.
+ * @param period
+ *  New minimum period value.
+ */
+void
+rte_jobstats_set_min(struct rte_jobstats *job, uint64_t period);
+/**
+ * Set maximum execute period of given job. Current period will be checked
+ * against new maximum value.
+ *
+ * @param job
+ *  The job object.
+ * @param period
+ *  New maximum period value.
+ */
+void
+rte_jobstats_set_max(struct rte_jobstats *job, uint64_t period);
+
+/**
+ * Set update period callback that is invoked after job finish.
+ *
+ * If application wants to do more sophisticated calculations than default
+ * it can provide this handler.
+ *
+ * @param job
+ *  Job object.
+ * @param update_pedriod_cb
+ *  Callback to set. If NULL restore default update function.
+ */
+void
+rte_jobstats_set_update_period_function(struct rte_jobstats *job,
+		rte_job_update_period_cb_t update_period_cb);
+
+/**
+ * Function resets job statistics.
+ *
+ * @param job
+ *  Job which statistics will be reset.
+ */
+void
+rte_jobstats_reset(struct rte_jobstats *job);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* JOBSTATS_H_ */
diff --git a/lib/librte_jobstats/rte_jobstats_version.map b/lib/librte_jobstats/rte_jobstats_version.map
new file mode 100644
index 00000000..f8944143
--- /dev/null
+++ b/lib/librte_jobstats/rte_jobstats_version.map
@@ -0,0 +1,26 @@
+DPDK_2.0 {
+	global:
+
+	rte_jobstats_context_finish;
+	rte_jobstats_context_init;
+	rte_jobstats_context_reset;
+	rte_jobstats_context_start;
+	rte_jobstats_finish;
+	rte_jobstats_init;
+	rte_jobstats_reset;
+	rte_jobstats_set_max;
+	rte_jobstats_set_min;
+	rte_jobstats_set_period;
+	rte_jobstats_set_target;
+	rte_jobstats_set_update_period_function;
+	rte_jobstats_start;
+
+	local: *;
+};
+
+DPDK_16.04 {
+	global:
+
+	rte_jobstats_abort;
+
+} DPDK_2.0;
diff --git a/lib/librte_kni/Makefile b/lib/librte_kni/Makefile
new file mode 100644
index 00000000..1398164d
--- /dev/null
+++ b/lib/librte_kni/Makefile
@@ -0,0 +1,53 @@
+#   BSD LICENSE
+#
+#   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+#   All rights reserved.
+#
+#   Redistribution and use in source and binary forms, with or without
+#   modification, are permitted provided that the following conditions
+#   are met:
+#
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in
+#       the documentation and/or other materials provided with the
+#       distribution.
+#     * Neither the name of Intel Corporation nor the names of its
+#       contributors may be used to endorse or promote products derived
+#       from this software without specific prior written permission.
+#
+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# library name
+LIB = librte_kni.a
+
+CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3 -fno-strict-aliasing
+
+EXPORT_MAP := rte_kni_version.map
+
+LIBABIVER := 2
+
+# all source are stored in SRCS-y
+SRCS-$(CONFIG_RTE_LIBRTE_KNI) := rte_kni.c
+
+# install includes
+SYMLINK-$(CONFIG_RTE_LIBRTE_KNI)-include := rte_kni.h
+
+# this lib needs eal
+DEPDIRS-$(CONFIG_RTE_LIBRTE_KNI) += lib/librte_eal lib/librte_mbuf
+DEPDIRS-$(CONFIG_RTE_LIBRTE_KNI) += lib/librte_ether
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_kni/rte_kni.c b/lib/librte_kni/rte_kni.c
new file mode 100644
index 00000000..ea9baf4c
--- /dev/null
+++ b/lib/librte_kni/rte_kni.c
@@ -0,0 +1,712 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef RTE_EXEC_ENV_LINUXAPP
+#error "KNI is not supported"
+#endif
+
+#include <string.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/ioctl.h>
+
+#include <rte_spinlock.h>
+#include <rte_string_fns.h>
+#include <rte_ethdev.h>
+#include <rte_malloc.h>
+#include <rte_log.h>
+#include <rte_kni.h>
+#include <rte_memzone.h>
+#include <exec-env/rte_kni_common.h>
+#include "rte_kni_fifo.h"
+
+#define MAX_MBUF_BURST_NUM            32
+
+/* Maximum number of ring entries */
+#define KNI_FIFO_COUNT_MAX     1024
+#define KNI_FIFO_SIZE          (KNI_FIFO_COUNT_MAX * sizeof(void *) + \
+					sizeof(struct rte_kni_fifo))
+
+#define KNI_REQUEST_MBUF_NUM_MAX      32
+
+#define KNI_MEM_CHECK(cond) do { if (cond) goto kni_fail; } while (0)
+
+/**
+ * KNI context
+ */
+struct rte_kni {
+	char name[RTE_KNI_NAMESIZE];        /**< KNI interface name */
+	uint16_t group_id;                  /**< Group ID of KNI devices */
+	uint32_t slot_id;                   /**< KNI pool slot ID */
+	struct rte_mempool *pktmbuf_pool;   /**< pkt mbuf mempool */
+	unsigned mbuf_size;                 /**< mbuf size */
+
+	struct rte_kni_fifo *tx_q;          /**< TX queue */
+	struct rte_kni_fifo *rx_q;          /**< RX queue */
+	struct rte_kni_fifo *alloc_q;       /**< Allocated mbufs queue */
+	struct rte_kni_fifo *free_q;        /**< To be freed mbufs queue */
+
+	/* For request & response */
+	struct rte_kni_fifo *req_q;         /**< Request queue */
+	struct rte_kni_fifo *resp_q;        /**< Response queue */
+	void * sync_addr;                   /**< Req/Resp Mem address */
+
+	struct rte_kni_ops ops;             /**< operations for request */
+	uint8_t in_use : 1;                 /**< kni in use */
+};
+
+enum kni_ops_status {
+	KNI_REQ_NO_REGISTER = 0,
+	KNI_REQ_REGISTERED,
+};
+
+/**
+ * KNI memzone pool slot
+ */
+struct rte_kni_memzone_slot {
+	uint32_t id;
+	uint8_t in_use : 1;                    /**< slot in use */
+
+	/* Memzones */
+	const struct rte_memzone *m_ctx;       /**< KNI ctx */
+	const struct rte_memzone *m_tx_q;      /**< TX queue */
+	const struct rte_memzone *m_rx_q;      /**< RX queue */
+	const struct rte_memzone *m_alloc_q;   /**< Allocated mbufs queue */
+	const struct rte_memzone *m_free_q;    /**< To be freed mbufs queue */
+	const struct rte_memzone *m_req_q;     /**< Request queue */
+	const struct rte_memzone *m_resp_q;    /**< Response queue */
+	const struct rte_memzone *m_sync_addr;
+
+	/* Free linked list */
+	struct rte_kni_memzone_slot *next;     /**< Next slot link.list */
+};
+
+/**
+ * KNI memzone pool
+ */
+struct rte_kni_memzone_pool {
+	uint8_t initialized : 1;            /**< Global KNI pool init flag */
+
+	uint32_t max_ifaces;                /**< Max. num of KNI ifaces */
+	struct rte_kni_memzone_slot *slots;        /**< Pool slots */
+	rte_spinlock_t mutex;               /**< alloc/relase mutex */
+
+	/* Free memzone slots linked-list */
+	struct rte_kni_memzone_slot *free;         /**< First empty slot */
+	struct rte_kni_memzone_slot *free_tail;    /**< Last empty slot */
+};
+
+
+static void kni_free_mbufs(struct rte_kni *kni);
+static void kni_allocate_mbufs(struct rte_kni *kni);
+
+static volatile int kni_fd = -1;
+static struct rte_kni_memzone_pool kni_memzone_pool = {
+	.initialized = 0,
+};
+
+static const struct rte_memzone *
+kni_memzone_reserve(const char *name, size_t len, int socket_id,
+						unsigned flags)
+{
+	const struct rte_memzone *mz = rte_memzone_lookup(name);
+
+	if (mz == NULL)
+		mz = rte_memzone_reserve(name, len, socket_id, flags);
+
+	return mz;
+}
+
+/* Pool mgmt */
+static struct rte_kni_memzone_slot*
+kni_memzone_pool_alloc(void)
+{
+	struct rte_kni_memzone_slot *slot;
+
+	rte_spinlock_lock(&kni_memzone_pool.mutex);
+
+	if (!kni_memzone_pool.free) {
+		rte_spinlock_unlock(&kni_memzone_pool.mutex);
+		return NULL;
+	}
+
+	slot = kni_memzone_pool.free;
+	kni_memzone_pool.free = slot->next;
+	slot->in_use = 1;
+
+	if (!kni_memzone_pool.free)
+		kni_memzone_pool.free_tail = NULL;
+
+	rte_spinlock_unlock(&kni_memzone_pool.mutex);
+
+	return slot;
+}
+
+static void
+kni_memzone_pool_release(struct rte_kni_memzone_slot *slot)
+{
+	rte_spinlock_lock(&kni_memzone_pool.mutex);
+
+	if (kni_memzone_pool.free)
+		kni_memzone_pool.free_tail->next = slot;
+	else
+		kni_memzone_pool.free = slot;
+
+	kni_memzone_pool.free_tail = slot;
+	slot->next = NULL;
+	slot->in_use = 0;
+
+	rte_spinlock_unlock(&kni_memzone_pool.mutex);
+}
+
+
+/* Shall be called before any allocation happens */
+void
+rte_kni_init(unsigned int max_kni_ifaces)
+{
+	uint32_t i;
+	struct rte_kni_memzone_slot *it;
+	const struct rte_memzone *mz;
+#define OBJNAMSIZ 32
+	char obj_name[OBJNAMSIZ];
+	char mz_name[RTE_MEMZONE_NAMESIZE];
+
+	/* Immediately return if KNI is already initialized */
+	if (kni_memzone_pool.initialized) {
+		RTE_LOG(WARNING, KNI, "Double call to rte_kni_init()");
+		return;
+	}
+
+	if (max_kni_ifaces == 0) {
+		RTE_LOG(ERR, KNI, "Invalid number of max_kni_ifaces %d\n",
+							max_kni_ifaces);
+		rte_panic("Unable to initialize KNI\n");
+	}
+
+	/* Check FD and open */
+	if (kni_fd < 0) {
+		kni_fd = open("/dev/" KNI_DEVICE, O_RDWR);
+		if (kni_fd < 0)
+			rte_panic("Can not open /dev/%s\n", KNI_DEVICE);
+	}
+
+	/* Allocate slot objects */
+	kni_memzone_pool.slots = (struct rte_kni_memzone_slot *)
+					rte_malloc(NULL,
+					sizeof(struct rte_kni_memzone_slot) *
+					max_kni_ifaces,
+					0);
+	KNI_MEM_CHECK(kni_memzone_pool.slots == NULL);
+
+	/* Initialize general pool variables */
+	kni_memzone_pool.initialized = 1;
+	kni_memzone_pool.max_ifaces = max_kni_ifaces;
+	kni_memzone_pool.free = &kni_memzone_pool.slots[0];
+	rte_spinlock_init(&kni_memzone_pool.mutex);
+
+	/* Pre-allocate all memzones of all the slots; panic on error */
+	for (i = 0; i < max_kni_ifaces; i++) {
+
+		/* Recover current slot */
+		it = &kni_memzone_pool.slots[i];
+		it->id = i;
+
+		/* Allocate KNI context */
+		snprintf(mz_name, RTE_MEMZONE_NAMESIZE, "KNI_INFO_%d", i);
+		mz = kni_memzone_reserve(mz_name, sizeof(struct rte_kni),
+					SOCKET_ID_ANY, 0);
+		KNI_MEM_CHECK(mz == NULL);
+		it->m_ctx = mz;
+
+		/* TX RING */
+		snprintf(obj_name, OBJNAMSIZ, "kni_tx_%d", i);
+		mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE,
+							SOCKET_ID_ANY, 0);
+		KNI_MEM_CHECK(mz == NULL);
+		it->m_tx_q = mz;
+
+		/* RX RING */
+		snprintf(obj_name, OBJNAMSIZ, "kni_rx_%d", i);
+		mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE,
+							SOCKET_ID_ANY, 0);
+		KNI_MEM_CHECK(mz == NULL);
+		it->m_rx_q = mz;
+
+		/* ALLOC RING */
+		snprintf(obj_name, OBJNAMSIZ, "kni_alloc_%d", i);
+		mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE,
+							SOCKET_ID_ANY, 0);
+		KNI_MEM_CHECK(mz == NULL);
+		it->m_alloc_q = mz;
+
+		/* FREE RING */
+		snprintf(obj_name, OBJNAMSIZ, "kni_free_%d", i);
+		mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE,
+							SOCKET_ID_ANY, 0);
+		KNI_MEM_CHECK(mz == NULL);
+		it->m_free_q = mz;
+
+		/* Request RING */
+		snprintf(obj_name, OBJNAMSIZ, "kni_req_%d", i);
+		mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE,
+							SOCKET_ID_ANY, 0);
+		KNI_MEM_CHECK(mz == NULL);
+		it->m_req_q = mz;
+
+		/* Response RING */
+		snprintf(obj_name, OBJNAMSIZ, "kni_resp_%d", i);
+		mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE,
+							SOCKET_ID_ANY, 0);
+		KNI_MEM_CHECK(mz == NULL);
+		it->m_resp_q = mz;
+
+		/* Req/Resp sync mem area */
+		snprintf(obj_name, OBJNAMSIZ, "kni_sync_%d", i);
+		mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE,
+							SOCKET_ID_ANY, 0);
+		KNI_MEM_CHECK(mz == NULL);
+		it->m_sync_addr = mz;
+
+		if ((i+1) == max_kni_ifaces) {
+			it->next = NULL;
+			kni_memzone_pool.free_tail = it;
+		} else
+			it->next = &kni_memzone_pool.slots[i+1];
+	}
+
+	return;
+
+kni_fail:
+	rte_panic("Unable to allocate memory for max_kni_ifaces:%d. Increase the amount of hugepages memory\n",
+			 max_kni_ifaces);
+}
+
+
+struct rte_kni *
+rte_kni_alloc(struct rte_mempool *pktmbuf_pool,
+	      const struct rte_kni_conf *conf,
+	      struct rte_kni_ops *ops)
+{
+	int ret;
+	struct rte_kni_device_info dev_info;
+	struct rte_kni *ctx;
+	char intf_name[RTE_KNI_NAMESIZE];
+	char mz_name[RTE_MEMZONE_NAMESIZE];
+	const struct rte_memzone *mz;
+	struct rte_kni_memzone_slot *slot = NULL;
+
+	if (!pktmbuf_pool || !conf || !conf->name[0])
+		return NULL;
+
+	/* Check if KNI subsystem has been initialized */
+	if (kni_memzone_pool.initialized != 1) {
+		RTE_LOG(ERR, KNI, "KNI subsystem has not been initialized. Invoke rte_kni_init() first\n");
+		return NULL;
+	}
+
+	/* Get an available slot from the pool */
+	slot = kni_memzone_pool_alloc();
+	if (!slot) {
+		RTE_LOG(ERR, KNI, "Cannot allocate more KNI interfaces; increase the number of max_kni_ifaces(current %d) or release unusued ones.\n",
+			kni_memzone_pool.max_ifaces);
+		return NULL;
+	}
+
+	/* Recover ctx */
+	ctx = slot->m_ctx->addr;
+	snprintf(intf_name, RTE_KNI_NAMESIZE, "%s", conf->name);
+
+	if (ctx->in_use) {
+		RTE_LOG(ERR, KNI, "KNI %s is in use\n", ctx->name);
+		return NULL;
+	}
+	memset(ctx, 0, sizeof(struct rte_kni));
+	if (ops)
+		memcpy(&ctx->ops, ops, sizeof(struct rte_kni_ops));
+
+	memset(&dev_info, 0, sizeof(dev_info));
+	dev_info.bus = conf->addr.bus;
+	dev_info.devid = conf->addr.devid;
+	dev_info.function = conf->addr.function;
+	dev_info.vendor_id = conf->id.vendor_id;
+	dev_info.device_id = conf->id.device_id;
+	dev_info.core_id = conf->core_id;
+	dev_info.force_bind = conf->force_bind;
+	dev_info.group_id = conf->group_id;
+	dev_info.mbuf_size = conf->mbuf_size;
+
+	snprintf(ctx->name, RTE_KNI_NAMESIZE, "%s", intf_name);
+	snprintf(dev_info.name, RTE_KNI_NAMESIZE, "%s", intf_name);
+
+	RTE_LOG(INFO, KNI, "pci: %02x:%02x:%02x \t %02x:%02x\n",
+		dev_info.bus, dev_info.devid, dev_info.function,
+			dev_info.vendor_id, dev_info.device_id);
+	/* TX RING */
+	mz = slot->m_tx_q;
+	ctx->tx_q = mz->addr;
+	kni_fifo_init(ctx->tx_q, KNI_FIFO_COUNT_MAX);
+	dev_info.tx_phys = mz->phys_addr;
+
+	/* RX RING */
+	mz = slot->m_rx_q;
+	ctx->rx_q = mz->addr;
+	kni_fifo_init(ctx->rx_q, KNI_FIFO_COUNT_MAX);
+	dev_info.rx_phys = mz->phys_addr;
+
+	/* ALLOC RING */
+	mz = slot->m_alloc_q;
+	ctx->alloc_q = mz->addr;
+	kni_fifo_init(ctx->alloc_q, KNI_FIFO_COUNT_MAX);
+	dev_info.alloc_phys = mz->phys_addr;
+
+	/* FREE RING */
+	mz = slot->m_free_q;
+	ctx->free_q = mz->addr;
+	kni_fifo_init(ctx->free_q, KNI_FIFO_COUNT_MAX);
+	dev_info.free_phys = mz->phys_addr;
+
+	/* Request RING */
+	mz = slot->m_req_q;
+	ctx->req_q = mz->addr;
+	kni_fifo_init(ctx->req_q, KNI_FIFO_COUNT_MAX);
+	dev_info.req_phys = mz->phys_addr;
+
+	/* Response RING */
+	mz = slot->m_resp_q;
+	ctx->resp_q = mz->addr;
+	kni_fifo_init(ctx->resp_q, KNI_FIFO_COUNT_MAX);
+	dev_info.resp_phys = mz->phys_addr;
+
+	/* Req/Resp sync mem area */
+	mz = slot->m_sync_addr;
+	ctx->sync_addr = mz->addr;
+	dev_info.sync_va = mz->addr;
+	dev_info.sync_phys = mz->phys_addr;
+
+
+	/* MBUF mempool */
+	snprintf(mz_name, sizeof(mz_name), RTE_MEMPOOL_OBJ_NAME,
+		pktmbuf_pool->name);
+	mz = rte_memzone_lookup(mz_name);
+	KNI_MEM_CHECK(mz == NULL);
+	dev_info.mbuf_va = mz->addr;
+	dev_info.mbuf_phys = mz->phys_addr;
+	ctx->pktmbuf_pool = pktmbuf_pool;
+	ctx->group_id = conf->group_id;
+	ctx->slot_id = slot->id;
+	ctx->mbuf_size = conf->mbuf_size;
+
+	ret = ioctl(kni_fd, RTE_KNI_IOCTL_CREATE, &dev_info);
+	KNI_MEM_CHECK(ret < 0);
+
+	ctx->in_use = 1;
+
+	/* Allocate mbufs and then put them into alloc_q */
+	kni_allocate_mbufs(ctx);
+
+	return ctx;
+
+kni_fail:
+	if (slot)
+		kni_memzone_pool_release(&kni_memzone_pool.slots[slot->id]);
+
+	return NULL;
+}
+
+static void
+kni_free_fifo(struct rte_kni_fifo *fifo)
+{
+	int ret;
+	struct rte_mbuf *pkt;
+
+	do {
+		ret = kni_fifo_get(fifo, (void **)&pkt, 1);
+		if (ret)
+			rte_pktmbuf_free(pkt);
+	} while (ret);
+}
+
+int
+rte_kni_release(struct rte_kni *kni)
+{
+	struct rte_kni_device_info dev_info;
+	uint32_t slot_id;
+
+	if (!kni || !kni->in_use)
+		return -1;
+
+	snprintf(dev_info.name, sizeof(dev_info.name), "%s", kni->name);
+	if (ioctl(kni_fd, RTE_KNI_IOCTL_RELEASE, &dev_info) < 0) {
+		RTE_LOG(ERR, KNI, "Fail to release kni device\n");
+		return -1;
+	}
+
+	/* mbufs in all fifo should be released, except request/response */
+	kni_free_fifo(kni->tx_q);
+	kni_free_fifo(kni->rx_q);
+	kni_free_fifo(kni->alloc_q);
+	kni_free_fifo(kni->free_q);
+
+	slot_id = kni->slot_id;
+
+	/* Memset the KNI struct */
+	memset(kni, 0, sizeof(struct rte_kni));
+
+	/* Release memzone */
+	if (slot_id > kni_memzone_pool.max_ifaces) {
+		rte_panic("KNI pool: corrupted slot ID: %d, max: %d\n",
+			slot_id, kni_memzone_pool.max_ifaces);
+	}
+	kni_memzone_pool_release(&kni_memzone_pool.slots[slot_id]);
+
+	return 0;
+}
+
+int
+rte_kni_handle_request(struct rte_kni *kni)
+{
+	unsigned ret;
+	struct rte_kni_request *req;
+
+	if (kni == NULL)
+		return -1;
+
+	/* Get request mbuf */
+	ret = kni_fifo_get(kni->req_q, (void **)&req, 1);
+	if (ret != 1)
+		return 0; /* It is OK of can not getting the request mbuf */
+
+	if (req != kni->sync_addr) {
+		rte_panic("Wrong req pointer %p\n", req);
+	}
+
+	/* Analyze the request and call the relevant actions for it */
+	switch (req->req_id) {
+	case RTE_KNI_REQ_CHANGE_MTU: /* Change MTU */
+		if (kni->ops.change_mtu)
+			req->result = kni->ops.change_mtu(kni->ops.port_id,
+							req->new_mtu);
+		break;
+	case RTE_KNI_REQ_CFG_NETWORK_IF: /* Set network interface up/down */
+		if (kni->ops.config_network_if)
+			req->result = kni->ops.config_network_if(\
+					kni->ops.port_id, req->if_up);
+		break;
+	default:
+		RTE_LOG(ERR, KNI, "Unknown request id %u\n", req->req_id);
+		req->result = -EINVAL;
+		break;
+	}
+
+	/* Construct response mbuf and put it back to resp_q */
+	ret = kni_fifo_put(kni->resp_q, (void **)&req, 1);
+	if (ret != 1) {
+		RTE_LOG(ERR, KNI, "Fail to put the muf back to resp_q\n");
+		return -1; /* It is an error of can't putting the mbuf back */
+	}
+
+	return 0;
+}
+
+unsigned
+rte_kni_tx_burst(struct rte_kni *kni, struct rte_mbuf **mbufs, unsigned num)
+{
+	unsigned ret = kni_fifo_put(kni->rx_q, (void **)mbufs, num);
+
+	/* Get mbufs from free_q and then free them */
+	kni_free_mbufs(kni);
+
+	return ret;
+}
+
+unsigned
+rte_kni_rx_burst(struct rte_kni *kni, struct rte_mbuf **mbufs, unsigned num)
+{
+	unsigned ret = kni_fifo_get(kni->tx_q, (void **)mbufs, num);
+
+	/* If buffers removed, allocate mbufs and then put them into alloc_q */
+	if (ret)
+		kni_allocate_mbufs(kni);
+
+	return ret;
+}
+
+static void
+kni_free_mbufs(struct rte_kni *kni)
+{
+	int i, ret;
+	struct rte_mbuf *pkts[MAX_MBUF_BURST_NUM];
+
+	ret = kni_fifo_get(kni->free_q, (void **)pkts, MAX_MBUF_BURST_NUM);
+	if (likely(ret > 0)) {
+		for (i = 0; i < ret; i++)
+			rte_pktmbuf_free(pkts[i]);
+	}
+}
+
+static void
+kni_allocate_mbufs(struct rte_kni *kni)
+{
+	int i, ret;
+	struct rte_mbuf *pkts[MAX_MBUF_BURST_NUM];
+
+	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, pool) !=
+			 offsetof(struct rte_kni_mbuf, pool));
+	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_addr) !=
+			 offsetof(struct rte_kni_mbuf, buf_addr));
+	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, next) !=
+			 offsetof(struct rte_kni_mbuf, next));
+	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, data_off) !=
+			 offsetof(struct rte_kni_mbuf, data_off));
+	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, data_len) !=
+			 offsetof(struct rte_kni_mbuf, data_len));
+	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, pkt_len) !=
+			 offsetof(struct rte_kni_mbuf, pkt_len));
+	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, ol_flags) !=
+			 offsetof(struct rte_kni_mbuf, ol_flags));
+
+	/* Check if pktmbuf pool has been configured */
+	if (kni->pktmbuf_pool == NULL) {
+		RTE_LOG(ERR, KNI, "No valid mempool for allocating mbufs\n");
+		return;
+	}
+
+	for (i = 0; i < MAX_MBUF_BURST_NUM; i++) {
+		pkts[i] = rte_pktmbuf_alloc(kni->pktmbuf_pool);
+		if (unlikely(pkts[i] == NULL)) {
+			/* Out of memory */
+			RTE_LOG(ERR, KNI, "Out of memory\n");
+			break;
+		}
+	}
+
+	/* No pkt mbuf alocated */
+	if (i <= 0)
+		return;
+
+	ret = kni_fifo_put(kni->alloc_q, (void **)pkts, i);
+
+	/* Check if any mbufs not put into alloc_q, and then free them */
+	if (ret >= 0 && ret < i && ret < MAX_MBUF_BURST_NUM) {
+		int j;
+
+		for (j = ret; j < i; j++)
+			rte_pktmbuf_free(pkts[j]);
+	}
+}
+
+struct rte_kni *
+rte_kni_get(const char *name)
+{
+	uint32_t i;
+	struct rte_kni_memzone_slot *it;
+	struct rte_kni *kni;
+
+	/* Note: could be improved perf-wise if necessary */
+	for (i = 0; i < kni_memzone_pool.max_ifaces; i++) {
+		it = &kni_memzone_pool.slots[i];
+		if (it->in_use == 0)
+			continue;
+		kni = it->m_ctx->addr;
+		if (strncmp(kni->name, name, RTE_KNI_NAMESIZE) == 0)
+			return kni;
+	}
+
+	return NULL;
+}
+
+const char *
+rte_kni_get_name(const struct rte_kni *kni)
+{
+	return kni->name;
+}
+
+static enum kni_ops_status
+kni_check_request_register(struct rte_kni_ops *ops)
+{
+	/* check if KNI request ops has been registered*/
+	if( NULL == ops )
+		return KNI_REQ_NO_REGISTER;
+
+	if((NULL == ops->change_mtu) && (NULL == ops->config_network_if))
+		return KNI_REQ_NO_REGISTER;
+
+	return KNI_REQ_REGISTERED;
+}
+
+int
+rte_kni_register_handlers(struct rte_kni *kni,struct rte_kni_ops *ops)
+{
+	enum kni_ops_status req_status;
+
+	if (NULL == ops) {
+		RTE_LOG(ERR, KNI, "Invalid KNI request operation.\n");
+		return -1;
+	}
+
+	if (NULL == kni) {
+		RTE_LOG(ERR, KNI, "Invalid kni info.\n");
+		return -1;
+	}
+
+	req_status = kni_check_request_register(&kni->ops);
+	if ( KNI_REQ_REGISTERED == req_status) {
+		RTE_LOG(ERR, KNI, "The KNI request operation has already registered.\n");
+		return -1;
+	}
+
+	memcpy(&kni->ops, ops, sizeof(struct rte_kni_ops));
+	return 0;
+}
+
+int
+rte_kni_unregister_handlers(struct rte_kni *kni)
+{
+	if (NULL == kni) {
+		RTE_LOG(ERR, KNI, "Invalid kni info.\n");
+		return -1;
+	}
+
+	kni->ops.change_mtu = NULL;
+	kni->ops.config_network_if = NULL;
+	return 0;
+}
+void
+rte_kni_close(void)
+{
+	if (kni_fd < 0)
+		return;
+
+	close(kni_fd);
+	kni_fd = -1;
+}
diff --git a/lib/librte_kni/rte_kni.h b/lib/librte_kni/rte_kni.h
new file mode 100644
index 00000000..9899a170
--- /dev/null
+++ b/lib/librte_kni/rte_kni.h
@@ -0,0 +1,256 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_KNI_H_
+#define _RTE_KNI_H_
+
+/**
+ * @file
+ * RTE KNI
+ *
+ * The KNI library provides the ability to create and destroy kernel NIC
+ * interfaces that may be used by the RTE application to receive/transmit
+ * packets from/to Linux kernel net interfaces.
+ *
+ * This library provide two APIs to burst receive packets from KNI interfaces,
+ * and burst transmit packets to KNI interfaces.
+ */
+
+#include <rte_pci.h>
+#include <rte_memory.h>
+#include <rte_mempool.h>
+
+#include <exec-env/rte_kni_common.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct rte_kni;
+struct rte_mbuf;
+
+/**
+ * Structure which has the function pointers for KNI interface.
+ */
+struct rte_kni_ops {
+	uint8_t port_id; /* Port ID */
+
+	/* Pointer to function of changing MTU */
+	int (*change_mtu)(uint8_t port_id, unsigned new_mtu);
+
+	/* Pointer to function of configuring network interface */
+	int (*config_network_if)(uint8_t port_id, uint8_t if_up);
+};
+
+/**
+ * Structure for configuring KNI device.
+ */
+struct rte_kni_conf {
+	/*
+	 * KNI name which will be used in relevant network device.
+	 * Let the name as short as possible, as it will be part of
+	 * memzone name.
+	 */
+	char name[RTE_KNI_NAMESIZE];
+	uint32_t core_id;   /* Core ID to bind kernel thread on */
+	uint16_t group_id;  /* Group ID */
+	unsigned mbuf_size; /* mbuf size */
+	struct rte_pci_addr addr;
+	struct rte_pci_id id;
+
+	uint8_t force_bind : 1; /* Flag to bind kernel thread */
+};
+
+/**
+ * Initialize and preallocate KNI subsystem
+ *
+ * This function is to be executed on the MASTER lcore only, after EAL
+ * initialization and before any KNI interface is attempted to be
+ * allocated
+ *
+ * @param max_kni_ifaces
+ *  The maximum number of KNI interfaces that can coexist concurrently
+ */
+void rte_kni_init(unsigned int max_kni_ifaces);
+
+
+/**
+ * Allocate KNI interface according to the port id, mbuf size, mbuf pool,
+ * configurations and callbacks for kernel requests.The KNI interface created
+ * in the kernel space is the net interface the traditional Linux application
+ * talking to.
+ *
+ * The rte_kni_alloc shall not be called before rte_kni_init() has been
+ * called. rte_kni_alloc is thread safe.
+ *
+ * @param pktmbuf_pool
+ *  The mempool for allocting mbufs for packets.
+ * @param conf
+ *  The pointer to the configurations of the KNI device.
+ * @param ops
+ *  The pointer to the callbacks for the KNI kernel requests.
+ *
+ * @return
+ *  - The pointer to the context of a KNI interface.
+ *  - NULL indicate error.
+ */
+struct rte_kni *rte_kni_alloc(struct rte_mempool *pktmbuf_pool,
+		const struct rte_kni_conf *conf, struct rte_kni_ops *ops);
+
+/**
+ * Release KNI interface according to the context. It will also release the
+ * paired KNI interface in kernel space. All processing on the specific KNI
+ * context need to be stopped before calling this interface.
+ *
+ * rte_kni_release is thread safe.
+ *
+ * @param kni
+ *  The pointer to the context of an existent KNI interface.
+ *
+ * @return
+ *  - 0 indicates success.
+ *  - negative value indicates failure.
+ */
+int rte_kni_release(struct rte_kni *kni);
+
+/**
+ * It is used to handle the request mbufs sent from kernel space.
+ * Then analyzes it and calls the specific actions for the specific requests.
+ * Finally constructs the response mbuf and puts it back to the resp_q.
+ *
+ * @param kni
+ *  The pointer to the context of an existent KNI interface.
+ *
+ * @return
+ *  - 0
+ *  - negative value indicates failure.
+ */
+int rte_kni_handle_request(struct rte_kni *kni);
+
+/**
+ * Retrieve a burst of packets from a KNI interface. The retrieved packets are
+ * stored in rte_mbuf structures whose pointers are supplied in the array of
+ * mbufs, and the maximum number is indicated by num. It handles the freeing of
+ * the mbufs in the free queue of KNI interface.
+ *
+ * @param kni
+ *  The KNI interface context.
+ * @param mbufs
+ *  The array to store the pointers of mbufs.
+ * @param num
+ *  The maximum number per burst.
+ *
+ * @return
+ *  The actual number of packets retrieved.
+ */
+unsigned rte_kni_rx_burst(struct rte_kni *kni, struct rte_mbuf **mbufs,
+		unsigned num);
+
+/**
+ * Send a burst of packets to a KNI interface. The packets to be sent out are
+ * stored in rte_mbuf structures whose pointers are supplied in the array of
+ * mbufs, and the maximum number is indicated by num. It handles allocating
+ * the mbufs for KNI interface alloc queue.
+ *
+ * @param kni
+ *  The KNI interface context.
+ * @param mbufs
+ *  The array to store the pointers of mbufs.
+ * @param num
+ *  The maximum number per burst.
+ *
+ * @return
+ *  The actual number of packets sent.
+ */
+unsigned rte_kni_tx_burst(struct rte_kni *kni, struct rte_mbuf **mbufs,
+		unsigned num);
+
+/**
+ * Get the KNI context of its name.
+ *
+ * @param name
+ *  pointer to the KNI device name.
+ *
+ * @return
+ *  On success: Pointer to KNI interface.
+ *  On failure: NULL.
+ */
+struct rte_kni *rte_kni_get(const char *name);
+
+/**
+ * Get the name given to a KNI device
+ *
+ * @param kni
+ *   The KNI instance to query
+ * @return
+ *   The pointer to the KNI name
+ */
+const char *rte_kni_get_name(const struct rte_kni *kni);
+
+/**
+ * Register KNI request handling for a specified port,and it can
+ * be called by master process or slave process.
+ *
+ * @param kni
+ *  pointer to struct rte_kni.
+ * @param ops
+ *  ponter to struct rte_kni_ops.
+ *
+ * @return
+ *  On success: 0
+ *  On failure: -1
+ */
+int rte_kni_register_handlers(struct rte_kni *kni, struct rte_kni_ops *ops);
+
+/**
+ *  Unregister KNI request handling for a specified port.
+ *
+ *  @param kni
+ *   pointer to struct rte_kni.
+ *
+ *  @return
+ *   On success: 0
+ *   On failure: -1
+ */
+int rte_kni_unregister_handlers(struct rte_kni *kni);
+
+/**
+ *  Close KNI device.
+ */
+void rte_kni_close(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_KNI_H_ */
diff --git a/lib/librte_kni/rte_kni_fifo.h b/lib/librte_kni/rte_kni_fifo.h
new file mode 100644
index 00000000..8cb85873
--- /dev/null
+++ b/lib/librte_kni/rte_kni_fifo.h
@@ -0,0 +1,93 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+
+/**
+ * Initializes the kni fifo structure
+ */
+static void
+kni_fifo_init(struct rte_kni_fifo *fifo, unsigned size)
+{
+	/* Ensure size is power of 2 */
+	if (size & (size - 1))
+		rte_panic("KNI fifo size must be power of 2\n");
+
+	fifo->write = 0;
+	fifo->read = 0;
+	fifo->len = size;
+	fifo->elem_size = sizeof(void *);
+}
+
+/**
+ * Adds num elements into the fifo. Return the number actually written
+ */
+static inline unsigned
+kni_fifo_put(struct rte_kni_fifo *fifo, void **data, unsigned num)
+{
+	unsigned i = 0;
+	unsigned fifo_write = fifo->write;
+	unsigned fifo_read = fifo->read;
+	unsigned new_write = fifo_write;
+
+	for (i = 0; i < num; i++) {
+		new_write = (new_write + 1) & (fifo->len - 1);
+
+		if (new_write == fifo_read)
+			break;
+		fifo->buffer[fifo_write] = data[i];
+		fifo_write = new_write;
+	}
+	fifo->write = fifo_write;
+	return i;
+}
+
+/**
+ * Get up to num elements from the fifo. Return the number actully read
+ */
+static inline unsigned
+kni_fifo_get(struct rte_kni_fifo *fifo, void **data, unsigned num)
+{
+	unsigned i = 0;
+	unsigned new_read = fifo->read;
+	unsigned fifo_write = fifo->write;
+	for (i = 0; i < num; i++) {
+		if (new_read == fifo_write)
+			break;
+
+		data[i] = fifo->buffer[new_read];
+		new_read = (new_read + 1) & (fifo->len - 1);
+	}
+	fifo->read = new_read;
+	return i;
+}
diff --git a/lib/librte_kni/rte_kni_version.map b/lib/librte_kni/rte_kni_version.map
new file mode 100644
index 00000000..acd515eb
--- /dev/null
+++ b/lib/librte_kni/rte_kni_version.map
@@ -0,0 +1,17 @@
+DPDK_2.0 {
+	global:
+
+	rte_kni_alloc;
+	rte_kni_close;
+	rte_kni_get;
+	rte_kni_get_name;
+	rte_kni_handle_request;
+	rte_kni_init;
+	rte_kni_register_handlers;
+	rte_kni_release;
+	rte_kni_rx_burst;
+	rte_kni_tx_burst;
+	rte_kni_unregister_handlers;
+
+	local: *;
+};
diff --git a/lib/librte_kvargs/Makefile b/lib/librte_kvargs/Makefile
new file mode 100644
index 00000000..87b09f20
--- /dev/null
+++ b/lib/librte_kvargs/Makefile
@@ -0,0 +1,55 @@
+# BSD LICENSE
+#
+# Copyright 2014 6WIND S.A.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# - Redistributions of source code must retain the above copyright
+#   notice, this list of conditions and the following disclaimer.
+#
+# - Redistributions in binary form must reproduce the above copyright
+#   notice, this list of conditions and the following disclaimer in
+#   the documentation and/or other materials provided with the
+#   distribution.
+#
+# - Neither the name of 6WIND S.A. nor the names of its
+#   contributors may be used to endorse or promote products derived
+#   from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+# COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+# STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+# OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# library name
+LIB = librte_kvargs.a
+
+CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3
+
+EXPORT_MAP := rte_kvargs_version.map
+
+LIBABIVER := 1
+
+# all source are stored in SRCS-y
+SRCS-$(CONFIG_RTE_LIBRTE_KVARGS) := rte_kvargs.c
+
+# install includes
+INCS := rte_kvargs.h
+SYMLINK-$(CONFIG_RTE_LIBRTE_KVARGS)-include := $(INCS)
+
+# this lib needs eal
+DEPDIRS-$(CONFIG_RTE_LIBRTE_KVARGS) += lib/librte_eal
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_kvargs/rte_kvargs.c b/lib/librte_kvargs/rte_kvargs.c
new file mode 100644
index 00000000..8d56abd4
--- /dev/null
+++ b/lib/librte_kvargs/rte_kvargs.c
@@ -0,0 +1,210 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2013 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2014 6WIND S.A.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <string.h>
+#include <stdlib.h>
+
+#include <rte_log.h>
+#include <rte_string_fns.h>
+
+#include "rte_kvargs.h"
+
+/*
+ * Receive a string with a list of arguments following the pattern
+ * key=value;key=value;... and insert them into the list.
+ * strtok() is used so the params string will be copied to be modified.
+ */
+static int
+rte_kvargs_tokenize(struct rte_kvargs *kvlist, const char *params)
+{
+	unsigned i;
+	char *str;
+	char *ctx1 = NULL;
+	char *ctx2 = NULL;
+
+	/* Copy the const char *params to a modifiable string
+	 * to pass to rte_strsplit
+	 */
+	kvlist->str = strdup(params);
+	if (kvlist->str == NULL) {
+		RTE_LOG(ERR, PMD, "Cannot parse arguments: not enough memory\n");
+		return -1;
+	}
+
+	/* browse each key/value pair and add it in kvlist */
+	str = kvlist->str;
+	while ((str = strtok_r(str, RTE_KVARGS_PAIRS_DELIM, &ctx1)) != NULL) {
+
+		i = kvlist->count;
+		if (i >= RTE_KVARGS_MAX) {
+			RTE_LOG(ERR, PMD, "Cannot parse arguments: list full\n");
+			return -1;
+		}
+
+		kvlist->pairs[i].key = strtok_r(str, RTE_KVARGS_KV_DELIM, &ctx2);
+		kvlist->pairs[i].value = strtok_r(NULL, RTE_KVARGS_KV_DELIM, &ctx2);
+		if (kvlist->pairs[i].key == NULL || kvlist->pairs[i].value == NULL) {
+			RTE_LOG(ERR, PMD,
+				"Cannot parse arguments: wrong key or value\n"
+				"params=<%s>\n", params);
+			return -1;
+		}
+
+		kvlist->count++;
+		str = NULL;
+	}
+
+	return 0;
+}
+
+/*
+ * Determine whether a key is valid or not by looking
+ * into a list of valid keys.
+ */
+static int
+is_valid_key(const char *valid[], const char *key_match)
+{
+	const char **valid_ptr;
+
+	for (valid_ptr = valid; *valid_ptr != NULL; valid_ptr++) {
+		if (strcmp(key_match, *valid_ptr) == 0)
+			return 1;
+	}
+	return 0;
+}
+
+/*
+ * Determine whether all keys are valid or not by looking
+ * into a list of valid keys.
+ */
+static int
+check_for_valid_keys(struct rte_kvargs *kvlist,
+		const char *valid[])
+{
+	unsigned i, ret;
+	struct rte_kvargs_pair *pair;
+
+	for (i = 0; i < kvlist->count; i++) {
+		pair = &kvlist->pairs[i];
+		ret = is_valid_key(valid, pair->key);
+		if (!ret) {
+			RTE_LOG(ERR, PMD,
+				"Error parsing device, invalid key <%s>\n",
+				pair->key);
+			return -1;
+		}
+	}
+	return 0;
+}
+
+/*
+ * Return the number of times a given arg_name exists in the key/value list.
+ * E.g. given a list = { rx = 0, rx = 1, tx = 2 } the number of args for
+ * arg "rx" will be 2.
+ */
+unsigned
+rte_kvargs_count(const struct rte_kvargs *kvlist, const char *key_match)
+{
+	const struct rte_kvargs_pair *pair;
+	unsigned i, ret;
+
+	ret = 0;
+	for (i = 0; i < kvlist->count; i++) {
+		pair = &kvlist->pairs[i];
+		if (key_match == NULL || strcmp(pair->key, key_match) == 0)
+			ret++;
+	}
+
+	return ret;
+}
+
+/*
+ * For each matching key, call the given handler function.
+ */
+int
+rte_kvargs_process(const struct rte_kvargs *kvlist,
+		const char *key_match,
+		arg_handler_t handler,
+		void *opaque_arg)
+{
+	const struct rte_kvargs_pair *pair;
+	unsigned i;
+
+	for (i = 0; i < kvlist->count; i++) {
+		pair = &kvlist->pairs[i];
+		if (key_match == NULL || strcmp(pair->key, key_match) == 0) {
+			if ((*handler)(pair->key, pair->value, opaque_arg) < 0)
+				return -1;
+		}
+	}
+	return 0;
+}
+
+/* free the rte_kvargs structure */
+void
+rte_kvargs_free(struct rte_kvargs *kvlist)
+{
+	if (!kvlist)
+		return;
+
+	free(kvlist->str);
+	free(kvlist);
+}
+
+/*
+ * Parse the arguments "key=value;key=value;..." string and return
+ * an allocated structure that contains a key/value list. Also
+ * check if only valid keys were used.
+ */
+struct rte_kvargs *
+rte_kvargs_parse(const char *args, const char *valid_keys[])
+{
+	struct rte_kvargs *kvlist;
+
+	kvlist = malloc(sizeof(*kvlist));
+	if (kvlist == NULL)
+		return NULL;
+	memset(kvlist, 0, sizeof(*kvlist));
+
+	if (rte_kvargs_tokenize(kvlist, args) < 0) {
+		rte_kvargs_free(kvlist);
+		return NULL;
+	}
+
+	if (valid_keys != NULL && check_for_valid_keys(kvlist, valid_keys) < 0) {
+		rte_kvargs_free(kvlist);
+		return NULL;
+	}
+
+	return kvlist;
+}
diff --git a/lib/librte_kvargs/rte_kvargs.h b/lib/librte_kvargs/rte_kvargs.h
new file mode 100644
index 00000000..ae9ae79f
--- /dev/null
+++ b/lib/librte_kvargs/rte_kvargs.h
@@ -0,0 +1,156 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2013 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2014 6WIND S.A.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_KVARGS_H_
+#define _RTE_KVARGS_H_
+
+/**
+ * @file
+ * RTE Argument parsing
+ *
+ * This module can be used to parse arguments whose format is
+ * key1=value1,key2=value2,key3=value3,...
+ *
+ * The same key can appear several times with the same or a different
+ * value. Indeed, the arguments are stored as a list of key/values
+ * associations and not as a dictionary.
+ *
+ * This file provides some helpers that are especially used by virtual
+ * ethernet devices at initialization for arguments parsing.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** Maximum number of key/value associations */
+#define RTE_KVARGS_MAX 32
+
+/** separator character used between each pair */
+#define RTE_KVARGS_PAIRS_DELIM	","
+
+/** separator character used between key and value */
+#define RTE_KVARGS_KV_DELIM	"="
+
+/** Type of callback function used by rte_kvargs_process() */
+typedef int (*arg_handler_t)(const char *key, const char *value, void *opaque);
+
+/** A key/value association */
+struct rte_kvargs_pair {
+	char *key;      /**< the name (key) of the association  */
+	char *value;    /**< the value associated to that key */
+};
+
+/** Store a list of key/value associations */
+struct rte_kvargs {
+	char *str;      /**< copy of the argument string */
+	unsigned count; /**< number of entries in the list */
+	struct rte_kvargs_pair pairs[RTE_KVARGS_MAX]; /**< list of key/values */
+};
+
+/**
+ * Allocate a rte_kvargs and store key/value associations from a string
+ *
+ * The function allocates and fills a rte_kvargs structure from a given
+ * string whose format is key1=value1,key2=value2,...
+ *
+ * The structure can be freed with rte_kvargs_free().
+ *
+ * @param args
+ *   The input string containing the key/value associations
+ * @param valid_keys
+ *   A list of valid keys (table of const char *, the last must be NULL).
+ *   This argument is ignored if NULL
+ *
+ * @return
+ *   - A pointer to an allocated rte_kvargs structure on success
+ *   - NULL on error
+ */
+struct rte_kvargs *rte_kvargs_parse(const char *args, const char *valid_keys[]);
+
+/**
+ * Free a rte_kvargs structure
+ *
+ * Free a rte_kvargs structure previously allocated with
+ * rte_kvargs_parse().
+ *
+ * @param kvlist
+ *   The rte_kvargs structure
+ */
+void rte_kvargs_free(struct rte_kvargs *kvlist);
+
+/**
+ * Call a handler function for each key/value matching the key
+ *
+ * For each key/value association that matches the given key, calls the
+ * handler function with the for a given arg_name passing the value on the
+ * dictionary for that key and a given extra argument. If *kvlist* is NULL
+ * function does nothing.
+ *
+ * @param kvlist
+ *   The rte_kvargs structure
+ * @param key_match
+ *   The key on which the handler should be called, or NULL to process handler
+ *   on all associations
+ * @param handler
+ *   The function to call for each matching key
+ * @param opaque_arg
+ *   A pointer passed unchanged to the handler
+ *
+ * @return
+ *   - 0 on success
+ *   - Negative on error
+ */
+int rte_kvargs_process(const struct rte_kvargs *kvlist,
+	const char *key_match, arg_handler_t handler, void *opaque_arg);
+
+/**
+ * Count the number of associations matching the given key
+ *
+ * @param kvlist
+ *   The rte_kvargs structure
+ * @param key_match
+ *   The key that should match, or NULL to count all associations
+
+ * @return
+ *   The number of entries
+ */
+unsigned rte_kvargs_count(const struct rte_kvargs *kvlist,
+	const char *key_match);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/lib/librte_kvargs/rte_kvargs_version.map b/lib/librte_kvargs/rte_kvargs_version.map
new file mode 100644
index 00000000..2030ec46
--- /dev/null
+++ b/lib/librte_kvargs/rte_kvargs_version.map
@@ -0,0 +1,10 @@
+DPDK_2.0 {
+	global:
+
+	rte_kvargs_count;
+	rte_kvargs_free;
+	rte_kvargs_parse;
+	rte_kvargs_process;
+
+	local: *;
+};
diff --git a/lib/librte_lpm/Makefile b/lib/librte_lpm/Makefile
new file mode 100644
index 00000000..656ade27
--- /dev/null
+++ b/lib/librte_lpm/Makefile
@@ -0,0 +1,59 @@
+#   BSD LICENSE
+#
+#   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+#   All rights reserved.
+#
+#   Redistribution and use in source and binary forms, with or without
+#   modification, are permitted provided that the following conditions
+#   are met:
+#
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in
+#       the documentation and/or other materials provided with the
+#       distribution.
+#     * Neither the name of Intel Corporation nor the names of its
+#       contributors may be used to endorse or promote products derived
+#       from this software without specific prior written permission.
+#
+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# library name
+LIB = librte_lpm.a
+
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR)
+
+EXPORT_MAP := rte_lpm_version.map
+
+LIBABIVER := 2
+
+# all source are stored in SRCS-y
+SRCS-$(CONFIG_RTE_LIBRTE_LPM) := rte_lpm.c rte_lpm6.c
+
+# install this header file
+SYMLINK-$(CONFIG_RTE_LIBRTE_LPM)-include := rte_lpm.h rte_lpm6.h
+
+ifneq ($(filter y,$(CONFIG_RTE_ARCH_ARM) $(CONFIG_RTE_ARCH_ARM64)),)
+SYMLINK-$(CONFIG_RTE_LIBRTE_LPM)-include += rte_lpm_neon.h
+else ifeq ($(CONFIG_RTE_ARCH_X86),y)
+SYMLINK-$(CONFIG_RTE_LIBRTE_LPM)-include += rte_lpm_sse.h
+endif
+
+# this lib needs eal
+DEPDIRS-$(CONFIG_RTE_LIBRTE_LPM) += lib/librte_eal
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_lpm/rte_lpm.c b/lib/librte_lpm/rte_lpm.c
new file mode 100644
index 00000000..8bdf6065
--- /dev/null
+++ b/lib/librte_lpm/rte_lpm.c
@@ -0,0 +1,1919 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <string.h>
+#include <stdint.h>
+#include <errno.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <errno.h>
+#include <sys/queue.h>
+
+#include <rte_log.h>
+#include <rte_branch_prediction.h>
+#include <rte_common.h>
+#include <rte_memory.h>        /* for definition of RTE_CACHE_LINE_SIZE */
+#include <rte_malloc.h>
+#include <rte_memzone.h>
+#include <rte_eal.h>
+#include <rte_eal_memconfig.h>
+#include <rte_per_lcore.h>
+#include <rte_string_fns.h>
+#include <rte_errno.h>
+#include <rte_rwlock.h>
+#include <rte_spinlock.h>
+
+#include "rte_lpm.h"
+
+TAILQ_HEAD(rte_lpm_list, rte_tailq_entry);
+
+static struct rte_tailq_elem rte_lpm_tailq = {
+	.name = "RTE_LPM",
+};
+EAL_REGISTER_TAILQ(rte_lpm_tailq)
+
+#define MAX_DEPTH_TBL24 24
+
+enum valid_flag {
+	INVALID = 0,
+	VALID
+};
+
+/* Macro to enable/disable run-time checks. */
+#if defined(RTE_LIBRTE_LPM_DEBUG)
+#include <rte_debug.h>
+#define VERIFY_DEPTH(depth) do {                                \
+	if ((depth == 0) || (depth > RTE_LPM_MAX_DEPTH))        \
+		rte_panic("LPM: Invalid depth (%u) at line %d", \
+				(unsigned)(depth), __LINE__);   \
+} while (0)
+#else
+#define VERIFY_DEPTH(depth)
+#endif
+
+/*
+ * Converts a given depth value to its corresponding mask value.
+ *
+ * depth  (IN)		: range = 1 - 32
+ * mask   (OUT)		: 32bit mask
+ */
+static uint32_t __attribute__((pure))
+depth_to_mask(uint8_t depth)
+{
+	VERIFY_DEPTH(depth);
+
+	/* To calculate a mask start with a 1 on the left hand side and right
+	 * shift while populating the left hand side with 1's
+	 */
+	return (int)0x80000000 >> (depth - 1);
+}
+
+/*
+ * Converts given depth value to its corresponding range value.
+ */
+static inline uint32_t __attribute__((pure))
+depth_to_range(uint8_t depth)
+{
+	VERIFY_DEPTH(depth);
+
+	/*
+	 * Calculate tbl24 range. (Note: 2^depth = 1 << depth)
+	 */
+	if (depth <= MAX_DEPTH_TBL24)
+		return 1 << (MAX_DEPTH_TBL24 - depth);
+
+	/* Else if depth is greater than 24 */
+	return 1 << (RTE_LPM_MAX_DEPTH - depth);
+}
+
+/*
+ * Find an existing lpm table and return a pointer to it.
+ */
+struct rte_lpm_v20 *
+rte_lpm_find_existing_v20(const char *name)
+{
+	struct rte_lpm_v20 *l = NULL;
+	struct rte_tailq_entry *te;
+	struct rte_lpm_list *lpm_list;
+
+	lpm_list = RTE_TAILQ_CAST(rte_lpm_tailq.head, rte_lpm_list);
+
+	rte_rwlock_read_lock(RTE_EAL_TAILQ_RWLOCK);
+	TAILQ_FOREACH(te, lpm_list, next) {
+		l = (struct rte_lpm_v20 *) te->data;
+		if (strncmp(name, l->name, RTE_LPM_NAMESIZE) == 0)
+			break;
+	}
+	rte_rwlock_read_unlock(RTE_EAL_TAILQ_RWLOCK);
+
+	if (te == NULL) {
+		rte_errno = ENOENT;
+		return NULL;
+	}
+
+	return l;
+}
+VERSION_SYMBOL(rte_lpm_find_existing, _v20, 2.0);
+
+struct rte_lpm *
+rte_lpm_find_existing_v1604(const char *name)
+{
+	struct rte_lpm *l = NULL;
+	struct rte_tailq_entry *te;
+	struct rte_lpm_list *lpm_list;
+
+	lpm_list = RTE_TAILQ_CAST(rte_lpm_tailq.head, rte_lpm_list);
+
+	rte_rwlock_read_lock(RTE_EAL_TAILQ_RWLOCK);
+	TAILQ_FOREACH(te, lpm_list, next) {
+		l = (struct rte_lpm *) te->data;
+		if (strncmp(name, l->name, RTE_LPM_NAMESIZE) == 0)
+			break;
+	}
+	rte_rwlock_read_unlock(RTE_EAL_TAILQ_RWLOCK);
+
+	if (te == NULL) {
+		rte_errno = ENOENT;
+		return NULL;
+	}
+
+	return l;
+}
+BIND_DEFAULT_SYMBOL(rte_lpm_find_existing, _v1604, 16.04);
+MAP_STATIC_SYMBOL(struct rte_lpm *rte_lpm_find_existing(const char *name),
+		rte_lpm_find_existing_v1604);
+
+/*
+ * Allocates memory for LPM object
+ */
+struct rte_lpm_v20 *
+rte_lpm_create_v20(const char *name, int socket_id, int max_rules,
+		__rte_unused int flags)
+{
+	char mem_name[RTE_LPM_NAMESIZE];
+	struct rte_lpm_v20 *lpm = NULL;
+	struct rte_tailq_entry *te;
+	uint32_t mem_size;
+	struct rte_lpm_list *lpm_list;
+
+	lpm_list = RTE_TAILQ_CAST(rte_lpm_tailq.head, rte_lpm_list);
+
+	RTE_BUILD_BUG_ON(sizeof(struct rte_lpm_tbl_entry_v20) != 2);
+
+	/* Check user arguments. */
+	if ((name == NULL) || (socket_id < -1) || (max_rules == 0)) {
+		rte_errno = EINVAL;
+		return NULL;
+	}
+
+	snprintf(mem_name, sizeof(mem_name), "LPM_%s", name);
+
+	/* Determine the amount of memory to allocate. */
+	mem_size = sizeof(*lpm) + (sizeof(lpm->rules_tbl[0]) * max_rules);
+
+	rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK);
+
+	/* guarantee there's no existing */
+	TAILQ_FOREACH(te, lpm_list, next) {
+		lpm = (struct rte_lpm_v20 *) te->data;
+		if (strncmp(name, lpm->name, RTE_LPM_NAMESIZE) == 0)
+			break;
+	}
+	lpm = NULL;
+	if (te != NULL) {
+		rte_errno = EEXIST;
+		goto exit;
+	}
+
+	/* allocate tailq entry */
+	te = rte_zmalloc("LPM_TAILQ_ENTRY", sizeof(*te), 0);
+	if (te == NULL) {
+		RTE_LOG(ERR, LPM, "Failed to allocate tailq entry\n");
+		goto exit;
+	}
+
+	/* Allocate memory to store the LPM data structures. */
+	lpm = (struct rte_lpm_v20 *)rte_zmalloc_socket(mem_name, mem_size,
+			RTE_CACHE_LINE_SIZE, socket_id);
+	if (lpm == NULL) {
+		RTE_LOG(ERR, LPM, "LPM memory allocation failed\n");
+		rte_free(te);
+		goto exit;
+	}
+
+	/* Save user arguments. */
+	lpm->max_rules = max_rules;
+	snprintf(lpm->name, sizeof(lpm->name), "%s", name);
+
+	te->data = (void *) lpm;
+
+	TAILQ_INSERT_TAIL(lpm_list, te, next);
+
+exit:
+	rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
+
+	return lpm;
+}
+VERSION_SYMBOL(rte_lpm_create, _v20, 2.0);
+
+struct rte_lpm *
+rte_lpm_create_v1604(const char *name, int socket_id,
+		const struct rte_lpm_config *config)
+{
+	char mem_name[RTE_LPM_NAMESIZE];
+	struct rte_lpm *lpm = NULL;
+	struct rte_tailq_entry *te;
+	uint32_t mem_size, rules_size, tbl8s_size;
+	struct rte_lpm_list *lpm_list;
+
+	lpm_list = RTE_TAILQ_CAST(rte_lpm_tailq.head, rte_lpm_list);
+
+	RTE_BUILD_BUG_ON(sizeof(struct rte_lpm_tbl_entry) != 4);
+
+	/* Check user arguments. */
+	if ((name == NULL) || (socket_id < -1) || (config->max_rules == 0)
+			|| config->number_tbl8s > RTE_LPM_MAX_TBL8_NUM_GROUPS) {
+		rte_errno = EINVAL;
+		return NULL;
+	}
+
+	snprintf(mem_name, sizeof(mem_name), "LPM_%s", name);
+
+	/* Determine the amount of memory to allocate. */
+	mem_size = sizeof(*lpm);
+	rules_size = sizeof(struct rte_lpm_rule) * config->max_rules;
+	tbl8s_size = (sizeof(struct rte_lpm_tbl_entry) *
+			RTE_LPM_TBL8_GROUP_NUM_ENTRIES * config->number_tbl8s);
+
+	rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK);
+
+	/* guarantee there's no existing */
+	TAILQ_FOREACH(te, lpm_list, next) {
+		lpm = (struct rte_lpm *) te->data;
+		if (strncmp(name, lpm->name, RTE_LPM_NAMESIZE) == 0)
+			break;
+	}
+	lpm = NULL;
+	if (te != NULL) {
+		rte_errno = EEXIST;
+		goto exit;
+	}
+
+	/* allocate tailq entry */
+	te = rte_zmalloc("LPM_TAILQ_ENTRY", sizeof(*te), 0);
+	if (te == NULL) {
+		RTE_LOG(ERR, LPM, "Failed to allocate tailq entry\n");
+		goto exit;
+	}
+
+	/* Allocate memory to store the LPM data structures. */
+	lpm = (struct rte_lpm *)rte_zmalloc_socket(mem_name, mem_size,
+			RTE_CACHE_LINE_SIZE, socket_id);
+	if (lpm == NULL) {
+		RTE_LOG(ERR, LPM, "LPM memory allocation failed\n");
+		rte_free(te);
+		goto exit;
+	}
+
+	lpm->rules_tbl = (struct rte_lpm_rule *)rte_zmalloc_socket(NULL,
+			(size_t)rules_size, RTE_CACHE_LINE_SIZE, socket_id);
+
+	if (lpm->rules_tbl == NULL) {
+		RTE_LOG(ERR, LPM, "LPM rules_tbl memory allocation failed\n");
+		rte_free(lpm);
+		lpm = NULL;
+		rte_free(te);
+		goto exit;
+	}
+
+	lpm->tbl8 = (struct rte_lpm_tbl_entry *)rte_zmalloc_socket(NULL,
+			(size_t)tbl8s_size, RTE_CACHE_LINE_SIZE, socket_id);
+
+	if (lpm->tbl8 == NULL) {
+		RTE_LOG(ERR, LPM, "LPM tbl8 memory allocation failed\n");
+		rte_free(lpm);
+		lpm = NULL;
+		rte_free(te);
+		goto exit;
+	}
+
+	/* Save user arguments. */
+	lpm->max_rules = config->max_rules;
+	lpm->number_tbl8s = config->number_tbl8s;
+	snprintf(lpm->name, sizeof(lpm->name), "%s", name);
+
+	te->data = (void *) lpm;
+
+	TAILQ_INSERT_TAIL(lpm_list, te, next);
+
+exit:
+	rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
+
+	return lpm;
+}
+BIND_DEFAULT_SYMBOL(rte_lpm_create, _v1604, 16.04);
+MAP_STATIC_SYMBOL(
+	struct rte_lpm *rte_lpm_create(const char *name, int socket_id,
+			const struct rte_lpm_config *config), rte_lpm_create_v1604);
+
+/*
+ * Deallocates memory for given LPM table.
+ */
+void
+rte_lpm_free_v20(struct rte_lpm_v20 *lpm)
+{
+	struct rte_lpm_list *lpm_list;
+	struct rte_tailq_entry *te;
+
+	/* Check user arguments. */
+	if (lpm == NULL)
+		return;
+
+	lpm_list = RTE_TAILQ_CAST(rte_lpm_tailq.head, rte_lpm_list);
+
+	rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK);
+
+	/* find our tailq entry */
+	TAILQ_FOREACH(te, lpm_list, next) {
+		if (te->data == (void *) lpm)
+			break;
+	}
+	if (te != NULL)
+		TAILQ_REMOVE(lpm_list, te, next);
+
+	rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
+
+	rte_free(lpm->rules_tbl);
+	rte_free(lpm);
+	rte_free(te);
+}
+VERSION_SYMBOL(rte_lpm_free, _v20, 2.0);
+
+void
+rte_lpm_free_v1604(struct rte_lpm *lpm)
+{
+	struct rte_lpm_list *lpm_list;
+	struct rte_tailq_entry *te;
+
+	/* Check user arguments. */
+	if (lpm == NULL)
+		return;
+
+	lpm_list = RTE_TAILQ_CAST(rte_lpm_tailq.head, rte_lpm_list);
+
+	rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK);
+
+	/* find our tailq entry */
+	TAILQ_FOREACH(te, lpm_list, next) {
+		if (te->data == (void *) lpm)
+			break;
+	}
+	if (te != NULL)
+		TAILQ_REMOVE(lpm_list, te, next);
+
+	rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
+
+	rte_free(lpm->rules_tbl);
+	rte_free(lpm);
+	rte_free(te);
+}
+BIND_DEFAULT_SYMBOL(rte_lpm_free, _v1604, 16.04);
+MAP_STATIC_SYMBOL(void rte_lpm_free(struct rte_lpm *lpm),
+		rte_lpm_free_v1604);
+
+/*
+ * Adds a rule to the rule table.
+ *
+ * NOTE: The rule table is split into 32 groups. Each group contains rules that
+ * apply to a specific prefix depth (i.e. group 1 contains rules that apply to
+ * prefixes with a depth of 1 etc.). In the following code (depth - 1) is used
+ * to refer to depth 1 because even though the depth range is 1 - 32, depths
+ * are stored in the rule table from 0 - 31.
+ * NOTE: Valid range for depth parameter is 1 .. 32 inclusive.
+ */
+static inline int32_t
+rule_add_v20(struct rte_lpm_v20 *lpm, uint32_t ip_masked, uint8_t depth,
+	uint8_t next_hop)
+{
+	uint32_t rule_gindex, rule_index, last_rule;
+	int i;
+
+	VERIFY_DEPTH(depth);
+
+	/* Scan through rule group to see if rule already exists. */
+	if (lpm->rule_info[depth - 1].used_rules > 0) {
+
+		/* rule_gindex stands for rule group index. */
+		rule_gindex = lpm->rule_info[depth - 1].first_rule;
+		/* Initialise rule_index to point to start of rule group. */
+		rule_index = rule_gindex;
+		/* Last rule = Last used rule in this rule group. */
+		last_rule = rule_gindex + lpm->rule_info[depth - 1].used_rules;
+
+		for (; rule_index < last_rule; rule_index++) {
+
+			/* If rule already exists update its next_hop and return. */
+			if (lpm->rules_tbl[rule_index].ip == ip_masked) {
+				lpm->rules_tbl[rule_index].next_hop = next_hop;
+
+				return rule_index;
+			}
+		}
+
+		if (rule_index == lpm->max_rules)
+			return -ENOSPC;
+	} else {
+		/* Calculate the position in which the rule will be stored. */
+		rule_index = 0;
+
+		for (i = depth - 1; i > 0; i--) {
+			if (lpm->rule_info[i - 1].used_rules > 0) {
+				rule_index = lpm->rule_info[i - 1].first_rule
+						+ lpm->rule_info[i - 1].used_rules;
+				break;
+			}
+		}
+		if (rule_index == lpm->max_rules)
+			return -ENOSPC;
+
+		lpm->rule_info[depth - 1].first_rule = rule_index;
+	}
+
+	/* Make room for the new rule in the array. */
+	for (i = RTE_LPM_MAX_DEPTH; i > depth; i--) {
+		if (lpm->rule_info[i - 1].first_rule
+				+ lpm->rule_info[i - 1].used_rules == lpm->max_rules)
+			return -ENOSPC;
+
+		if (lpm->rule_info[i - 1].used_rules > 0) {
+			lpm->rules_tbl[lpm->rule_info[i - 1].first_rule
+				+ lpm->rule_info[i - 1].used_rules]
+					= lpm->rules_tbl[lpm->rule_info[i - 1].first_rule];
+			lpm->rule_info[i - 1].first_rule++;
+		}
+	}
+
+	/* Add the new rule. */
+	lpm->rules_tbl[rule_index].ip = ip_masked;
+	lpm->rules_tbl[rule_index].next_hop = next_hop;
+
+	/* Increment the used rules counter for this rule group. */
+	lpm->rule_info[depth - 1].used_rules++;
+
+	return rule_index;
+}
+
+static inline int32_t
+rule_add_v1604(struct rte_lpm *lpm, uint32_t ip_masked, uint8_t depth,
+	uint32_t next_hop)
+{
+	uint32_t rule_gindex, rule_index, last_rule;
+	int i;
+
+	VERIFY_DEPTH(depth);
+
+	/* Scan through rule group to see if rule already exists. */
+	if (lpm->rule_info[depth - 1].used_rules > 0) {
+
+		/* rule_gindex stands for rule group index. */
+		rule_gindex = lpm->rule_info[depth - 1].first_rule;
+		/* Initialise rule_index to point to start of rule group. */
+		rule_index = rule_gindex;
+		/* Last rule = Last used rule in this rule group. */
+		last_rule = rule_gindex + lpm->rule_info[depth - 1].used_rules;
+
+		for (; rule_index < last_rule; rule_index++) {
+
+			/* If rule already exists update its next_hop and return. */
+			if (lpm->rules_tbl[rule_index].ip == ip_masked) {
+				lpm->rules_tbl[rule_index].next_hop = next_hop;
+
+				return rule_index;
+			}
+		}
+
+		if (rule_index == lpm->max_rules)
+			return -ENOSPC;
+	} else {
+		/* Calculate the position in which the rule will be stored. */
+		rule_index = 0;
+
+		for (i = depth - 1; i > 0; i--) {
+			if (lpm->rule_info[i - 1].used_rules > 0) {
+				rule_index = lpm->rule_info[i - 1].first_rule
+						+ lpm->rule_info[i - 1].used_rules;
+				break;
+			}
+		}
+		if (rule_index == lpm->max_rules)
+			return -ENOSPC;
+
+		lpm->rule_info[depth - 1].first_rule = rule_index;
+	}
+
+	/* Make room for the new rule in the array. */
+	for (i = RTE_LPM_MAX_DEPTH; i > depth; i--) {
+		if (lpm->rule_info[i - 1].first_rule
+				+ lpm->rule_info[i - 1].used_rules == lpm->max_rules)
+			return -ENOSPC;
+
+		if (lpm->rule_info[i - 1].used_rules > 0) {
+			lpm->rules_tbl[lpm->rule_info[i - 1].first_rule
+				+ lpm->rule_info[i - 1].used_rules]
+					= lpm->rules_tbl[lpm->rule_info[i - 1].first_rule];
+			lpm->rule_info[i - 1].first_rule++;
+		}
+	}
+
+	/* Add the new rule. */
+	lpm->rules_tbl[rule_index].ip = ip_masked;
+	lpm->rules_tbl[rule_index].next_hop = next_hop;
+
+	/* Increment the used rules counter for this rule group. */
+	lpm->rule_info[depth - 1].used_rules++;
+
+	return rule_index;
+}
+
+/*
+ * Delete a rule from the rule table.
+ * NOTE: Valid range for depth parameter is 1 .. 32 inclusive.
+ */
+static inline void
+rule_delete_v20(struct rte_lpm_v20 *lpm, int32_t rule_index, uint8_t depth)
+{
+	int i;
+
+	VERIFY_DEPTH(depth);
+
+	lpm->rules_tbl[rule_index] =
+			lpm->rules_tbl[lpm->rule_info[depth - 1].first_rule
+				+ lpm->rule_info[depth - 1].used_rules - 1];
+
+	for (i = depth; i < RTE_LPM_MAX_DEPTH; i++) {
+		if (lpm->rule_info[i].used_rules > 0) {
+			lpm->rules_tbl[lpm->rule_info[i].first_rule - 1] =
+				lpm->rules_tbl[lpm->rule_info[i].first_rule
+					+ lpm->rule_info[i].used_rules - 1];
+			lpm->rule_info[i].first_rule--;
+		}
+	}
+
+	lpm->rule_info[depth - 1].used_rules--;
+}
+
+static inline void
+rule_delete_v1604(struct rte_lpm *lpm, int32_t rule_index, uint8_t depth)
+{
+	int i;
+
+	VERIFY_DEPTH(depth);
+
+	lpm->rules_tbl[rule_index] =
+			lpm->rules_tbl[lpm->rule_info[depth - 1].first_rule
+			+ lpm->rule_info[depth - 1].used_rules - 1];
+
+	for (i = depth; i < RTE_LPM_MAX_DEPTH; i++) {
+		if (lpm->rule_info[i].used_rules > 0) {
+			lpm->rules_tbl[lpm->rule_info[i].first_rule - 1] =
+					lpm->rules_tbl[lpm->rule_info[i].first_rule
+						+ lpm->rule_info[i].used_rules - 1];
+			lpm->rule_info[i].first_rule--;
+		}
+	}
+
+	lpm->rule_info[depth - 1].used_rules--;
+}
+
+/*
+ * Finds a rule in rule table.
+ * NOTE: Valid range for depth parameter is 1 .. 32 inclusive.
+ */
+static inline int32_t
+rule_find_v20(struct rte_lpm_v20 *lpm, uint32_t ip_masked, uint8_t depth)
+{
+	uint32_t rule_gindex, last_rule, rule_index;
+
+	VERIFY_DEPTH(depth);
+
+	rule_gindex = lpm->rule_info[depth - 1].first_rule;
+	last_rule = rule_gindex + lpm->rule_info[depth - 1].used_rules;
+
+	/* Scan used rules at given depth to find rule. */
+	for (rule_index = rule_gindex; rule_index < last_rule; rule_index++) {
+		/* If rule is found return the rule index. */
+		if (lpm->rules_tbl[rule_index].ip == ip_masked)
+			return rule_index;
+	}
+
+	/* If rule is not found return -EINVAL. */
+	return -EINVAL;
+}
+
+static inline int32_t
+rule_find_v1604(struct rte_lpm *lpm, uint32_t ip_masked, uint8_t depth)
+{
+	uint32_t rule_gindex, last_rule, rule_index;
+
+	VERIFY_DEPTH(depth);
+
+	rule_gindex = lpm->rule_info[depth - 1].first_rule;
+	last_rule = rule_gindex + lpm->rule_info[depth - 1].used_rules;
+
+	/* Scan used rules at given depth to find rule. */
+	for (rule_index = rule_gindex; rule_index < last_rule; rule_index++) {
+		/* If rule is found return the rule index. */
+		if (lpm->rules_tbl[rule_index].ip == ip_masked)
+			return rule_index;
+	}
+
+	/* If rule is not found return -EINVAL. */
+	return -EINVAL;
+}
+
+/*
+ * Find, clean and allocate a tbl8.
+ */
+static inline int32_t
+tbl8_alloc_v20(struct rte_lpm_tbl_entry_v20 *tbl8)
+{
+	uint32_t group_idx; /* tbl8 group index. */
+	struct rte_lpm_tbl_entry_v20 *tbl8_entry;
+
+	/* Scan through tbl8 to find a free (i.e. INVALID) tbl8 group. */
+	for (group_idx = 0; group_idx < RTE_LPM_TBL8_NUM_GROUPS;
+			group_idx++) {
+		tbl8_entry = &tbl8[group_idx * RTE_LPM_TBL8_GROUP_NUM_ENTRIES];
+		/* If a free tbl8 group is found clean it and set as VALID. */
+		if (!tbl8_entry->valid_group) {
+			memset(&tbl8_entry[0], 0,
+					RTE_LPM_TBL8_GROUP_NUM_ENTRIES *
+					sizeof(tbl8_entry[0]));
+
+			tbl8_entry->valid_group = VALID;
+
+			/* Return group index for allocated tbl8 group. */
+			return group_idx;
+		}
+	}
+
+	/* If there are no tbl8 groups free then return error. */
+	return -ENOSPC;
+}
+
+static inline int32_t
+tbl8_alloc_v1604(struct rte_lpm_tbl_entry *tbl8, uint32_t number_tbl8s)
+{
+	uint32_t group_idx; /* tbl8 group index. */
+	struct rte_lpm_tbl_entry *tbl8_entry;
+
+	/* Scan through tbl8 to find a free (i.e. INVALID) tbl8 group. */
+	for (group_idx = 0; group_idx < number_tbl8s; group_idx++) {
+		tbl8_entry = &tbl8[group_idx * RTE_LPM_TBL8_GROUP_NUM_ENTRIES];
+		/* If a free tbl8 group is found clean it and set as VALID. */
+		if (!tbl8_entry->valid_group) {
+			memset(&tbl8_entry[0], 0,
+					RTE_LPM_TBL8_GROUP_NUM_ENTRIES *
+					sizeof(tbl8_entry[0]));
+
+			tbl8_entry->valid_group = VALID;
+
+			/* Return group index for allocated tbl8 group. */
+			return group_idx;
+		}
+	}
+
+	/* If there are no tbl8 groups free then return error. */
+	return -ENOSPC;
+}
+
+static inline void
+tbl8_free_v20(struct rte_lpm_tbl_entry_v20 *tbl8, uint32_t tbl8_group_start)
+{
+	/* Set tbl8 group invalid*/
+	tbl8[tbl8_group_start].valid_group = INVALID;
+}
+
+static inline void
+tbl8_free_v1604(struct rte_lpm_tbl_entry *tbl8, uint32_t tbl8_group_start)
+{
+	/* Set tbl8 group invalid*/
+	tbl8[tbl8_group_start].valid_group = INVALID;
+}
+
+static inline int32_t
+add_depth_small_v20(struct rte_lpm_v20 *lpm, uint32_t ip, uint8_t depth,
+		uint8_t next_hop)
+{
+	uint32_t tbl24_index, tbl24_range, tbl8_index, tbl8_group_end, i, j;
+
+	/* Calculate the index into Table24. */
+	tbl24_index = ip >> 8;
+	tbl24_range = depth_to_range(depth);
+
+	for (i = tbl24_index; i < (tbl24_index + tbl24_range); i++) {
+		/*
+		 * For invalid OR valid and non-extended tbl 24 entries set
+		 * entry.
+		 */
+		if (!lpm->tbl24[i].valid || (lpm->tbl24[i].valid_group == 0 &&
+				lpm->tbl24[i].depth <= depth)) {
+
+			struct rte_lpm_tbl_entry_v20 new_tbl24_entry = {
+				.valid = VALID,
+				.valid_group = 0,
+				.depth = depth,
+			};
+			new_tbl24_entry.next_hop = next_hop;
+
+			/* Setting tbl24 entry in one go to avoid race
+			 * conditions
+			 */
+			lpm->tbl24[i] = new_tbl24_entry;
+
+			continue;
+		}
+
+		if (lpm->tbl24[i].valid_group == 1) {
+			/* If tbl24 entry is valid and extended calculate the
+			 *  index into tbl8.
+			 */
+			tbl8_index = lpm->tbl24[i].group_idx *
+					RTE_LPM_TBL8_GROUP_NUM_ENTRIES;
+			tbl8_group_end = tbl8_index +
+					RTE_LPM_TBL8_GROUP_NUM_ENTRIES;
+
+			for (j = tbl8_index; j < tbl8_group_end; j++) {
+				if (!lpm->tbl8[j].valid ||
+						lpm->tbl8[j].depth <= depth) {
+					struct rte_lpm_tbl_entry_v20
+						new_tbl8_entry = {
+						.valid = VALID,
+						.valid_group = VALID,
+						.depth = depth,
+					};
+					new_tbl8_entry.next_hop = next_hop;
+
+					/*
+					 * Setting tbl8 entry in one go to avoid
+					 * race conditions
+					 */
+					lpm->tbl8[j] = new_tbl8_entry;
+
+					continue;
+				}
+			}
+		}
+	}
+
+	return 0;
+}
+
+static inline int32_t
+add_depth_small_v1604(struct rte_lpm *lpm, uint32_t ip, uint8_t depth,
+		uint32_t next_hop)
+{
+#define group_idx next_hop
+	uint32_t tbl24_index, tbl24_range, tbl8_index, tbl8_group_end, i, j;
+
+	/* Calculate the index into Table24. */
+	tbl24_index = ip >> 8;
+	tbl24_range = depth_to_range(depth);
+
+	for (i = tbl24_index; i < (tbl24_index + tbl24_range); i++) {
+		/*
+		 * For invalid OR valid and non-extended tbl 24 entries set
+		 * entry.
+		 */
+		if (!lpm->tbl24[i].valid || (lpm->tbl24[i].valid_group == 0 &&
+				lpm->tbl24[i].depth <= depth)) {
+
+			struct rte_lpm_tbl_entry new_tbl24_entry = {
+				.next_hop = next_hop,
+				.valid = VALID,
+				.valid_group = 0,
+				.depth = depth,
+			};
+
+			/* Setting tbl24 entry in one go to avoid race
+			 * conditions
+			 */
+			lpm->tbl24[i] = new_tbl24_entry;
+
+			continue;
+		}
+
+		if (lpm->tbl24[i].valid_group == 1) {
+			/* If tbl24 entry is valid and extended calculate the
+			 *  index into tbl8.
+			 */
+			tbl8_index = lpm->tbl24[i].group_idx *
+					RTE_LPM_TBL8_GROUP_NUM_ENTRIES;
+			tbl8_group_end = tbl8_index +
+					RTE_LPM_TBL8_GROUP_NUM_ENTRIES;
+
+			for (j = tbl8_index; j < tbl8_group_end; j++) {
+				if (!lpm->tbl8[j].valid ||
+						lpm->tbl8[j].depth <= depth) {
+					struct rte_lpm_tbl_entry
+						new_tbl8_entry = {
+						.valid = VALID,
+						.valid_group = VALID,
+						.depth = depth,
+						.next_hop = next_hop,
+					};
+
+					/*
+					 * Setting tbl8 entry in one go to avoid
+					 * race conditions
+					 */
+					lpm->tbl8[j] = new_tbl8_entry;
+
+					continue;
+				}
+			}
+		}
+	}
+#undef group_idx
+	return 0;
+}
+
+static inline int32_t
+add_depth_big_v20(struct rte_lpm_v20 *lpm, uint32_t ip_masked, uint8_t depth,
+		uint8_t next_hop)
+{
+	uint32_t tbl24_index;
+	int32_t tbl8_group_index, tbl8_group_start, tbl8_group_end, tbl8_index,
+		tbl8_range, i;
+
+	tbl24_index = (ip_masked >> 8);
+	tbl8_range = depth_to_range(depth);
+
+	if (!lpm->tbl24[tbl24_index].valid) {
+		/* Search for a free tbl8 group. */
+		tbl8_group_index = tbl8_alloc_v20(lpm->tbl8);
+
+		/* Check tbl8 allocation was successful. */
+		if (tbl8_group_index < 0) {
+			return tbl8_group_index;
+		}
+
+		/* Find index into tbl8 and range. */
+		tbl8_index = (tbl8_group_index *
+				RTE_LPM_TBL8_GROUP_NUM_ENTRIES) +
+				(ip_masked & 0xFF);
+
+		/* Set tbl8 entry. */
+		for (i = tbl8_index; i < (tbl8_index + tbl8_range); i++) {
+			lpm->tbl8[i].depth = depth;
+			lpm->tbl8[i].next_hop = next_hop;
+			lpm->tbl8[i].valid = VALID;
+		}
+
+		/*
+		 * Update tbl24 entry to point to new tbl8 entry. Note: The
+		 * ext_flag and tbl8_index need to be updated simultaneously,
+		 * so assign whole structure in one go
+		 */
+
+		struct rte_lpm_tbl_entry_v20 new_tbl24_entry = {
+			{ .group_idx = (uint8_t)tbl8_group_index, },
+			.valid = VALID,
+			.valid_group = 1,
+			.depth = 0,
+		};
+
+		lpm->tbl24[tbl24_index] = new_tbl24_entry;
+
+	} /* If valid entry but not extended calculate the index into Table8. */
+	else if (lpm->tbl24[tbl24_index].valid_group == 0) {
+		/* Search for free tbl8 group. */
+		tbl8_group_index = tbl8_alloc_v20(lpm->tbl8);
+
+		if (tbl8_group_index < 0) {
+			return tbl8_group_index;
+		}
+
+		tbl8_group_start = tbl8_group_index *
+				RTE_LPM_TBL8_GROUP_NUM_ENTRIES;
+		tbl8_group_end = tbl8_group_start +
+				RTE_LPM_TBL8_GROUP_NUM_ENTRIES;
+
+		/* Populate new tbl8 with tbl24 value. */
+		for (i = tbl8_group_start; i < tbl8_group_end; i++) {
+			lpm->tbl8[i].valid = VALID;
+			lpm->tbl8[i].depth = lpm->tbl24[tbl24_index].depth;
+			lpm->tbl8[i].next_hop =
+					lpm->tbl24[tbl24_index].next_hop;
+		}
+
+		tbl8_index = tbl8_group_start + (ip_masked & 0xFF);
+
+		/* Insert new rule into the tbl8 entry. */
+		for (i = tbl8_index; i < tbl8_index + tbl8_range; i++) {
+			if (!lpm->tbl8[i].valid ||
+					lpm->tbl8[i].depth <= depth) {
+				lpm->tbl8[i].valid = VALID;
+				lpm->tbl8[i].depth = depth;
+				lpm->tbl8[i].next_hop = next_hop;
+
+				continue;
+			}
+		}
+
+		/*
+		 * Update tbl24 entry to point to new tbl8 entry. Note: The
+		 * ext_flag and tbl8_index need to be updated simultaneously,
+		 * so assign whole structure in one go.
+		 */
+
+		struct rte_lpm_tbl_entry_v20 new_tbl24_entry = {
+				{ .group_idx = (uint8_t)tbl8_group_index, },
+				.valid = VALID,
+				.valid_group = 1,
+				.depth = 0,
+		};
+
+		lpm->tbl24[tbl24_index] = new_tbl24_entry;
+
+	} else { /*
+		* If it is valid, extended entry calculate the index into tbl8.
+		*/
+		tbl8_group_index = lpm->tbl24[tbl24_index].group_idx;
+		tbl8_group_start = tbl8_group_index *
+				RTE_LPM_TBL8_GROUP_NUM_ENTRIES;
+		tbl8_index = tbl8_group_start + (ip_masked & 0xFF);
+
+		for (i = tbl8_index; i < (tbl8_index + tbl8_range); i++) {
+
+			if (!lpm->tbl8[i].valid ||
+					lpm->tbl8[i].depth <= depth) {
+				struct rte_lpm_tbl_entry_v20 new_tbl8_entry = {
+					.valid = VALID,
+					.depth = depth,
+					.valid_group = lpm->tbl8[i].valid_group,
+				};
+				new_tbl8_entry.next_hop = next_hop;
+				/*
+				 * Setting tbl8 entry in one go to avoid race
+				 * condition
+				 */
+				lpm->tbl8[i] = new_tbl8_entry;
+
+				continue;
+			}
+		}
+	}
+
+	return 0;
+}
+
+static inline int32_t
+add_depth_big_v1604(struct rte_lpm *lpm, uint32_t ip_masked, uint8_t depth,
+		uint32_t next_hop)
+{
+#define group_idx next_hop
+	uint32_t tbl24_index;
+	int32_t tbl8_group_index, tbl8_group_start, tbl8_group_end, tbl8_index,
+		tbl8_range, i;
+
+	tbl24_index = (ip_masked >> 8);
+	tbl8_range = depth_to_range(depth);
+
+	if (!lpm->tbl24[tbl24_index].valid) {
+		/* Search for a free tbl8 group. */
+		tbl8_group_index = tbl8_alloc_v1604(lpm->tbl8, lpm->number_tbl8s);
+
+		/* Check tbl8 allocation was successful. */
+		if (tbl8_group_index < 0) {
+			return tbl8_group_index;
+		}
+
+		/* Find index into tbl8 and range. */
+		tbl8_index = (tbl8_group_index *
+				RTE_LPM_TBL8_GROUP_NUM_ENTRIES) +
+				(ip_masked & 0xFF);
+
+		/* Set tbl8 entry. */
+		for (i = tbl8_index; i < (tbl8_index + tbl8_range); i++) {
+			lpm->tbl8[i].depth = depth;
+			lpm->tbl8[i].next_hop = next_hop;
+			lpm->tbl8[i].valid = VALID;
+		}
+
+		/*
+		 * Update tbl24 entry to point to new tbl8 entry. Note: The
+		 * ext_flag and tbl8_index need to be updated simultaneously,
+		 * so assign whole structure in one go
+		 */
+
+		struct rte_lpm_tbl_entry new_tbl24_entry = {
+			.group_idx = (uint8_t)tbl8_group_index,
+			.valid = VALID,
+			.valid_group = 1,
+			.depth = 0,
+		};
+
+		lpm->tbl24[tbl24_index] = new_tbl24_entry;
+
+	} /* If valid entry but not extended calculate the index into Table8. */
+	else if (lpm->tbl24[tbl24_index].valid_group == 0) {
+		/* Search for free tbl8 group. */
+		tbl8_group_index = tbl8_alloc_v1604(lpm->tbl8, lpm->number_tbl8s);
+
+		if (tbl8_group_index < 0) {
+			return tbl8_group_index;
+		}
+
+		tbl8_group_start = tbl8_group_index *
+				RTE_LPM_TBL8_GROUP_NUM_ENTRIES;
+		tbl8_group_end = tbl8_group_start +
+				RTE_LPM_TBL8_GROUP_NUM_ENTRIES;
+
+		/* Populate new tbl8 with tbl24 value. */
+		for (i = tbl8_group_start; i < tbl8_group_end; i++) {
+			lpm->tbl8[i].valid = VALID;
+			lpm->tbl8[i].depth = lpm->tbl24[tbl24_index].depth;
+			lpm->tbl8[i].next_hop =
+					lpm->tbl24[tbl24_index].next_hop;
+		}
+
+		tbl8_index = tbl8_group_start + (ip_masked & 0xFF);
+
+		/* Insert new rule into the tbl8 entry. */
+		for (i = tbl8_index; i < tbl8_index + tbl8_range; i++) {
+			if (!lpm->tbl8[i].valid ||
+					lpm->tbl8[i].depth <= depth) {
+				lpm->tbl8[i].valid = VALID;
+				lpm->tbl8[i].depth = depth;
+				lpm->tbl8[i].next_hop = next_hop;
+
+				continue;
+			}
+		}
+
+		/*
+		 * Update tbl24 entry to point to new tbl8 entry. Note: The
+		 * ext_flag and tbl8_index need to be updated simultaneously,
+		 * so assign whole structure in one go.
+		 */
+
+		struct rte_lpm_tbl_entry new_tbl24_entry = {
+				.group_idx = (uint8_t)tbl8_group_index,
+				.valid = VALID,
+				.valid_group = 1,
+				.depth = 0,
+		};
+
+		lpm->tbl24[tbl24_index] = new_tbl24_entry;
+
+	} else { /*
+		* If it is valid, extended entry calculate the index into tbl8.
+		*/
+		tbl8_group_index = lpm->tbl24[tbl24_index].group_idx;
+		tbl8_group_start = tbl8_group_index *
+				RTE_LPM_TBL8_GROUP_NUM_ENTRIES;
+		tbl8_index = tbl8_group_start + (ip_masked & 0xFF);
+
+		for (i = tbl8_index; i < (tbl8_index + tbl8_range); i++) {
+
+			if (!lpm->tbl8[i].valid ||
+					lpm->tbl8[i].depth <= depth) {
+				struct rte_lpm_tbl_entry new_tbl8_entry = {
+					.valid = VALID,
+					.depth = depth,
+					.next_hop = next_hop,
+					.valid_group = lpm->tbl8[i].valid_group,
+				};
+
+				/*
+				 * Setting tbl8 entry in one go to avoid race
+				 * condition
+				 */
+				lpm->tbl8[i] = new_tbl8_entry;
+
+				continue;
+			}
+		}
+	}
+#undef group_idx
+	return 0;
+}
+
+/*
+ * Add a route
+ */
+int
+rte_lpm_add_v20(struct rte_lpm_v20 *lpm, uint32_t ip, uint8_t depth,
+		uint8_t next_hop)
+{
+	int32_t rule_index, status = 0;
+	uint32_t ip_masked;
+
+	/* Check user arguments. */
+	if ((lpm == NULL) || (depth < 1) || (depth > RTE_LPM_MAX_DEPTH))
+		return -EINVAL;
+
+	ip_masked = ip & depth_to_mask(depth);
+
+	/* Add the rule to the rule table. */
+	rule_index = rule_add_v20(lpm, ip_masked, depth, next_hop);
+
+	/* If the is no space available for new rule return error. */
+	if (rule_index < 0) {
+		return rule_index;
+	}
+
+	if (depth <= MAX_DEPTH_TBL24) {
+		status = add_depth_small_v20(lpm, ip_masked, depth, next_hop);
+	} else { /* If depth > RTE_LPM_MAX_DEPTH_TBL24 */
+		status = add_depth_big_v20(lpm, ip_masked, depth, next_hop);
+
+		/*
+		 * If add fails due to exhaustion of tbl8 extensions delete
+		 * rule that was added to rule table.
+		 */
+		if (status < 0) {
+			rule_delete_v20(lpm, rule_index, depth);
+
+			return status;
+		}
+	}
+
+	return 0;
+}
+VERSION_SYMBOL(rte_lpm_add, _v20, 2.0);
+
+int
+rte_lpm_add_v1604(struct rte_lpm *lpm, uint32_t ip, uint8_t depth,
+		uint32_t next_hop)
+{
+	int32_t rule_index, status = 0;
+	uint32_t ip_masked;
+
+	/* Check user arguments. */
+	if ((lpm == NULL) || (depth < 1) || (depth > RTE_LPM_MAX_DEPTH))
+		return -EINVAL;
+
+	ip_masked = ip & depth_to_mask(depth);
+
+	/* Add the rule to the rule table. */
+	rule_index = rule_add_v1604(lpm, ip_masked, depth, next_hop);
+
+	/* If the is no space available for new rule return error. */
+	if (rule_index < 0) {
+		return rule_index;
+	}
+
+	if (depth <= MAX_DEPTH_TBL24) {
+		status = add_depth_small_v1604(lpm, ip_masked, depth, next_hop);
+	} else { /* If depth > RTE_LPM_MAX_DEPTH_TBL24 */
+		status = add_depth_big_v1604(lpm, ip_masked, depth, next_hop);
+
+		/*
+		 * If add fails due to exhaustion of tbl8 extensions delete
+		 * rule that was added to rule table.
+		 */
+		if (status < 0) {
+			rule_delete_v1604(lpm, rule_index, depth);
+
+			return status;
+		}
+	}
+
+	return 0;
+}
+BIND_DEFAULT_SYMBOL(rte_lpm_add, _v1604, 16.04);
+MAP_STATIC_SYMBOL(int rte_lpm_add(struct rte_lpm *lpm, uint32_t ip,
+		uint8_t depth, uint32_t next_hop), rte_lpm_add_v1604);
+
+/*
+ * Look for a rule in the high-level rules table
+ */
+int
+rte_lpm_is_rule_present_v20(struct rte_lpm_v20 *lpm, uint32_t ip, uint8_t depth,
+uint8_t *next_hop)
+{
+	uint32_t ip_masked;
+	int32_t rule_index;
+
+	/* Check user arguments. */
+	if ((lpm == NULL) ||
+		(next_hop == NULL) ||
+		(depth < 1) || (depth > RTE_LPM_MAX_DEPTH))
+		return -EINVAL;
+
+	/* Look for the rule using rule_find. */
+	ip_masked = ip & depth_to_mask(depth);
+	rule_index = rule_find_v20(lpm, ip_masked, depth);
+
+	if (rule_index >= 0) {
+		*next_hop = lpm->rules_tbl[rule_index].next_hop;
+		return 1;
+	}
+
+	/* If rule is not found return 0. */
+	return 0;
+}
+VERSION_SYMBOL(rte_lpm_is_rule_present, _v20, 2.0);
+
+int
+rte_lpm_is_rule_present_v1604(struct rte_lpm *lpm, uint32_t ip, uint8_t depth,
+uint32_t *next_hop)
+{
+	uint32_t ip_masked;
+	int32_t rule_index;
+
+	/* Check user arguments. */
+	if ((lpm == NULL) ||
+		(next_hop == NULL) ||
+		(depth < 1) || (depth > RTE_LPM_MAX_DEPTH))
+		return -EINVAL;
+
+	/* Look for the rule using rule_find. */
+	ip_masked = ip & depth_to_mask(depth);
+	rule_index = rule_find_v1604(lpm, ip_masked, depth);
+
+	if (rule_index >= 0) {
+		*next_hop = lpm->rules_tbl[rule_index].next_hop;
+		return 1;
+	}
+
+	/* If rule is not found return 0. */
+	return 0;
+}
+BIND_DEFAULT_SYMBOL(rte_lpm_is_rule_present, _v1604, 16.04);
+MAP_STATIC_SYMBOL(int rte_lpm_is_rule_present(struct rte_lpm *lpm, uint32_t ip,
+		uint8_t depth, uint32_t *next_hop), rte_lpm_is_rule_present_v1604);
+
+static inline int32_t
+find_previous_rule_v20(struct rte_lpm_v20 *lpm, uint32_t ip, uint8_t depth,
+		uint8_t *sub_rule_depth)
+{
+	int32_t rule_index;
+	uint32_t ip_masked;
+	uint8_t prev_depth;
+
+	for (prev_depth = (uint8_t)(depth - 1); prev_depth > 0; prev_depth--) {
+		ip_masked = ip & depth_to_mask(prev_depth);
+
+		rule_index = rule_find_v20(lpm, ip_masked, prev_depth);
+
+		if (rule_index >= 0) {
+			*sub_rule_depth = prev_depth;
+			return rule_index;
+		}
+	}
+
+	return -1;
+}
+
+static inline int32_t
+find_previous_rule_v1604(struct rte_lpm *lpm, uint32_t ip, uint8_t depth,
+		uint8_t *sub_rule_depth)
+{
+	int32_t rule_index;
+	uint32_t ip_masked;
+	uint8_t prev_depth;
+
+	for (prev_depth = (uint8_t)(depth - 1); prev_depth > 0; prev_depth--) {
+		ip_masked = ip & depth_to_mask(prev_depth);
+
+		rule_index = rule_find_v1604(lpm, ip_masked, prev_depth);
+
+		if (rule_index >= 0) {
+			*sub_rule_depth = prev_depth;
+			return rule_index;
+		}
+	}
+
+	return -1;
+}
+
+static inline int32_t
+delete_depth_small_v20(struct rte_lpm_v20 *lpm, uint32_t ip_masked,
+	uint8_t depth, int32_t sub_rule_index, uint8_t sub_rule_depth)
+{
+	uint32_t tbl24_range, tbl24_index, tbl8_group_index, tbl8_index, i, j;
+
+	/* Calculate the range and index into Table24. */
+	tbl24_range = depth_to_range(depth);
+	tbl24_index = (ip_masked >> 8);
+
+	/*
+	 * Firstly check the sub_rule_index. A -1 indicates no replacement rule
+	 * and a positive number indicates a sub_rule_index.
+	 */
+	if (sub_rule_index < 0) {
+		/*
+		 * If no replacement rule exists then invalidate entries
+		 * associated with this rule.
+		 */
+		for (i = tbl24_index; i < (tbl24_index + tbl24_range); i++) {
+
+			if (lpm->tbl24[i].valid_group == 0 &&
+					lpm->tbl24[i].depth <= depth) {
+				lpm->tbl24[i].valid = INVALID;
+			} else if (lpm->tbl24[i].valid_group == 1) {
+				/*
+				 * If TBL24 entry is extended, then there has
+				 * to be a rule with depth >= 25 in the
+				 * associated TBL8 group.
+				 */
+
+				tbl8_group_index = lpm->tbl24[i].group_idx;
+				tbl8_index = tbl8_group_index *
+						RTE_LPM_TBL8_GROUP_NUM_ENTRIES;
+
+				for (j = tbl8_index; j < (tbl8_index +
+					RTE_LPM_TBL8_GROUP_NUM_ENTRIES); j++) {
+
+					if (lpm->tbl8[j].depth <= depth)
+						lpm->tbl8[j].valid = INVALID;
+				}
+			}
+		}
+	} else {
+		/*
+		 * If a replacement rule exists then modify entries
+		 * associated with this rule.
+		 */
+
+		struct rte_lpm_tbl_entry_v20 new_tbl24_entry = {
+			{.next_hop = lpm->rules_tbl[sub_rule_index].next_hop,},
+			.valid = VALID,
+			.valid_group = 0,
+			.depth = sub_rule_depth,
+		};
+
+		struct rte_lpm_tbl_entry_v20 new_tbl8_entry = {
+			.valid = VALID,
+			.valid_group = VALID,
+			.depth = sub_rule_depth,
+		};
+		new_tbl8_entry.next_hop =
+				lpm->rules_tbl[sub_rule_index].next_hop;
+
+		for (i = tbl24_index; i < (tbl24_index + tbl24_range); i++) {
+
+			if (lpm->tbl24[i].valid_group == 0 &&
+					lpm->tbl24[i].depth <= depth) {
+				lpm->tbl24[i] = new_tbl24_entry;
+			} else  if (lpm->tbl24[i].valid_group == 1) {
+				/*
+				 * If TBL24 entry is extended, then there has
+				 * to be a rule with depth >= 25 in the
+				 * associated TBL8 group.
+				 */
+
+				tbl8_group_index = lpm->tbl24[i].group_idx;
+				tbl8_index = tbl8_group_index *
+						RTE_LPM_TBL8_GROUP_NUM_ENTRIES;
+
+				for (j = tbl8_index; j < (tbl8_index +
+					RTE_LPM_TBL8_GROUP_NUM_ENTRIES); j++) {
+
+					if (lpm->tbl8[j].depth <= depth)
+						lpm->tbl8[j] = new_tbl8_entry;
+				}
+			}
+		}
+	}
+
+	return 0;
+}
+
+static inline int32_t
+delete_depth_small_v1604(struct rte_lpm *lpm, uint32_t ip_masked,
+	uint8_t depth, int32_t sub_rule_index, uint8_t sub_rule_depth)
+{
+#define group_idx next_hop
+	uint32_t tbl24_range, tbl24_index, tbl8_group_index, tbl8_index, i, j;
+
+	/* Calculate the range and index into Table24. */
+	tbl24_range = depth_to_range(depth);
+	tbl24_index = (ip_masked >> 8);
+
+	/*
+	 * Firstly check the sub_rule_index. A -1 indicates no replacement rule
+	 * and a positive number indicates a sub_rule_index.
+	 */
+	if (sub_rule_index < 0) {
+		/*
+		 * If no replacement rule exists then invalidate entries
+		 * associated with this rule.
+		 */
+		for (i = tbl24_index; i < (tbl24_index + tbl24_range); i++) {
+
+			if (lpm->tbl24[i].valid_group == 0 &&
+					lpm->tbl24[i].depth <= depth) {
+				lpm->tbl24[i].valid = INVALID;
+			} else if (lpm->tbl24[i].valid_group == 1) {
+				/*
+				 * If TBL24 entry is extended, then there has
+				 * to be a rule with depth >= 25 in the
+				 * associated TBL8 group.
+				 */
+
+				tbl8_group_index = lpm->tbl24[i].group_idx;
+				tbl8_index = tbl8_group_index *
+						RTE_LPM_TBL8_GROUP_NUM_ENTRIES;
+
+				for (j = tbl8_index; j < (tbl8_index +
+					RTE_LPM_TBL8_GROUP_NUM_ENTRIES); j++) {
+
+					if (lpm->tbl8[j].depth <= depth)
+						lpm->tbl8[j].valid = INVALID;
+				}
+			}
+		}
+	} else {
+		/*
+		 * If a replacement rule exists then modify entries
+		 * associated with this rule.
+		 */
+
+		struct rte_lpm_tbl_entry new_tbl24_entry = {
+			.next_hop = lpm->rules_tbl[sub_rule_index].next_hop,
+			.valid = VALID,
+			.valid_group = 0,
+			.depth = sub_rule_depth,
+		};
+
+		struct rte_lpm_tbl_entry new_tbl8_entry = {
+			.valid = VALID,
+			.valid_group = VALID,
+			.depth = sub_rule_depth,
+			.next_hop = lpm->rules_tbl
+			[sub_rule_index].next_hop,
+		};
+
+		for (i = tbl24_index; i < (tbl24_index + tbl24_range); i++) {
+
+			if (lpm->tbl24[i].valid_group == 0 &&
+					lpm->tbl24[i].depth <= depth) {
+				lpm->tbl24[i] = new_tbl24_entry;
+			} else  if (lpm->tbl24[i].valid_group == 1) {
+				/*
+				 * If TBL24 entry is extended, then there has
+				 * to be a rule with depth >= 25 in the
+				 * associated TBL8 group.
+				 */
+
+				tbl8_group_index = lpm->tbl24[i].group_idx;
+				tbl8_index = tbl8_group_index *
+						RTE_LPM_TBL8_GROUP_NUM_ENTRIES;
+
+				for (j = tbl8_index; j < (tbl8_index +
+					RTE_LPM_TBL8_GROUP_NUM_ENTRIES); j++) {
+
+					if (lpm->tbl8[j].depth <= depth)
+						lpm->tbl8[j] = new_tbl8_entry;
+				}
+			}
+		}
+	}
+#undef group_idx
+	return 0;
+}
+
+/*
+ * Checks if table 8 group can be recycled.
+ *
+ * Return of -EEXIST means tbl8 is in use and thus can not be recycled.
+ * Return of -EINVAL means tbl8 is empty and thus can be recycled
+ * Return of value > -1 means tbl8 is in use but has all the same values and
+ * thus can be recycled
+ */
+static inline int32_t
+tbl8_recycle_check_v20(struct rte_lpm_tbl_entry_v20 *tbl8,
+		uint32_t tbl8_group_start)
+{
+	uint32_t tbl8_group_end, i;
+	tbl8_group_end = tbl8_group_start + RTE_LPM_TBL8_GROUP_NUM_ENTRIES;
+
+	/*
+	 * Check the first entry of the given tbl8. If it is invalid we know
+	 * this tbl8 does not contain any rule with a depth < RTE_LPM_MAX_DEPTH
+	 *  (As they would affect all entries in a tbl8) and thus this table
+	 *  can not be recycled.
+	 */
+	if (tbl8[tbl8_group_start].valid) {
+		/*
+		 * If first entry is valid check if the depth is less than 24
+		 * and if so check the rest of the entries to verify that they
+		 * are all of this depth.
+		 */
+		if (tbl8[tbl8_group_start].depth < MAX_DEPTH_TBL24) {
+			for (i = (tbl8_group_start + 1); i < tbl8_group_end;
+					i++) {
+
+				if (tbl8[i].depth !=
+						tbl8[tbl8_group_start].depth) {
+
+					return -EEXIST;
+				}
+			}
+			/* If all entries are the same return the tb8 index */
+			return tbl8_group_start;
+		}
+
+		return -EEXIST;
+	}
+	/*
+	 * If the first entry is invalid check if the rest of the entries in
+	 * the tbl8 are invalid.
+	 */
+	for (i = (tbl8_group_start + 1); i < tbl8_group_end; i++) {
+		if (tbl8[i].valid)
+			return -EEXIST;
+	}
+	/* If no valid entries are found then return -EINVAL. */
+	return -EINVAL;
+}
+
+static inline int32_t
+tbl8_recycle_check_v1604(struct rte_lpm_tbl_entry *tbl8,
+		uint32_t tbl8_group_start)
+{
+	uint32_t tbl8_group_end, i;
+	tbl8_group_end = tbl8_group_start + RTE_LPM_TBL8_GROUP_NUM_ENTRIES;
+
+	/*
+	 * Check the first entry of the given tbl8. If it is invalid we know
+	 * this tbl8 does not contain any rule with a depth < RTE_LPM_MAX_DEPTH
+	 *  (As they would affect all entries in a tbl8) and thus this table
+	 *  can not be recycled.
+	 */
+	if (tbl8[tbl8_group_start].valid) {
+		/*
+		 * If first entry is valid check if the depth is less than 24
+		 * and if so check the rest of the entries to verify that they
+		 * are all of this depth.
+		 */
+		if (tbl8[tbl8_group_start].depth < MAX_DEPTH_TBL24) {
+			for (i = (tbl8_group_start + 1); i < tbl8_group_end;
+					i++) {
+
+				if (tbl8[i].depth !=
+						tbl8[tbl8_group_start].depth) {
+
+					return -EEXIST;
+				}
+			}
+			/* If all entries are the same return the tb8 index */
+			return tbl8_group_start;
+		}
+
+		return -EEXIST;
+	}
+	/*
+	 * If the first entry is invalid check if the rest of the entries in
+	 * the tbl8 are invalid.
+	 */
+	for (i = (tbl8_group_start + 1); i < tbl8_group_end; i++) {
+		if (tbl8[i].valid)
+			return -EEXIST;
+	}
+	/* If no valid entries are found then return -EINVAL. */
+	return -EINVAL;
+}
+
+static inline int32_t
+delete_depth_big_v20(struct rte_lpm_v20 *lpm, uint32_t ip_masked,
+	uint8_t depth, int32_t sub_rule_index, uint8_t sub_rule_depth)
+{
+	uint32_t tbl24_index, tbl8_group_index, tbl8_group_start, tbl8_index,
+			tbl8_range, i;
+	int32_t tbl8_recycle_index;
+
+	/*
+	 * Calculate the index into tbl24 and range. Note: All depths larger
+	 * than MAX_DEPTH_TBL24 are associated with only one tbl24 entry.
+	 */
+	tbl24_index = ip_masked >> 8;
+
+	/* Calculate the index into tbl8 and range. */
+	tbl8_group_index = lpm->tbl24[tbl24_index].group_idx;
+	tbl8_group_start = tbl8_group_index * RTE_LPM_TBL8_GROUP_NUM_ENTRIES;
+	tbl8_index = tbl8_group_start + (ip_masked & 0xFF);
+	tbl8_range = depth_to_range(depth);
+
+	if (sub_rule_index < 0) {
+		/*
+		 * Loop through the range of entries on tbl8 for which the
+		 * rule_to_delete must be removed or modified.
+		 */
+		for (i = tbl8_index; i < (tbl8_index + tbl8_range); i++) {
+			if (lpm->tbl8[i].depth <= depth)
+				lpm->tbl8[i].valid = INVALID;
+		}
+	} else {
+		/* Set new tbl8 entry. */
+		struct rte_lpm_tbl_entry_v20 new_tbl8_entry = {
+			.valid = VALID,
+			.depth = sub_rule_depth,
+			.valid_group = lpm->tbl8[tbl8_group_start].valid_group,
+		};
+
+		new_tbl8_entry.next_hop =
+				lpm->rules_tbl[sub_rule_index].next_hop;
+		/*
+		 * Loop through the range of entries on tbl8 for which the
+		 * rule_to_delete must be modified.
+		 */
+		for (i = tbl8_index; i < (tbl8_index + tbl8_range); i++) {
+			if (lpm->tbl8[i].depth <= depth)
+				lpm->tbl8[i] = new_tbl8_entry;
+		}
+	}
+
+	/*
+	 * Check if there are any valid entries in this tbl8 group. If all
+	 * tbl8 entries are invalid we can free the tbl8 and invalidate the
+	 * associated tbl24 entry.
+	 */
+
+	tbl8_recycle_index = tbl8_recycle_check_v20(lpm->tbl8, tbl8_group_start);
+
+	if (tbl8_recycle_index == -EINVAL) {
+		/* Set tbl24 before freeing tbl8 to avoid race condition. */
+		lpm->tbl24[tbl24_index].valid = 0;
+		tbl8_free_v20(lpm->tbl8, tbl8_group_start);
+	} else if (tbl8_recycle_index > -1) {
+		/* Update tbl24 entry. */
+		struct rte_lpm_tbl_entry_v20 new_tbl24_entry = {
+			{ .next_hop = lpm->tbl8[tbl8_recycle_index].next_hop, },
+			.valid = VALID,
+			.valid_group = 0,
+			.depth = lpm->tbl8[tbl8_recycle_index].depth,
+		};
+
+		/* Set tbl24 before freeing tbl8 to avoid race condition. */
+		lpm->tbl24[tbl24_index] = new_tbl24_entry;
+		tbl8_free_v20(lpm->tbl8, tbl8_group_start);
+	}
+
+	return 0;
+}
+
+static inline int32_t
+delete_depth_big_v1604(struct rte_lpm *lpm, uint32_t ip_masked,
+	uint8_t depth, int32_t sub_rule_index, uint8_t sub_rule_depth)
+{
+#define group_idx next_hop
+	uint32_t tbl24_index, tbl8_group_index, tbl8_group_start, tbl8_index,
+			tbl8_range, i;
+	int32_t tbl8_recycle_index;
+
+	/*
+	 * Calculate the index into tbl24 and range. Note: All depths larger
+	 * than MAX_DEPTH_TBL24 are associated with only one tbl24 entry.
+	 */
+	tbl24_index = ip_masked >> 8;
+
+	/* Calculate the index into tbl8 and range. */
+	tbl8_group_index = lpm->tbl24[tbl24_index].group_idx;
+	tbl8_group_start = tbl8_group_index * RTE_LPM_TBL8_GROUP_NUM_ENTRIES;
+	tbl8_index = tbl8_group_start + (ip_masked & 0xFF);
+	tbl8_range = depth_to_range(depth);
+
+	if (sub_rule_index < 0) {
+		/*
+		 * Loop through the range of entries on tbl8 for which the
+		 * rule_to_delete must be removed or modified.
+		 */
+		for (i = tbl8_index; i < (tbl8_index + tbl8_range); i++) {
+			if (lpm->tbl8[i].depth <= depth)
+				lpm->tbl8[i].valid = INVALID;
+		}
+	} else {
+		/* Set new tbl8 entry. */
+		struct rte_lpm_tbl_entry new_tbl8_entry = {
+			.valid = VALID,
+			.depth = sub_rule_depth,
+			.valid_group = lpm->tbl8[tbl8_group_start].valid_group,
+			.next_hop = lpm->rules_tbl[sub_rule_index].next_hop,
+		};
+
+		/*
+		 * Loop through the range of entries on tbl8 for which the
+		 * rule_to_delete must be modified.
+		 */
+		for (i = tbl8_index; i < (tbl8_index + tbl8_range); i++) {
+			if (lpm->tbl8[i].depth <= depth)
+				lpm->tbl8[i] = new_tbl8_entry;
+		}
+	}
+
+	/*
+	 * Check if there are any valid entries in this tbl8 group. If all
+	 * tbl8 entries are invalid we can free the tbl8 and invalidate the
+	 * associated tbl24 entry.
+	 */
+
+	tbl8_recycle_index = tbl8_recycle_check_v1604(lpm->tbl8, tbl8_group_start);
+
+	if (tbl8_recycle_index == -EINVAL) {
+		/* Set tbl24 before freeing tbl8 to avoid race condition. */
+		lpm->tbl24[tbl24_index].valid = 0;
+		tbl8_free_v1604(lpm->tbl8, tbl8_group_start);
+	} else if (tbl8_recycle_index > -1) {
+		/* Update tbl24 entry. */
+		struct rte_lpm_tbl_entry new_tbl24_entry = {
+			.next_hop = lpm->tbl8[tbl8_recycle_index].next_hop,
+			.valid = VALID,
+			.valid_group = 0,
+			.depth = lpm->tbl8[tbl8_recycle_index].depth,
+		};
+
+		/* Set tbl24 before freeing tbl8 to avoid race condition. */
+		lpm->tbl24[tbl24_index] = new_tbl24_entry;
+		tbl8_free_v1604(lpm->tbl8, tbl8_group_start);
+	}
+#undef group_idx
+	return 0;
+}
+
+/*
+ * Deletes a rule
+ */
+int
+rte_lpm_delete_v20(struct rte_lpm_v20 *lpm, uint32_t ip, uint8_t depth)
+{
+	int32_t rule_to_delete_index, sub_rule_index;
+	uint32_t ip_masked;
+	uint8_t sub_rule_depth;
+	/*
+	 * Check input arguments. Note: IP must be a positive integer of 32
+	 * bits in length therefore it need not be checked.
+	 */
+	if ((lpm == NULL) || (depth < 1) || (depth > RTE_LPM_MAX_DEPTH)) {
+		return -EINVAL;
+	}
+
+	ip_masked = ip & depth_to_mask(depth);
+
+	/*
+	 * Find the index of the input rule, that needs to be deleted, in the
+	 * rule table.
+	 */
+	rule_to_delete_index = rule_find_v20(lpm, ip_masked, depth);
+
+	/*
+	 * Check if rule_to_delete_index was found. If no rule was found the
+	 * function rule_find returns -EINVAL.
+	 */
+	if (rule_to_delete_index < 0)
+		return -EINVAL;
+
+	/* Delete the rule from the rule table. */
+	rule_delete_v20(lpm, rule_to_delete_index, depth);
+
+	/*
+	 * Find rule to replace the rule_to_delete. If there is no rule to
+	 * replace the rule_to_delete we return -1 and invalidate the table
+	 * entries associated with this rule.
+	 */
+	sub_rule_depth = 0;
+	sub_rule_index = find_previous_rule_v20(lpm, ip, depth, &sub_rule_depth);
+
+	/*
+	 * If the input depth value is less than 25 use function
+	 * delete_depth_small otherwise use delete_depth_big.
+	 */
+	if (depth <= MAX_DEPTH_TBL24) {
+		return delete_depth_small_v20(lpm, ip_masked, depth,
+				sub_rule_index, sub_rule_depth);
+	} else { /* If depth > MAX_DEPTH_TBL24 */
+		return delete_depth_big_v20(lpm, ip_masked, depth, sub_rule_index,
+				sub_rule_depth);
+	}
+}
+VERSION_SYMBOL(rte_lpm_delete, _v20, 2.0);
+
+int
+rte_lpm_delete_v1604(struct rte_lpm *lpm, uint32_t ip, uint8_t depth)
+{
+	int32_t rule_to_delete_index, sub_rule_index;
+	uint32_t ip_masked;
+	uint8_t sub_rule_depth;
+	/*
+	 * Check input arguments. Note: IP must be a positive integer of 32
+	 * bits in length therefore it need not be checked.
+	 */
+	if ((lpm == NULL) || (depth < 1) || (depth > RTE_LPM_MAX_DEPTH)) {
+		return -EINVAL;
+	}
+
+	ip_masked = ip & depth_to_mask(depth);
+
+	/*
+	 * Find the index of the input rule, that needs to be deleted, in the
+	 * rule table.
+	 */
+	rule_to_delete_index = rule_find_v1604(lpm, ip_masked, depth);
+
+	/*
+	 * Check if rule_to_delete_index was found. If no rule was found the
+	 * function rule_find returns -EINVAL.
+	 */
+	if (rule_to_delete_index < 0)
+		return -EINVAL;
+
+	/* Delete the rule from the rule table. */
+	rule_delete_v1604(lpm, rule_to_delete_index, depth);
+
+	/*
+	 * Find rule to replace the rule_to_delete. If there is no rule to
+	 * replace the rule_to_delete we return -1 and invalidate the table
+	 * entries associated with this rule.
+	 */
+	sub_rule_depth = 0;
+	sub_rule_index = find_previous_rule_v1604(lpm, ip, depth, &sub_rule_depth);
+
+	/*
+	 * If the input depth value is less than 25 use function
+	 * delete_depth_small otherwise use delete_depth_big.
+	 */
+	if (depth <= MAX_DEPTH_TBL24) {
+		return delete_depth_small_v1604(lpm, ip_masked, depth,
+				sub_rule_index, sub_rule_depth);
+	} else { /* If depth > MAX_DEPTH_TBL24 */
+		return delete_depth_big_v1604(lpm, ip_masked, depth, sub_rule_index,
+				sub_rule_depth);
+	}
+}
+BIND_DEFAULT_SYMBOL(rte_lpm_delete, _v1604, 16.04);
+MAP_STATIC_SYMBOL(int rte_lpm_delete(struct rte_lpm *lpm, uint32_t ip,
+		uint8_t depth), rte_lpm_delete_v1604);
+
+/*
+ * Delete all rules from the LPM table.
+ */
+void
+rte_lpm_delete_all_v20(struct rte_lpm_v20 *lpm)
+{
+	/* Zero rule information. */
+	memset(lpm->rule_info, 0, sizeof(lpm->rule_info));
+
+	/* Zero tbl24. */
+	memset(lpm->tbl24, 0, sizeof(lpm->tbl24));
+
+	/* Zero tbl8. */
+	memset(lpm->tbl8, 0, sizeof(lpm->tbl8));
+
+	/* Delete all rules form the rules table. */
+	memset(lpm->rules_tbl, 0, sizeof(lpm->rules_tbl[0]) * lpm->max_rules);
+}
+VERSION_SYMBOL(rte_lpm_delete_all, _v20, 2.0);
+
+void
+rte_lpm_delete_all_v1604(struct rte_lpm *lpm)
+{
+	/* Zero rule information. */
+	memset(lpm->rule_info, 0, sizeof(lpm->rule_info));
+
+	/* Zero tbl24. */
+	memset(lpm->tbl24, 0, sizeof(lpm->tbl24));
+
+	/* Zero tbl8. */
+	memset(lpm->tbl8, 0, sizeof(lpm->tbl8[0])
+			* RTE_LPM_TBL8_GROUP_NUM_ENTRIES * lpm->number_tbl8s);
+
+	/* Delete all rules form the rules table. */
+	memset(lpm->rules_tbl, 0, sizeof(lpm->rules_tbl[0]) * lpm->max_rules);
+}
+BIND_DEFAULT_SYMBOL(rte_lpm_delete_all, _v1604, 16.04);
+MAP_STATIC_SYMBOL(void rte_lpm_delete_all(struct rte_lpm *lpm),
+		rte_lpm_delete_all_v1604);
diff --git a/lib/librte_lpm/rte_lpm.h b/lib/librte_lpm/rte_lpm.h
new file mode 100644
index 00000000..2df1d672
--- /dev/null
+++ b/lib/librte_lpm/rte_lpm.h
@@ -0,0 +1,491 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_LPM_H_
+#define _RTE_LPM_H_
+
+/**
+ * @file
+ * RTE Longest Prefix Match (LPM)
+ */
+
+#include <errno.h>
+#include <sys/queue.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <rte_branch_prediction.h>
+#include <rte_byteorder.h>
+#include <rte_memory.h>
+#include <rte_common.h>
+#include <rte_vect.h>
+#include <rte_compat.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** Max number of characters in LPM name. */
+#define RTE_LPM_NAMESIZE                32
+
+/** Maximum depth value possible for IPv4 LPM. */
+#define RTE_LPM_MAX_DEPTH               32
+
+/** @internal Total number of tbl24 entries. */
+#define RTE_LPM_TBL24_NUM_ENTRIES       (1 << 24)
+
+/** @internal Number of entries in a tbl8 group. */
+#define RTE_LPM_TBL8_GROUP_NUM_ENTRIES  256
+
+/** @internal Max number of tbl8 groups in the tbl8. */
+#define RTE_LPM_MAX_TBL8_NUM_GROUPS         (1 << 24)
+
+/** @internal Total number of tbl8 groups in the tbl8. */
+#define RTE_LPM_TBL8_NUM_GROUPS         256
+
+/** @internal Total number of tbl8 entries. */
+#define RTE_LPM_TBL8_NUM_ENTRIES        (RTE_LPM_TBL8_NUM_GROUPS * \
+					RTE_LPM_TBL8_GROUP_NUM_ENTRIES)
+
+/** @internal Macro to enable/disable run-time checks. */
+#if defined(RTE_LIBRTE_LPM_DEBUG)
+#define RTE_LPM_RETURN_IF_TRUE(cond, retval) do { \
+	if (cond) return (retval);                \
+} while (0)
+#else
+#define RTE_LPM_RETURN_IF_TRUE(cond, retval)
+#endif
+
+/** @internal bitmask with valid and valid_group fields set */
+#define RTE_LPM_VALID_EXT_ENTRY_BITMASK 0x03000000
+
+/** Bitmask used to indicate successful lookup */
+#define RTE_LPM_LOOKUP_SUCCESS          0x01000000
+
+#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
+/** @internal Tbl24 entry structure. */
+struct rte_lpm_tbl_entry_v20 {
+	/**
+	 * Stores Next hop (tbl8 or tbl24 when valid_group is not set) or
+	 * a group index pointing to a tbl8 structure (tbl24 only, when
+	 * valid_group is set)
+	 */
+	union {
+		uint8_t next_hop;
+		uint8_t group_idx;
+	};
+	/* Using single uint8_t to store 3 values. */
+	uint8_t valid     :1;   /**< Validation flag. */
+	/**
+	 * For tbl24:
+	 *  - valid_group == 0: entry stores a next hop
+	 *  - valid_group == 1: entry stores a group_index pointing to a tbl8
+	 * For tbl8:
+	 *  - valid_group indicates whether the current tbl8 is in use or not
+	 */
+	uint8_t valid_group :1;
+	uint8_t depth       :6; /**< Rule depth. */
+};
+
+struct rte_lpm_tbl_entry {
+	/**
+	 * Stores Next hop (tbl8 or tbl24 when valid_group is not set) or
+	 * a group index pointing to a tbl8 structure (tbl24 only, when
+	 * valid_group is set)
+	 */
+	uint32_t next_hop    :24;
+	/* Using single uint8_t to store 3 values. */
+	uint32_t valid       :1;   /**< Validation flag. */
+	/**
+	 * For tbl24:
+	 *  - valid_group == 0: entry stores a next hop
+	 *  - valid_group == 1: entry stores a group_index pointing to a tbl8
+	 * For tbl8:
+	 *  - valid_group indicates whether the current tbl8 is in use or not
+	 */
+	uint32_t valid_group :1;
+	uint32_t depth       :6; /**< Rule depth. */
+};
+
+#else
+struct rte_lpm_tbl_entry_v20 {
+	uint8_t depth       :6;
+	uint8_t valid_group :1;
+	uint8_t valid       :1;
+	union {
+		uint8_t group_idx;
+		uint8_t next_hop;
+	};
+};
+
+struct rte_lpm_tbl_entry {
+	uint32_t depth       :6;
+	uint32_t valid_group :1;
+	uint32_t valid       :1;
+	uint32_t next_hop    :24;
+
+};
+
+#endif
+
+/** LPM configuration structure. */
+struct rte_lpm_config {
+	uint32_t max_rules;      /**< Max number of rules. */
+	uint32_t number_tbl8s;   /**< Number of tbl8s to allocate. */
+	int flags;               /**< This field is currently unused. */
+};
+
+/** @internal Rule structure. */
+struct rte_lpm_rule_v20 {
+	uint32_t ip; /**< Rule IP address. */
+	uint8_t  next_hop; /**< Rule next hop. */
+};
+
+struct rte_lpm_rule {
+	uint32_t ip; /**< Rule IP address. */
+	uint32_t next_hop; /**< Rule next hop. */
+};
+
+/** @internal Contains metadata about the rules table. */
+struct rte_lpm_rule_info {
+	uint32_t used_rules; /**< Used rules so far. */
+	uint32_t first_rule; /**< Indexes the first rule of a given depth. */
+};
+
+/** @internal LPM structure. */
+struct rte_lpm_v20 {
+	/* LPM metadata. */
+	char name[RTE_LPM_NAMESIZE];        /**< Name of the lpm. */
+	uint32_t max_rules; /**< Max. balanced rules per lpm. */
+	struct rte_lpm_rule_info rule_info[RTE_LPM_MAX_DEPTH]; /**< Rule info table. */
+
+	/* LPM Tables. */
+	struct rte_lpm_tbl_entry_v20 tbl24[RTE_LPM_TBL24_NUM_ENTRIES]
+			__rte_cache_aligned; /**< LPM tbl24 table. */
+	struct rte_lpm_tbl_entry_v20 tbl8[RTE_LPM_TBL8_NUM_ENTRIES]
+			__rte_cache_aligned; /**< LPM tbl8 table. */
+	struct rte_lpm_rule_v20 rules_tbl[0] \
+			__rte_cache_aligned; /**< LPM rules. */
+};
+
+struct rte_lpm {
+	/* LPM metadata. */
+	char name[RTE_LPM_NAMESIZE];        /**< Name of the lpm. */
+	uint32_t max_rules; /**< Max. balanced rules per lpm. */
+	uint32_t number_tbl8s; /**< Number of tbl8s. */
+	struct rte_lpm_rule_info rule_info[RTE_LPM_MAX_DEPTH]; /**< Rule info table. */
+
+	/* LPM Tables. */
+	struct rte_lpm_tbl_entry tbl24[RTE_LPM_TBL24_NUM_ENTRIES]
+			__rte_cache_aligned; /**< LPM tbl24 table. */
+	struct rte_lpm_tbl_entry *tbl8; /**< LPM tbl8 table. */
+	struct rte_lpm_rule *rules_tbl; /**< LPM rules. */
+};
+
+/**
+ * Create an LPM object.
+ *
+ * @param name
+ *   LPM object name
+ * @param socket_id
+ *   NUMA socket ID for LPM table memory allocation
+ * @param config
+ *   Structure containing the configuration
+ * @return
+ *   Handle to LPM object on success, NULL otherwise with rte_errno set
+ *   to an appropriate values. Possible rte_errno values include:
+ *    - E_RTE_NO_CONFIG - function could not get pointer to rte_config structure
+ *    - E_RTE_SECONDARY - function was called from a secondary process instance
+ *    - EINVAL - invalid parameter passed to function
+ *    - ENOSPC - the maximum number of memzones has already been allocated
+ *    - EEXIST - a memzone with the same name already exists
+ *    - ENOMEM - no appropriate memory area found in which to create memzone
+ */
+struct rte_lpm *
+rte_lpm_create(const char *name, int socket_id,
+		const struct rte_lpm_config *config);
+struct rte_lpm_v20 *
+rte_lpm_create_v20(const char *name, int socket_id, int max_rules, int flags);
+struct rte_lpm *
+rte_lpm_create_v1604(const char *name, int socket_id,
+		const struct rte_lpm_config *config);
+
+/**
+ * Find an existing LPM object and return a pointer to it.
+ *
+ * @param name
+ *   Name of the lpm object as passed to rte_lpm_create()
+ * @return
+ *   Pointer to lpm object or NULL if object not found with rte_errno
+ *   set appropriately. Possible rte_errno values include:
+ *    - ENOENT - required entry not available to return.
+ */
+struct rte_lpm *
+rte_lpm_find_existing(const char *name);
+struct rte_lpm_v20 *
+rte_lpm_find_existing_v20(const char *name);
+struct rte_lpm *
+rte_lpm_find_existing_v1604(const char *name);
+
+/**
+ * Free an LPM object.
+ *
+ * @param lpm
+ *   LPM object handle
+ * @return
+ *   None
+ */
+void
+rte_lpm_free(struct rte_lpm *lpm);
+void
+rte_lpm_free_v20(struct rte_lpm_v20 *lpm);
+void
+rte_lpm_free_v1604(struct rte_lpm *lpm);
+
+/**
+ * Add a rule to the LPM table.
+ *
+ * @param lpm
+ *   LPM object handle
+ * @param ip
+ *   IP of the rule to be added to the LPM table
+ * @param depth
+ *   Depth of the rule to be added to the LPM table
+ * @param next_hop
+ *   Next hop of the rule to be added to the LPM table
+ * @return
+ *   0 on success, negative value otherwise
+ */
+int
+rte_lpm_add(struct rte_lpm *lpm, uint32_t ip, uint8_t depth, uint32_t next_hop);
+int
+rte_lpm_add_v20(struct rte_lpm_v20 *lpm, uint32_t ip, uint8_t depth,
+		uint8_t next_hop);
+int
+rte_lpm_add_v1604(struct rte_lpm *lpm, uint32_t ip, uint8_t depth,
+		uint32_t next_hop);
+
+/**
+ * Check if a rule is present in the LPM table,
+ * and provide its next hop if it is.
+ *
+ * @param lpm
+ *   LPM object handle
+ * @param ip
+ *   IP of the rule to be searched
+ * @param depth
+ *   Depth of the rule to searched
+ * @param next_hop
+ *   Next hop of the rule (valid only if it is found)
+ * @return
+ *   1 if the rule exists, 0 if it does not, a negative value on failure
+ */
+int
+rte_lpm_is_rule_present(struct rte_lpm *lpm, uint32_t ip, uint8_t depth,
+uint32_t *next_hop);
+int
+rte_lpm_is_rule_present_v20(struct rte_lpm_v20 *lpm, uint32_t ip, uint8_t depth,
+uint8_t *next_hop);
+int
+rte_lpm_is_rule_present_v1604(struct rte_lpm *lpm, uint32_t ip, uint8_t depth,
+uint32_t *next_hop);
+
+/**
+ * Delete a rule from the LPM table.
+ *
+ * @param lpm
+ *   LPM object handle
+ * @param ip
+ *   IP of the rule to be deleted from the LPM table
+ * @param depth
+ *   Depth of the rule to be deleted from the LPM table
+ * @return
+ *   0 on success, negative value otherwise
+ */
+int
+rte_lpm_delete(struct rte_lpm *lpm, uint32_t ip, uint8_t depth);
+int
+rte_lpm_delete_v20(struct rte_lpm_v20 *lpm, uint32_t ip, uint8_t depth);
+int
+rte_lpm_delete_v1604(struct rte_lpm *lpm, uint32_t ip, uint8_t depth);
+
+/**
+ * Delete all rules from the LPM table.
+ *
+ * @param lpm
+ *   LPM object handle
+ */
+void
+rte_lpm_delete_all(struct rte_lpm *lpm);
+void
+rte_lpm_delete_all_v20(struct rte_lpm_v20 *lpm);
+void
+rte_lpm_delete_all_v1604(struct rte_lpm *lpm);
+
+/**
+ * Lookup an IP into the LPM table.
+ *
+ * @param lpm
+ *   LPM object handle
+ * @param ip
+ *   IP to be looked up in the LPM table
+ * @param next_hop
+ *   Next hop of the most specific rule found for IP (valid on lookup hit only)
+ * @return
+ *   -EINVAL for incorrect arguments, -ENOENT on lookup miss, 0 on lookup hit
+ */
+static inline int
+rte_lpm_lookup(struct rte_lpm *lpm, uint32_t ip, uint32_t *next_hop)
+{
+	unsigned tbl24_index = (ip >> 8);
+	uint32_t tbl_entry;
+	const uint32_t *ptbl;
+
+	/* DEBUG: Check user input arguments. */
+	RTE_LPM_RETURN_IF_TRUE(((lpm == NULL) || (next_hop == NULL)), -EINVAL);
+
+	/* Copy tbl24 entry */
+	ptbl = (const uint32_t *)(&lpm->tbl24[tbl24_index]);
+	tbl_entry = *ptbl;
+
+	/* Copy tbl8 entry (only if needed) */
+	if (unlikely((tbl_entry & RTE_LPM_VALID_EXT_ENTRY_BITMASK) ==
+			RTE_LPM_VALID_EXT_ENTRY_BITMASK)) {
+
+		unsigned tbl8_index = (uint8_t)ip +
+				(((uint32_t)tbl_entry & 0x00FFFFFF) *
+						RTE_LPM_TBL8_GROUP_NUM_ENTRIES);
+
+		ptbl = (const uint32_t *)&lpm->tbl8[tbl8_index];
+		tbl_entry = *ptbl;
+	}
+
+	*next_hop = ((uint32_t)tbl_entry & 0x00FFFFFF);
+	return (tbl_entry & RTE_LPM_LOOKUP_SUCCESS) ? 0 : -ENOENT;
+}
+
+/**
+ * Lookup multiple IP addresses in an LPM table. This may be implemented as a
+ * macro, so the address of the function should not be used.
+ *
+ * @param lpm
+ *   LPM object handle
+ * @param ips
+ *   Array of IPs to be looked up in the LPM table
+ * @param next_hops
+ *   Next hop of the most specific rule found for IP (valid on lookup hit only).
+ *   This is an array of two byte values. The most significant byte in each
+ *   value says whether the lookup was successful (bitmask
+ *   RTE_LPM_LOOKUP_SUCCESS is set). The least significant byte is the
+ *   actual next hop.
+ * @param n
+ *   Number of elements in ips (and next_hops) array to lookup. This should be a
+ *   compile time constant, and divisible by 8 for best performance.
+ *  @return
+ *   -EINVAL for incorrect arguments, otherwise 0
+ */
+#define rte_lpm_lookup_bulk(lpm, ips, next_hops, n) \
+		rte_lpm_lookup_bulk_func(lpm, ips, next_hops, n)
+
+static inline int
+rte_lpm_lookup_bulk_func(const struct rte_lpm *lpm, const uint32_t *ips,
+		uint32_t *next_hops, const unsigned n)
+{
+	unsigned i;
+	unsigned tbl24_indexes[n];
+	const uint32_t *ptbl;
+
+	/* DEBUG: Check user input arguments. */
+	RTE_LPM_RETURN_IF_TRUE(((lpm == NULL) || (ips == NULL) ||
+			(next_hops == NULL)), -EINVAL);
+
+	for (i = 0; i < n; i++) {
+		tbl24_indexes[i] = ips[i] >> 8;
+	}
+
+	for (i = 0; i < n; i++) {
+		/* Simply copy tbl24 entry to output */
+		ptbl = (const uint32_t *)&lpm->tbl24[tbl24_indexes[i]];
+		next_hops[i] = *ptbl;
+
+		/* Overwrite output with tbl8 entry if needed */
+		if (unlikely((next_hops[i] & RTE_LPM_VALID_EXT_ENTRY_BITMASK) ==
+				RTE_LPM_VALID_EXT_ENTRY_BITMASK)) {
+
+			unsigned tbl8_index = (uint8_t)ips[i] +
+					(((uint32_t)next_hops[i] & 0x00FFFFFF) *
+					 RTE_LPM_TBL8_GROUP_NUM_ENTRIES);
+
+			ptbl = (const uint32_t *)&lpm->tbl8[tbl8_index];
+			next_hops[i] = *ptbl;
+		}
+	}
+	return 0;
+}
+
+/* Mask four results. */
+#define	 RTE_LPM_MASKX4_RES	UINT64_C(0x00ffffff00ffffff)
+
+/**
+ * Lookup four IP addresses in an LPM table.
+ *
+ * @param lpm
+ *   LPM object handle
+ * @param ip
+ *   Four IPs to be looked up in the LPM table
+ * @param hop
+ *   Next hop of the most specific rule found for IP (valid on lookup hit only).
+ *   This is an 4 elements array of two byte values.
+ *   If the lookup was succesfull for the given IP, then least significant byte
+ *   of the corresponding element is the  actual next hop and the most
+ *   significant byte is zero.
+ *   If the lookup for the given IP failed, then corresponding element would
+ *   contain default value, see description of then next parameter.
+ * @param defv
+ *   Default value to populate into corresponding element of hop[] array,
+ *   if lookup would fail.
+ */
+static inline void
+rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4],
+	uint32_t defv);
+
+#if defined(RTE_ARCH_ARM) || defined(RTE_ARCH_ARM64)
+#include "rte_lpm_neon.h"
+#else
+#include "rte_lpm_sse.h"
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_LPM_H_ */
diff --git a/lib/librte_lpm/rte_lpm6.c b/lib/librte_lpm/rte_lpm6.c
new file mode 100644
index 00000000..ba4353ce
--- /dev/null
+++ b/lib/librte_lpm/rte_lpm6.c
@@ -0,0 +1,883 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <string.h>
+#include <stdint.h>
+#include <errno.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <errno.h>
+#include <sys/queue.h>
+
+#include <rte_log.h>
+#include <rte_branch_prediction.h>
+#include <rte_common.h>
+#include <rte_memory.h>
+#include <rte_malloc.h>
+#include <rte_memzone.h>
+#include <rte_memcpy.h>
+#include <rte_eal.h>
+#include <rte_eal_memconfig.h>
+#include <rte_per_lcore.h>
+#include <rte_string_fns.h>
+#include <rte_errno.h>
+#include <rte_rwlock.h>
+#include <rte_spinlock.h>
+
+#include "rte_lpm6.h"
+
+#define RTE_LPM6_TBL24_NUM_ENTRIES        (1 << 24)
+#define RTE_LPM6_TBL8_GROUP_NUM_ENTRIES         256
+#define RTE_LPM6_TBL8_MAX_NUM_GROUPS      (1 << 21)
+
+#define RTE_LPM6_VALID_EXT_ENTRY_BITMASK 0xA0000000
+#define RTE_LPM6_LOOKUP_SUCCESS          0x20000000
+#define RTE_LPM6_TBL8_BITMASK            0x001FFFFF
+
+#define ADD_FIRST_BYTE                            3
+#define LOOKUP_FIRST_BYTE                         4
+#define BYTE_SIZE                                 8
+#define BYTES2_SIZE                              16
+
+#define lpm6_tbl8_gindex next_hop
+
+/** Flags for setting an entry as valid/invalid. */
+enum valid_flag {
+	INVALID = 0,
+	VALID
+};
+
+TAILQ_HEAD(rte_lpm6_list, rte_tailq_entry);
+
+static struct rte_tailq_elem rte_lpm6_tailq = {
+	.name = "RTE_LPM6",
+};
+EAL_REGISTER_TAILQ(rte_lpm6_tailq)
+
+/** Tbl entry structure. It is the same for both tbl24 and tbl8 */
+struct rte_lpm6_tbl_entry {
+	uint32_t next_hop:	21;  /**< Next hop / next table to be checked. */
+	uint32_t depth	:8;      /**< Rule depth. */
+
+	/* Flags. */
+	uint32_t valid     :1;   /**< Validation flag. */
+	uint32_t valid_group :1; /**< Group validation flag. */
+	uint32_t ext_entry :1;   /**< External entry. */
+};
+
+/** Rules tbl entry structure. */
+struct rte_lpm6_rule {
+	uint8_t ip[RTE_LPM6_IPV6_ADDR_SIZE]; /**< Rule IP address. */
+	uint8_t next_hop; /**< Rule next hop. */
+	uint8_t depth; /**< Rule depth. */
+};
+
+/** LPM6 structure. */
+struct rte_lpm6 {
+	/* LPM metadata. */
+	char name[RTE_LPM6_NAMESIZE];    /**< Name of the lpm. */
+	uint32_t max_rules;              /**< Max number of rules. */
+	uint32_t used_rules;             /**< Used rules so far. */
+	uint32_t number_tbl8s;           /**< Number of tbl8s to allocate. */
+	uint32_t next_tbl8;              /**< Next tbl8 to be used. */
+
+	/* LPM Tables. */
+	struct rte_lpm6_rule *rules_tbl; /**< LPM rules. */
+	struct rte_lpm6_tbl_entry tbl24[RTE_LPM6_TBL24_NUM_ENTRIES]
+			__rte_cache_aligned; /**< LPM tbl24 table. */
+	struct rte_lpm6_tbl_entry tbl8[0]
+			__rte_cache_aligned; /**< LPM tbl8 table. */
+};
+
+/*
+ * Takes an array of uint8_t (IPv6 address) and masks it using the depth.
+ * It leaves untouched one bit per unit in the depth variable
+ * and set the rest to 0.
+ */
+static inline void
+mask_ip(uint8_t *ip, uint8_t depth)
+{
+        int16_t part_depth, mask;
+        int i;
+
+		part_depth = depth;
+
+		for (i = 0; i < RTE_LPM6_IPV6_ADDR_SIZE; i++) {
+			if (part_depth < BYTE_SIZE && part_depth >= 0) {
+				mask = (uint16_t)(~(UINT8_MAX >> part_depth));
+				ip[i] = (uint8_t)(ip[i] & mask);
+			} else if (part_depth < 0) {
+				ip[i] = 0;
+			}
+			part_depth -= BYTE_SIZE;
+		}
+}
+
+/*
+ * Allocates memory for LPM object
+ */
+struct rte_lpm6 *
+rte_lpm6_create(const char *name, int socket_id,
+		const struct rte_lpm6_config *config)
+{
+	char mem_name[RTE_LPM6_NAMESIZE];
+	struct rte_lpm6 *lpm = NULL;
+	struct rte_tailq_entry *te;
+	uint64_t mem_size, rules_size;
+	struct rte_lpm6_list *lpm_list;
+
+	lpm_list = RTE_TAILQ_CAST(rte_lpm6_tailq.head, rte_lpm6_list);
+
+	RTE_BUILD_BUG_ON(sizeof(struct rte_lpm6_tbl_entry) != sizeof(uint32_t));
+
+	/* Check user arguments. */
+	if ((name == NULL) || (socket_id < -1) || (config == NULL) ||
+			(config->max_rules == 0) ||
+			config->number_tbl8s > RTE_LPM6_TBL8_MAX_NUM_GROUPS) {
+		rte_errno = EINVAL;
+		return NULL;
+	}
+
+	snprintf(mem_name, sizeof(mem_name), "LPM_%s", name);
+
+	/* Determine the amount of memory to allocate. */
+	mem_size = sizeof(*lpm) + (sizeof(lpm->tbl8[0]) *
+			RTE_LPM6_TBL8_GROUP_NUM_ENTRIES * config->number_tbl8s);
+	rules_size = sizeof(struct rte_lpm6_rule) * config->max_rules;
+
+	rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK);
+
+	/* Guarantee there's no existing */
+	TAILQ_FOREACH(te, lpm_list, next) {
+		lpm = (struct rte_lpm6 *) te->data;
+		if (strncmp(name, lpm->name, RTE_LPM6_NAMESIZE) == 0)
+			break;
+	}
+	lpm = NULL;
+	if (te != NULL) {
+		rte_errno = EEXIST;
+		goto exit;
+	}
+
+	/* allocate tailq entry */
+	te = rte_zmalloc("LPM6_TAILQ_ENTRY", sizeof(*te), 0);
+	if (te == NULL) {
+		RTE_LOG(ERR, LPM, "Failed to allocate tailq entry!\n");
+		goto exit;
+	}
+
+	/* Allocate memory to store the LPM data structures. */
+	lpm = (struct rte_lpm6 *)rte_zmalloc_socket(mem_name, (size_t)mem_size,
+			RTE_CACHE_LINE_SIZE, socket_id);
+
+	if (lpm == NULL) {
+		RTE_LOG(ERR, LPM, "LPM memory allocation failed\n");
+		rte_free(te);
+		goto exit;
+	}
+
+	lpm->rules_tbl = (struct rte_lpm6_rule *)rte_zmalloc_socket(NULL,
+			(size_t)rules_size, RTE_CACHE_LINE_SIZE, socket_id);
+
+	if (lpm->rules_tbl == NULL) {
+		RTE_LOG(ERR, LPM, "LPM rules_tbl allocation failed\n");
+		rte_free(lpm);
+		lpm = NULL;
+		rte_free(te);
+		goto exit;
+	}
+
+	/* Save user arguments. */
+	lpm->max_rules = config->max_rules;
+	lpm->number_tbl8s = config->number_tbl8s;
+	snprintf(lpm->name, sizeof(lpm->name), "%s", name);
+
+	te->data = (void *) lpm;
+
+	TAILQ_INSERT_TAIL(lpm_list, te, next);
+
+exit:
+	rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
+
+	return lpm;
+}
+
+/*
+ * Find an existing lpm table and return a pointer to it.
+ */
+struct rte_lpm6 *
+rte_lpm6_find_existing(const char *name)
+{
+	struct rte_lpm6 *l = NULL;
+	struct rte_tailq_entry *te;
+	struct rte_lpm6_list *lpm_list;
+
+	lpm_list = RTE_TAILQ_CAST(rte_lpm6_tailq.head, rte_lpm6_list);
+
+	rte_rwlock_read_lock(RTE_EAL_TAILQ_RWLOCK);
+	TAILQ_FOREACH(te, lpm_list, next) {
+		l = (struct rte_lpm6 *) te->data;
+		if (strncmp(name, l->name, RTE_LPM6_NAMESIZE) == 0)
+			break;
+	}
+	rte_rwlock_read_unlock(RTE_EAL_TAILQ_RWLOCK);
+
+	if (te == NULL) {
+		rte_errno = ENOENT;
+		return NULL;
+	}
+
+	return l;
+}
+
+/*
+ * Deallocates memory for given LPM table.
+ */
+void
+rte_lpm6_free(struct rte_lpm6 *lpm)
+{
+	struct rte_lpm6_list *lpm_list;
+	struct rte_tailq_entry *te;
+
+	/* Check user arguments. */
+	if (lpm == NULL)
+		return;
+
+	lpm_list = RTE_TAILQ_CAST(rte_lpm6_tailq.head, rte_lpm6_list);
+
+	rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK);
+
+	/* find our tailq entry */
+	TAILQ_FOREACH(te, lpm_list, next) {
+		if (te->data == (void *) lpm)
+			break;
+	}
+
+	if (te != NULL)
+		TAILQ_REMOVE(lpm_list, te, next);
+
+	rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
+
+	rte_free(lpm->rules_tbl);
+	rte_free(lpm);
+	rte_free(te);
+}
+
+/*
+ * Checks if a rule already exists in the rules table and updates
+ * the nexthop if so. Otherwise it adds a new rule if enough space is available.
+ */
+static inline int32_t
+rule_add(struct rte_lpm6 *lpm, uint8_t *ip, uint8_t next_hop, uint8_t depth)
+{
+	uint32_t rule_index;
+
+	/* Scan through rule list to see if rule already exists. */
+	for (rule_index = 0; rule_index < lpm->used_rules; rule_index++) {
+
+		/* If rule already exists update its next_hop and return. */
+		if ((memcmp (lpm->rules_tbl[rule_index].ip, ip,
+				RTE_LPM6_IPV6_ADDR_SIZE) == 0) &&
+				lpm->rules_tbl[rule_index].depth == depth) {
+			lpm->rules_tbl[rule_index].next_hop = next_hop;
+
+			return rule_index;
+		}
+	}
+
+	/*
+	 * If rule does not exist check if there is space to add a new rule to
+	 * this rule group. If there is no space return error.
+	 */
+	if (lpm->used_rules == lpm->max_rules) {
+		return -ENOSPC;
+	}
+
+	/* If there is space for the new rule add it. */
+	rte_memcpy(lpm->rules_tbl[rule_index].ip, ip, RTE_LPM6_IPV6_ADDR_SIZE);
+	lpm->rules_tbl[rule_index].next_hop = next_hop;
+	lpm->rules_tbl[rule_index].depth = depth;
+
+	/* Increment the used rules counter for this rule group. */
+	lpm->used_rules++;
+
+	return rule_index;
+}
+
+/*
+ * Function that expands a rule across the data structure when a less-generic
+ * one has been added before. It assures that every possible combination of bits
+ * in the IP address returns a match.
+ */
+static void
+expand_rule(struct rte_lpm6 *lpm, uint32_t tbl8_gindex, uint8_t depth,
+		uint8_t next_hop)
+{
+	uint32_t tbl8_group_end, tbl8_gindex_next, j;
+
+	tbl8_group_end = tbl8_gindex + RTE_LPM6_TBL8_GROUP_NUM_ENTRIES;
+
+	struct rte_lpm6_tbl_entry new_tbl8_entry = {
+		.valid = VALID,
+		.valid_group = VALID,
+		.depth = depth,
+		.next_hop = next_hop,
+		.ext_entry = 0,
+	};
+
+	for (j = tbl8_gindex; j < tbl8_group_end; j++) {
+		if (!lpm->tbl8[j].valid || (lpm->tbl8[j].ext_entry == 0
+				&& lpm->tbl8[j].depth <= depth)) {
+
+			lpm->tbl8[j] = new_tbl8_entry;
+
+		} else if (lpm->tbl8[j].ext_entry == 1) {
+
+			tbl8_gindex_next = lpm->tbl8[j].lpm6_tbl8_gindex
+					* RTE_LPM6_TBL8_GROUP_NUM_ENTRIES;
+			expand_rule(lpm, tbl8_gindex_next, depth, next_hop);
+		}
+	}
+}
+
+/*
+ * Partially adds a new route to the data structure (tbl24+tbl8s).
+ * It returns 0 on success, a negative number on failure, or 1 if
+ * the process needs to be continued by calling the function again.
+ */
+static inline int
+add_step(struct rte_lpm6 *lpm, struct rte_lpm6_tbl_entry *tbl,
+		struct rte_lpm6_tbl_entry **tbl_next, uint8_t *ip, uint8_t bytes,
+		uint8_t first_byte, uint8_t depth, uint8_t next_hop)
+{
+	uint32_t tbl_index, tbl_range, tbl8_group_start, tbl8_group_end, i;
+	int32_t tbl8_gindex;
+	int8_t bitshift;
+	uint8_t bits_covered;
+
+	/*
+	 * Calculate index to the table based on the number and position
+	 * of the bytes being inspected in this step.
+	 */
+	tbl_index = 0;
+	for (i = first_byte; i < (uint32_t)(first_byte + bytes); i++) {
+		bitshift = (int8_t)((bytes - i)*BYTE_SIZE);
+
+		if (bitshift < 0) bitshift = 0;
+		tbl_index = tbl_index | ip[i-1] << bitshift;
+	}
+
+	/* Number of bits covered in this step */
+	bits_covered = (uint8_t)((bytes+first_byte-1)*BYTE_SIZE);
+
+	/*
+	 * If depth if smaller than this number (ie this is the last step)
+	 * expand the rule across the relevant positions in the table.
+	 */
+	if (depth <= bits_covered) {
+		tbl_range = 1 << (bits_covered - depth);
+
+		for (i = tbl_index; i < (tbl_index + tbl_range); i++) {
+			if (!tbl[i].valid || (tbl[i].ext_entry == 0 &&
+					tbl[i].depth <= depth)) {
+
+				struct rte_lpm6_tbl_entry new_tbl_entry = {
+					.next_hop = next_hop,
+					.depth = depth,
+					.valid = VALID,
+					.valid_group = VALID,
+					.ext_entry = 0,
+				};
+
+				tbl[i] = new_tbl_entry;
+
+			} else if (tbl[i].ext_entry == 1) {
+
+				/*
+				 * If tbl entry is valid and extended calculate the index
+				 * into next tbl8 and expand the rule across the data structure.
+				 */
+				tbl8_gindex = tbl[i].lpm6_tbl8_gindex *
+						RTE_LPM6_TBL8_GROUP_NUM_ENTRIES;
+				expand_rule(lpm, tbl8_gindex, depth, next_hop);
+			}
+		}
+
+		return 0;
+	}
+	/*
+	 * If this is not the last step just fill one position
+	 * and calculate the index to the next table.
+	 */
+	else {
+		/* If it's invalid a new tbl8 is needed */
+		if (!tbl[tbl_index].valid) {
+			if (lpm->next_tbl8 < lpm->number_tbl8s)
+				tbl8_gindex = (lpm->next_tbl8)++;
+			else
+				return -ENOSPC;
+
+			struct rte_lpm6_tbl_entry new_tbl_entry = {
+				.lpm6_tbl8_gindex = tbl8_gindex,
+				.depth = 0,
+				.valid = VALID,
+				.valid_group = VALID,
+				.ext_entry = 1,
+			};
+
+			tbl[tbl_index] = new_tbl_entry;
+		}
+		/*
+		 * If it's valid but not extended the rule that was stored *
+		 * here needs to be moved to the next table.
+		 */
+		else if (tbl[tbl_index].ext_entry == 0) {
+			/* Search for free tbl8 group. */
+			if (lpm->next_tbl8 < lpm->number_tbl8s)
+				tbl8_gindex = (lpm->next_tbl8)++;
+			else
+				return -ENOSPC;
+
+			tbl8_group_start = tbl8_gindex *
+					RTE_LPM6_TBL8_GROUP_NUM_ENTRIES;
+			tbl8_group_end = tbl8_group_start +
+					RTE_LPM6_TBL8_GROUP_NUM_ENTRIES;
+
+			/* Populate new tbl8 with tbl value. */
+			for (i = tbl8_group_start; i < tbl8_group_end; i++) {
+				lpm->tbl8[i].valid = VALID;
+				lpm->tbl8[i].depth = tbl[tbl_index].depth;
+				lpm->tbl8[i].next_hop = tbl[tbl_index].next_hop;
+				lpm->tbl8[i].ext_entry = 0;
+			}
+
+			/*
+			 * Update tbl entry to point to new tbl8 entry. Note: The
+			 * ext_flag and tbl8_index need to be updated simultaneously,
+			 * so assign whole structure in one go.
+			 */
+			struct rte_lpm6_tbl_entry new_tbl_entry = {
+				.lpm6_tbl8_gindex = tbl8_gindex,
+				.depth = 0,
+				.valid = VALID,
+				.valid_group = VALID,
+				.ext_entry = 1,
+			};
+
+			tbl[tbl_index] = new_tbl_entry;
+		}
+
+		*tbl_next = &(lpm->tbl8[tbl[tbl_index].lpm6_tbl8_gindex *
+				RTE_LPM6_TBL8_GROUP_NUM_ENTRIES]);
+	}
+
+	return 1;
+}
+
+/*
+ * Add a route
+ */
+int
+rte_lpm6_add(struct rte_lpm6 *lpm, uint8_t *ip, uint8_t depth,
+		uint8_t next_hop)
+{
+	struct rte_lpm6_tbl_entry *tbl;
+	struct rte_lpm6_tbl_entry *tbl_next;
+	int32_t rule_index;
+	int status;
+	uint8_t masked_ip[RTE_LPM6_IPV6_ADDR_SIZE];
+	int i;
+
+	/* Check user arguments. */
+	if ((lpm == NULL) || (depth < 1) || (depth > RTE_LPM6_MAX_DEPTH))
+		return -EINVAL;
+
+	/* Copy the IP and mask it to avoid modifying user's input data. */
+	memcpy(masked_ip, ip, RTE_LPM6_IPV6_ADDR_SIZE);
+	mask_ip(masked_ip, depth);
+
+	/* Add the rule to the rule table. */
+	rule_index = rule_add(lpm, masked_ip, next_hop, depth);
+
+	/* If there is no space available for new rule return error. */
+	if (rule_index < 0) {
+		return rule_index;
+	}
+
+	/* Inspect the first three bytes through tbl24 on the first step. */
+	tbl = lpm->tbl24;
+	status = add_step (lpm, tbl, &tbl_next, masked_ip, ADD_FIRST_BYTE, 1,
+			depth, next_hop);
+	if (status < 0) {
+		rte_lpm6_delete(lpm, masked_ip, depth);
+
+		return status;
+	}
+
+	/*
+	 * Inspect one by one the rest of the bytes until
+	 * the process is completed.
+	 */
+	for (i = ADD_FIRST_BYTE; i < RTE_LPM6_IPV6_ADDR_SIZE && status == 1; i++) {
+		tbl = tbl_next;
+		status = add_step (lpm, tbl, &tbl_next, masked_ip, 1, (uint8_t)(i+1),
+				depth, next_hop);
+		if (status < 0) {
+			rte_lpm6_delete(lpm, masked_ip, depth);
+
+			return status;
+		}
+	}
+
+	return status;
+}
+
+/*
+ * Takes a pointer to a table entry and inspect one level.
+ * The function returns 0 on lookup success, ENOENT if no match was found
+ * or 1 if the process needs to be continued by calling the function again.
+ */
+static inline int
+lookup_step(const struct rte_lpm6 *lpm, const struct rte_lpm6_tbl_entry *tbl,
+		const struct rte_lpm6_tbl_entry **tbl_next, uint8_t *ip,
+		uint8_t first_byte, uint8_t *next_hop)
+{
+	uint32_t tbl8_index, tbl_entry;
+
+	/* Take the integer value from the pointer. */
+	tbl_entry = *(const uint32_t *)tbl;
+
+	/* If it is valid and extended we calculate the new pointer to return. */
+	if ((tbl_entry & RTE_LPM6_VALID_EXT_ENTRY_BITMASK) ==
+			RTE_LPM6_VALID_EXT_ENTRY_BITMASK) {
+
+		tbl8_index = ip[first_byte-1] +
+				((tbl_entry & RTE_LPM6_TBL8_BITMASK) *
+				RTE_LPM6_TBL8_GROUP_NUM_ENTRIES);
+
+		*tbl_next = &lpm->tbl8[tbl8_index];
+
+		return 1;
+	} else {
+		/* If not extended then we can have a match. */
+		*next_hop = (uint8_t)tbl_entry;
+		return (tbl_entry & RTE_LPM6_LOOKUP_SUCCESS) ? 0 : -ENOENT;
+	}
+}
+
+/*
+ * Looks up an IP
+ */
+int
+rte_lpm6_lookup(const struct rte_lpm6 *lpm, uint8_t *ip, uint8_t *next_hop)
+{
+	const struct rte_lpm6_tbl_entry *tbl;
+	const struct rte_lpm6_tbl_entry *tbl_next;
+	int status;
+	uint8_t first_byte;
+	uint32_t tbl24_index;
+
+	/* DEBUG: Check user input arguments. */
+	if ((lpm == NULL) || (ip == NULL) || (next_hop == NULL)) {
+		return -EINVAL;
+	}
+
+	first_byte = LOOKUP_FIRST_BYTE;
+	tbl24_index = (ip[0] << BYTES2_SIZE) | (ip[1] << BYTE_SIZE) | ip[2];
+
+	/* Calculate pointer to the first entry to be inspected */
+	tbl = &lpm->tbl24[tbl24_index];
+
+	do {
+		/* Continue inspecting following levels until success or failure */
+		status = lookup_step(lpm, tbl, &tbl_next, ip, first_byte++, next_hop);
+		tbl = tbl_next;
+	} while (status == 1);
+
+	return status;
+}
+
+/*
+ * Looks up a group of IP addresses
+ */
+int
+rte_lpm6_lookup_bulk_func(const struct rte_lpm6 *lpm,
+		uint8_t ips[][RTE_LPM6_IPV6_ADDR_SIZE],
+		int16_t * next_hops, unsigned n)
+{
+	unsigned i;
+	const struct rte_lpm6_tbl_entry *tbl;
+	const struct rte_lpm6_tbl_entry *tbl_next;
+	uint32_t tbl24_index;
+	uint8_t first_byte, next_hop;
+	int status;
+
+	/* DEBUG: Check user input arguments. */
+	if ((lpm == NULL) || (ips == NULL) || (next_hops == NULL)) {
+		return -EINVAL;
+	}
+
+	for (i = 0; i < n; i++) {
+		first_byte = LOOKUP_FIRST_BYTE;
+		tbl24_index = (ips[i][0] << BYTES2_SIZE) |
+				(ips[i][1] << BYTE_SIZE) | ips[i][2];
+
+		/* Calculate pointer to the first entry to be inspected */
+		tbl = &lpm->tbl24[tbl24_index];
+
+		do {
+			/* Continue inspecting following levels until success or failure */
+			status = lookup_step(lpm, tbl, &tbl_next, ips[i], first_byte++,
+					&next_hop);
+			tbl = tbl_next;
+		} while (status == 1);
+
+		if (status < 0)
+			next_hops[i] = -1;
+		else
+			next_hops[i] = next_hop;
+	}
+
+	return 0;
+}
+
+/*
+ * Finds a rule in rule table.
+ * NOTE: Valid range for depth parameter is 1 .. 128 inclusive.
+ */
+static inline int32_t
+rule_find(struct rte_lpm6 *lpm, uint8_t *ip, uint8_t depth)
+{
+	uint32_t rule_index;
+
+	/* Scan used rules at given depth to find rule. */
+	for (rule_index = 0; rule_index < lpm->used_rules; rule_index++) {
+		/* If rule is found return the rule index. */
+		if ((memcmp (lpm->rules_tbl[rule_index].ip, ip,
+				RTE_LPM6_IPV6_ADDR_SIZE) == 0) &&
+				lpm->rules_tbl[rule_index].depth == depth) {
+
+			return rule_index;
+		}
+	}
+
+	/* If rule is not found return -ENOENT. */
+	return -ENOENT;
+}
+
+/*
+ * Look for a rule in the high-level rules table
+ */
+int
+rte_lpm6_is_rule_present(struct rte_lpm6 *lpm, uint8_t *ip, uint8_t depth,
+uint8_t *next_hop)
+{
+	uint8_t ip_masked[RTE_LPM6_IPV6_ADDR_SIZE];
+	int32_t rule_index;
+
+	/* Check user arguments. */
+	if ((lpm == NULL) || next_hop == NULL || ip == NULL ||
+			(depth < 1) || (depth > RTE_LPM6_MAX_DEPTH))
+		return -EINVAL;
+
+	/* Copy the IP and mask it to avoid modifying user's input data. */
+	memcpy(ip_masked, ip, RTE_LPM6_IPV6_ADDR_SIZE);
+	mask_ip(ip_masked, depth);
+
+	/* Look for the rule using rule_find. */
+	rule_index = rule_find(lpm, ip_masked, depth);
+
+	if (rule_index >= 0) {
+		*next_hop = lpm->rules_tbl[rule_index].next_hop;
+		return 1;
+	}
+
+	/* If rule is not found return 0. */
+	return 0;
+}
+
+/*
+ * Delete a rule from the rule table.
+ * NOTE: Valid range for depth parameter is 1 .. 128 inclusive.
+ */
+static inline void
+rule_delete(struct rte_lpm6 *lpm, int32_t rule_index)
+{
+	/*
+	 * Overwrite redundant rule with last rule in group and decrement rule
+	 * counter.
+	 */
+	lpm->rules_tbl[rule_index] = lpm->rules_tbl[lpm->used_rules-1];
+	lpm->used_rules--;
+}
+
+/*
+ * Deletes a rule
+ */
+int
+rte_lpm6_delete(struct rte_lpm6 *lpm, uint8_t *ip, uint8_t depth)
+{
+	int32_t rule_to_delete_index;
+	uint8_t ip_masked[RTE_LPM6_IPV6_ADDR_SIZE];
+	unsigned i;
+
+	/*
+	 * Check input arguments.
+	 */
+	if ((lpm == NULL) || (depth < 1) || (depth > RTE_LPM6_MAX_DEPTH)) {
+		return -EINVAL;
+	}
+
+	/* Copy the IP and mask it to avoid modifying user's input data. */
+	memcpy(ip_masked, ip, RTE_LPM6_IPV6_ADDR_SIZE);
+	mask_ip(ip_masked, depth);
+
+	/*
+	 * Find the index of the input rule, that needs to be deleted, in the
+	 * rule table.
+	 */
+	rule_to_delete_index = rule_find(lpm, ip_masked, depth);
+
+	/*
+	 * Check if rule_to_delete_index was found. If no rule was found the
+	 * function rule_find returns -ENOENT.
+	 */
+	if (rule_to_delete_index < 0)
+		return rule_to_delete_index;
+
+	/* Delete the rule from the rule table. */
+	rule_delete(lpm, rule_to_delete_index);
+
+	/*
+	 * Set all the table entries to 0 (ie delete every rule
+	 * from the data structure.
+	 */
+	lpm->next_tbl8 = 0;
+	memset(lpm->tbl24, 0, sizeof(lpm->tbl24));
+	memset(lpm->tbl8, 0, sizeof(lpm->tbl8[0])
+			* RTE_LPM6_TBL8_GROUP_NUM_ENTRIES * lpm->number_tbl8s);
+
+	/*
+	 * Add every rule again (except for the one that was removed from
+	 * the rules table).
+	 */
+	for (i = 0; i < lpm->used_rules; i++) {
+		rte_lpm6_add(lpm, lpm->rules_tbl[i].ip, lpm->rules_tbl[i].depth,
+				lpm->rules_tbl[i].next_hop);
+	}
+
+	return 0;
+}
+
+/*
+ * Deletes a group of rules
+ */
+int
+rte_lpm6_delete_bulk_func(struct rte_lpm6 *lpm,
+		uint8_t ips[][RTE_LPM6_IPV6_ADDR_SIZE], uint8_t *depths, unsigned n)
+{
+	int32_t rule_to_delete_index;
+	uint8_t ip_masked[RTE_LPM6_IPV6_ADDR_SIZE];
+	unsigned i;
+
+	/*
+	 * Check input arguments.
+	 */
+	if ((lpm == NULL) || (ips == NULL) || (depths == NULL)) {
+		return -EINVAL;
+	}
+
+	for (i = 0; i < n; i++) {
+		/* Copy the IP and mask it to avoid modifying user's input data. */
+		memcpy(ip_masked, ips[i], RTE_LPM6_IPV6_ADDR_SIZE);
+		mask_ip(ip_masked, depths[i]);
+
+		/*
+		 * Find the index of the input rule, that needs to be deleted, in the
+		 * rule table.
+		 */
+		rule_to_delete_index = rule_find(lpm, ip_masked, depths[i]);
+
+		/*
+		 * Check if rule_to_delete_index was found. If no rule was found the
+		 * function rule_find returns -ENOENT.
+		 */
+		if (rule_to_delete_index < 0)
+			continue;
+
+		/* Delete the rule from the rule table. */
+		rule_delete(lpm, rule_to_delete_index);
+	}
+
+	/*
+	 * Set all the table entries to 0 (ie delete every rule
+	 * from the data structure.
+	 */
+	lpm->next_tbl8 = 0;
+	memset(lpm->tbl24, 0, sizeof(lpm->tbl24));
+	memset(lpm->tbl8, 0, sizeof(lpm->tbl8[0])
+			* RTE_LPM6_TBL8_GROUP_NUM_ENTRIES * lpm->number_tbl8s);
+
+	/*
+	 * Add every rule again (except for the ones that were removed from
+	 * the rules table).
+	 */
+	for (i = 0; i < lpm->used_rules; i++) {
+		rte_lpm6_add(lpm, lpm->rules_tbl[i].ip, lpm->rules_tbl[i].depth,
+				lpm->rules_tbl[i].next_hop);
+	}
+
+	return 0;
+}
+
+/*
+ * Delete all rules from the LPM table.
+ */
+void
+rte_lpm6_delete_all(struct rte_lpm6 *lpm)
+{
+	/* Zero used rules counter. */
+	lpm->used_rules = 0;
+
+	/* Zero next tbl8 index. */
+	lpm->next_tbl8 = 0;
+
+	/* Zero tbl24. */
+	memset(lpm->tbl24, 0, sizeof(lpm->tbl24));
+
+	/* Zero tbl8. */
+	memset(lpm->tbl8, 0, sizeof(lpm->tbl8[0]) *
+			RTE_LPM6_TBL8_GROUP_NUM_ENTRIES * lpm->number_tbl8s);
+
+	/* Delete all rules form the rules table. */
+	memset(lpm->rules_tbl, 0, sizeof(struct rte_lpm6_rule) * lpm->max_rules);
+}
diff --git a/lib/librte_lpm/rte_lpm6.h b/lib/librte_lpm/rte_lpm6.h
new file mode 100644
index 00000000..cedcea8d
--- /dev/null
+++ b/lib/librte_lpm/rte_lpm6.h
@@ -0,0 +1,227 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifndef _RTE_LPM6_H_
+#define _RTE_LPM6_H_
+
+/**
+ * @file
+ * RTE Longest Prefix Match for IPv6 (LPM6)
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+#define RTE_LPM6_MAX_DEPTH               128
+#define RTE_LPM6_IPV6_ADDR_SIZE           16
+/** Max number of characters in LPM name. */
+#define RTE_LPM6_NAMESIZE                 32
+
+/** LPM structure. */
+struct rte_lpm6;
+
+/** LPM configuration structure. */
+struct rte_lpm6_config {
+	uint32_t max_rules;      /**< Max number of rules. */
+	uint32_t number_tbl8s;   /**< Number of tbl8s to allocate. */
+	int flags;               /**< This field is currently unused. */
+};
+
+/**
+ * Create an LPM object.
+ *
+ * @param name
+ *   LPM object name
+ * @param socket_id
+ *   NUMA socket ID for LPM table memory allocation
+ * @param config
+ *   Structure containing the configuration
+ * @return
+ *   Handle to LPM object on success, NULL otherwise with rte_errno set
+ *   to an appropriate values. Possible rte_errno values include:
+ *    - E_RTE_NO_CONFIG - function could not get pointer to rte_config structure
+ *    - E_RTE_SECONDARY - function was called from a secondary process instance
+ *    - EINVAL - invalid parameter passed to function
+ *    - ENOSPC - the maximum number of memzones has already been allocated
+ *    - EEXIST - a memzone with the same name already exists
+ *    - ENOMEM - no appropriate memory area found in which to create memzone
+ */
+struct rte_lpm6 *
+rte_lpm6_create(const char *name, int socket_id,
+		const struct rte_lpm6_config *config);
+
+/**
+ * Find an existing LPM object and return a pointer to it.
+ *
+ * @param name
+ *   Name of the lpm object as passed to rte_lpm6_create()
+ * @return
+ *   Pointer to lpm object or NULL if object not found with rte_errno
+ *   set appropriately. Possible rte_errno values include:
+ *    - ENOENT - required entry not available to return.
+ */
+struct rte_lpm6 *
+rte_lpm6_find_existing(const char *name);
+
+/**
+ * Free an LPM object.
+ *
+ * @param lpm
+ *   LPM object handle
+ * @return
+ *   None
+ */
+void
+rte_lpm6_free(struct rte_lpm6 *lpm);
+
+/**
+ * Add a rule to the LPM table.
+ *
+ * @param lpm
+ *   LPM object handle
+ * @param ip
+ *   IP of the rule to be added to the LPM table
+ * @param depth
+ *   Depth of the rule to be added to the LPM table
+ * @param next_hop
+ *   Next hop of the rule to be added to the LPM table
+ * @return
+ *   0 on success, negative value otherwise
+ */
+int
+rte_lpm6_add(struct rte_lpm6 *lpm, uint8_t *ip, uint8_t depth,
+		uint8_t next_hop);
+
+/**
+ * Check if a rule is present in the LPM table,
+ * and provide its next hop if it is.
+ *
+ * @param lpm
+ *   LPM object handle
+ * @param ip
+ *   IP of the rule to be searched
+ * @param depth
+ *   Depth of the rule to searched
+ * @param next_hop
+ *   Next hop of the rule (valid only if it is found)
+ * @return
+ *   1 if the rule exists, 0 if it does not, a negative value on failure
+ */
+int
+rte_lpm6_is_rule_present(struct rte_lpm6 *lpm, uint8_t *ip, uint8_t depth,
+uint8_t *next_hop);
+
+/**
+ * Delete a rule from the LPM table.
+ *
+ * @param lpm
+ *   LPM object handle
+ * @param ip
+ *   IP of the rule to be deleted from the LPM table
+ * @param depth
+ *   Depth of the rule to be deleted from the LPM table
+ * @return
+ *   0 on success, negative value otherwise
+ */
+int
+rte_lpm6_delete(struct rte_lpm6 *lpm, uint8_t *ip, uint8_t depth);
+
+/**
+ * Delete a rule from the LPM table.
+ *
+ * @param lpm
+ *   LPM object handle
+ * @param ips
+ *   Array of IPs to be deleted from the LPM table
+ * @param depths
+ *   Array of depths of the rules to be deleted from the LPM table
+ * @param n
+ *   Number of rules to be deleted from the LPM table
+ * @return
+ *   0 on success, negative value otherwise.
+ */
+int
+rte_lpm6_delete_bulk_func(struct rte_lpm6 *lpm,
+		uint8_t ips[][RTE_LPM6_IPV6_ADDR_SIZE], uint8_t *depths, unsigned n);
+
+/**
+ * Delete all rules from the LPM table.
+ *
+ * @param lpm
+ *   LPM object handle
+ */
+void
+rte_lpm6_delete_all(struct rte_lpm6 *lpm);
+
+/**
+ * Lookup an IP into the LPM table.
+ *
+ * @param lpm
+ *   LPM object handle
+ * @param ip
+ *   IP to be looked up in the LPM table
+ * @param next_hop
+ *   Next hop of the most specific rule found for IP (valid on lookup hit only)
+ * @return
+ *   -EINVAL for incorrect arguments, -ENOENT on lookup miss, 0 on lookup hit
+ */
+int
+rte_lpm6_lookup(const struct rte_lpm6 *lpm, uint8_t *ip, uint8_t *next_hop);
+
+/**
+ * Lookup multiple IP addresses in an LPM table.
+ *
+ * @param lpm
+ *   LPM object handle
+ * @param ips
+ *   Array of IPs to be looked up in the LPM table
+ * @param next_hops
+ *   Next hop of the most specific rule found for IP (valid on lookup hit only).
+ *   This is an array of two byte values. The next hop will be stored on
+ *   each position on success; otherwise the position will be set to -1.
+ * @param n
+ *   Number of elements in ips (and next_hops) array to lookup.
+ *  @return
+ *   -EINVAL for incorrect arguments, otherwise 0
+ */
+int
+rte_lpm6_lookup_bulk_func(const struct rte_lpm6 *lpm,
+		uint8_t ips[][RTE_LPM6_IPV6_ADDR_SIZE],
+		int16_t * next_hops, unsigned n);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/lib/librte_lpm/rte_lpm_neon.h b/lib/librte_lpm/rte_lpm_neon.h
new file mode 100644
index 00000000..7c643159
--- /dev/null
+++ b/lib/librte_lpm/rte_lpm_neon.h
@@ -0,0 +1,153 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2015 Cavium Networks. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Derived rte_lpm_lookupx4 implementation from lib/librte_lpm/rte_lpm_sse.h
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Cavium Networks nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_LPM_NEON_H_
+#define _RTE_LPM_NEON_H_
+
+#include <rte_branch_prediction.h>
+#include <rte_byteorder.h>
+#include <rte_common.h>
+#include <rte_vect.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+static inline void
+rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4],
+	uint32_t defv)
+{
+	uint32x4_t i24;
+	rte_xmm_t i8;
+	uint32_t tbl[4];
+	uint64_t idx, pt, pt2;
+	const uint32_t *ptbl;
+
+	const uint32_t mask = UINT8_MAX;
+	const int32x4_t mask8 = vdupq_n_s32(mask);
+
+	/*
+	 * RTE_LPM_VALID_EXT_ENTRY_BITMASK for 2 LPM entries
+	 * as one 64-bit value (0x0300000003000000).
+	 */
+	const uint64_t mask_xv =
+		((uint64_t)RTE_LPM_VALID_EXT_ENTRY_BITMASK |
+		(uint64_t)RTE_LPM_VALID_EXT_ENTRY_BITMASK << 32);
+
+	/*
+	 * RTE_LPM_LOOKUP_SUCCESS for 2 LPM entries
+	 * as one 64-bit value (0x0100000001000000).
+	 */
+	const uint64_t mask_v =
+		((uint64_t)RTE_LPM_LOOKUP_SUCCESS |
+		(uint64_t)RTE_LPM_LOOKUP_SUCCESS << 32);
+
+	/* get 4 indexes for tbl24[]. */
+	i24 = vshrq_n_u32((uint32x4_t)ip, CHAR_BIT);
+
+	/* extract values from tbl24[] */
+	idx = vgetq_lane_u64((uint64x2_t)i24, 0);
+
+	ptbl = (const uint32_t *)&lpm->tbl24[(uint32_t)idx];
+	tbl[0] = *ptbl;
+	ptbl = (const uint32_t *)&lpm->tbl24[idx >> 32];
+	tbl[1] = *ptbl;
+
+	idx = vgetq_lane_u64((uint64x2_t)i24, 1);
+
+	ptbl = (const uint32_t *)&lpm->tbl24[(uint32_t)idx];
+	tbl[2] = *ptbl;
+	ptbl = (const uint32_t *)&lpm->tbl24[idx >> 32];
+	tbl[3] = *ptbl;
+
+	/* get 4 indexes for tbl8[]. */
+	i8.x = vandq_s32(ip, mask8);
+
+	pt = (uint64_t)tbl[0] |
+		(uint64_t)tbl[1] << 32;
+	pt2 = (uint64_t)tbl[2] |
+		(uint64_t)tbl[3] << 32;
+
+	/* search successfully finished for all 4 IP addresses. */
+	if (likely((pt & mask_xv) == mask_v) &&
+			likely((pt2 & mask_xv) == mask_v)) {
+		*(uint64_t *)hop = pt & RTE_LPM_MASKX4_RES;
+		*(uint64_t *)(hop + 2) = pt2 & RTE_LPM_MASKX4_RES;
+		return;
+	}
+
+	if (unlikely((pt & RTE_LPM_VALID_EXT_ENTRY_BITMASK) ==
+			RTE_LPM_VALID_EXT_ENTRY_BITMASK)) {
+		i8.u32[0] = i8.u32[0] +
+			(uint8_t)tbl[0] * RTE_LPM_TBL8_GROUP_NUM_ENTRIES;
+		ptbl = (const uint32_t *)&lpm->tbl8[i8.u32[0]];
+		tbl[0] = *ptbl;
+	}
+	if (unlikely((pt >> 32 & RTE_LPM_VALID_EXT_ENTRY_BITMASK) ==
+			RTE_LPM_VALID_EXT_ENTRY_BITMASK)) {
+		i8.u32[1] = i8.u32[1] +
+			(uint8_t)tbl[1] * RTE_LPM_TBL8_GROUP_NUM_ENTRIES;
+		ptbl = (const uint32_t *)&lpm->tbl8[i8.u32[1]];
+		tbl[1] = *ptbl;
+	}
+	if (unlikely((pt2 & RTE_LPM_VALID_EXT_ENTRY_BITMASK) ==
+			RTE_LPM_VALID_EXT_ENTRY_BITMASK)) {
+		i8.u32[2] = i8.u32[2] +
+			(uint8_t)tbl[2] * RTE_LPM_TBL8_GROUP_NUM_ENTRIES;
+		ptbl = (const uint32_t *)&lpm->tbl8[i8.u32[2]];
+		tbl[2] = *ptbl;
+	}
+	if (unlikely((pt2 >> 32 & RTE_LPM_VALID_EXT_ENTRY_BITMASK) ==
+			RTE_LPM_VALID_EXT_ENTRY_BITMASK)) {
+		i8.u32[3] = i8.u32[3] +
+			(uint8_t)tbl[3] * RTE_LPM_TBL8_GROUP_NUM_ENTRIES;
+		ptbl = (const uint32_t *)&lpm->tbl8[i8.u32[3]];
+		tbl[3] = *ptbl;
+	}
+
+	hop[0] = (tbl[0] & RTE_LPM_LOOKUP_SUCCESS) ? tbl[0] & 0x00FFFFFF : defv;
+	hop[1] = (tbl[1] & RTE_LPM_LOOKUP_SUCCESS) ? tbl[1] & 0x00FFFFFF : defv;
+	hop[2] = (tbl[2] & RTE_LPM_LOOKUP_SUCCESS) ? tbl[2] & 0x00FFFFFF : defv;
+	hop[3] = (tbl[3] & RTE_LPM_LOOKUP_SUCCESS) ? tbl[3] & 0x00FFFFFF : defv;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_LPM_NEON_H_ */
diff --git a/lib/librte_lpm/rte_lpm_sse.h b/lib/librte_lpm/rte_lpm_sse.h
new file mode 100644
index 00000000..da830995
--- /dev/null
+++ b/lib/librte_lpm/rte_lpm_sse.h
@@ -0,0 +1,149 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_LPM_SSE_H_
+#define _RTE_LPM_SSE_H_
+
+#include <rte_branch_prediction.h>
+#include <rte_byteorder.h>
+#include <rte_common.h>
+#include <rte_vect.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+static inline void
+rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4],
+	uint32_t defv)
+{
+	__m128i i24;
+	rte_xmm_t i8;
+	uint32_t tbl[4];
+	uint64_t idx, pt, pt2;
+	const uint32_t *ptbl;
+
+	const __m128i mask8 =
+		_mm_set_epi32(UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX);
+
+	/*
+	 * RTE_LPM_VALID_EXT_ENTRY_BITMASK for 2 LPM entries
+	 * as one 64-bit value (0x0300000003000000).
+	 */
+	const uint64_t mask_xv =
+		((uint64_t)RTE_LPM_VALID_EXT_ENTRY_BITMASK |
+		(uint64_t)RTE_LPM_VALID_EXT_ENTRY_BITMASK << 32);
+
+	/*
+	 * RTE_LPM_LOOKUP_SUCCESS for 2 LPM entries
+	 * as one 64-bit value (0x0100000001000000).
+	 */
+	const uint64_t mask_v =
+		((uint64_t)RTE_LPM_LOOKUP_SUCCESS |
+		(uint64_t)RTE_LPM_LOOKUP_SUCCESS << 32);
+
+	/* get 4 indexes for tbl24[]. */
+	i24 = _mm_srli_epi32(ip, CHAR_BIT);
+
+	/* extract values from tbl24[] */
+	idx = _mm_cvtsi128_si64(i24);
+	i24 = _mm_srli_si128(i24, sizeof(uint64_t));
+
+	ptbl = (const uint32_t *)&lpm->tbl24[(uint32_t)idx];
+	tbl[0] = *ptbl;
+	ptbl = (const uint32_t *)&lpm->tbl24[idx >> 32];
+	tbl[1] = *ptbl;
+
+	idx = _mm_cvtsi128_si64(i24);
+
+	ptbl = (const uint32_t *)&lpm->tbl24[(uint32_t)idx];
+	tbl[2] = *ptbl;
+	ptbl = (const uint32_t *)&lpm->tbl24[idx >> 32];
+	tbl[3] = *ptbl;
+
+	/* get 4 indexes for tbl8[]. */
+	i8.x = _mm_and_si128(ip, mask8);
+
+	pt = (uint64_t)tbl[0] |
+		(uint64_t)tbl[1] << 32;
+	pt2 = (uint64_t)tbl[2] |
+		(uint64_t)tbl[3] << 32;
+
+	/* search successfully finished for all 4 IP addresses. */
+	if (likely((pt & mask_xv) == mask_v) &&
+			likely((pt2 & mask_xv) == mask_v)) {
+		*(uint64_t *)hop = pt & RTE_LPM_MASKX4_RES;
+		*(uint64_t *)(hop + 2) = pt2 & RTE_LPM_MASKX4_RES;
+		return;
+	}
+
+	if (unlikely((pt & RTE_LPM_VALID_EXT_ENTRY_BITMASK) ==
+			RTE_LPM_VALID_EXT_ENTRY_BITMASK)) {
+		i8.u32[0] = i8.u32[0] +
+			(uint8_t)tbl[0] * RTE_LPM_TBL8_GROUP_NUM_ENTRIES;
+		ptbl = (const uint32_t *)&lpm->tbl8[i8.u32[0]];
+		tbl[0] = *ptbl;
+	}
+	if (unlikely((pt >> 32 & RTE_LPM_VALID_EXT_ENTRY_BITMASK) ==
+			RTE_LPM_VALID_EXT_ENTRY_BITMASK)) {
+		i8.u32[1] = i8.u32[1] +
+			(uint8_t)tbl[1] * RTE_LPM_TBL8_GROUP_NUM_ENTRIES;
+		ptbl = (const uint32_t *)&lpm->tbl8[i8.u32[1]];
+		tbl[1] = *ptbl;
+	}
+	if (unlikely((pt2 & RTE_LPM_VALID_EXT_ENTRY_BITMASK) ==
+			RTE_LPM_VALID_EXT_ENTRY_BITMASK)) {
+		i8.u32[2] = i8.u32[2] +
+			(uint8_t)tbl[2] * RTE_LPM_TBL8_GROUP_NUM_ENTRIES;
+		ptbl = (const uint32_t *)&lpm->tbl8[i8.u32[2]];
+		tbl[2] = *ptbl;
+	}
+	if (unlikely((pt2 >> 32 & RTE_LPM_VALID_EXT_ENTRY_BITMASK) ==
+			RTE_LPM_VALID_EXT_ENTRY_BITMASK)) {
+		i8.u32[3] = i8.u32[3] +
+			(uint8_t)tbl[3] * RTE_LPM_TBL8_GROUP_NUM_ENTRIES;
+		ptbl = (const uint32_t *)&lpm->tbl8[i8.u32[3]];
+		tbl[3] = *ptbl;
+	}
+
+	hop[0] = (tbl[0] & RTE_LPM_LOOKUP_SUCCESS) ? tbl[0] & 0x00FFFFFF : defv;
+	hop[1] = (tbl[1] & RTE_LPM_LOOKUP_SUCCESS) ? tbl[1] & 0x00FFFFFF : defv;
+	hop[2] = (tbl[2] & RTE_LPM_LOOKUP_SUCCESS) ? tbl[2] & 0x00FFFFFF : defv;
+	hop[3] = (tbl[3] & RTE_LPM_LOOKUP_SUCCESS) ? tbl[3] & 0x00FFFFFF : defv;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_LPM_SSE_H_ */
diff --git a/lib/librte_lpm/rte_lpm_version.map b/lib/librte_lpm/rte_lpm_version.map
new file mode 100644
index 00000000..239b371e
--- /dev/null
+++ b/lib/librte_lpm/rte_lpm_version.map
@@ -0,0 +1,36 @@
+DPDK_2.0 {
+	global:
+
+	rte_lpm_add;
+	rte_lpm_create;
+	rte_lpm_delete;
+	rte_lpm_delete_all;
+	rte_lpm_find_existing;
+	rte_lpm_free;
+	rte_lpm_is_rule_present;
+	rte_lpm6_add;
+	rte_lpm6_create;
+	rte_lpm6_delete;
+	rte_lpm6_delete_all;
+	rte_lpm6_delete_bulk_func;
+	rte_lpm6_find_existing;
+	rte_lpm6_free;
+	rte_lpm6_is_rule_present;
+	rte_lpm6_lookup;
+	rte_lpm6_lookup_bulk_func;
+
+	local: *;
+};
+
+DPDK_16.04 {
+	global:
+
+	rte_lpm_add;
+	rte_lpm_find_existing;
+	rte_lpm_create;
+	rte_lpm_free;
+	rte_lpm_is_rule_present;
+	rte_lpm_delete;
+	rte_lpm_delete_all;
+
+} DPDK_2.0;
diff --git a/lib/librte_mbuf/Makefile b/lib/librte_mbuf/Makefile
new file mode 100644
index 00000000..8d62b0d3
--- /dev/null
+++ b/lib/librte_mbuf/Makefile
@@ -0,0 +1,52 @@
+#   BSD LICENSE
+#
+#   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+#   All rights reserved.
+#
+#   Redistribution and use in source and binary forms, with or without
+#   modification, are permitted provided that the following conditions
+#   are met:
+#
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in
+#       the documentation and/or other materials provided with the
+#       distribution.
+#     * Neither the name of Intel Corporation nor the names of its
+#       contributors may be used to endorse or promote products derived
+#       from this software without specific prior written permission.
+#
+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# library name
+LIB = librte_mbuf.a
+
+CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3
+
+EXPORT_MAP := rte_mbuf_version.map
+
+LIBABIVER := 2
+
+# all source are stored in SRCS-y
+SRCS-$(CONFIG_RTE_LIBRTE_MBUF) := rte_mbuf.c
+
+# install includes
+SYMLINK-$(CONFIG_RTE_LIBRTE_MBUF)-include := rte_mbuf.h
+
+# this lib needs eal
+DEPDIRS-$(CONFIG_RTE_LIBRTE_MBUF) += lib/librte_eal lib/librte_mempool
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_mbuf/rte_mbuf.c b/lib/librte_mbuf/rte_mbuf.c
new file mode 100644
index 00000000..dc0467c9
--- /dev/null
+++ b/lib/librte_mbuf/rte_mbuf.c
@@ -0,0 +1,288 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   Copyright 2014 6WIND S.A.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdarg.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <ctype.h>
+#include <sys/queue.h>
+
+#include <rte_debug.h>
+#include <rte_common.h>
+#include <rte_log.h>
+#include <rte_memory.h>
+#include <rte_memzone.h>
+#include <rte_launch.h>
+#include <rte_eal.h>
+#include <rte_per_lcore.h>
+#include <rte_lcore.h>
+#include <rte_atomic.h>
+#include <rte_branch_prediction.h>
+#include <rte_ring.h>
+#include <rte_mempool.h>
+#include <rte_mbuf.h>
+#include <rte_string_fns.h>
+#include <rte_hexdump.h>
+#include <rte_errno.h>
+
+/*
+ * ctrlmbuf constructor, given as a callback function to
+ * rte_mempool_create()
+ */
+void
+rte_ctrlmbuf_init(struct rte_mempool *mp,
+		__attribute__((unused)) void *opaque_arg,
+		void *_m,
+		__attribute__((unused)) unsigned i)
+{
+	struct rte_mbuf *m = _m;
+	rte_pktmbuf_init(mp, opaque_arg, _m, i);
+	m->ol_flags |= CTRL_MBUF_FLAG;
+}
+
+/*
+ * pktmbuf pool constructor, given as a callback function to
+ * rte_mempool_create()
+ */
+void
+rte_pktmbuf_pool_init(struct rte_mempool *mp, void *opaque_arg)
+{
+	struct rte_pktmbuf_pool_private *user_mbp_priv, *mbp_priv;
+	struct rte_pktmbuf_pool_private default_mbp_priv;
+	uint16_t roomsz;
+
+	RTE_MBUF_ASSERT(mp->elt_size >= sizeof(struct rte_mbuf));
+
+	/* if no structure is provided, assume no mbuf private area */
+	user_mbp_priv = opaque_arg;
+	if (user_mbp_priv == NULL) {
+		default_mbp_priv.mbuf_priv_size = 0;
+		if (mp->elt_size > sizeof(struct rte_mbuf))
+			roomsz = mp->elt_size - sizeof(struct rte_mbuf);
+		else
+			roomsz = 0;
+		default_mbp_priv.mbuf_data_room_size = roomsz;
+		user_mbp_priv = &default_mbp_priv;
+	}
+
+	RTE_MBUF_ASSERT(mp->elt_size >= sizeof(struct rte_mbuf) +
+		user_mbp_priv->mbuf_data_room_size +
+		user_mbp_priv->mbuf_priv_size);
+
+	mbp_priv = rte_mempool_get_priv(mp);
+	memcpy(mbp_priv, user_mbp_priv, sizeof(*mbp_priv));
+}
+
+/*
+ * pktmbuf constructor, given as a callback function to
+ * rte_mempool_create().
+ * Set the fields of a packet mbuf to their default values.
+ */
+void
+rte_pktmbuf_init(struct rte_mempool *mp,
+		 __attribute__((unused)) void *opaque_arg,
+		 void *_m,
+		 __attribute__((unused)) unsigned i)
+{
+	struct rte_mbuf *m = _m;
+	uint32_t mbuf_size, buf_len, priv_size;
+
+	priv_size = rte_pktmbuf_priv_size(mp);
+	mbuf_size = sizeof(struct rte_mbuf) + priv_size;
+	buf_len = rte_pktmbuf_data_room_size(mp);
+
+	RTE_MBUF_ASSERT(RTE_ALIGN(priv_size, RTE_MBUF_PRIV_ALIGN) == priv_size);
+	RTE_MBUF_ASSERT(mp->elt_size >= mbuf_size);
+	RTE_MBUF_ASSERT(buf_len <= UINT16_MAX);
+
+	memset(m, 0, mp->elt_size);
+
+	/* start of buffer is after mbuf structure and priv data */
+	m->priv_size = priv_size;
+	m->buf_addr = (char *)m + mbuf_size;
+	m->buf_physaddr = rte_mempool_virt2phy(mp, m) + mbuf_size;
+	m->buf_len = (uint16_t)buf_len;
+
+	/* keep some headroom between start of buffer and data */
+	m->data_off = RTE_MIN(RTE_PKTMBUF_HEADROOM, (uint16_t)m->buf_len);
+
+	/* init some constant fields */
+	m->pool = mp;
+	m->nb_segs = 1;
+	m->port = 0xff;
+}
+
+/* helper to create a mbuf pool */
+struct rte_mempool *
+rte_pktmbuf_pool_create(const char *name, unsigned n,
+	unsigned cache_size, uint16_t priv_size, uint16_t data_room_size,
+	int socket_id)
+{
+	struct rte_pktmbuf_pool_private mbp_priv;
+	unsigned elt_size;
+
+	if (RTE_ALIGN(priv_size, RTE_MBUF_PRIV_ALIGN) != priv_size) {
+		RTE_LOG(ERR, MBUF, "mbuf priv_size=%u is not aligned\n",
+			priv_size);
+		rte_errno = EINVAL;
+		return NULL;
+	}
+	elt_size = sizeof(struct rte_mbuf) + (unsigned)priv_size +
+		(unsigned)data_room_size;
+	mbp_priv.mbuf_data_room_size = data_room_size;
+	mbp_priv.mbuf_priv_size = priv_size;
+
+	return rte_mempool_create(name, n, elt_size,
+		cache_size, sizeof(struct rte_pktmbuf_pool_private),
+		rte_pktmbuf_pool_init, &mbp_priv, rte_pktmbuf_init, NULL,
+		socket_id, 0);
+}
+
+/* do some sanity checks on a mbuf: panic if it fails */
+void
+rte_mbuf_sanity_check(const struct rte_mbuf *m, int is_header)
+{
+	const struct rte_mbuf *m_seg;
+	unsigned nb_segs;
+
+	if (m == NULL)
+		rte_panic("mbuf is NULL\n");
+
+	/* generic checks */
+	if (m->pool == NULL)
+		rte_panic("bad mbuf pool\n");
+	if (m->buf_physaddr == 0)
+		rte_panic("bad phys addr\n");
+	if (m->buf_addr == NULL)
+		rte_panic("bad virt addr\n");
+
+	uint16_t cnt = rte_mbuf_refcnt_read(m);
+	if ((cnt == 0) || (cnt == UINT16_MAX))
+		rte_panic("bad ref cnt\n");
+
+	/* nothing to check for sub-segments */
+	if (is_header == 0)
+		return;
+
+	nb_segs = m->nb_segs;
+	m_seg = m;
+	while (m_seg && nb_segs != 0) {
+		m_seg = m_seg->next;
+		nb_segs--;
+	}
+	if (nb_segs != 0)
+		rte_panic("bad nb_segs\n");
+}
+
+/* dump a mbuf on console */
+void
+rte_pktmbuf_dump(FILE *f, const struct rte_mbuf *m, unsigned dump_len)
+{
+	unsigned int len;
+	unsigned nb_segs;
+
+	__rte_mbuf_sanity_check(m, 1);
+
+	fprintf(f, "dump mbuf at 0x%p, phys=%"PRIx64", buf_len=%u\n",
+	       m, (uint64_t)m->buf_physaddr, (unsigned)m->buf_len);
+	fprintf(f, "  pkt_len=%"PRIu32", ol_flags=%"PRIx64", nb_segs=%u, "
+	       "in_port=%u\n", m->pkt_len, m->ol_flags,
+	       (unsigned)m->nb_segs, (unsigned)m->port);
+	nb_segs = m->nb_segs;
+
+	while (m && nb_segs != 0) {
+		__rte_mbuf_sanity_check(m, 0);
+
+		fprintf(f, "  segment at 0x%p, data=0x%p, data_len=%u\n",
+			m, rte_pktmbuf_mtod(m, void *), (unsigned)m->data_len);
+		len = dump_len;
+		if (len > m->data_len)
+			len = m->data_len;
+		if (len != 0)
+			rte_hexdump(f, NULL, rte_pktmbuf_mtod(m, void *), len);
+		dump_len -= len;
+		m = m->next;
+		nb_segs --;
+	}
+}
+
+/*
+ * Get the name of a RX offload flag. Must be kept synchronized with flag
+ * definitions in rte_mbuf.h.
+ */
+const char *rte_get_rx_ol_flag_name(uint64_t mask)
+{
+	switch (mask) {
+	case PKT_RX_VLAN_PKT: return "PKT_RX_VLAN_PKT";
+	case PKT_RX_RSS_HASH: return "PKT_RX_RSS_HASH";
+	case PKT_RX_FDIR: return "PKT_RX_FDIR";
+	case PKT_RX_L4_CKSUM_BAD: return "PKT_RX_L4_CKSUM_BAD";
+	case PKT_RX_IP_CKSUM_BAD: return "PKT_RX_IP_CKSUM_BAD";
+	case PKT_RX_EIP_CKSUM_BAD: return "PKT_RX_EIP_CKSUM_BAD";
+	/* case PKT_RX_OVERSIZE: return "PKT_RX_OVERSIZE"; */
+	/* case PKT_RX_HBUF_OVERFLOW: return "PKT_RX_HBUF_OVERFLOW"; */
+	/* case PKT_RX_RECIP_ERR: return "PKT_RX_RECIP_ERR"; */
+	/* case PKT_RX_MAC_ERR: return "PKT_RX_MAC_ERR"; */
+	case PKT_RX_IEEE1588_PTP: return "PKT_RX_IEEE1588_PTP";
+	case PKT_RX_IEEE1588_TMST: return "PKT_RX_IEEE1588_TMST";
+	default: return NULL;
+	}
+}
+
+/*
+ * Get the name of a TX offload flag. Must be kept synchronized with flag
+ * definitions in rte_mbuf.h.
+ */
+const char *rte_get_tx_ol_flag_name(uint64_t mask)
+{
+	switch (mask) {
+	case PKT_TX_VLAN_PKT: return "PKT_TX_VLAN_PKT";
+	case PKT_TX_IP_CKSUM: return "PKT_TX_IP_CKSUM";
+	case PKT_TX_TCP_CKSUM: return "PKT_TX_TCP_CKSUM";
+	case PKT_TX_SCTP_CKSUM: return "PKT_TX_SCTP_CKSUM";
+	case PKT_TX_UDP_CKSUM: return "PKT_TX_UDP_CKSUM";
+	case PKT_TX_IEEE1588_TMST: return "PKT_TX_IEEE1588_TMST";
+	case PKT_TX_TCP_SEG: return "PKT_TX_TCP_SEG";
+	case PKT_TX_IPV4: return "PKT_TX_IPV4";
+	case PKT_TX_IPV6: return "PKT_TX_IPV6";
+	case PKT_TX_OUTER_IP_CKSUM: return "PKT_TX_OUTER_IP_CKSUM";
+	case PKT_TX_OUTER_IPV4: return "PKT_TX_OUTER_IPV4";
+	case PKT_TX_OUTER_IPV6: return "PKT_TX_OUTER_IPV6";
+	default: return NULL;
+	}
+}
diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
new file mode 100644
index 00000000..75a227d8
--- /dev/null
+++ b/lib/librte_mbuf/rte_mbuf.h
@@ -0,0 +1,1946 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   Copyright 2014 6WIND S.A.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_MBUF_H_
+#define _RTE_MBUF_H_
+
+/**
+ * @file
+ * RTE Mbuf
+ *
+ * The mbuf library provides the ability to create and destroy buffers
+ * that may be used by the RTE application to store message
+ * buffers. The message buffers are stored in a mempool, using the
+ * RTE mempool library.
+ *
+ * This library provide an API to allocate/free packet mbufs, which are
+ * used to carry network packets.
+ *
+ * To understand the concepts of packet buffers or mbufs, you
+ * should read "TCP/IP Illustrated, Volume 2: The Implementation,
+ * Addison-Wesley, 1995, ISBN 0-201-63354-X from Richard Stevens"
+ * http://www.kohala.com/start/tcpipiv2.html
+ */
+
+#include <stdint.h>
+#include <rte_common.h>
+#include <rte_mempool.h>
+#include <rte_memory.h>
+#include <rte_atomic.h>
+#include <rte_prefetch.h>
+#include <rte_branch_prediction.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* deprecated options */
+#pragma GCC poison RTE_MBUF_SCATTER_GATHER
+#pragma GCC poison RTE_MBUF_REFCNT
+
+/*
+ * Packet Offload Features Flags. It also carry packet type information.
+ * Critical resources. Both rx/tx shared these bits. Be cautious on any change
+ *
+ * - RX flags start at bit position zero, and get added to the left of previous
+ *   flags.
+ * - The most-significant 3 bits are reserved for generic mbuf flags
+ * - TX flags therefore start at bit position 60 (i.e. 63-3), and new flags get
+ *   added to the right of the previously defined flags i.e. they should count
+ *   downwards, not upwards.
+ *
+ * Keep these flags synchronized with rte_get_rx_ol_flag_name() and
+ * rte_get_tx_ol_flag_name().
+ */
+#define PKT_RX_VLAN_PKT      (1ULL << 0)  /**< RX packet is a 802.1q VLAN packet. */
+#define PKT_RX_RSS_HASH      (1ULL << 1)  /**< RX packet with RSS hash result. */
+#define PKT_RX_FDIR          (1ULL << 2)  /**< RX packet with FDIR match indicate. */
+#define PKT_RX_L4_CKSUM_BAD  (1ULL << 3)  /**< L4 cksum of RX pkt. is not OK. */
+#define PKT_RX_IP_CKSUM_BAD  (1ULL << 4)  /**< IP cksum of RX pkt. is not OK. */
+#define PKT_RX_EIP_CKSUM_BAD (1ULL << 5)  /**< External IP header checksum error. */
+#define PKT_RX_OVERSIZE      (0ULL << 0)  /**< Num of desc of an RX pkt oversize. */
+#define PKT_RX_HBUF_OVERFLOW (0ULL << 0)  /**< Header buffer overflow. */
+#define PKT_RX_RECIP_ERR     (0ULL << 0)  /**< Hardware processing error. */
+#define PKT_RX_MAC_ERR       (0ULL << 0)  /**< MAC error. */
+#define PKT_RX_IEEE1588_PTP  (1ULL << 9)  /**< RX IEEE1588 L2 Ethernet PT Packet. */
+#define PKT_RX_IEEE1588_TMST (1ULL << 10) /**< RX IEEE1588 L2/L4 timestamped packet.*/
+#define PKT_RX_FDIR_ID       (1ULL << 13) /**< FD id reported if FDIR match. */
+#define PKT_RX_FDIR_FLX      (1ULL << 14) /**< Flexible bytes reported if FDIR match. */
+#define PKT_RX_QINQ_PKT      (1ULL << 15)  /**< RX packet with double VLAN stripped. */
+/* add new RX flags here */
+
+/* add new TX flags here */
+
+/**
+ * Second VLAN insertion (QinQ) flag.
+ */
+#define PKT_TX_QINQ_PKT    (1ULL << 49)   /**< TX packet with double VLAN inserted. */
+
+/**
+ * TCP segmentation offload. To enable this offload feature for a
+ * packet to be transmitted on hardware supporting TSO:
+ *  - set the PKT_TX_TCP_SEG flag in mbuf->ol_flags (this flag implies
+ *    PKT_TX_TCP_CKSUM)
+ *  - set the flag PKT_TX_IPV4 or PKT_TX_IPV6
+ *  - if it's IPv4, set the PKT_TX_IP_CKSUM flag and write the IP checksum
+ *    to 0 in the packet
+ *  - fill the mbuf offload information: l2_len, l3_len, l4_len, tso_segsz
+ *  - calculate the pseudo header checksum without taking ip_len in account,
+ *    and set it in the TCP header. Refer to rte_ipv4_phdr_cksum() and
+ *    rte_ipv6_phdr_cksum() that can be used as helpers.
+ */
+#define PKT_TX_TCP_SEG       (1ULL << 50)
+
+#define PKT_TX_IEEE1588_TMST (1ULL << 51) /**< TX IEEE1588 packet to timestamp. */
+
+/**
+ * Bits 52+53 used for L4 packet type with checksum enabled: 00: Reserved,
+ * 01: TCP checksum, 10: SCTP checksum, 11: UDP checksum. To use hardware
+ * L4 checksum offload, the user needs to:
+ *  - fill l2_len and l3_len in mbuf
+ *  - set the flags PKT_TX_TCP_CKSUM, PKT_TX_SCTP_CKSUM or PKT_TX_UDP_CKSUM
+ *  - set the flag PKT_TX_IPV4 or PKT_TX_IPV6
+ *  - calculate the pseudo header checksum and set it in the L4 header (only
+ *    for TCP or UDP). See rte_ipv4_phdr_cksum() and rte_ipv6_phdr_cksum().
+ *    For SCTP, set the crc field to 0.
+ */
+#define PKT_TX_L4_NO_CKSUM   (0ULL << 52) /**< Disable L4 cksum of TX pkt. */
+#define PKT_TX_TCP_CKSUM     (1ULL << 52) /**< TCP cksum of TX pkt. computed by NIC. */
+#define PKT_TX_SCTP_CKSUM    (2ULL << 52) /**< SCTP cksum of TX pkt. computed by NIC. */
+#define PKT_TX_UDP_CKSUM     (3ULL << 52) /**< UDP cksum of TX pkt. computed by NIC. */
+#define PKT_TX_L4_MASK       (3ULL << 52) /**< Mask for L4 cksum offload request. */
+
+/**
+ * Offload the IP checksum in the hardware. The flag PKT_TX_IPV4 should
+ * also be set by the application, although a PMD will only check
+ * PKT_TX_IP_CKSUM.
+ *  - set the IP checksum field in the packet to 0
+ *  - fill the mbuf offload information: l2_len, l3_len
+ */
+#define PKT_TX_IP_CKSUM      (1ULL << 54)
+
+/**
+ * Packet is IPv4. This flag must be set when using any offload feature
+ * (TSO, L3 or L4 checksum) to tell the NIC that the packet is an IPv4
+ * packet. If the packet is a tunneled packet, this flag is related to
+ * the inner headers.
+ */
+#define PKT_TX_IPV4          (1ULL << 55)
+
+/**
+ * Packet is IPv6. This flag must be set when using an offload feature
+ * (TSO or L4 checksum) to tell the NIC that the packet is an IPv6
+ * packet. If the packet is a tunneled packet, this flag is related to
+ * the inner headers.
+ */
+#define PKT_TX_IPV6          (1ULL << 56)
+
+#define PKT_TX_VLAN_PKT      (1ULL << 57) /**< TX packet is a 802.1q VLAN packet. */
+
+/**
+ * Offload the IP checksum of an external header in the hardware. The
+ * flag PKT_TX_OUTER_IPV4 should also be set by the application, alto ugh
+ * a PMD will only check PKT_TX_IP_CKSUM.  The IP checksum field in the
+ * packet must be set to 0.
+ *  - set the outer IP checksum field in the packet to 0
+ *  - fill the mbuf offload information: outer_l2_len, outer_l3_len
+ */
+#define PKT_TX_OUTER_IP_CKSUM   (1ULL << 58)
+
+/**
+ * Packet outer header is IPv4. This flag must be set when using any
+ * outer offload feature (L3 or L4 checksum) to tell the NIC that the
+ * outer header of the tunneled packet is an IPv4 packet.
+ */
+#define PKT_TX_OUTER_IPV4   (1ULL << 59)
+
+/**
+ * Packet outer header is IPv6. This flag must be set when using any
+ * outer offload feature (L4 checksum) to tell the NIC that the outer
+ * header of the tunneled packet is an IPv6 packet.
+ */
+#define PKT_TX_OUTER_IPV6    (1ULL << 60)
+
+#define __RESERVED           (1ULL << 61) /**< reserved for future mbuf use */
+
+#define IND_ATTACHED_MBUF    (1ULL << 62) /**< Indirect attached mbuf */
+
+/* Use final bit of flags to indicate a control mbuf */
+#define CTRL_MBUF_FLAG       (1ULL << 63) /**< Mbuf contains control data */
+
+/*
+ * 32 bits are divided into several fields to mark packet types. Note that
+ * each field is indexical.
+ * - Bit 3:0 is for L2 types.
+ * - Bit 7:4 is for L3 or outer L3 (for tunneling case) types.
+ * - Bit 11:8 is for L4 or outer L4 (for tunneling case) types.
+ * - Bit 15:12 is for tunnel types.
+ * - Bit 19:16 is for inner L2 types.
+ * - Bit 23:20 is for inner L3 types.
+ * - Bit 27:24 is for inner L4 types.
+ * - Bit 31:28 is reserved.
+ *
+ * To be compatible with Vector PMD, RTE_PTYPE_L3_IPV4, RTE_PTYPE_L3_IPV4_EXT,
+ * RTE_PTYPE_L3_IPV6, RTE_PTYPE_L3_IPV6_EXT, RTE_PTYPE_L4_TCP, RTE_PTYPE_L4_UDP
+ * and RTE_PTYPE_L4_SCTP should be kept as below in a contiguous 7 bits.
+ *
+ * Note that L3 types values are selected for checking IPV4/IPV6 header from
+ * performance point of view. Reading annotations of RTE_ETH_IS_IPV4_HDR and
+ * RTE_ETH_IS_IPV6_HDR is needed for any future changes of L3 type values.
+ *
+ * Note that the packet types of the same packet recognized by different
+ * hardware may be different, as different hardware may have different
+ * capability of packet type recognition.
+ *
+ * examples:
+ * <'ether type'=0x0800
+ * | 'version'=4, 'protocol'=0x29
+ * | 'version'=6, 'next header'=0x3A
+ * | 'ICMPv6 header'>
+ * will be recognized on i40e hardware as packet type combination of,
+ * RTE_PTYPE_L2_ETHER |
+ * RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+ * RTE_PTYPE_TUNNEL_IP |
+ * RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
+ * RTE_PTYPE_INNER_L4_ICMP.
+ *
+ * <'ether type'=0x86DD
+ * | 'version'=6, 'next header'=0x2F
+ * | 'GRE header'
+ * | 'version'=6, 'next header'=0x11
+ * | 'UDP header'>
+ * will be recognized on i40e hardware as packet type combination of,
+ * RTE_PTYPE_L2_ETHER |
+ * RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+ * RTE_PTYPE_TUNNEL_GRENAT |
+ * RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
+ * RTE_PTYPE_INNER_L4_UDP.
+ */
+#define RTE_PTYPE_UNKNOWN                   0x00000000
+/**
+ * Ethernet packet type.
+ * It is used for outer packet for tunneling cases.
+ *
+ * Packet format:
+ * <'ether type'=[0x0800|0x86DD]>
+ */
+#define RTE_PTYPE_L2_ETHER                  0x00000001
+/**
+ * Ethernet packet type for time sync.
+ *
+ * Packet format:
+ * <'ether type'=0x88F7>
+ */
+#define RTE_PTYPE_L2_ETHER_TIMESYNC         0x00000002
+/**
+ * ARP (Address Resolution Protocol) packet type.
+ *
+ * Packet format:
+ * <'ether type'=0x0806>
+ */
+#define RTE_PTYPE_L2_ETHER_ARP              0x00000003
+/**
+ * LLDP (Link Layer Discovery Protocol) packet type.
+ *
+ * Packet format:
+ * <'ether type'=0x88CC>
+ */
+#define RTE_PTYPE_L2_ETHER_LLDP             0x00000004
+/**
+ * Mask of layer 2 packet types.
+ * It is used for outer packet for tunneling cases.
+ */
+#define RTE_PTYPE_L2_MASK                   0x0000000f
+/**
+ * IP (Internet Protocol) version 4 packet type.
+ * It is used for outer packet for tunneling cases, and does not contain any
+ * header option.
+ *
+ * Packet format:
+ * <'ether type'=0x0800
+ * | 'version'=4, 'ihl'=5>
+ */
+#define RTE_PTYPE_L3_IPV4                   0x00000010
+/**
+ * IP (Internet Protocol) version 4 packet type.
+ * It is used for outer packet for tunneling cases, and contains header
+ * options.
+ *
+ * Packet format:
+ * <'ether type'=0x0800
+ * | 'version'=4, 'ihl'=[6-15], 'options'>
+ */
+#define RTE_PTYPE_L3_IPV4_EXT               0x00000030
+/**
+ * IP (Internet Protocol) version 6 packet type.
+ * It is used for outer packet for tunneling cases, and does not contain any
+ * extension header.
+ *
+ * Packet format:
+ * <'ether type'=0x86DD
+ * | 'version'=6, 'next header'=0x3B>
+ */
+#define RTE_PTYPE_L3_IPV6                   0x00000040
+/**
+ * IP (Internet Protocol) version 4 packet type.
+ * It is used for outer packet for tunneling cases, and may or maynot contain
+ * header options.
+ *
+ * Packet format:
+ * <'ether type'=0x0800
+ * | 'version'=4, 'ihl'=[5-15], <'options'>>
+ */
+#define RTE_PTYPE_L3_IPV4_EXT_UNKNOWN       0x00000090
+/**
+ * IP (Internet Protocol) version 6 packet type.
+ * It is used for outer packet for tunneling cases, and contains extension
+ * headers.
+ *
+ * Packet format:
+ * <'ether type'=0x86DD
+ * | 'version'=6, 'next header'=[0x0|0x2B|0x2C|0x32|0x33|0x3C|0x87],
+ *   'extension headers'>
+ */
+#define RTE_PTYPE_L3_IPV6_EXT               0x000000c0
+/**
+ * IP (Internet Protocol) version 6 packet type.
+ * It is used for outer packet for tunneling cases, and may or maynot contain
+ * extension headers.
+ *
+ * Packet format:
+ * <'ether type'=0x86DD
+ * | 'version'=6, 'next header'=[0x3B|0x0|0x2B|0x2C|0x32|0x33|0x3C|0x87],
+ *   <'extension headers'>>
+ */
+#define RTE_PTYPE_L3_IPV6_EXT_UNKNOWN       0x000000e0
+/**
+ * Mask of layer 3 packet types.
+ * It is used for outer packet for tunneling cases.
+ */
+#define RTE_PTYPE_L3_MASK                   0x000000f0
+/**
+ * TCP (Transmission Control Protocol) packet type.
+ * It is used for outer packet for tunneling cases.
+ *
+ * Packet format:
+ * <'ether type'=0x0800
+ * | 'version'=4, 'protocol'=6, 'MF'=0>
+ * or,
+ * <'ether type'=0x86DD
+ * | 'version'=6, 'next header'=6>
+ */
+#define RTE_PTYPE_L4_TCP                    0x00000100
+/**
+ * UDP (User Datagram Protocol) packet type.
+ * It is used for outer packet for tunneling cases.
+ *
+ * Packet format:
+ * <'ether type'=0x0800
+ * | 'version'=4, 'protocol'=17, 'MF'=0>
+ * or,
+ * <'ether type'=0x86DD
+ * | 'version'=6, 'next header'=17>
+ */
+#define RTE_PTYPE_L4_UDP                    0x00000200
+/**
+ * Fragmented IP (Internet Protocol) packet type.
+ * It is used for outer packet for tunneling cases.
+ *
+ * It refers to those packets of any IP types, which can be recognized as
+ * fragmented. A fragmented packet cannot be recognized as any other L4 types
+ * (RTE_PTYPE_L4_TCP, RTE_PTYPE_L4_UDP, RTE_PTYPE_L4_SCTP, RTE_PTYPE_L4_ICMP,
+ * RTE_PTYPE_L4_NONFRAG).
+ *
+ * Packet format:
+ * <'ether type'=0x0800
+ * | 'version'=4, 'MF'=1>
+ * or,
+ * <'ether type'=0x86DD
+ * | 'version'=6, 'next header'=44>
+ */
+#define RTE_PTYPE_L4_FRAG                   0x00000300
+/**
+ * SCTP (Stream Control Transmission Protocol) packet type.
+ * It is used for outer packet for tunneling cases.
+ *
+ * Packet format:
+ * <'ether type'=0x0800
+ * | 'version'=4, 'protocol'=132, 'MF'=0>
+ * or,
+ * <'ether type'=0x86DD
+ * | 'version'=6, 'next header'=132>
+ */
+#define RTE_PTYPE_L4_SCTP                   0x00000400
+/**
+ * ICMP (Internet Control Message Protocol) packet type.
+ * It is used for outer packet for tunneling cases.
+ *
+ * Packet format:
+ * <'ether type'=0x0800
+ * | 'version'=4, 'protocol'=1, 'MF'=0>
+ * or,
+ * <'ether type'=0x86DD
+ * | 'version'=6, 'next header'=1>
+ */
+#define RTE_PTYPE_L4_ICMP                   0x00000500
+/**
+ * Non-fragmented IP (Internet Protocol) packet type.
+ * It is used for outer packet for tunneling cases.
+ *
+ * It refers to those packets of any IP types, while cannot be recognized as
+ * any of above L4 types (RTE_PTYPE_L4_TCP, RTE_PTYPE_L4_UDP,
+ * RTE_PTYPE_L4_FRAG, RTE_PTYPE_L4_SCTP, RTE_PTYPE_L4_ICMP).
+ *
+ * Packet format:
+ * <'ether type'=0x0800
+ * | 'version'=4, 'protocol'!=[6|17|132|1], 'MF'=0>
+ * or,
+ * <'ether type'=0x86DD
+ * | 'version'=6, 'next header'!=[6|17|44|132|1]>
+ */
+#define RTE_PTYPE_L4_NONFRAG                0x00000600
+/**
+ * Mask of layer 4 packet types.
+ * It is used for outer packet for tunneling cases.
+ */
+#define RTE_PTYPE_L4_MASK                   0x00000f00
+/**
+ * IP (Internet Protocol) in IP (Internet Protocol) tunneling packet type.
+ *
+ * Packet format:
+ * <'ether type'=0x0800
+ * | 'version'=4, 'protocol'=[4|41]>
+ * or,
+ * <'ether type'=0x86DD
+ * | 'version'=6, 'next header'=[4|41]>
+ */
+#define RTE_PTYPE_TUNNEL_IP                 0x00001000
+/**
+ * GRE (Generic Routing Encapsulation) tunneling packet type.
+ *
+ * Packet format:
+ * <'ether type'=0x0800
+ * | 'version'=4, 'protocol'=47>
+ * or,
+ * <'ether type'=0x86DD
+ * | 'version'=6, 'next header'=47>
+ */
+#define RTE_PTYPE_TUNNEL_GRE                0x00002000
+/**
+ * VXLAN (Virtual eXtensible Local Area Network) tunneling packet type.
+ *
+ * Packet format:
+ * <'ether type'=0x0800
+ * | 'version'=4, 'protocol'=17
+ * | 'destination port'=4798>
+ * or,
+ * <'ether type'=0x86DD
+ * | 'version'=6, 'next header'=17
+ * | 'destination port'=4798>
+ */
+#define RTE_PTYPE_TUNNEL_VXLAN              0x00003000
+/**
+ * NVGRE (Network Virtualization using Generic Routing Encapsulation) tunneling
+ * packet type.
+ *
+ * Packet format:
+ * <'ether type'=0x0800
+ * | 'version'=4, 'protocol'=47
+ * | 'protocol type'=0x6558>
+ * or,
+ * <'ether type'=0x86DD
+ * | 'version'=6, 'next header'=47
+ * | 'protocol type'=0x6558'>
+ */
+#define RTE_PTYPE_TUNNEL_NVGRE              0x00004000
+/**
+ * GENEVE (Generic Network Virtualization Encapsulation) tunneling packet type.
+ *
+ * Packet format:
+ * <'ether type'=0x0800
+ * | 'version'=4, 'protocol'=17
+ * | 'destination port'=6081>
+ * or,
+ * <'ether type'=0x86DD
+ * | 'version'=6, 'next header'=17
+ * | 'destination port'=6081>
+ */
+#define RTE_PTYPE_TUNNEL_GENEVE             0x00005000
+/**
+ * Tunneling packet type of Teredo, VXLAN (Virtual eXtensible Local Area
+ * Network) or GRE (Generic Routing Encapsulation) could be recognized as this
+ * packet type, if they can not be recognized independently as of hardware
+ * capability.
+ */
+#define RTE_PTYPE_TUNNEL_GRENAT             0x00006000
+/**
+ * Mask of tunneling packet types.
+ */
+#define RTE_PTYPE_TUNNEL_MASK               0x0000f000
+/**
+ * Ethernet packet type.
+ * It is used for inner packet type only.
+ *
+ * Packet format (inner only):
+ * <'ether type'=[0x800|0x86DD]>
+ */
+#define RTE_PTYPE_INNER_L2_ETHER            0x00010000
+/**
+ * Ethernet packet type with VLAN (Virtual Local Area Network) tag.
+ *
+ * Packet format (inner only):
+ * <'ether type'=[0x800|0x86DD], vlan=[1-4095]>
+ */
+#define RTE_PTYPE_INNER_L2_ETHER_VLAN       0x00020000
+/**
+ * Mask of inner layer 2 packet types.
+ */
+#define RTE_PTYPE_INNER_L2_MASK             0x000f0000
+/**
+ * IP (Internet Protocol) version 4 packet type.
+ * It is used for inner packet only, and does not contain any header option.
+ *
+ * Packet format (inner only):
+ * <'ether type'=0x0800
+ * | 'version'=4, 'ihl'=5>
+ */
+#define RTE_PTYPE_INNER_L3_IPV4             0x00100000
+/**
+ * IP (Internet Protocol) version 4 packet type.
+ * It is used for inner packet only, and contains header options.
+ *
+ * Packet format (inner only):
+ * <'ether type'=0x0800
+ * | 'version'=4, 'ihl'=[6-15], 'options'>
+ */
+#define RTE_PTYPE_INNER_L3_IPV4_EXT         0x00200000
+/**
+ * IP (Internet Protocol) version 6 packet type.
+ * It is used for inner packet only, and does not contain any extension header.
+ *
+ * Packet format (inner only):
+ * <'ether type'=0x86DD
+ * | 'version'=6, 'next header'=0x3B>
+ */
+#define RTE_PTYPE_INNER_L3_IPV6             0x00300000
+/**
+ * IP (Internet Protocol) version 4 packet type.
+ * It is used for inner packet only, and may or maynot contain header options.
+ *
+ * Packet format (inner only):
+ * <'ether type'=0x0800
+ * | 'version'=4, 'ihl'=[5-15], <'options'>>
+ */
+#define RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN 0x00400000
+/**
+ * IP (Internet Protocol) version 6 packet type.
+ * It is used for inner packet only, and contains extension headers.
+ *
+ * Packet format (inner only):
+ * <'ether type'=0x86DD
+ * | 'version'=6, 'next header'=[0x0|0x2B|0x2C|0x32|0x33|0x3C|0x87],
+ *   'extension headers'>
+ */
+#define RTE_PTYPE_INNER_L3_IPV6_EXT         0x00500000
+/**
+ * IP (Internet Protocol) version 6 packet type.
+ * It is used for inner packet only, and may or maynot contain extension
+ * headers.
+ *
+ * Packet format (inner only):
+ * <'ether type'=0x86DD
+ * | 'version'=6, 'next header'=[0x3B|0x0|0x2B|0x2C|0x32|0x33|0x3C|0x87],
+ *   <'extension headers'>>
+ */
+#define RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN 0x00600000
+/**
+ * Mask of inner layer 3 packet types.
+ */
+#define RTE_PTYPE_INNER_L3_MASK             0x00f00000
+/**
+ * TCP (Transmission Control Protocol) packet type.
+ * It is used for inner packet only.
+ *
+ * Packet format (inner only):
+ * <'ether type'=0x0800
+ * | 'version'=4, 'protocol'=6, 'MF'=0>
+ * or,
+ * <'ether type'=0x86DD
+ * | 'version'=6, 'next header'=6>
+ */
+#define RTE_PTYPE_INNER_L4_TCP              0x01000000
+/**
+ * UDP (User Datagram Protocol) packet type.
+ * It is used for inner packet only.
+ *
+ * Packet format (inner only):
+ * <'ether type'=0x0800
+ * | 'version'=4, 'protocol'=17, 'MF'=0>
+ * or,
+ * <'ether type'=0x86DD
+ * | 'version'=6, 'next header'=17>
+ */
+#define RTE_PTYPE_INNER_L4_UDP              0x02000000
+/**
+ * Fragmented IP (Internet Protocol) packet type.
+ * It is used for inner packet only, and may or maynot have layer 4 packet.
+ *
+ * Packet format (inner only):
+ * <'ether type'=0x0800
+ * | 'version'=4, 'MF'=1>
+ * or,
+ * <'ether type'=0x86DD
+ * | 'version'=6, 'next header'=44>
+ */
+#define RTE_PTYPE_INNER_L4_FRAG             0x03000000
+/**
+ * SCTP (Stream Control Transmission Protocol) packet type.
+ * It is used for inner packet only.
+ *
+ * Packet format (inner only):
+ * <'ether type'=0x0800
+ * | 'version'=4, 'protocol'=132, 'MF'=0>
+ * or,
+ * <'ether type'=0x86DD
+ * | 'version'=6, 'next header'=132>
+ */
+#define RTE_PTYPE_INNER_L4_SCTP             0x04000000
+/**
+ * ICMP (Internet Control Message Protocol) packet type.
+ * It is used for inner packet only.
+ *
+ * Packet format (inner only):
+ * <'ether type'=0x0800
+ * | 'version'=4, 'protocol'=1, 'MF'=0>
+ * or,
+ * <'ether type'=0x86DD
+ * | 'version'=6, 'next header'=1>
+ */
+#define RTE_PTYPE_INNER_L4_ICMP             0x05000000
+/**
+ * Non-fragmented IP (Internet Protocol) packet type.
+ * It is used for inner packet only, and may or maynot have other unknown layer
+ * 4 packet types.
+ *
+ * Packet format (inner only):
+ * <'ether type'=0x0800
+ * | 'version'=4, 'protocol'!=[6|17|132|1], 'MF'=0>
+ * or,
+ * <'ether type'=0x86DD
+ * | 'version'=6, 'next header'!=[6|17|44|132|1]>
+ */
+#define RTE_PTYPE_INNER_L4_NONFRAG          0x06000000
+/**
+ * Mask of inner layer 4 packet types.
+ */
+#define RTE_PTYPE_INNER_L4_MASK             0x0f000000
+
+/**
+ * Check if the (outer) L3 header is IPv4. To avoid comparing IPv4 types one by
+ * one, bit 4 is selected to be used for IPv4 only. Then checking bit 4 can
+ * determine if it is an IPV4 packet.
+ */
+#define  RTE_ETH_IS_IPV4_HDR(ptype) ((ptype) & RTE_PTYPE_L3_IPV4)
+
+/**
+ * Check if the (outer) L3 header is IPv4. To avoid comparing IPv4 types one by
+ * one, bit 6 is selected to be used for IPv4 only. Then checking bit 6 can
+ * determine if it is an IPV4 packet.
+ */
+#define  RTE_ETH_IS_IPV6_HDR(ptype) ((ptype) & RTE_PTYPE_L3_IPV6)
+
+/* Check if it is a tunneling packet */
+#define RTE_ETH_IS_TUNNEL_PKT(ptype) ((ptype) & (RTE_PTYPE_TUNNEL_MASK | \
+                                                 RTE_PTYPE_INNER_L2_MASK | \
+                                                 RTE_PTYPE_INNER_L3_MASK | \
+                                                 RTE_PTYPE_INNER_L4_MASK))
+
+/** Alignment constraint of mbuf private area. */
+#define RTE_MBUF_PRIV_ALIGN 8
+
+/**
+ * Get the name of a RX offload flag
+ *
+ * @param mask
+ *   The mask describing the flag.
+ * @return
+ *   The name of this flag, or NULL if it's not a valid RX flag.
+ */
+const char *rte_get_rx_ol_flag_name(uint64_t mask);
+
+/**
+ * Get the name of a TX offload flag
+ *
+ * @param mask
+ *   The mask describing the flag. Usually only one bit must be set.
+ *   Several bits can be given if they belong to the same mask.
+ *   Ex: PKT_TX_L4_MASK.
+ * @return
+ *   The name of this flag, or NULL if it's not a valid TX flag.
+ */
+const char *rte_get_tx_ol_flag_name(uint64_t mask);
+
+/**
+ * Some NICs need at least 2KB buffer to RX standard Ethernet frame without
+ * splitting it into multiple segments.
+ * So, for mbufs that planned to be involved into RX/TX, the recommended
+ * minimal buffer length is 2KB + RTE_PKTMBUF_HEADROOM.
+ */
+#define	RTE_MBUF_DEFAULT_DATAROOM	2048
+#define	RTE_MBUF_DEFAULT_BUF_SIZE	\
+	(RTE_MBUF_DEFAULT_DATAROOM + RTE_PKTMBUF_HEADROOM)
+
+/* define a set of marker types that can be used to refer to set points in the
+ * mbuf */
+typedef void    *MARKER[0];   /**< generic marker for a point in a structure */
+typedef uint8_t  MARKER8[0];  /**< generic marker with 1B alignment */
+typedef uint64_t MARKER64[0]; /**< marker that allows us to overwrite 8 bytes
+                               * with a single assignment */
+
+/**
+ * The generic rte_mbuf, containing a packet mbuf.
+ */
+struct rte_mbuf {
+	MARKER cacheline0;
+
+	void *buf_addr;           /**< Virtual address of segment buffer. */
+	phys_addr_t buf_physaddr; /**< Physical address of segment buffer. */
+
+	uint16_t buf_len;         /**< Length of segment buffer. */
+
+	/* next 6 bytes are initialised on RX descriptor rearm */
+	MARKER8 rearm_data;
+	uint16_t data_off;
+
+	/**
+	 * 16-bit Reference counter.
+	 * It should only be accessed using the following functions:
+	 * rte_mbuf_refcnt_update(), rte_mbuf_refcnt_read(), and
+	 * rte_mbuf_refcnt_set(). The functionality of these functions (atomic,
+	 * or non-atomic) is controlled by the CONFIG_RTE_MBUF_REFCNT_ATOMIC
+	 * config option.
+	 */
+	union {
+		rte_atomic16_t refcnt_atomic; /**< Atomically accessed refcnt */
+		uint16_t refcnt;              /**< Non-atomically accessed refcnt */
+	};
+	uint8_t nb_segs;          /**< Number of segments. */
+	uint8_t port;             /**< Input port. */
+
+	uint64_t ol_flags;        /**< Offload features. */
+
+	/* remaining bytes are set on RX when pulling packet from descriptor */
+	MARKER rx_descriptor_fields1;
+
+	/*
+	 * The packet type, which is the combination of outer/inner L2, L3, L4
+	 * and tunnel types.
+	 */
+	union {
+		uint32_t packet_type; /**< L2/L3/L4 and tunnel information. */
+		struct {
+			uint32_t l2_type:4; /**< (Outer) L2 type. */
+			uint32_t l3_type:4; /**< (Outer) L3 type. */
+			uint32_t l4_type:4; /**< (Outer) L4 type. */
+			uint32_t tun_type:4; /**< Tunnel type. */
+			uint32_t inner_l2_type:4; /**< Inner L2 type. */
+			uint32_t inner_l3_type:4; /**< Inner L3 type. */
+			uint32_t inner_l4_type:4; /**< Inner L4 type. */
+		};
+	};
+
+	uint32_t pkt_len;         /**< Total pkt len: sum of all segments. */
+	uint16_t data_len;        /**< Amount of data in segment buffer. */
+	uint16_t vlan_tci;        /**< VLAN Tag Control Identifier (CPU order) */
+
+	union {
+		uint32_t rss;     /**< RSS hash result if RSS enabled */
+		struct {
+			union {
+				struct {
+					uint16_t hash;
+					uint16_t id;
+				};
+				uint32_t lo;
+				/**< Second 4 flexible bytes */
+			};
+			uint32_t hi;
+			/**< First 4 flexible bytes or FD ID, dependent on
+			     PKT_RX_FDIR_* flag in ol_flags. */
+		} fdir;           /**< Filter identifier if FDIR enabled */
+		struct {
+			uint32_t lo;
+			uint32_t hi;
+		} sched;          /**< Hierarchical scheduler */
+		uint32_t usr;	  /**< User defined tags. See rte_distributor_process() */
+	} hash;                   /**< hash information */
+
+	uint32_t seqn; /**< Sequence number. See also rte_reorder_insert() */
+
+	uint16_t vlan_tci_outer;  /**< Outer VLAN Tag Control Identifier (CPU order) */
+
+	/* second cache line - fields only used in slow path or on TX */
+	MARKER cacheline1 __rte_cache_min_aligned;
+
+	union {
+		void *userdata;   /**< Can be used for external metadata */
+		uint64_t udata64; /**< Allow 8-byte userdata on 32-bit */
+	};
+
+	struct rte_mempool *pool; /**< Pool from which mbuf was allocated. */
+	struct rte_mbuf *next;    /**< Next segment of scattered packet. */
+
+	/* fields to support TX offloads */
+	union {
+		uint64_t tx_offload;       /**< combined for easy fetch */
+		struct {
+			uint64_t l2_len:7; /**< L2 (MAC) Header Length. */
+			uint64_t l3_len:9; /**< L3 (IP) Header Length. */
+			uint64_t l4_len:8; /**< L4 (TCP/UDP) Header Length. */
+			uint64_t tso_segsz:16; /**< TCP TSO segment size */
+
+			/* fields for TX offloading of tunnels */
+			uint64_t outer_l3_len:9; /**< Outer L3 (IP) Hdr Length. */
+			uint64_t outer_l2_len:7; /**< Outer L2 (MAC) Hdr Length. */
+
+			/* uint64_t unused:8; */
+		};
+	};
+
+	/** Size of the application private data. In case of an indirect
+	 * mbuf, it stores the direct mbuf private data size. */
+	uint16_t priv_size;
+
+	/** Timesync flags for use with IEEE1588. */
+	uint16_t timesync;
+} __rte_cache_aligned;
+
+static inline uint16_t rte_pktmbuf_priv_size(struct rte_mempool *mp);
+
+/**
+ * Return the DMA address of the beginning of the mbuf data
+ *
+ * @param mb
+ *   The pointer to the mbuf.
+ * @return
+ *   The physical address of the beginning of the mbuf data
+ */
+static inline phys_addr_t
+rte_mbuf_data_dma_addr(const struct rte_mbuf *mb)
+{
+	return mb->buf_physaddr + mb->data_off;
+}
+
+/**
+ * Return the default DMA address of the beginning of the mbuf data
+ *
+ * This function is used by drivers in their receive function, as it
+ * returns the location where data should be written by the NIC, taking
+ * the default headroom in account.
+ *
+ * @param mb
+ *   The pointer to the mbuf.
+ * @return
+ *   The physical address of the beginning of the mbuf data
+ */
+static inline phys_addr_t
+rte_mbuf_data_dma_addr_default(const struct rte_mbuf *mb)
+{
+	return mb->buf_physaddr + RTE_PKTMBUF_HEADROOM;
+}
+
+/**
+ * Return the mbuf owning the data buffer address of an indirect mbuf.
+ *
+ * @param mi
+ *   The pointer to the indirect mbuf.
+ * @return
+ *   The address of the direct mbuf corresponding to buffer_addr.
+ */
+static inline struct rte_mbuf *
+rte_mbuf_from_indirect(struct rte_mbuf *mi)
+{
+	return (struct rte_mbuf *)RTE_PTR_SUB(mi->buf_addr, sizeof(*mi) + mi->priv_size);
+}
+
+/**
+ * Return the buffer address embedded in the given mbuf.
+ *
+ * @param md
+ *   The pointer to the mbuf.
+ * @return
+ *   The address of the data buffer owned by the mbuf.
+ */
+static inline char *
+rte_mbuf_to_baddr(struct rte_mbuf *md)
+{
+	char *buffer_addr;
+	buffer_addr = (char *)md + sizeof(*md) + rte_pktmbuf_priv_size(md->pool);
+	return buffer_addr;
+}
+
+/**
+ * Returns TRUE if given mbuf is indirect, or FALSE otherwise.
+ */
+#define RTE_MBUF_INDIRECT(mb)   ((mb)->ol_flags & IND_ATTACHED_MBUF)
+
+/**
+ * Returns TRUE if given mbuf is direct, or FALSE otherwise.
+ */
+#define RTE_MBUF_DIRECT(mb)     (!RTE_MBUF_INDIRECT(mb))
+
+/**
+ * Private data in case of pktmbuf pool.
+ *
+ * A structure that contains some pktmbuf_pool-specific data that are
+ * appended after the mempool structure (in private data).
+ */
+struct rte_pktmbuf_pool_private {
+	uint16_t mbuf_data_room_size; /**< Size of data space in each mbuf. */
+	uint16_t mbuf_priv_size;      /**< Size of private area in each mbuf. */
+};
+
+#ifdef RTE_LIBRTE_MBUF_DEBUG
+
+/**  check mbuf type in debug mode */
+#define __rte_mbuf_sanity_check(m, is_h) rte_mbuf_sanity_check(m, is_h)
+
+/**  check mbuf type in debug mode if mbuf pointer is not null */
+#define __rte_mbuf_sanity_check_raw(m, is_h)	do {       \
+	if ((m) != NULL)                                   \
+		rte_mbuf_sanity_check(m, is_h);          \
+} while (0)
+
+/**  MBUF asserts in debug mode */
+#define RTE_MBUF_ASSERT(exp)                                         \
+if (!(exp)) {                                                        \
+	rte_panic("line%d\tassert \"" #exp "\" failed\n", __LINE__); \
+}
+
+#else /*  RTE_LIBRTE_MBUF_DEBUG */
+
+/**  check mbuf type in debug mode */
+#define __rte_mbuf_sanity_check(m, is_h) do { } while (0)
+
+/**  check mbuf type in debug mode if mbuf pointer is not null */
+#define __rte_mbuf_sanity_check_raw(m, is_h) do { } while (0)
+
+/**  MBUF asserts in debug mode */
+#define RTE_MBUF_ASSERT(exp)                do { } while (0)
+
+#endif /*  RTE_LIBRTE_MBUF_DEBUG */
+
+#ifdef RTE_MBUF_REFCNT_ATOMIC
+
+/**
+ * Reads the value of an mbuf's refcnt.
+ * @param m
+ *   Mbuf to read
+ * @return
+ *   Reference count number.
+ */
+static inline uint16_t
+rte_mbuf_refcnt_read(const struct rte_mbuf *m)
+{
+	return (uint16_t)(rte_atomic16_read(&m->refcnt_atomic));
+}
+
+/**
+ * Sets an mbuf's refcnt to a defined value.
+ * @param m
+ *   Mbuf to update
+ * @param new_value
+ *   Value set
+ */
+static inline void
+rte_mbuf_refcnt_set(struct rte_mbuf *m, uint16_t new_value)
+{
+	rte_atomic16_set(&m->refcnt_atomic, new_value);
+}
+
+/**
+ * Adds given value to an mbuf's refcnt and returns its new value.
+ * @param m
+ *   Mbuf to update
+ * @param value
+ *   Value to add/subtract
+ * @return
+ *   Updated value
+ */
+static inline uint16_t
+rte_mbuf_refcnt_update(struct rte_mbuf *m, int16_t value)
+{
+	/*
+	 * The atomic_add is an expensive operation, so we don't want to
+	 * call it in the case where we know we are the uniq holder of
+	 * this mbuf (i.e. ref_cnt == 1). Otherwise, an atomic
+	 * operation has to be used because concurrent accesses on the
+	 * reference counter can occur.
+	 */
+	if (likely(rte_mbuf_refcnt_read(m) == 1)) {
+		rte_mbuf_refcnt_set(m, 1 + value);
+		return 1 + value;
+	}
+
+	return (uint16_t)(rte_atomic16_add_return(&m->refcnt_atomic, value));
+}
+
+#else /* ! RTE_MBUF_REFCNT_ATOMIC */
+
+/**
+ * Adds given value to an mbuf's refcnt and returns its new value.
+ */
+static inline uint16_t
+rte_mbuf_refcnt_update(struct rte_mbuf *m, int16_t value)
+{
+	m->refcnt = (uint16_t)(m->refcnt + value);
+	return m->refcnt;
+}
+
+/**
+ * Reads the value of an mbuf's refcnt.
+ */
+static inline uint16_t
+rte_mbuf_refcnt_read(const struct rte_mbuf *m)
+{
+	return m->refcnt;
+}
+
+/**
+ * Sets an mbuf's refcnt to the defined value.
+ */
+static inline void
+rte_mbuf_refcnt_set(struct rte_mbuf *m, uint16_t new_value)
+{
+	m->refcnt = new_value;
+}
+
+#endif /* RTE_MBUF_REFCNT_ATOMIC */
+
+/** Mbuf prefetch */
+#define RTE_MBUF_PREFETCH_TO_FREE(m) do {       \
+	if ((m) != NULL)                        \
+		rte_prefetch0(m);               \
+} while (0)
+
+
+/**
+ * Sanity checks on an mbuf.
+ *
+ * Check the consistency of the given mbuf. The function will cause a
+ * panic if corruption is detected.
+ *
+ * @param m
+ *   The mbuf to be checked.
+ * @param is_header
+ *   True if the mbuf is a packet header, false if it is a sub-segment
+ *   of a packet (in this case, some fields like nb_segs are not checked)
+ */
+void
+rte_mbuf_sanity_check(const struct rte_mbuf *m, int is_header);
+
+/**
+ * @internal Allocate a new mbuf from mempool *mp*.
+ * The use of that function is reserved for RTE internal needs.
+ * Please use rte_pktmbuf_alloc().
+ *
+ * @param mp
+ *   The mempool from which mbuf is allocated.
+ * @return
+ *   - The pointer to the new mbuf on success.
+ *   - NULL if allocation failed.
+ */
+static inline struct rte_mbuf *__rte_mbuf_raw_alloc(struct rte_mempool *mp)
+{
+	struct rte_mbuf *m;
+	void *mb = NULL;
+	if (rte_mempool_get(mp, &mb) < 0)
+		return NULL;
+	m = (struct rte_mbuf *)mb;
+	RTE_MBUF_ASSERT(rte_mbuf_refcnt_read(m) == 0);
+	rte_mbuf_refcnt_set(m, 1);
+	return m;
+}
+
+/**
+ * @internal Put mbuf back into its original mempool.
+ * The use of that function is reserved for RTE internal needs.
+ * Please use rte_pktmbuf_free().
+ *
+ * @param m
+ *   The mbuf to be freed.
+ */
+static inline void __attribute__((always_inline))
+__rte_mbuf_raw_free(struct rte_mbuf *m)
+{
+	RTE_MBUF_ASSERT(rte_mbuf_refcnt_read(m) == 0);
+	rte_mempool_put(m->pool, m);
+}
+
+/* Operations on ctrl mbuf */
+
+/**
+ * The control mbuf constructor.
+ *
+ * This function initializes some fields in an mbuf structure that are
+ * not modified by the user once created (mbuf type, origin pool, buffer
+ * start address, and so on). This function is given as a callback function
+ * to rte_mempool_create() at pool creation time.
+ *
+ * @param mp
+ *   The mempool from which the mbuf is allocated.
+ * @param opaque_arg
+ *   A pointer that can be used by the user to retrieve useful information
+ *   for mbuf initialization. This pointer comes from the ``init_arg``
+ *   parameter of rte_mempool_create().
+ * @param m
+ *   The mbuf to initialize.
+ * @param i
+ *   The index of the mbuf in the pool table.
+ */
+void rte_ctrlmbuf_init(struct rte_mempool *mp, void *opaque_arg,
+		void *m, unsigned i);
+
+/**
+ * Allocate a new mbuf (type is ctrl) from mempool *mp*.
+ *
+ * This new mbuf is initialized with data pointing to the beginning of
+ * buffer, and with a length of zero.
+ *
+ * @param mp
+ *   The mempool from which the mbuf is allocated.
+ * @return
+ *   - The pointer to the new mbuf on success.
+ *   - NULL if allocation failed.
+ */
+#define rte_ctrlmbuf_alloc(mp) rte_pktmbuf_alloc(mp)
+
+/**
+ * Free a control mbuf back into its original mempool.
+ *
+ * @param m
+ *   The control mbuf to be freed.
+ */
+#define rte_ctrlmbuf_free(m) rte_pktmbuf_free(m)
+
+/**
+ * A macro that returns the pointer to the carried data.
+ *
+ * The value that can be read or assigned.
+ *
+ * @param m
+ *   The control mbuf.
+ */
+#define rte_ctrlmbuf_data(m) ((char *)((m)->buf_addr) + (m)->data_off)
+
+/**
+ * A macro that returns the length of the carried data.
+ *
+ * The value that can be read or assigned.
+ *
+ * @param m
+ *   The control mbuf.
+ */
+#define rte_ctrlmbuf_len(m) rte_pktmbuf_data_len(m)
+
+/**
+ * Tests if an mbuf is a control mbuf
+ *
+ * @param m
+ *   The mbuf to be tested
+ * @return
+ *   - True (1) if the mbuf is a control mbuf
+ *   - False(0) otherwise
+ */
+static inline int
+rte_is_ctrlmbuf(struct rte_mbuf *m)
+{
+	return !!(m->ol_flags & CTRL_MBUF_FLAG);
+}
+
+/* Operations on pkt mbuf */
+
+/**
+ * The packet mbuf constructor.
+ *
+ * This function initializes some fields in the mbuf structure that are
+ * not modified by the user once created (origin pool, buffer start
+ * address, and so on). This function is given as a callback function to
+ * rte_mempool_create() at pool creation time.
+ *
+ * @param mp
+ *   The mempool from which mbufs originate.
+ * @param opaque_arg
+ *   A pointer that can be used by the user to retrieve useful information
+ *   for mbuf initialization. This pointer comes from the ``init_arg``
+ *   parameter of rte_mempool_create().
+ * @param m
+ *   The mbuf to initialize.
+ * @param i
+ *   The index of the mbuf in the pool table.
+ */
+void rte_pktmbuf_init(struct rte_mempool *mp, void *opaque_arg,
+		      void *m, unsigned i);
+
+
+/**
+ * A  packet mbuf pool constructor.
+ *
+ * This function initializes the mempool private data in the case of a
+ * pktmbuf pool. This private data is needed by the driver. The
+ * function is given as a callback function to rte_mempool_create() at
+ * pool creation. It can be extended by the user, for example, to
+ * provide another packet size.
+ *
+ * @param mp
+ *   The mempool from which mbufs originate.
+ * @param opaque_arg
+ *   A pointer that can be used by the user to retrieve useful information
+ *   for mbuf initialization. This pointer comes from the ``init_arg``
+ *   parameter of rte_mempool_create().
+ */
+void rte_pktmbuf_pool_init(struct rte_mempool *mp, void *opaque_arg);
+
+/**
+ * Create a mbuf pool.
+ *
+ * This function creates and initializes a packet mbuf pool. It is
+ * a wrapper to rte_mempool_create() with the proper packet constructor
+ * and mempool constructor.
+ *
+ * @param name
+ *   The name of the mbuf pool.
+ * @param n
+ *   The number of elements in the mbuf pool. The optimum size (in terms
+ *   of memory usage) for a mempool is when n is a power of two minus one:
+ *   n = (2^q - 1).
+ * @param cache_size
+ *   Size of the per-core object cache. See rte_mempool_create() for
+ *   details.
+ * @param priv_size
+ *   Size of application private are between the rte_mbuf structure
+ *   and the data buffer. This value must be aligned to RTE_MBUF_PRIV_ALIGN.
+ * @param data_room_size
+ *   Size of data buffer in each mbuf, including RTE_PKTMBUF_HEADROOM.
+ * @param socket_id
+ *   The socket identifier where the memory should be allocated. The
+ *   value can be *SOCKET_ID_ANY* if there is no NUMA constraint for the
+ *   reserved zone.
+ * @return
+ *   The pointer to the new allocated mempool, on success. NULL on error
+ *   with rte_errno set appropriately. Possible rte_errno values include:
+ *    - E_RTE_NO_CONFIG - function could not get pointer to rte_config structure
+ *    - E_RTE_SECONDARY - function was called from a secondary process instance
+ *    - EINVAL - cache size provided is too large, or priv_size is not aligned.
+ *    - ENOSPC - the maximum number of memzones has already been allocated
+ *    - EEXIST - a memzone with the same name already exists
+ *    - ENOMEM - no appropriate memory area found in which to create memzone
+ */
+struct rte_mempool *
+rte_pktmbuf_pool_create(const char *name, unsigned n,
+	unsigned cache_size, uint16_t priv_size, uint16_t data_room_size,
+	int socket_id);
+
+/**
+ * Get the data room size of mbufs stored in a pktmbuf_pool
+ *
+ * The data room size is the amount of data that can be stored in a
+ * mbuf including the headroom (RTE_PKTMBUF_HEADROOM).
+ *
+ * @param mp
+ *   The packet mbuf pool.
+ * @return
+ *   The data room size of mbufs stored in this mempool.
+ */
+static inline uint16_t
+rte_pktmbuf_data_room_size(struct rte_mempool *mp)
+{
+	struct rte_pktmbuf_pool_private *mbp_priv;
+
+	mbp_priv = (struct rte_pktmbuf_pool_private *)rte_mempool_get_priv(mp);
+	return mbp_priv->mbuf_data_room_size;
+}
+
+/**
+ * Get the application private size of mbufs stored in a pktmbuf_pool
+ *
+ * The private size of mbuf is a zone located between the rte_mbuf
+ * structure and the data buffer where an application can store data
+ * associated to a packet.
+ *
+ * @param mp
+ *   The packet mbuf pool.
+ * @return
+ *   The private size of mbufs stored in this mempool.
+ */
+static inline uint16_t
+rte_pktmbuf_priv_size(struct rte_mempool *mp)
+{
+	struct rte_pktmbuf_pool_private *mbp_priv;
+
+	mbp_priv = (struct rte_pktmbuf_pool_private *)rte_mempool_get_priv(mp);
+	return mbp_priv->mbuf_priv_size;
+}
+
+/**
+ * Reset the fields of a packet mbuf to their default values.
+ *
+ * The given mbuf must have only one segment.
+ *
+ * @param m
+ *   The packet mbuf to be resetted.
+ */
+static inline void rte_pktmbuf_reset(struct rte_mbuf *m)
+{
+	m->next = NULL;
+	m->pkt_len = 0;
+	m->tx_offload = 0;
+	m->vlan_tci = 0;
+	m->vlan_tci_outer = 0;
+	m->nb_segs = 1;
+	m->port = 0xff;
+
+	m->ol_flags = 0;
+	m->packet_type = 0;
+	m->data_off = (RTE_PKTMBUF_HEADROOM <= m->buf_len) ?
+			RTE_PKTMBUF_HEADROOM : m->buf_len;
+
+	m->data_len = 0;
+	__rte_mbuf_sanity_check(m, 1);
+}
+
+/**
+ * Allocate a new mbuf from a mempool.
+ *
+ * This new mbuf contains one segment, which has a length of 0. The pointer
+ * to data is initialized to have some bytes of headroom in the buffer
+ * (if buffer size allows).
+ *
+ * @param mp
+ *   The mempool from which the mbuf is allocated.
+ * @return
+ *   - The pointer to the new mbuf on success.
+ *   - NULL if allocation failed.
+ */
+static inline struct rte_mbuf *rte_pktmbuf_alloc(struct rte_mempool *mp)
+{
+	struct rte_mbuf *m;
+	if ((m = __rte_mbuf_raw_alloc(mp)) != NULL)
+		rte_pktmbuf_reset(m);
+	return m;
+}
+
+/**
+ * Allocate a bulk of mbufs, initialize refcnt and reset the fields to default
+ * values.
+ *
+ *  @param pool
+ *    The mempool from which mbufs are allocated.
+ *  @param mbufs
+ *    Array of pointers to mbufs
+ *  @param count
+ *    Array size
+ *  @return
+ *   - 0: Success
+ */
+static inline int rte_pktmbuf_alloc_bulk(struct rte_mempool *pool,
+	 struct rte_mbuf **mbufs, unsigned count)
+{
+	unsigned idx = 0;
+	int rc;
+
+	rc = rte_mempool_get_bulk(pool, (void **)mbufs, count);
+	if (unlikely(rc))
+		return rc;
+
+	/* To understand duff's device on loop unwinding optimization, see
+	 * https://en.wikipedia.org/wiki/Duff's_device.
+	 * Here while() loop is used rather than do() while{} to avoid extra
+	 * check if count is zero.
+	 */
+	switch (count % 4) {
+	case 0:
+		while (idx != count) {
+			RTE_MBUF_ASSERT(rte_mbuf_refcnt_read(mbufs[idx]) == 0);
+			rte_mbuf_refcnt_set(mbufs[idx], 1);
+			rte_pktmbuf_reset(mbufs[idx]);
+			idx++;
+	case 3:
+			RTE_MBUF_ASSERT(rte_mbuf_refcnt_read(mbufs[idx]) == 0);
+			rte_mbuf_refcnt_set(mbufs[idx], 1);
+			rte_pktmbuf_reset(mbufs[idx]);
+			idx++;
+	case 2:
+			RTE_MBUF_ASSERT(rte_mbuf_refcnt_read(mbufs[idx]) == 0);
+			rte_mbuf_refcnt_set(mbufs[idx], 1);
+			rte_pktmbuf_reset(mbufs[idx]);
+			idx++;
+	case 1:
+			RTE_MBUF_ASSERT(rte_mbuf_refcnt_read(mbufs[idx]) == 0);
+			rte_mbuf_refcnt_set(mbufs[idx], 1);
+			rte_pktmbuf_reset(mbufs[idx]);
+			idx++;
+		}
+	}
+	return 0;
+}
+
+/**
+ * Attach packet mbuf to another packet mbuf.
+ *
+ * After attachment we refer the mbuf we attached as 'indirect',
+ * while mbuf we attached to as 'direct'.
+ * Right now, not supported:
+ *  - attachment for already indirect mbuf (e.g. - mi has to be direct).
+ *  - mbuf we trying to attach (mi) is used by someone else
+ *    e.g. it's reference counter is greater then 1.
+ *
+ * @param mi
+ *   The indirect packet mbuf.
+ * @param m
+ *   The packet mbuf we're attaching to.
+ */
+static inline void rte_pktmbuf_attach(struct rte_mbuf *mi, struct rte_mbuf *m)
+{
+	struct rte_mbuf *md;
+
+	RTE_MBUF_ASSERT(RTE_MBUF_DIRECT(mi) &&
+	    rte_mbuf_refcnt_read(mi) == 1);
+
+	/* if m is not direct, get the mbuf that embeds the data */
+	if (RTE_MBUF_DIRECT(m))
+		md = m;
+	else
+		md = rte_mbuf_from_indirect(m);
+
+	rte_mbuf_refcnt_update(md, 1);
+	mi->priv_size = m->priv_size;
+	mi->buf_physaddr = m->buf_physaddr;
+	mi->buf_addr = m->buf_addr;
+	mi->buf_len = m->buf_len;
+
+	mi->next = m->next;
+	mi->data_off = m->data_off;
+	mi->data_len = m->data_len;
+	mi->port = m->port;
+	mi->vlan_tci = m->vlan_tci;
+	mi->vlan_tci_outer = m->vlan_tci_outer;
+	mi->tx_offload = m->tx_offload;
+	mi->hash = m->hash;
+
+	mi->next = NULL;
+	mi->pkt_len = mi->data_len;
+	mi->nb_segs = 1;
+	mi->ol_flags = m->ol_flags | IND_ATTACHED_MBUF;
+	mi->packet_type = m->packet_type;
+
+	__rte_mbuf_sanity_check(mi, 1);
+	__rte_mbuf_sanity_check(m, 0);
+}
+
+/**
+ * Detach an indirect packet mbuf.
+ *
+ *  - restore original mbuf address and length values.
+ *  - reset pktmbuf data and data_len to their default values.
+ *  All other fields of the given packet mbuf will be left intact.
+ *
+ * @param m
+ *   The indirect attached packet mbuf.
+ */
+static inline void rte_pktmbuf_detach(struct rte_mbuf *m)
+{
+	struct rte_mempool *mp = m->pool;
+	uint32_t mbuf_size, buf_len, priv_size;
+
+	priv_size = rte_pktmbuf_priv_size(mp);
+	mbuf_size = sizeof(struct rte_mbuf) + priv_size;
+	buf_len = rte_pktmbuf_data_room_size(mp);
+
+	m->priv_size = priv_size;
+	m->buf_addr = (char *)m + mbuf_size;
+	m->buf_physaddr = rte_mempool_virt2phy(mp, m) + mbuf_size;
+	m->buf_len = (uint16_t)buf_len;
+	m->data_off = RTE_MIN(RTE_PKTMBUF_HEADROOM, (uint16_t)m->buf_len);
+	m->data_len = 0;
+	m->ol_flags = 0;
+}
+
+static inline struct rte_mbuf* __attribute__((always_inline))
+__rte_pktmbuf_prefree_seg(struct rte_mbuf *m)
+{
+	__rte_mbuf_sanity_check(m, 0);
+
+	if (likely(rte_mbuf_refcnt_update(m, -1) == 0)) {
+
+		/* if this is an indirect mbuf, then
+		 *  - detach mbuf
+		 *  - free attached mbuf segment
+		 */
+		if (RTE_MBUF_INDIRECT(m)) {
+			struct rte_mbuf *md = rte_mbuf_from_indirect(m);
+			rte_pktmbuf_detach(m);
+			if (rte_mbuf_refcnt_update(md, -1) == 0)
+				__rte_mbuf_raw_free(md);
+		}
+		return m;
+	}
+	return NULL;
+}
+
+/**
+ * Free a segment of a packet mbuf into its original mempool.
+ *
+ * Free an mbuf, without parsing other segments in case of chained
+ * buffers.
+ *
+ * @param m
+ *   The packet mbuf segment to be freed.
+ */
+static inline void __attribute__((always_inline))
+rte_pktmbuf_free_seg(struct rte_mbuf *m)
+{
+	if (likely(NULL != (m = __rte_pktmbuf_prefree_seg(m)))) {
+		m->next = NULL;
+		__rte_mbuf_raw_free(m);
+	}
+}
+
+/**
+ * Free a packet mbuf back into its original mempool.
+ *
+ * Free an mbuf, and all its segments in case of chained buffers. Each
+ * segment is added back into its original mempool.
+ *
+ * @param m
+ *   The packet mbuf to be freed.
+ */
+static inline void rte_pktmbuf_free(struct rte_mbuf *m)
+{
+	struct rte_mbuf *m_next;
+
+	__rte_mbuf_sanity_check(m, 1);
+
+	while (m != NULL) {
+		m_next = m->next;
+		rte_pktmbuf_free_seg(m);
+		m = m_next;
+	}
+}
+
+/**
+ * Creates a "clone" of the given packet mbuf.
+ *
+ * Walks through all segments of the given packet mbuf, and for each of them:
+ *  - Creates a new packet mbuf from the given pool.
+ *  - Attaches newly created mbuf to the segment.
+ * Then updates pkt_len and nb_segs of the "clone" packet mbuf to match values
+ * from the original packet mbuf.
+ *
+ * @param md
+ *   The packet mbuf to be cloned.
+ * @param mp
+ *   The mempool from which the "clone" mbufs are allocated.
+ * @return
+ *   - The pointer to the new "clone" mbuf on success.
+ *   - NULL if allocation fails.
+ */
+static inline struct rte_mbuf *rte_pktmbuf_clone(struct rte_mbuf *md,
+		struct rte_mempool *mp)
+{
+	struct rte_mbuf *mc, *mi, **prev;
+	uint32_t pktlen;
+	uint8_t nseg;
+
+	if (unlikely ((mc = rte_pktmbuf_alloc(mp)) == NULL))
+		return NULL;
+
+	mi = mc;
+	prev = &mi->next;
+	pktlen = md->pkt_len;
+	nseg = 0;
+
+	do {
+		nseg++;
+		rte_pktmbuf_attach(mi, md);
+		*prev = mi;
+		prev = &mi->next;
+	} while ((md = md->next) != NULL &&
+	    (mi = rte_pktmbuf_alloc(mp)) != NULL);
+
+	*prev = NULL;
+	mc->nb_segs = nseg;
+	mc->pkt_len = pktlen;
+
+	/* Allocation of new indirect segment failed */
+	if (unlikely (mi == NULL)) {
+		rte_pktmbuf_free(mc);
+		return NULL;
+	}
+
+	__rte_mbuf_sanity_check(mc, 1);
+	return mc;
+}
+
+/**
+ * Adds given value to the refcnt of all packet mbuf segments.
+ *
+ * Walks through all segments of given packet mbuf and for each of them
+ * invokes rte_mbuf_refcnt_update().
+ *
+ * @param m
+ *   The packet mbuf whose refcnt to be updated.
+ * @param v
+ *   The value to add to the mbuf's segments refcnt.
+ */
+static inline void rte_pktmbuf_refcnt_update(struct rte_mbuf *m, int16_t v)
+{
+	__rte_mbuf_sanity_check(m, 1);
+
+	do {
+		rte_mbuf_refcnt_update(m, v);
+	} while ((m = m->next) != NULL);
+}
+
+/**
+ * Get the headroom in a packet mbuf.
+ *
+ * @param m
+ *   The packet mbuf.
+ * @return
+ *   The length of the headroom.
+ */
+static inline uint16_t rte_pktmbuf_headroom(const struct rte_mbuf *m)
+{
+	__rte_mbuf_sanity_check(m, 1);
+	return m->data_off;
+}
+
+/**
+ * Get the tailroom of a packet mbuf.
+ *
+ * @param m
+ *   The packet mbuf.
+ * @return
+ *   The length of the tailroom.
+ */
+static inline uint16_t rte_pktmbuf_tailroom(const struct rte_mbuf *m)
+{
+	__rte_mbuf_sanity_check(m, 1);
+	return (uint16_t)(m->buf_len - rte_pktmbuf_headroom(m) -
+			  m->data_len);
+}
+
+/**
+ * Get the last segment of the packet.
+ *
+ * @param m
+ *   The packet mbuf.
+ * @return
+ *   The last segment of the given mbuf.
+ */
+static inline struct rte_mbuf *rte_pktmbuf_lastseg(struct rte_mbuf *m)
+{
+	struct rte_mbuf *m2 = (struct rte_mbuf *)m;
+
+	__rte_mbuf_sanity_check(m, 1);
+	while (m2->next != NULL)
+		m2 = m2->next;
+	return m2;
+}
+
+/**
+ * A macro that points to an offset into the data in the mbuf.
+ *
+ * The returned pointer is cast to type t. Before using this
+ * function, the user must ensure that the first segment is large
+ * enough to accommodate its data.
+ *
+ * @param m
+ *   The packet mbuf.
+ * @param o
+ *   The offset into the mbuf data.
+ * @param t
+ *   The type to cast the result into.
+ */
+#define rte_pktmbuf_mtod_offset(m, t, o)	\
+	((t)((char *)(m)->buf_addr + (m)->data_off + (o)))
+
+/**
+ * A macro that points to the start of the data in the mbuf.
+ *
+ * The returned pointer is cast to type t. Before using this
+ * function, the user must ensure that the first segment is large
+ * enough to accommodate its data.
+ *
+ * @param m
+ *   The packet mbuf.
+ * @param t
+ *   The type to cast the result into.
+ */
+#define rte_pktmbuf_mtod(m, t) rte_pktmbuf_mtod_offset(m, t, 0)
+
+/**
+ * A macro that returns the physical address that points to an offset of the
+ * start of the data in the mbuf
+ *
+ * @param m
+ *   The packet mbuf.
+ * @param o
+ *   The offset into the data to calculate address from.
+ */
+#define rte_pktmbuf_mtophys_offset(m, o) \
+	(phys_addr_t)((m)->buf_physaddr + (m)->data_off + (o))
+
+/**
+ * A macro that returns the physical address that points to the start of the
+ * data in the mbuf
+ *
+ * @param m
+ *   The packet mbuf.
+ */
+#define rte_pktmbuf_mtophys(m) rte_pktmbuf_mtophys_offset(m, 0)
+
+/**
+ * A macro that returns the length of the packet.
+ *
+ * The value can be read or assigned.
+ *
+ * @param m
+ *   The packet mbuf.
+ */
+#define rte_pktmbuf_pkt_len(m) ((m)->pkt_len)
+
+/**
+ * A macro that returns the length of the segment.
+ *
+ * The value can be read or assigned.
+ *
+ * @param m
+ *   The packet mbuf.
+ */
+#define rte_pktmbuf_data_len(m) ((m)->data_len)
+
+/**
+ * Prepend len bytes to an mbuf data area.
+ *
+ * Returns a pointer to the new
+ * data start address. If there is not enough headroom in the first
+ * segment, the function will return NULL, without modifying the mbuf.
+ *
+ * @param m
+ *   The pkt mbuf.
+ * @param len
+ *   The amount of data to prepend (in bytes).
+ * @return
+ *   A pointer to the start of the newly prepended data, or
+ *   NULL if there is not enough headroom space in the first segment
+ */
+static inline char *rte_pktmbuf_prepend(struct rte_mbuf *m,
+					uint16_t len)
+{
+	__rte_mbuf_sanity_check(m, 1);
+
+	if (unlikely(len > rte_pktmbuf_headroom(m)))
+		return NULL;
+
+	m->data_off -= len;
+	m->data_len = (uint16_t)(m->data_len + len);
+	m->pkt_len  = (m->pkt_len + len);
+
+	return (char *)m->buf_addr + m->data_off;
+}
+
+/**
+ * Append len bytes to an mbuf.
+ *
+ * Append len bytes to an mbuf and return a pointer to the start address
+ * of the added data. If there is not enough tailroom in the last
+ * segment, the function will return NULL, without modifying the mbuf.
+ *
+ * @param m
+ *   The packet mbuf.
+ * @param len
+ *   The amount of data to append (in bytes).
+ * @return
+ *   A pointer to the start of the newly appended data, or
+ *   NULL if there is not enough tailroom space in the last segment
+ */
+static inline char *rte_pktmbuf_append(struct rte_mbuf *m, uint16_t len)
+{
+	void *tail;
+	struct rte_mbuf *m_last;
+
+	__rte_mbuf_sanity_check(m, 1);
+
+	m_last = rte_pktmbuf_lastseg(m);
+	if (unlikely(len > rte_pktmbuf_tailroom(m_last)))
+		return NULL;
+
+	tail = (char *)m_last->buf_addr + m_last->data_off + m_last->data_len;
+	m_last->data_len = (uint16_t)(m_last->data_len + len);
+	m->pkt_len  = (m->pkt_len + len);
+	return (char*) tail;
+}
+
+/**
+ * Remove len bytes at the beginning of an mbuf.
+ *
+ * Returns a pointer to the start address of the new data area. If the
+ * length is greater than the length of the first segment, then the
+ * function will fail and return NULL, without modifying the mbuf.
+ *
+ * @param m
+ *   The packet mbuf.
+ * @param len
+ *   The amount of data to remove (in bytes).
+ * @return
+ *   A pointer to the new start of the data.
+ */
+static inline char *rte_pktmbuf_adj(struct rte_mbuf *m, uint16_t len)
+{
+	__rte_mbuf_sanity_check(m, 1);
+
+	if (unlikely(len > m->data_len))
+		return NULL;
+
+	m->data_len = (uint16_t)(m->data_len - len);
+	m->data_off += len;
+	m->pkt_len  = (m->pkt_len - len);
+	return (char *)m->buf_addr + m->data_off;
+}
+
+/**
+ * Remove len bytes of data at the end of the mbuf.
+ *
+ * If the length is greater than the length of the last segment, the
+ * function will fail and return -1 without modifying the mbuf.
+ *
+ * @param m
+ *   The packet mbuf.
+ * @param len
+ *   The amount of data to remove (in bytes).
+ * @return
+ *   - 0: On success.
+ *   - -1: On error.
+ */
+static inline int rte_pktmbuf_trim(struct rte_mbuf *m, uint16_t len)
+{
+	struct rte_mbuf *m_last;
+
+	__rte_mbuf_sanity_check(m, 1);
+
+	m_last = rte_pktmbuf_lastseg(m);
+	if (unlikely(len > m_last->data_len))
+		return -1;
+
+	m_last->data_len = (uint16_t)(m_last->data_len - len);
+	m->pkt_len  = (m->pkt_len - len);
+	return 0;
+}
+
+/**
+ * Test if mbuf data is contiguous.
+ *
+ * @param m
+ *   The packet mbuf.
+ * @return
+ *   - 1, if all data is contiguous (one segment).
+ *   - 0, if there is several segments.
+ */
+static inline int rte_pktmbuf_is_contiguous(const struct rte_mbuf *m)
+{
+	__rte_mbuf_sanity_check(m, 1);
+	return !!(m->nb_segs == 1);
+}
+
+/**
+ * Chain an mbuf to another, thereby creating a segmented packet.
+ *
+ * Note: The implementation will do a linear walk over the segments to find
+ * the tail entry. For cases when there are many segments, it's better to
+ * chain the entries manually.
+ *
+ * @param head
+ *   The head of the mbuf chain (the first packet)
+ * @param tail
+ *   The mbuf to put last in the chain
+ *
+ * @return
+ *   - 0, on success.
+ *   - -EOVERFLOW, if the chain is full (256 entries)
+ */
+static inline int rte_pktmbuf_chain(struct rte_mbuf *head, struct rte_mbuf *tail)
+{
+	struct rte_mbuf *cur_tail;
+
+	/* Check for number-of-segments-overflow */
+	if (head->nb_segs + tail->nb_segs >= 1 << (sizeof(head->nb_segs) * 8))
+		return -EOVERFLOW;
+
+	/* Chain 'tail' onto the old tail */
+	cur_tail = rte_pktmbuf_lastseg(head);
+	cur_tail->next = tail;
+
+	/* accumulate number of segments and total length. */
+	head->nb_segs = (uint8_t)(head->nb_segs + tail->nb_segs);
+	head->pkt_len += tail->pkt_len;
+
+	/* pkt_len is only set in the head */
+	tail->pkt_len = tail->data_len;
+
+	return 0;
+}
+
+/**
+ * Dump an mbuf structure to the console.
+ *
+ * Dump all fields for the given packet mbuf and all its associated
+ * segments (in the case of a chained buffer).
+ *
+ * @param f
+ *   A pointer to a file for output
+ * @param m
+ *   The packet mbuf.
+ * @param dump_len
+ *   If dump_len != 0, also dump the "dump_len" first data bytes of
+ *   the packet.
+ */
+void rte_pktmbuf_dump(FILE *f, const struct rte_mbuf *m, unsigned dump_len);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_MBUF_H_ */
diff --git a/lib/librte_mbuf/rte_mbuf_version.map b/lib/librte_mbuf/rte_mbuf_version.map
new file mode 100644
index 00000000..e10f6bdc
--- /dev/null
+++ b/lib/librte_mbuf/rte_mbuf_version.map
@@ -0,0 +1,20 @@
+DPDK_2.0 {
+	global:
+
+	rte_ctrlmbuf_init;
+	rte_get_rx_ol_flag_name;
+	rte_get_tx_ol_flag_name;
+	rte_mbuf_sanity_check;
+	rte_pktmbuf_dump;
+	rte_pktmbuf_init;
+	rte_pktmbuf_pool_init;
+
+	local: *;
+};
+
+DPDK_2.1 {
+	global:
+
+	rte_pktmbuf_pool_create;
+
+} DPDK_2.0;
diff --git a/lib/librte_mempool/Makefile b/lib/librte_mempool/Makefile
new file mode 100644
index 00000000..a6898eff
--- /dev/null
+++ b/lib/librte_mempool/Makefile
@@ -0,0 +1,53 @@
+#   BSD LICENSE
+#
+#   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+#   All rights reserved.
+#
+#   Redistribution and use in source and binary forms, with or without
+#   modification, are permitted provided that the following conditions
+#   are met:
+#
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in
+#       the documentation and/or other materials provided with the
+#       distribution.
+#     * Neither the name of Intel Corporation nor the names of its
+#       contributors may be used to endorse or promote products derived
+#       from this software without specific prior written permission.
+#
+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# library name
+LIB = librte_mempool.a
+
+CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3
+
+EXPORT_MAP := rte_mempool_version.map
+
+LIBABIVER := 1
+
+# all source are stored in SRCS-y
+SRCS-$(CONFIG_RTE_LIBRTE_MEMPOOL) +=  rte_mempool.c
+ifeq ($(CONFIG_RTE_LIBRTE_XEN_DOM0),y)
+SRCS-$(CONFIG_RTE_LIBRTE_MEMPOOL) +=  rte_dom0_mempool.c
+endif
+# install includes
+SYMLINK-$(CONFIG_RTE_LIBRTE_MEMPOOL)-include := rte_mempool.h
+
+DEPDIRS-$(CONFIG_RTE_LIBRTE_MEMPOOL) += lib/librte_eal lib/librte_ring
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_mempool/rte_dom0_mempool.c b/lib/librte_mempool/rte_dom0_mempool.c
new file mode 100644
index 00000000..0d6d7504
--- /dev/null
+++ b/lib/librte_mempool/rte_dom0_mempool.c
@@ -0,0 +1,133 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <stdarg.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/queue.h>
+
+#include <rte_common.h>
+#include <rte_log.h>
+#include <rte_debug.h>
+#include <rte_memory.h>
+#include <rte_memzone.h>
+#include <rte_atomic.h>
+#include <rte_launch.h>
+#include <rte_eal.h>
+#include <rte_eal_memconfig.h>
+#include <rte_per_lcore.h>
+#include <rte_lcore.h>
+#include <rte_branch_prediction.h>
+#include <rte_ring.h>
+#include <rte_errno.h>
+#include <rte_string_fns.h>
+#include <rte_spinlock.h>
+
+#include "rte_mempool.h"
+
+static void
+get_phys_map(void *va, phys_addr_t pa[], uint32_t pg_num,
+	uint32_t pg_sz, uint32_t memseg_id)
+{
+	uint32_t i;
+	uint64_t virt_addr, mfn_id;
+	struct rte_mem_config *mcfg;
+	uint32_t page_size = getpagesize();
+
+	/* get pointer to global configuration */
+	mcfg = rte_eal_get_configuration()->mem_config;
+	virt_addr = (uintptr_t) mcfg->memseg[memseg_id].addr;
+
+	for (i = 0; i != pg_num; i++) {
+		mfn_id = ((uintptr_t)va + i * pg_sz - virt_addr) / RTE_PGSIZE_2M;
+		pa[i] = mcfg->memseg[memseg_id].mfn[mfn_id] * page_size;
+	}
+}
+
+/* create the mempool for supporting Dom0 */
+struct rte_mempool *
+rte_dom0_mempool_create(const char *name, unsigned elt_num, unsigned elt_size,
+	unsigned cache_size, unsigned private_data_size,
+	rte_mempool_ctor_t *mp_init, void *mp_init_arg,
+	rte_mempool_obj_ctor_t *obj_init, void *obj_init_arg,
+	int socket_id, unsigned flags)
+{
+	struct rte_mempool *mp = NULL;
+	phys_addr_t *pa;
+	char *va;
+	size_t sz;
+	uint32_t pg_num, pg_shift, pg_sz, total_size;
+	const struct rte_memzone *mz;
+	char mz_name[RTE_MEMZONE_NAMESIZE];
+	int mz_flags = RTE_MEMZONE_1GB|RTE_MEMZONE_SIZE_HINT_ONLY;
+
+	pg_sz = RTE_PGSIZE_2M;
+
+	pg_shift = rte_bsf32(pg_sz);
+	total_size = rte_mempool_calc_obj_size(elt_size, flags, NULL);
+
+	/* calc max memory size and max number of pages needed. */
+	sz = rte_mempool_xmem_size(elt_num, total_size, pg_shift) +
+		RTE_PGSIZE_2M;
+	pg_num = sz >> pg_shift;
+
+	/* extract physical mappings of the allocated memory. */
+	pa = calloc(pg_num, sizeof (*pa));
+	if (pa == NULL)
+		return mp;
+
+	snprintf(mz_name, sizeof(mz_name), RTE_MEMPOOL_OBJ_NAME, name);
+	mz = rte_memzone_reserve(mz_name, sz, socket_id, mz_flags);
+	if (mz == NULL) {
+		free(pa);
+		return mp;
+	}
+
+	va = (char *)RTE_ALIGN_CEIL((uintptr_t)mz->addr, RTE_PGSIZE_2M);
+	/* extract physical mappings of the allocated memory. */
+	get_phys_map(va, pa, pg_num, pg_sz, mz->memseg_id);
+
+	mp = rte_mempool_xmem_create(name, elt_num, elt_size,
+		cache_size, private_data_size,
+		mp_init, mp_init_arg,
+		obj_init, obj_init_arg,
+		socket_id, flags, va, pa, pg_num, pg_shift);
+
+	free(pa);
+
+	return mp;
+}
diff --git a/lib/librte_mempool/rte_mempool.c b/lib/librte_mempool/rte_mempool.c
new file mode 100644
index 00000000..f8781e17
--- /dev/null
+++ b/lib/librte_mempool/rte_mempool.c
@@ -0,0 +1,919 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <stdint.h>
+#include <stdarg.h>
+#include <unistd.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/queue.h>
+
+#include <rte_common.h>
+#include <rte_log.h>
+#include <rte_debug.h>
+#include <rte_memory.h>
+#include <rte_memzone.h>
+#include <rte_malloc.h>
+#include <rte_atomic.h>
+#include <rte_launch.h>
+#include <rte_eal.h>
+#include <rte_eal_memconfig.h>
+#include <rte_per_lcore.h>
+#include <rte_lcore.h>
+#include <rte_branch_prediction.h>
+#include <rte_ring.h>
+#include <rte_errno.h>
+#include <rte_string_fns.h>
+#include <rte_spinlock.h>
+
+#include "rte_mempool.h"
+
+TAILQ_HEAD(rte_mempool_list, rte_tailq_entry);
+
+static struct rte_tailq_elem rte_mempool_tailq = {
+	.name = "RTE_MEMPOOL",
+};
+EAL_REGISTER_TAILQ(rte_mempool_tailq)
+
+#define CACHE_FLUSHTHRESH_MULTIPLIER 1.5
+#define CALC_CACHE_FLUSHTHRESH(c)	\
+	((typeof(c))((c) * CACHE_FLUSHTHRESH_MULTIPLIER))
+
+/*
+ * return the greatest common divisor between a and b (fast algorithm)
+ *
+ */
+static unsigned get_gcd(unsigned a, unsigned b)
+{
+	unsigned c;
+
+	if (0 == a)
+		return b;
+	if (0 == b)
+		return a;
+
+	if (a < b) {
+		c = a;
+		a = b;
+		b = c;
+	}
+
+	while (b != 0) {
+		c = a % b;
+		a = b;
+		b = c;
+	}
+
+	return a;
+}
+
+/*
+ * Depending on memory configuration, objects addresses are spread
+ * between channels and ranks in RAM: the pool allocator will add
+ * padding between objects. This function return the new size of the
+ * object.
+ */
+static unsigned optimize_object_size(unsigned obj_size)
+{
+	unsigned nrank, nchan;
+	unsigned new_obj_size;
+
+	/* get number of channels */
+	nchan = rte_memory_get_nchannel();
+	if (nchan == 0)
+		nchan = 4;
+
+	nrank = rte_memory_get_nrank();
+	if (nrank == 0)
+		nrank = 1;
+
+	/* process new object size */
+	new_obj_size = (obj_size + RTE_MEMPOOL_ALIGN_MASK) / RTE_MEMPOOL_ALIGN;
+	while (get_gcd(new_obj_size, nrank * nchan) != 1)
+		new_obj_size++;
+	return new_obj_size * RTE_MEMPOOL_ALIGN;
+}
+
+static void
+mempool_add_elem(struct rte_mempool *mp, void *obj, uint32_t obj_idx,
+	rte_mempool_obj_ctor_t *obj_init, void *obj_init_arg)
+{
+	struct rte_mempool_objhdr *hdr;
+	struct rte_mempool_objtlr *tlr __rte_unused;
+
+	obj = (char *)obj + mp->header_size;
+
+	/* set mempool ptr in header */
+	hdr = RTE_PTR_SUB(obj, sizeof(*hdr));
+	hdr->mp = mp;
+
+#ifdef RTE_LIBRTE_MEMPOOL_DEBUG
+	hdr->cookie = RTE_MEMPOOL_HEADER_COOKIE2;
+	tlr = __mempool_get_trailer(obj);
+	tlr->cookie = RTE_MEMPOOL_TRAILER_COOKIE;
+#endif
+	/* call the initializer */
+	if (obj_init)
+		obj_init(mp, obj_init_arg, obj, obj_idx);
+
+	/* enqueue in ring */
+	rte_ring_sp_enqueue(mp->ring, obj);
+}
+
+uint32_t
+rte_mempool_obj_iter(void *vaddr, uint32_t elt_num, size_t elt_sz, size_t align,
+	const phys_addr_t paddr[], uint32_t pg_num, uint32_t pg_shift,
+	rte_mempool_obj_iter_t obj_iter, void *obj_iter_arg)
+{
+	uint32_t i, j, k;
+	uint32_t pgn, pgf;
+	uintptr_t end, start, va;
+	uintptr_t pg_sz;
+
+	pg_sz = (uintptr_t)1 << pg_shift;
+	va = (uintptr_t)vaddr;
+
+	i = 0;
+	j = 0;
+
+	while (i != elt_num && j != pg_num) {
+
+		start = RTE_ALIGN_CEIL(va, align);
+		end = start + elt_sz;
+
+		/* index of the first page for the next element. */
+		pgf = (end >> pg_shift) - (start >> pg_shift);
+
+		/* index of the last page for the current element. */
+		pgn = ((end - 1) >> pg_shift) - (start >> pg_shift);
+		pgn += j;
+
+		/* do we have enough space left for the element. */
+		if (pgn >= pg_num)
+			break;
+
+		for (k = j;
+				k != pgn &&
+				paddr[k] + pg_sz == paddr[k + 1];
+				k++)
+			;
+
+		/*
+		 * if next pgn chunks of memory physically continuous,
+		 * use it to create next element.
+		 * otherwise, just skip that chunk unused.
+		 */
+		if (k == pgn) {
+			if (obj_iter != NULL)
+				obj_iter(obj_iter_arg, (void *)start,
+					(void *)end, i);
+			va = end;
+			j += pgf;
+			i++;
+		} else {
+			va = RTE_ALIGN_CEIL((va + 1), pg_sz);
+			j++;
+		}
+	}
+
+	return i;
+}
+
+/*
+ * Populate  mempool with the objects.
+ */
+
+struct mempool_populate_arg {
+	struct rte_mempool     *mp;
+	rte_mempool_obj_ctor_t *obj_init;
+	void                   *obj_init_arg;
+};
+
+static void
+mempool_obj_populate(void *arg, void *start, void *end, uint32_t idx)
+{
+	struct mempool_populate_arg *pa = arg;
+
+	mempool_add_elem(pa->mp, start, idx, pa->obj_init, pa->obj_init_arg);
+	pa->mp->elt_va_end = (uintptr_t)end;
+}
+
+static void
+mempool_populate(struct rte_mempool *mp, size_t num, size_t align,
+	rte_mempool_obj_ctor_t *obj_init, void *obj_init_arg)
+{
+	uint32_t elt_sz;
+	struct mempool_populate_arg arg;
+
+	elt_sz = mp->elt_size + mp->header_size + mp->trailer_size;
+	arg.mp = mp;
+	arg.obj_init = obj_init;
+	arg.obj_init_arg = obj_init_arg;
+
+	mp->size = rte_mempool_obj_iter((void *)mp->elt_va_start,
+		num, elt_sz, align,
+		mp->elt_pa, mp->pg_num, mp->pg_shift,
+		mempool_obj_populate, &arg);
+}
+
+uint32_t
+rte_mempool_calc_obj_size(uint32_t elt_size, uint32_t flags,
+	struct rte_mempool_objsz *sz)
+{
+	struct rte_mempool_objsz lsz;
+
+	sz = (sz != NULL) ? sz : &lsz;
+
+	/*
+	 * In header, we have at least the pointer to the pool, and
+	 * optionaly a 64 bits cookie.
+	 */
+	sz->header_size = 0;
+	sz->header_size += sizeof(struct rte_mempool *); /* ptr to pool */
+#ifdef RTE_LIBRTE_MEMPOOL_DEBUG
+	sz->header_size += sizeof(uint64_t); /* cookie */
+#endif
+	if ((flags & MEMPOOL_F_NO_CACHE_ALIGN) == 0)
+		sz->header_size = RTE_ALIGN_CEIL(sz->header_size,
+			RTE_MEMPOOL_ALIGN);
+
+	/* trailer contains the cookie in debug mode */
+	sz->trailer_size = 0;
+#ifdef RTE_LIBRTE_MEMPOOL_DEBUG
+	sz->trailer_size += sizeof(uint64_t); /* cookie */
+#endif
+	/* element size is 8 bytes-aligned at least */
+	sz->elt_size = RTE_ALIGN_CEIL(elt_size, sizeof(uint64_t));
+
+	/* expand trailer to next cache line */
+	if ((flags & MEMPOOL_F_NO_CACHE_ALIGN) == 0) {
+		sz->total_size = sz->header_size + sz->elt_size +
+			sz->trailer_size;
+		sz->trailer_size += ((RTE_MEMPOOL_ALIGN -
+				  (sz->total_size & RTE_MEMPOOL_ALIGN_MASK)) &
+				 RTE_MEMPOOL_ALIGN_MASK);
+	}
+
+	/*
+	 * increase trailer to add padding between objects in order to
+	 * spread them across memory channels/ranks
+	 */
+	if ((flags & MEMPOOL_F_NO_SPREAD) == 0) {
+		unsigned new_size;
+		new_size = optimize_object_size(sz->header_size + sz->elt_size +
+			sz->trailer_size);
+		sz->trailer_size = new_size - sz->header_size - sz->elt_size;
+	}
+
+	if (! rte_eal_has_hugepages()) {
+		/*
+		 * compute trailer size so that pool elements fit exactly in
+		 * a standard page
+		 */
+		int page_size = getpagesize();
+		int new_size = page_size - sz->header_size - sz->elt_size;
+		if (new_size < 0 || (unsigned int)new_size < sz->trailer_size) {
+			printf("When hugepages are disabled, pool objects "
+			       "can't exceed PAGE_SIZE: %d + %d + %d > %d\n",
+			       sz->header_size, sz->elt_size, sz->trailer_size,
+			       page_size);
+			return 0;
+		}
+		sz->trailer_size = new_size;
+	}
+
+	/* this is the size of an object, including header and trailer */
+	sz->total_size = sz->header_size + sz->elt_size + sz->trailer_size;
+
+	return sz->total_size;
+}
+
+
+/*
+ * Calculate maximum amount of memory required to store given number of objects.
+ */
+size_t
+rte_mempool_xmem_size(uint32_t elt_num, size_t elt_sz, uint32_t pg_shift)
+{
+	size_t n, pg_num, pg_sz, sz;
+
+	pg_sz = (size_t)1 << pg_shift;
+
+	if ((n = pg_sz / elt_sz) > 0) {
+		pg_num = (elt_num + n - 1) / n;
+		sz = pg_num << pg_shift;
+	} else {
+		sz = RTE_ALIGN_CEIL(elt_sz, pg_sz) * elt_num;
+	}
+
+	return sz;
+}
+
+/*
+ * Calculate how much memory would be actually required with the
+ * given memory footprint to store required number of elements.
+ */
+static void
+mempool_lelem_iter(void *arg, __rte_unused void *start, void *end,
+	__rte_unused uint32_t idx)
+{
+	*(uintptr_t *)arg = (uintptr_t)end;
+}
+
+ssize_t
+rte_mempool_xmem_usage(void *vaddr, uint32_t elt_num, size_t elt_sz,
+	const phys_addr_t paddr[], uint32_t pg_num, uint32_t pg_shift)
+{
+	uint32_t n;
+	uintptr_t va, uv;
+	size_t pg_sz, usz;
+
+	pg_sz = (size_t)1 << pg_shift;
+	va = (uintptr_t)vaddr;
+	uv = va;
+
+	if ((n = rte_mempool_obj_iter(vaddr, elt_num, elt_sz, 1,
+			paddr, pg_num, pg_shift, mempool_lelem_iter,
+			&uv)) != elt_num) {
+		return -(ssize_t)n;
+	}
+
+	uv = RTE_ALIGN_CEIL(uv, pg_sz);
+	usz = uv - va;
+	return usz;
+}
+
+#ifndef RTE_LIBRTE_XEN_DOM0
+/* stub if DOM0 support not configured */
+struct rte_mempool *
+rte_dom0_mempool_create(const char *name __rte_unused,
+			unsigned n __rte_unused,
+			unsigned elt_size __rte_unused,
+			unsigned cache_size __rte_unused,
+			unsigned private_data_size __rte_unused,
+			rte_mempool_ctor_t *mp_init __rte_unused,
+			void *mp_init_arg __rte_unused,
+			rte_mempool_obj_ctor_t *obj_init __rte_unused,
+			void *obj_init_arg __rte_unused,
+			int socket_id __rte_unused,
+			unsigned flags __rte_unused)
+{
+	rte_errno = EINVAL;
+	return NULL;
+}
+#endif
+
+/* create the mempool */
+struct rte_mempool *
+rte_mempool_create(const char *name, unsigned n, unsigned elt_size,
+		   unsigned cache_size, unsigned private_data_size,
+		   rte_mempool_ctor_t *mp_init, void *mp_init_arg,
+		   rte_mempool_obj_ctor_t *obj_init, void *obj_init_arg,
+		   int socket_id, unsigned flags)
+{
+	if (rte_xen_dom0_supported())
+		return rte_dom0_mempool_create(name, n, elt_size,
+					       cache_size, private_data_size,
+					       mp_init, mp_init_arg,
+					       obj_init, obj_init_arg,
+					       socket_id, flags);
+	else
+		return rte_mempool_xmem_create(name, n, elt_size,
+					       cache_size, private_data_size,
+					       mp_init, mp_init_arg,
+					       obj_init, obj_init_arg,
+					       socket_id, flags,
+					       NULL, NULL, MEMPOOL_PG_NUM_DEFAULT,
+					       MEMPOOL_PG_SHIFT_MAX);
+}
+
+/*
+ * Create the mempool over already allocated chunk of memory.
+ * That external memory buffer can consists of physically disjoint pages.
+ * Setting vaddr to NULL, makes mempool to fallback to original behaviour
+ * and allocate space for mempool and it's elements as one big chunk of
+ * physically continuos memory.
+ * */
+struct rte_mempool *
+rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
+		unsigned cache_size, unsigned private_data_size,
+		rte_mempool_ctor_t *mp_init, void *mp_init_arg,
+		rte_mempool_obj_ctor_t *obj_init, void *obj_init_arg,
+		int socket_id, unsigned flags, void *vaddr,
+		const phys_addr_t paddr[], uint32_t pg_num, uint32_t pg_shift)
+{
+	char mz_name[RTE_MEMZONE_NAMESIZE];
+	char rg_name[RTE_RING_NAMESIZE];
+	struct rte_mempool_list *mempool_list;
+	struct rte_mempool *mp = NULL;
+	struct rte_tailq_entry *te = NULL;
+	struct rte_ring *r = NULL;
+	const struct rte_memzone *mz;
+	size_t mempool_size;
+	int mz_flags = RTE_MEMZONE_1GB|RTE_MEMZONE_SIZE_HINT_ONLY;
+	int rg_flags = 0;
+	void *obj;
+	struct rte_mempool_objsz objsz;
+	void *startaddr;
+	int page_size = getpagesize();
+
+	/* compilation-time checks */
+	RTE_BUILD_BUG_ON((sizeof(struct rte_mempool) &
+			  RTE_CACHE_LINE_MASK) != 0);
+#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
+	RTE_BUILD_BUG_ON((sizeof(struct rte_mempool_cache) &
+			  RTE_CACHE_LINE_MASK) != 0);
+	RTE_BUILD_BUG_ON((offsetof(struct rte_mempool, local_cache) &
+			  RTE_CACHE_LINE_MASK) != 0);
+#endif
+#ifdef RTE_LIBRTE_MEMPOOL_DEBUG
+	RTE_BUILD_BUG_ON((sizeof(struct rte_mempool_debug_stats) &
+			  RTE_CACHE_LINE_MASK) != 0);
+	RTE_BUILD_BUG_ON((offsetof(struct rte_mempool, stats) &
+			  RTE_CACHE_LINE_MASK) != 0);
+#endif
+
+	mempool_list = RTE_TAILQ_CAST(rte_mempool_tailq.head, rte_mempool_list);
+
+	/* asked cache too big */
+	if (cache_size > RTE_MEMPOOL_CACHE_MAX_SIZE ||
+	    CALC_CACHE_FLUSHTHRESH(cache_size) > n) {
+		rte_errno = EINVAL;
+		return NULL;
+	}
+
+	/* check that we have both VA and PA */
+	if (vaddr != NULL && paddr == NULL) {
+		rte_errno = EINVAL;
+		return NULL;
+	}
+
+	/* Check that pg_num and pg_shift parameters are valid. */
+	if (pg_num < RTE_DIM(mp->elt_pa) || pg_shift > MEMPOOL_PG_SHIFT_MAX) {
+		rte_errno = EINVAL;
+		return NULL;
+	}
+
+	/* "no cache align" imply "no spread" */
+	if (flags & MEMPOOL_F_NO_CACHE_ALIGN)
+		flags |= MEMPOOL_F_NO_SPREAD;
+
+	/* ring flags */
+	if (flags & MEMPOOL_F_SP_PUT)
+		rg_flags |= RING_F_SP_ENQ;
+	if (flags & MEMPOOL_F_SC_GET)
+		rg_flags |= RING_F_SC_DEQ;
+
+	/* calculate mempool object sizes. */
+	if (!rte_mempool_calc_obj_size(elt_size, flags, &objsz)) {
+		rte_errno = EINVAL;
+		return NULL;
+	}
+
+	rte_rwlock_write_lock(RTE_EAL_MEMPOOL_RWLOCK);
+
+	/* allocate the ring that will be used to store objects */
+	/* Ring functions will return appropriate errors if we are
+	 * running as a secondary process etc., so no checks made
+	 * in this function for that condition */
+	snprintf(rg_name, sizeof(rg_name), RTE_MEMPOOL_MZ_FORMAT, name);
+	r = rte_ring_create(rg_name, rte_align32pow2(n+1), socket_id, rg_flags);
+	if (r == NULL)
+		goto exit_unlock;
+
+	/*
+	 * reserve a memory zone for this mempool: private data is
+	 * cache-aligned
+	 */
+	private_data_size = (private_data_size +
+			     RTE_MEMPOOL_ALIGN_MASK) & (~RTE_MEMPOOL_ALIGN_MASK);
+
+	if (! rte_eal_has_hugepages()) {
+		/*
+		 * expand private data size to a whole page, so that the
+		 * first pool element will start on a new standard page
+		 */
+		int head = sizeof(struct rte_mempool);
+		int new_size = (private_data_size + head) % page_size;
+		if (new_size) {
+			private_data_size += page_size - new_size;
+		}
+	}
+
+	/* try to allocate tailq entry */
+	te = rte_zmalloc("MEMPOOL_TAILQ_ENTRY", sizeof(*te), 0);
+	if (te == NULL) {
+		RTE_LOG(ERR, MEMPOOL, "Cannot allocate tailq entry!\n");
+		goto exit_unlock;
+	}
+
+	/*
+	 * If user provided an external memory buffer, then use it to
+	 * store mempool objects. Otherwise reserve a memzone that is large
+	 * enough to hold mempool header and metadata plus mempool objects.
+	 */
+	mempool_size = MEMPOOL_HEADER_SIZE(mp, pg_num) + private_data_size;
+	mempool_size = RTE_ALIGN_CEIL(mempool_size, RTE_MEMPOOL_ALIGN);
+	if (vaddr == NULL)
+		mempool_size += (size_t)objsz.total_size * n;
+
+	if (! rte_eal_has_hugepages()) {
+		/*
+		 * we want the memory pool to start on a page boundary,
+		 * because pool elements crossing page boundaries would
+		 * result in discontiguous physical addresses
+		 */
+		mempool_size += page_size;
+	}
+
+	snprintf(mz_name, sizeof(mz_name), RTE_MEMPOOL_MZ_FORMAT, name);
+
+	mz = rte_memzone_reserve(mz_name, mempool_size, socket_id, mz_flags);
+	if (mz == NULL)
+		goto exit_unlock;
+
+	if (rte_eal_has_hugepages()) {
+		startaddr = (void*)mz->addr;
+	} else {
+		/* align memory pool start address on a page boundary */
+		unsigned long addr = (unsigned long)mz->addr;
+		if (addr & (page_size - 1)) {
+			addr += page_size;
+			addr &= ~(page_size - 1);
+		}
+		startaddr = (void*)addr;
+	}
+
+	/* init the mempool structure */
+	mp = startaddr;
+	memset(mp, 0, sizeof(*mp));
+	snprintf(mp->name, sizeof(mp->name), "%s", name);
+	mp->phys_addr = mz->phys_addr;
+	mp->ring = r;
+	mp->size = n;
+	mp->flags = flags;
+	mp->elt_size = objsz.elt_size;
+	mp->header_size = objsz.header_size;
+	mp->trailer_size = objsz.trailer_size;
+	mp->cache_size = cache_size;
+	mp->cache_flushthresh = CALC_CACHE_FLUSHTHRESH(cache_size);
+	mp->private_data_size = private_data_size;
+
+	/* calculate address of the first element for continuous mempool. */
+	obj = (char *)mp + MEMPOOL_HEADER_SIZE(mp, pg_num) +
+		private_data_size;
+	obj = RTE_PTR_ALIGN_CEIL(obj, RTE_MEMPOOL_ALIGN);
+
+	/* populate address translation fields. */
+	mp->pg_num = pg_num;
+	mp->pg_shift = pg_shift;
+	mp->pg_mask = RTE_LEN2MASK(mp->pg_shift, typeof(mp->pg_mask));
+
+	/* mempool elements allocated together with mempool */
+	if (vaddr == NULL) {
+		mp->elt_va_start = (uintptr_t)obj;
+		mp->elt_pa[0] = mp->phys_addr +
+			(mp->elt_va_start - (uintptr_t)mp);
+
+	/* mempool elements in a separate chunk of memory. */
+	} else {
+		mp->elt_va_start = (uintptr_t)vaddr;
+		memcpy(mp->elt_pa, paddr, sizeof (mp->elt_pa[0]) * pg_num);
+	}
+
+	mp->elt_va_end = mp->elt_va_start;
+
+	/* call the initializer */
+	if (mp_init)
+		mp_init(mp, mp_init_arg);
+
+	mempool_populate(mp, n, 1, obj_init, obj_init_arg);
+
+	te->data = (void *) mp;
+
+	rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK);
+	TAILQ_INSERT_TAIL(mempool_list, te, next);
+	rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
+	rte_rwlock_write_unlock(RTE_EAL_MEMPOOL_RWLOCK);
+
+	return mp;
+
+exit_unlock:
+	rte_rwlock_write_unlock(RTE_EAL_MEMPOOL_RWLOCK);
+	rte_ring_free(r);
+	rte_free(te);
+
+	return NULL;
+}
+
+/* Return the number of entries in the mempool */
+unsigned
+rte_mempool_count(const struct rte_mempool *mp)
+{
+	unsigned count;
+
+	count = rte_ring_count(mp->ring);
+
+#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
+	{
+		unsigned lcore_id;
+		if (mp->cache_size == 0)
+			return count;
+
+		for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++)
+			count += mp->local_cache[lcore_id].len;
+	}
+#endif
+
+	/*
+	 * due to race condition (access to len is not locked), the
+	 * total can be greater than size... so fix the result
+	 */
+	if (count > mp->size)
+		return mp->size;
+	return count;
+}
+
+/* dump the cache status */
+static unsigned
+rte_mempool_dump_cache(FILE *f, const struct rte_mempool *mp)
+{
+#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
+	unsigned lcore_id;
+	unsigned count = 0;
+	unsigned cache_count;
+
+	fprintf(f, "  cache infos:\n");
+	fprintf(f, "    cache_size=%"PRIu32"\n", mp->cache_size);
+	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
+		cache_count = mp->local_cache[lcore_id].len;
+		fprintf(f, "    cache_count[%u]=%u\n", lcore_id, cache_count);
+		count += cache_count;
+	}
+	fprintf(f, "    total_cache_count=%u\n", count);
+	return count;
+#else
+	RTE_SET_USED(mp);
+	fprintf(f, "  cache disabled\n");
+	return 0;
+#endif
+}
+
+#ifdef RTE_LIBRTE_MEMPOOL_DEBUG
+/* check cookies before and after objects */
+#ifndef __INTEL_COMPILER
+#pragma GCC diagnostic ignored "-Wcast-qual"
+#endif
+
+struct mempool_audit_arg {
+	const struct rte_mempool *mp;
+	uintptr_t obj_end;
+	uint32_t obj_num;
+};
+
+static void
+mempool_obj_audit(void *arg, void *start, void *end, uint32_t idx)
+{
+	struct mempool_audit_arg *pa = arg;
+	void *obj;
+
+	obj = (char *)start + pa->mp->header_size;
+	pa->obj_end = (uintptr_t)end;
+	pa->obj_num = idx + 1;
+	__mempool_check_cookies(pa->mp, &obj, 1, 2);
+}
+
+static void
+mempool_audit_cookies(const struct rte_mempool *mp)
+{
+	uint32_t elt_sz, num;
+	struct mempool_audit_arg arg;
+
+	elt_sz = mp->elt_size + mp->header_size + mp->trailer_size;
+
+	arg.mp = mp;
+	arg.obj_end = mp->elt_va_start;
+	arg.obj_num = 0;
+
+	num = rte_mempool_obj_iter((void *)mp->elt_va_start,
+		mp->size, elt_sz, 1,
+		mp->elt_pa, mp->pg_num, mp->pg_shift,
+		mempool_obj_audit, &arg);
+
+	if (num != mp->size) {
+			rte_panic("rte_mempool_obj_iter(mempool=%p, size=%u) "
+			"iterated only over %u elements\n",
+			mp, mp->size, num);
+	} else if (arg.obj_end != mp->elt_va_end || arg.obj_num != mp->size) {
+			rte_panic("rte_mempool_obj_iter(mempool=%p, size=%u) "
+			"last callback va_end: %#tx (%#tx expeceted), "
+			"num of objects: %u (%u expected)\n",
+			mp, mp->size,
+			arg.obj_end, mp->elt_va_end,
+			arg.obj_num, mp->size);
+	}
+}
+
+#ifndef __INTEL_COMPILER
+#pragma GCC diagnostic error "-Wcast-qual"
+#endif
+#else
+#define mempool_audit_cookies(mp) do {} while(0)
+#endif
+
+#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
+/* check cookies before and after objects */
+static void
+mempool_audit_cache(const struct rte_mempool *mp)
+{
+	/* check cache size consistency */
+	unsigned lcore_id;
+	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
+		if (mp->local_cache[lcore_id].len > mp->cache_flushthresh) {
+			RTE_LOG(CRIT, MEMPOOL, "badness on cache[%u]\n",
+				lcore_id);
+			rte_panic("MEMPOOL: invalid cache len\n");
+		}
+	}
+}
+#else
+#define mempool_audit_cache(mp) do {} while(0)
+#endif
+
+
+/* check the consistency of mempool (size, cookies, ...) */
+void
+rte_mempool_audit(const struct rte_mempool *mp)
+{
+	mempool_audit_cache(mp);
+	mempool_audit_cookies(mp);
+
+	/* For case where mempool DEBUG is not set, and cache size is 0 */
+	RTE_SET_USED(mp);
+}
+
+/* dump the status of the mempool on the console */
+void
+rte_mempool_dump(FILE *f, const struct rte_mempool *mp)
+{
+#ifdef RTE_LIBRTE_MEMPOOL_DEBUG
+	struct rte_mempool_debug_stats sum;
+	unsigned lcore_id;
+#endif
+	unsigned common_count;
+	unsigned cache_count;
+
+	RTE_VERIFY(f != NULL);
+	RTE_VERIFY(mp != NULL);
+
+	fprintf(f, "mempool <%s>@%p\n", mp->name, mp);
+	fprintf(f, "  flags=%x\n", mp->flags);
+	fprintf(f, "  ring=<%s>@%p\n", mp->ring->name, mp->ring);
+	fprintf(f, "  phys_addr=0x%" PRIx64 "\n", mp->phys_addr);
+	fprintf(f, "  size=%"PRIu32"\n", mp->size);
+	fprintf(f, "  header_size=%"PRIu32"\n", mp->header_size);
+	fprintf(f, "  elt_size=%"PRIu32"\n", mp->elt_size);
+	fprintf(f, "  trailer_size=%"PRIu32"\n", mp->trailer_size);
+	fprintf(f, "  total_obj_size=%"PRIu32"\n",
+	       mp->header_size + mp->elt_size + mp->trailer_size);
+
+	fprintf(f, "  private_data_size=%"PRIu32"\n", mp->private_data_size);
+	fprintf(f, "  pg_num=%"PRIu32"\n", mp->pg_num);
+	fprintf(f, "  pg_shift=%"PRIu32"\n", mp->pg_shift);
+	fprintf(f, "  pg_mask=%#tx\n", mp->pg_mask);
+	fprintf(f, "  elt_va_start=%#tx\n", mp->elt_va_start);
+	fprintf(f, "  elt_va_end=%#tx\n", mp->elt_va_end);
+	fprintf(f, "  elt_pa[0]=0x%" PRIx64 "\n", mp->elt_pa[0]);
+
+	if (mp->size != 0)
+		fprintf(f, "  avg bytes/object=%#Lf\n",
+			(long double)(mp->elt_va_end - mp->elt_va_start) /
+			mp->size);
+
+	cache_count = rte_mempool_dump_cache(f, mp);
+	common_count = rte_ring_count(mp->ring);
+	if ((cache_count + common_count) > mp->size)
+		common_count = mp->size - cache_count;
+	fprintf(f, "  common_pool_count=%u\n", common_count);
+
+	/* sum and dump statistics */
+#ifdef RTE_LIBRTE_MEMPOOL_DEBUG
+	memset(&sum, 0, sizeof(sum));
+	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
+		sum.put_bulk += mp->stats[lcore_id].put_bulk;
+		sum.put_objs += mp->stats[lcore_id].put_objs;
+		sum.get_success_bulk += mp->stats[lcore_id].get_success_bulk;
+		sum.get_success_objs += mp->stats[lcore_id].get_success_objs;
+		sum.get_fail_bulk += mp->stats[lcore_id].get_fail_bulk;
+		sum.get_fail_objs += mp->stats[lcore_id].get_fail_objs;
+	}
+	fprintf(f, "  stats:\n");
+	fprintf(f, "    put_bulk=%"PRIu64"\n", sum.put_bulk);
+	fprintf(f, "    put_objs=%"PRIu64"\n", sum.put_objs);
+	fprintf(f, "    get_success_bulk=%"PRIu64"\n", sum.get_success_bulk);
+	fprintf(f, "    get_success_objs=%"PRIu64"\n", sum.get_success_objs);
+	fprintf(f, "    get_fail_bulk=%"PRIu64"\n", sum.get_fail_bulk);
+	fprintf(f, "    get_fail_objs=%"PRIu64"\n", sum.get_fail_objs);
+#else
+	fprintf(f, "  no statistics available\n");
+#endif
+
+	rte_mempool_audit(mp);
+}
+
+/* dump the status of all mempools on the console */
+void
+rte_mempool_list_dump(FILE *f)
+{
+	const struct rte_mempool *mp = NULL;
+	struct rte_tailq_entry *te;
+	struct rte_mempool_list *mempool_list;
+
+	mempool_list = RTE_TAILQ_CAST(rte_mempool_tailq.head, rte_mempool_list);
+
+	rte_rwlock_read_lock(RTE_EAL_MEMPOOL_RWLOCK);
+
+	TAILQ_FOREACH(te, mempool_list, next) {
+		mp = (struct rte_mempool *) te->data;
+		rte_mempool_dump(f, mp);
+	}
+
+	rte_rwlock_read_unlock(RTE_EAL_MEMPOOL_RWLOCK);
+}
+
+/* search a mempool from its name */
+struct rte_mempool *
+rte_mempool_lookup(const char *name)
+{
+	struct rte_mempool *mp = NULL;
+	struct rte_tailq_entry *te;
+	struct rte_mempool_list *mempool_list;
+
+	mempool_list = RTE_TAILQ_CAST(rte_mempool_tailq.head, rte_mempool_list);
+
+	rte_rwlock_read_lock(RTE_EAL_MEMPOOL_RWLOCK);
+
+	TAILQ_FOREACH(te, mempool_list, next) {
+		mp = (struct rte_mempool *) te->data;
+		if (strncmp(name, mp->name, RTE_MEMPOOL_NAMESIZE) == 0)
+			break;
+	}
+
+	rte_rwlock_read_unlock(RTE_EAL_MEMPOOL_RWLOCK);
+
+	if (te == NULL) {
+		rte_errno = ENOENT;
+		return NULL;
+	}
+
+	return mp;
+}
+
+void rte_mempool_walk(void (*func)(const struct rte_mempool *, void *),
+		      void *arg)
+{
+	struct rte_tailq_entry *te = NULL;
+	struct rte_mempool_list *mempool_list;
+
+	mempool_list = RTE_TAILQ_CAST(rte_mempool_tailq.head, rte_mempool_list);
+
+	rte_rwlock_read_lock(RTE_EAL_MEMPOOL_RWLOCK);
+
+	TAILQ_FOREACH(te, mempool_list, next) {
+		(*func)((struct rte_mempool *) te->data, arg);
+	}
+
+	rte_rwlock_read_unlock(RTE_EAL_MEMPOOL_RWLOCK);
+}
diff --git a/lib/librte_mempool/rte_mempool.h b/lib/librte_mempool/rte_mempool.h
new file mode 100644
index 00000000..9745bf0d
--- /dev/null
+++ b/lib/librte_mempool/rte_mempool.h
@@ -0,0 +1,1408 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_MEMPOOL_H_
+#define _RTE_MEMPOOL_H_
+
+/**
+ * @file
+ * RTE Mempool.
+ *
+ * A memory pool is an allocator of fixed-size object. It is
+ * identified by its name, and uses a ring to store free objects. It
+ * provides some other optional services, like a per-core object
+ * cache, and an alignment helper to ensure that objects are padded
+ * to spread them equally on all RAM channels, ranks, and so on.
+ *
+ * Objects owned by a mempool should never be added in another
+ * mempool. When an object is freed using rte_mempool_put() or
+ * equivalent, the object data is not modified; the user can save some
+ * meta-data in the object data and retrieve them when allocating a
+ * new object.
+ *
+ * Note: the mempool implementation is not preemptable. A lcore must
+ * not be interrupted by another task that uses the same mempool
+ * (because it uses a ring which is not preemptable). Also, mempool
+ * functions must not be used outside the DPDK environment: for
+ * example, in linuxapp environment, a thread that is not created by
+ * the EAL must not use mempools. This is due to the per-lcore cache
+ * that won't work as rte_lcore_id() will not return a correct value.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <errno.h>
+#include <inttypes.h>
+#include <sys/queue.h>
+
+#include <rte_log.h>
+#include <rte_debug.h>
+#include <rte_lcore.h>
+#include <rte_memory.h>
+#include <rte_branch_prediction.h>
+#include <rte_ring.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define RTE_MEMPOOL_HEADER_COOKIE1  0xbadbadbadadd2e55ULL /**< Header cookie. */
+#define RTE_MEMPOOL_HEADER_COOKIE2  0xf2eef2eedadd2e55ULL /**< Header cookie. */
+#define RTE_MEMPOOL_TRAILER_COOKIE  0xadd2e55badbadbadULL /**< Trailer cookie.*/
+
+#ifdef RTE_LIBRTE_MEMPOOL_DEBUG
+/**
+ * A structure that stores the mempool statistics (per-lcore).
+ */
+struct rte_mempool_debug_stats {
+	uint64_t put_bulk;         /**< Number of puts. */
+	uint64_t put_objs;         /**< Number of objects successfully put. */
+	uint64_t get_success_bulk; /**< Successful allocation number. */
+	uint64_t get_success_objs; /**< Objects successfully allocated. */
+	uint64_t get_fail_bulk;    /**< Failed allocation number. */
+	uint64_t get_fail_objs;    /**< Objects that failed to be allocated. */
+} __rte_cache_aligned;
+#endif
+
+#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
+/**
+ * A structure that stores a per-core object cache.
+ */
+struct rte_mempool_cache {
+	unsigned len; /**< Cache len */
+	/*
+	 * Cache is allocated to this size to allow it to overflow in certain
+	 * cases to avoid needless emptying of cache.
+	 */
+	void *objs[RTE_MEMPOOL_CACHE_MAX_SIZE * 3]; /**< Cache objects */
+} __rte_cache_aligned;
+#endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */
+
+/**
+ * A structure that stores the size of mempool elements.
+ */
+struct rte_mempool_objsz {
+	uint32_t elt_size;     /**< Size of an element. */
+	uint32_t header_size;  /**< Size of header (before elt). */
+	uint32_t trailer_size; /**< Size of trailer (after elt). */
+	uint32_t total_size;
+	/**< Total size of an object (header + elt + trailer). */
+};
+
+#define RTE_MEMPOOL_NAMESIZE 32 /**< Maximum length of a memory pool. */
+#define RTE_MEMPOOL_MZ_PREFIX "MP_"
+
+/* "MP_<name>" */
+#define	RTE_MEMPOOL_MZ_FORMAT	RTE_MEMPOOL_MZ_PREFIX "%s"
+
+#ifdef RTE_LIBRTE_XEN_DOM0
+
+/* "<name>_MP_elt" */
+#define	RTE_MEMPOOL_OBJ_NAME	"%s_" RTE_MEMPOOL_MZ_PREFIX "elt"
+
+#else
+
+#define	RTE_MEMPOOL_OBJ_NAME	RTE_MEMPOOL_MZ_FORMAT
+
+#endif /* RTE_LIBRTE_XEN_DOM0 */
+
+#define	MEMPOOL_PG_SHIFT_MAX	(sizeof(uintptr_t) * CHAR_BIT - 1)
+
+/** Mempool over one chunk of physically continuous memory */
+#define	MEMPOOL_PG_NUM_DEFAULT	1
+
+#ifndef RTE_MEMPOOL_ALIGN
+#define RTE_MEMPOOL_ALIGN	RTE_CACHE_LINE_SIZE
+#endif
+
+#define RTE_MEMPOOL_ALIGN_MASK	(RTE_MEMPOOL_ALIGN - 1)
+
+/**
+ * Mempool object header structure
+ *
+ * Each object stored in mempools are prefixed by this header structure,
+ * it allows to retrieve the mempool pointer from the object. When debug
+ * is enabled, a cookie is also added in this structure preventing
+ * corruptions and double-frees.
+ */
+struct rte_mempool_objhdr {
+	struct rte_mempool *mp;          /**< The mempool owning the object. */
+#ifdef RTE_LIBRTE_MEMPOOL_DEBUG
+	uint64_t cookie;                 /**< Debug cookie. */
+#endif
+};
+
+/**
+ * Mempool object trailer structure
+ *
+ * In debug mode, each object stored in mempools are suffixed by this
+ * trailer structure containing a cookie preventing memory corruptions.
+ */
+struct rte_mempool_objtlr {
+#ifdef RTE_LIBRTE_MEMPOOL_DEBUG
+	uint64_t cookie;                 /**< Debug cookie. */
+#endif
+};
+
+/**
+ * The RTE mempool structure.
+ */
+struct rte_mempool {
+	char name[RTE_MEMPOOL_NAMESIZE]; /**< Name of mempool. */
+	struct rte_ring *ring;           /**< Ring to store objects. */
+	phys_addr_t phys_addr;           /**< Phys. addr. of mempool struct. */
+	int flags;                       /**< Flags of the mempool. */
+	uint32_t size;                   /**< Size of the mempool. */
+	uint32_t cache_size;             /**< Size of per-lcore local cache. */
+	uint32_t cache_flushthresh;
+	/**< Threshold before we flush excess elements. */
+
+	uint32_t elt_size;               /**< Size of an element. */
+	uint32_t header_size;            /**< Size of header (before elt). */
+	uint32_t trailer_size;           /**< Size of trailer (after elt). */
+
+	unsigned private_data_size;      /**< Size of private data. */
+
+#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
+	/** Per-lcore local cache. */
+	struct rte_mempool_cache local_cache[RTE_MAX_LCORE];
+#endif
+
+#ifdef RTE_LIBRTE_MEMPOOL_DEBUG
+	/** Per-lcore statistics. */
+	struct rte_mempool_debug_stats stats[RTE_MAX_LCORE];
+#endif
+
+	/* Address translation support, starts from next cache line. */
+
+	/** Number of elements in the elt_pa array. */
+	uint32_t    pg_num __rte_cache_aligned;
+	uint32_t    pg_shift;     /**< LOG2 of the physical pages. */
+	uintptr_t   pg_mask;      /**< physical page mask value. */
+	uintptr_t   elt_va_start;
+	/**< Virtual address of the first mempool object. */
+	uintptr_t   elt_va_end;
+	/**< Virtual address of the <size + 1> mempool object. */
+	phys_addr_t elt_pa[MEMPOOL_PG_NUM_DEFAULT];
+	/**< Array of physical page addresses for the mempool objects buffer. */
+
+}  __rte_cache_aligned;
+
+#define MEMPOOL_F_NO_SPREAD      0x0001 /**< Do not spread in memory. */
+#define MEMPOOL_F_NO_CACHE_ALIGN 0x0002 /**< Do not align objs on cache lines.*/
+#define MEMPOOL_F_SP_PUT         0x0004 /**< Default put is "single-producer".*/
+#define MEMPOOL_F_SC_GET         0x0008 /**< Default get is "single-consumer".*/
+
+/**
+ * @internal When debug is enabled, store some statistics.
+ *
+ * @param mp
+ *   Pointer to the memory pool.
+ * @param name
+ *   Name of the statistics field to increment in the memory pool.
+ * @param n
+ *   Number to add to the object-oriented statistics.
+ */
+#ifdef RTE_LIBRTE_MEMPOOL_DEBUG
+#define __MEMPOOL_STAT_ADD(mp, name, n) do {                    \
+		unsigned __lcore_id = rte_lcore_id();           \
+		if (__lcore_id < RTE_MAX_LCORE) {               \
+			mp->stats[__lcore_id].name##_objs += n;	\
+			mp->stats[__lcore_id].name##_bulk += 1;	\
+		}                                               \
+	} while(0)
+#else
+#define __MEMPOOL_STAT_ADD(mp, name, n) do {} while(0)
+#endif
+
+/**
+ * Calculate the size of the mempool header.
+ *
+ * @param mp
+ *   Pointer to the memory pool.
+ * @param pgn
+ *   Number of pages used to store mempool objects.
+ */
+#define	MEMPOOL_HEADER_SIZE(mp, pgn)	(sizeof(*(mp)) + \
+	RTE_ALIGN_CEIL(((pgn) - RTE_DIM((mp)->elt_pa)) * \
+	sizeof ((mp)->elt_pa[0]), RTE_CACHE_LINE_SIZE))
+
+/**
+ * Return true if the whole mempool is in contiguous memory.
+ */
+#define	MEMPOOL_IS_CONTIG(mp)                      \
+	((mp)->pg_num == MEMPOOL_PG_NUM_DEFAULT && \
+	(mp)->phys_addr == (mp)->elt_pa[0])
+
+/* return the header of a mempool object (internal) */
+static inline struct rte_mempool_objhdr *__mempool_get_header(void *obj)
+{
+	return (struct rte_mempool_objhdr *)RTE_PTR_SUB(obj, sizeof(struct rte_mempool_objhdr));
+}
+
+/**
+ * Return a pointer to the mempool owning this object.
+ *
+ * @param obj
+ *   An object that is owned by a pool. If this is not the case,
+ *   the behavior is undefined.
+ * @return
+ *   A pointer to the mempool structure.
+ */
+static inline struct rte_mempool *rte_mempool_from_obj(void *obj)
+{
+	struct rte_mempool_objhdr *hdr = __mempool_get_header(obj);
+	return hdr->mp;
+}
+
+/* return the trailer of a mempool object (internal) */
+static inline struct rte_mempool_objtlr *__mempool_get_trailer(void *obj)
+{
+	struct rte_mempool *mp = rte_mempool_from_obj(obj);
+	return (struct rte_mempool_objtlr *)RTE_PTR_ADD(obj, mp->elt_size);
+}
+
+/**
+ * @internal Check and update cookies or panic.
+ *
+ * @param mp
+ *   Pointer to the memory pool.
+ * @param obj_table_const
+ *   Pointer to a table of void * pointers (objects).
+ * @param n
+ *   Index of object in object table.
+ * @param free
+ *   - 0: object is supposed to be allocated, mark it as free
+ *   - 1: object is supposed to be free, mark it as allocated
+ *   - 2: just check that cookie is valid (free or allocated)
+ */
+#ifdef RTE_LIBRTE_MEMPOOL_DEBUG
+#ifndef __INTEL_COMPILER
+#pragma GCC diagnostic ignored "-Wcast-qual"
+#endif
+static inline void __mempool_check_cookies(const struct rte_mempool *mp,
+					   void * const *obj_table_const,
+					   unsigned n, int free)
+{
+	struct rte_mempool_objhdr *hdr;
+	struct rte_mempool_objtlr *tlr;
+	uint64_t cookie;
+	void *tmp;
+	void *obj;
+	void **obj_table;
+
+	/* Force to drop the "const" attribute. This is done only when
+	 * DEBUG is enabled */
+	tmp = (void *) obj_table_const;
+	obj_table = (void **) tmp;
+
+	while (n--) {
+		obj = obj_table[n];
+
+		if (rte_mempool_from_obj(obj) != mp)
+			rte_panic("MEMPOOL: object is owned by another "
+				  "mempool\n");
+
+		hdr = __mempool_get_header(obj);
+		cookie = hdr->cookie;
+
+		if (free == 0) {
+			if (cookie != RTE_MEMPOOL_HEADER_COOKIE1) {
+				rte_log_set_history(0);
+				RTE_LOG(CRIT, MEMPOOL,
+					"obj=%p, mempool=%p, cookie=%" PRIx64 "\n",
+					obj, (const void *) mp, cookie);
+				rte_panic("MEMPOOL: bad header cookie (put)\n");
+			}
+			hdr->cookie = RTE_MEMPOOL_HEADER_COOKIE2;
+		}
+		else if (free == 1) {
+			if (cookie != RTE_MEMPOOL_HEADER_COOKIE2) {
+				rte_log_set_history(0);
+				RTE_LOG(CRIT, MEMPOOL,
+					"obj=%p, mempool=%p, cookie=%" PRIx64 "\n",
+					obj, (const void *) mp, cookie);
+				rte_panic("MEMPOOL: bad header cookie (get)\n");
+			}
+			hdr->cookie = RTE_MEMPOOL_HEADER_COOKIE1;
+		}
+		else if (free == 2) {
+			if (cookie != RTE_MEMPOOL_HEADER_COOKIE1 &&
+			    cookie != RTE_MEMPOOL_HEADER_COOKIE2) {
+				rte_log_set_history(0);
+				RTE_LOG(CRIT, MEMPOOL,
+					"obj=%p, mempool=%p, cookie=%" PRIx64 "\n",
+					obj, (const void *) mp, cookie);
+				rte_panic("MEMPOOL: bad header cookie (audit)\n");
+			}
+		}
+		tlr = __mempool_get_trailer(obj);
+		cookie = tlr->cookie;
+		if (cookie != RTE_MEMPOOL_TRAILER_COOKIE) {
+			rte_log_set_history(0);
+			RTE_LOG(CRIT, MEMPOOL,
+				"obj=%p, mempool=%p, cookie=%" PRIx64 "\n",
+				obj, (const void *) mp, cookie);
+			rte_panic("MEMPOOL: bad trailer cookie\n");
+		}
+	}
+}
+#ifndef __INTEL_COMPILER
+#pragma GCC diagnostic error "-Wcast-qual"
+#endif
+#else
+#define __mempool_check_cookies(mp, obj_table_const, n, free) do {} while(0)
+#endif /* RTE_LIBRTE_MEMPOOL_DEBUG */
+
+/**
+ * A mempool object iterator callback function.
+ */
+typedef void (*rte_mempool_obj_iter_t)(void * /*obj_iter_arg*/,
+	void * /*obj_start*/,
+	void * /*obj_end*/,
+	uint32_t /*obj_index */);
+
+/**
+ * Call a function for each mempool object in a memory chunk
+ *
+ * Iterate across objects of the given size and alignment in the
+ * provided chunk of memory. The given memory buffer can consist of
+ * disjointed physical pages.
+ *
+ * For each object, call the provided callback (if any). This function
+ * is used to populate a mempool, or walk through all the elements of a
+ * mempool, or estimate how many elements of the given size could be
+ * created in the given memory buffer.
+ *
+ * @param vaddr
+ *   Virtual address of the memory buffer.
+ * @param elt_num
+ *   Maximum number of objects to iterate through.
+ * @param elt_sz
+ *   Size of each object.
+ * @param align
+ *   Alignment of each object.
+ * @param paddr
+ *   Array of physical addresses of the pages that comprises given memory
+ *   buffer.
+ * @param pg_num
+ *   Number of elements in the paddr array.
+ * @param pg_shift
+ *   LOG2 of the physical pages size.
+ * @param obj_iter
+ *   Object iterator callback function (could be NULL).
+ * @param obj_iter_arg
+ *   User defined parameter for the object iterator callback function.
+ *
+ * @return
+ *   Number of objects iterated through.
+ */
+uint32_t rte_mempool_obj_iter(void *vaddr,
+	uint32_t elt_num, size_t elt_sz, size_t align,
+	const phys_addr_t paddr[], uint32_t pg_num, uint32_t pg_shift,
+	rte_mempool_obj_iter_t obj_iter, void *obj_iter_arg);
+
+/**
+ * An object constructor callback function for mempool.
+ *
+ * Arguments are the mempool, the opaque pointer given by the user in
+ * rte_mempool_create(), the pointer to the element and the index of
+ * the element in the pool.
+ */
+typedef void (rte_mempool_obj_ctor_t)(struct rte_mempool *, void *,
+				      void *, unsigned);
+
+/**
+ * A mempool constructor callback function.
+ *
+ * Arguments are the mempool and the opaque pointer given by the user in
+ * rte_mempool_create().
+ */
+typedef void (rte_mempool_ctor_t)(struct rte_mempool *, void *);
+
+/**
+ * Create a new mempool named *name* in memory.
+ *
+ * This function uses ``memzone_reserve()`` to allocate memory. The
+ * pool contains n elements of elt_size. Its size is set to n.
+ * All elements of the mempool are allocated together with the mempool header,
+ * in one physically continuous chunk of memory.
+ *
+ * @param name
+ *   The name of the mempool.
+ * @param n
+ *   The number of elements in the mempool. The optimum size (in terms of
+ *   memory usage) for a mempool is when n is a power of two minus one:
+ *   n = (2^q - 1).
+ * @param elt_size
+ *   The size of each element.
+ * @param cache_size
+ *   If cache_size is non-zero, the rte_mempool library will try to
+ *   limit the accesses to the common lockless pool, by maintaining a
+ *   per-lcore object cache. This argument must be lower or equal to
+ *   CONFIG_RTE_MEMPOOL_CACHE_MAX_SIZE and n / 1.5. It is advised to choose
+ *   cache_size to have "n modulo cache_size == 0": if this is
+ *   not the case, some elements will always stay in the pool and will
+ *   never be used. The access to the per-lcore table is of course
+ *   faster than the multi-producer/consumer pool. The cache can be
+ *   disabled if the cache_size argument is set to 0; it can be useful to
+ *   avoid losing objects in cache. Note that even if not used, the
+ *   memory space for cache is always reserved in a mempool structure,
+ *   except if CONFIG_RTE_MEMPOOL_CACHE_MAX_SIZE is set to 0.
+ * @param private_data_size
+ *   The size of the private data appended after the mempool
+ *   structure. This is useful for storing some private data after the
+ *   mempool structure, as is done for rte_mbuf_pool for example.
+ * @param mp_init
+ *   A function pointer that is called for initialization of the pool,
+ *   before object initialization. The user can initialize the private
+ *   data in this function if needed. This parameter can be NULL if
+ *   not needed.
+ * @param mp_init_arg
+ *   An opaque pointer to data that can be used in the mempool
+ *   constructor function.
+ * @param obj_init
+ *   A function pointer that is called for each object at
+ *   initialization of the pool. The user can set some meta data in
+ *   objects if needed. This parameter can be NULL if not needed.
+ *   The obj_init() function takes the mempool pointer, the init_arg,
+ *   the object pointer and the object number as parameters.
+ * @param obj_init_arg
+ *   An opaque pointer to data that can be used as an argument for
+ *   each call to the object constructor function.
+ * @param socket_id
+ *   The *socket_id* argument is the socket identifier in the case of
+ *   NUMA. The value can be *SOCKET_ID_ANY* if there is no NUMA
+ *   constraint for the reserved zone.
+ * @param flags
+ *   The *flags* arguments is an OR of following flags:
+ *   - MEMPOOL_F_NO_SPREAD: By default, objects addresses are spread
+ *     between channels in RAM: the pool allocator will add padding
+ *     between objects depending on the hardware configuration. See
+ *     Memory alignment constraints for details. If this flag is set,
+ *     the allocator will just align them to a cache line.
+ *   - MEMPOOL_F_NO_CACHE_ALIGN: By default, the returned objects are
+ *     cache-aligned. This flag removes this constraint, and no
+ *     padding will be present between objects. This flag implies
+ *     MEMPOOL_F_NO_SPREAD.
+ *   - MEMPOOL_F_SP_PUT: If this flag is set, the default behavior
+ *     when using rte_mempool_put() or rte_mempool_put_bulk() is
+ *     "single-producer". Otherwise, it is "multi-producers".
+ *   - MEMPOOL_F_SC_GET: If this flag is set, the default behavior
+ *     when using rte_mempool_get() or rte_mempool_get_bulk() is
+ *     "single-consumer". Otherwise, it is "multi-consumers".
+ * @return
+ *   The pointer to the new allocated mempool, on success. NULL on error
+ *   with rte_errno set appropriately. Possible rte_errno values include:
+ *    - E_RTE_NO_CONFIG - function could not get pointer to rte_config structure
+ *    - E_RTE_SECONDARY - function was called from a secondary process instance
+ *    - EINVAL - cache size provided is too large
+ *    - ENOSPC - the maximum number of memzones has already been allocated
+ *    - EEXIST - a memzone with the same name already exists
+ *    - ENOMEM - no appropriate memory area found in which to create memzone
+ */
+struct rte_mempool *
+rte_mempool_create(const char *name, unsigned n, unsigned elt_size,
+		   unsigned cache_size, unsigned private_data_size,
+		   rte_mempool_ctor_t *mp_init, void *mp_init_arg,
+		   rte_mempool_obj_ctor_t *obj_init, void *obj_init_arg,
+		   int socket_id, unsigned flags);
+
+/**
+ * Create a new mempool named *name* in memory.
+ *
+ * This function uses ``memzone_reserve()`` to allocate memory. The
+ * pool contains n elements of elt_size. Its size is set to n.
+ * Depending on the input parameters, mempool elements can be either allocated
+ * together with the mempool header, or an externally provided memory buffer
+ * could be used to store mempool objects. In later case, that external
+ * memory buffer can consist of set of disjoint physical pages.
+ *
+ * @param name
+ *   The name of the mempool.
+ * @param n
+ *   The number of elements in the mempool. The optimum size (in terms of
+ *   memory usage) for a mempool is when n is a power of two minus one:
+ *   n = (2^q - 1).
+ * @param elt_size
+ *   The size of each element.
+ * @param cache_size
+ *   If cache_size is non-zero, the rte_mempool library will try to
+ *   limit the accesses to the common lockless pool, by maintaining a
+ *   per-lcore object cache. This argument must be lower or equal to
+ *   CONFIG_RTE_MEMPOOL_CACHE_MAX_SIZE. It is advised to choose
+ *   cache_size to have "n modulo cache_size == 0": if this is
+ *   not the case, some elements will always stay in the pool and will
+ *   never be used. The access to the per-lcore table is of course
+ *   faster than the multi-producer/consumer pool. The cache can be
+ *   disabled if the cache_size argument is set to 0; it can be useful to
+ *   avoid losing objects in cache. Note that even if not used, the
+ *   memory space for cache is always reserved in a mempool structure,
+ *   except if CONFIG_RTE_MEMPOOL_CACHE_MAX_SIZE is set to 0.
+ * @param private_data_size
+ *   The size of the private data appended after the mempool
+ *   structure. This is useful for storing some private data after the
+ *   mempool structure, as is done for rte_mbuf_pool for example.
+ * @param mp_init
+ *   A function pointer that is called for initialization of the pool,
+ *   before object initialization. The user can initialize the private
+ *   data in this function if needed. This parameter can be NULL if
+ *   not needed.
+ * @param mp_init_arg
+ *   An opaque pointer to data that can be used in the mempool
+ *   constructor function.
+ * @param obj_init
+ *   A function pointer that is called for each object at
+ *   initialization of the pool. The user can set some meta data in
+ *   objects if needed. This parameter can be NULL if not needed.
+ *   The obj_init() function takes the mempool pointer, the init_arg,
+ *   the object pointer and the object number as parameters.
+ * @param obj_init_arg
+ *   An opaque pointer to data that can be used as an argument for
+ *   each call to the object constructor function.
+ * @param socket_id
+ *   The *socket_id* argument is the socket identifier in the case of
+ *   NUMA. The value can be *SOCKET_ID_ANY* if there is no NUMA
+ *   constraint for the reserved zone.
+ * @param flags
+ *   The *flags* arguments is an OR of following flags:
+ *   - MEMPOOL_F_NO_SPREAD: By default, objects addresses are spread
+ *     between channels in RAM: the pool allocator will add padding
+ *     between objects depending on the hardware configuration. See
+ *     Memory alignment constraints for details. If this flag is set,
+ *     the allocator will just align them to a cache line.
+ *   - MEMPOOL_F_NO_CACHE_ALIGN: By default, the returned objects are
+ *     cache-aligned. This flag removes this constraint, and no
+ *     padding will be present between objects. This flag implies
+ *     MEMPOOL_F_NO_SPREAD.
+ *   - MEMPOOL_F_SP_PUT: If this flag is set, the default behavior
+ *     when using rte_mempool_put() or rte_mempool_put_bulk() is
+ *     "single-producer". Otherwise, it is "multi-producers".
+ *   - MEMPOOL_F_SC_GET: If this flag is set, the default behavior
+ *     when using rte_mempool_get() or rte_mempool_get_bulk() is
+ *     "single-consumer". Otherwise, it is "multi-consumers".
+ * @param vaddr
+ *   Virtual address of the externally allocated memory buffer.
+ *   Will be used to store mempool objects.
+ * @param paddr
+ *   Array of physical addresses of the pages that comprises given memory
+ *   buffer.
+ * @param pg_num
+ *   Number of elements in the paddr array.
+ * @param pg_shift
+ *   LOG2 of the physical pages size.
+ * @return
+ *   The pointer to the new allocated mempool, on success. NULL on error
+ *   with rte_errno set appropriately. Possible rte_errno values include:
+ *    - E_RTE_NO_CONFIG - function could not get pointer to rte_config structure
+ *    - E_RTE_SECONDARY - function was called from a secondary process instance
+ *    - EINVAL - cache size provided is too large
+ *    - ENOSPC - the maximum number of memzones has already been allocated
+ *    - EEXIST - a memzone with the same name already exists
+ *    - ENOMEM - no appropriate memory area found in which to create memzone
+ */
+struct rte_mempool *
+rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
+		unsigned cache_size, unsigned private_data_size,
+		rte_mempool_ctor_t *mp_init, void *mp_init_arg,
+		rte_mempool_obj_ctor_t *obj_init, void *obj_init_arg,
+		int socket_id, unsigned flags, void *vaddr,
+		const phys_addr_t paddr[], uint32_t pg_num, uint32_t pg_shift);
+
+/**
+ * Create a new mempool named *name* in memory on Xen Dom0.
+ *
+ * This function uses ``rte_mempool_xmem_create()`` to allocate memory. The
+ * pool contains n elements of elt_size. Its size is set to n.
+ * All elements of the mempool are allocated together with the mempool header,
+ * and memory buffer can consist of set of disjoint physical pages.
+ *
+ * @param name
+ *   The name of the mempool.
+ * @param n
+ *   The number of elements in the mempool. The optimum size (in terms of
+ *   memory usage) for a mempool is when n is a power of two minus one:
+ *   n = (2^q - 1).
+ * @param elt_size
+ *   The size of each element.
+ * @param cache_size
+ *   If cache_size is non-zero, the rte_mempool library will try to
+ *   limit the accesses to the common lockless pool, by maintaining a
+ *   per-lcore object cache. This argument must be lower or equal to
+ *   CONFIG_RTE_MEMPOOL_CACHE_MAX_SIZE. It is advised to choose
+ *   cache_size to have "n modulo cache_size == 0": if this is
+ *   not the case, some elements will always stay in the pool and will
+ *   never be used. The access to the per-lcore table is of course
+ *   faster than the multi-producer/consumer pool. The cache can be
+ *   disabled if the cache_size argument is set to 0; it can be useful to
+ *   avoid losing objects in cache. Note that even if not used, the
+ *   memory space for cache is always reserved in a mempool structure,
+ *   except if CONFIG_RTE_MEMPOOL_CACHE_MAX_SIZE is set to 0.
+ * @param private_data_size
+ *   The size of the private data appended after the mempool
+ *   structure. This is useful for storing some private data after the
+ *   mempool structure, as is done for rte_mbuf_pool for example.
+ * @param mp_init
+ *   A function pointer that is called for initialization of the pool,
+ *   before object initialization. The user can initialize the private
+ *   data in this function if needed. This parameter can be NULL if
+ *   not needed.
+ * @param mp_init_arg
+ *   An opaque pointer to data that can be used in the mempool
+ *   constructor function.
+ * @param obj_init
+ *   A function pointer that is called for each object at
+ *   initialization of the pool. The user can set some meta data in
+ *   objects if needed. This parameter can be NULL if not needed.
+ *   The obj_init() function takes the mempool pointer, the init_arg,
+ *   the object pointer and the object number as parameters.
+ * @param obj_init_arg
+ *   An opaque pointer to data that can be used as an argument for
+ *   each call to the object constructor function.
+ * @param socket_id
+ *   The *socket_id* argument is the socket identifier in the case of
+ *   NUMA. The value can be *SOCKET_ID_ANY* if there is no NUMA
+ *   constraint for the reserved zone.
+ * @param flags
+ *   The *flags* arguments is an OR of following flags:
+ *   - MEMPOOL_F_NO_SPREAD: By default, objects addresses are spread
+ *     between channels in RAM: the pool allocator will add padding
+ *     between objects depending on the hardware configuration. See
+ *     Memory alignment constraints for details. If this flag is set,
+ *     the allocator will just align them to a cache line.
+ *   - MEMPOOL_F_NO_CACHE_ALIGN: By default, the returned objects are
+ *     cache-aligned. This flag removes this constraint, and no
+ *     padding will be present between objects. This flag implies
+ *     MEMPOOL_F_NO_SPREAD.
+ *   - MEMPOOL_F_SP_PUT: If this flag is set, the default behavior
+ *     when using rte_mempool_put() or rte_mempool_put_bulk() is
+ *     "single-producer". Otherwise, it is "multi-producers".
+ *   - MEMPOOL_F_SC_GET: If this flag is set, the default behavior
+ *     when using rte_mempool_get() or rte_mempool_get_bulk() is
+ *     "single-consumer". Otherwise, it is "multi-consumers".
+ * @return
+ *   The pointer to the new allocated mempool, on success. NULL on error
+ *   with rte_errno set appropriately. Possible rte_errno values include:
+ *    - E_RTE_NO_CONFIG - function could not get pointer to rte_config structure
+ *    - E_RTE_SECONDARY - function was called from a secondary process instance
+ *    - EINVAL - cache size provided is too large
+ *    - ENOSPC - the maximum number of memzones has already been allocated
+ *    - EEXIST - a memzone with the same name already exists
+ *    - ENOMEM - no appropriate memory area found in which to create memzone
+ */
+struct rte_mempool *
+rte_dom0_mempool_create(const char *name, unsigned n, unsigned elt_size,
+		unsigned cache_size, unsigned private_data_size,
+		rte_mempool_ctor_t *mp_init, void *mp_init_arg,
+		rte_mempool_obj_ctor_t *obj_init, void *obj_init_arg,
+		int socket_id, unsigned flags);
+
+
+/**
+ * Dump the status of the mempool to the console.
+ *
+ * @param f
+ *   A pointer to a file for output
+ * @param mp
+ *   A pointer to the mempool structure.
+ */
+void rte_mempool_dump(FILE *f, const struct rte_mempool *mp);
+
+/**
+ * @internal Put several objects back in the mempool; used internally.
+ * @param mp
+ *   A pointer to the mempool structure.
+ * @param obj_table
+ *   A pointer to a table of void * pointers (objects).
+ * @param n
+ *   The number of objects to store back in the mempool, must be strictly
+ *   positive.
+ * @param is_mp
+ *   Mono-producer (0) or multi-producers (1).
+ */
+static inline void __attribute__((always_inline))
+__mempool_put_bulk(struct rte_mempool *mp, void * const *obj_table,
+		    unsigned n, int is_mp)
+{
+#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
+	struct rte_mempool_cache *cache;
+	uint32_t index;
+	void **cache_objs;
+	unsigned lcore_id = rte_lcore_id();
+	uint32_t cache_size = mp->cache_size;
+	uint32_t flushthresh = mp->cache_flushthresh;
+#endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */
+
+	/* increment stat now, adding in mempool always success */
+	__MEMPOOL_STAT_ADD(mp, put, n);
+
+#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
+	/* cache is not enabled or single producer or non-EAL thread */
+	if (unlikely(cache_size == 0 || is_mp == 0 ||
+		     lcore_id >= RTE_MAX_LCORE))
+		goto ring_enqueue;
+
+	/* Go straight to ring if put would overflow mem allocated for cache */
+	if (unlikely(n > RTE_MEMPOOL_CACHE_MAX_SIZE))
+		goto ring_enqueue;
+
+	cache = &mp->local_cache[lcore_id];
+	cache_objs = &cache->objs[cache->len];
+
+	/*
+	 * The cache follows the following algorithm
+	 *   1. Add the objects to the cache
+	 *   2. Anything greater than the cache min value (if it crosses the
+	 *   cache flush threshold) is flushed to the ring.
+	 */
+
+	/* Add elements back into the cache */
+	for (index = 0; index < n; ++index, obj_table++)
+		cache_objs[index] = *obj_table;
+
+	cache->len += n;
+
+	if (cache->len >= flushthresh) {
+		rte_ring_mp_enqueue_bulk(mp->ring, &cache->objs[cache_size],
+				cache->len - cache_size);
+		cache->len = cache_size;
+	}
+
+	return;
+
+ring_enqueue:
+#endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */
+
+	/* push remaining objects in ring */
+#ifdef RTE_LIBRTE_MEMPOOL_DEBUG
+	if (is_mp) {
+		if (rte_ring_mp_enqueue_bulk(mp->ring, obj_table, n) < 0)
+			rte_panic("cannot put objects in mempool\n");
+	}
+	else {
+		if (rte_ring_sp_enqueue_bulk(mp->ring, obj_table, n) < 0)
+			rte_panic("cannot put objects in mempool\n");
+	}
+#else
+	if (is_mp)
+		rte_ring_mp_enqueue_bulk(mp->ring, obj_table, n);
+	else
+		rte_ring_sp_enqueue_bulk(mp->ring, obj_table, n);
+#endif
+}
+
+
+/**
+ * Put several objects back in the mempool (multi-producers safe).
+ *
+ * @param mp
+ *   A pointer to the mempool structure.
+ * @param obj_table
+ *   A pointer to a table of void * pointers (objects).
+ * @param n
+ *   The number of objects to add in the mempool from the obj_table.
+ */
+static inline void __attribute__((always_inline))
+rte_mempool_mp_put_bulk(struct rte_mempool *mp, void * const *obj_table,
+			unsigned n)
+{
+	__mempool_check_cookies(mp, obj_table, n, 0);
+	__mempool_put_bulk(mp, obj_table, n, 1);
+}
+
+/**
+ * Put several objects back in the mempool (NOT multi-producers safe).
+ *
+ * @param mp
+ *   A pointer to the mempool structure.
+ * @param obj_table
+ *   A pointer to a table of void * pointers (objects).
+ * @param n
+ *   The number of objects to add in the mempool from obj_table.
+ */
+static inline void
+rte_mempool_sp_put_bulk(struct rte_mempool *mp, void * const *obj_table,
+			unsigned n)
+{
+	__mempool_check_cookies(mp, obj_table, n, 0);
+	__mempool_put_bulk(mp, obj_table, n, 0);
+}
+
+/**
+ * Put several objects back in the mempool.
+ *
+ * This function calls the multi-producer or the single-producer
+ * version depending on the default behavior that was specified at
+ * mempool creation time (see flags).
+ *
+ * @param mp
+ *   A pointer to the mempool structure.
+ * @param obj_table
+ *   A pointer to a table of void * pointers (objects).
+ * @param n
+ *   The number of objects to add in the mempool from obj_table.
+ */
+static inline void __attribute__((always_inline))
+rte_mempool_put_bulk(struct rte_mempool *mp, void * const *obj_table,
+		     unsigned n)
+{
+	__mempool_check_cookies(mp, obj_table, n, 0);
+	__mempool_put_bulk(mp, obj_table, n, !(mp->flags & MEMPOOL_F_SP_PUT));
+}
+
+/**
+ * Put one object in the mempool (multi-producers safe).
+ *
+ * @param mp
+ *   A pointer to the mempool structure.
+ * @param obj
+ *   A pointer to the object to be added.
+ */
+static inline void __attribute__((always_inline))
+rte_mempool_mp_put(struct rte_mempool *mp, void *obj)
+{
+	rte_mempool_mp_put_bulk(mp, &obj, 1);
+}
+
+/**
+ * Put one object back in the mempool (NOT multi-producers safe).
+ *
+ * @param mp
+ *   A pointer to the mempool structure.
+ * @param obj
+ *   A pointer to the object to be added.
+ */
+static inline void __attribute__((always_inline))
+rte_mempool_sp_put(struct rte_mempool *mp, void *obj)
+{
+	rte_mempool_sp_put_bulk(mp, &obj, 1);
+}
+
+/**
+ * Put one object back in the mempool.
+ *
+ * This function calls the multi-producer or the single-producer
+ * version depending on the default behavior that was specified at
+ * mempool creation time (see flags).
+ *
+ * @param mp
+ *   A pointer to the mempool structure.
+ * @param obj
+ *   A pointer to the object to be added.
+ */
+static inline void __attribute__((always_inline))
+rte_mempool_put(struct rte_mempool *mp, void *obj)
+{
+	rte_mempool_put_bulk(mp, &obj, 1);
+}
+
+/**
+ * @internal Get several objects from the mempool; used internally.
+ * @param mp
+ *   A pointer to the mempool structure.
+ * @param obj_table
+ *   A pointer to a table of void * pointers (objects).
+ * @param n
+ *   The number of objects to get, must be strictly positive.
+ * @param is_mc
+ *   Mono-consumer (0) or multi-consumers (1).
+ * @return
+ *   - >=0: Success; number of objects supplied.
+ *   - <0: Error; code of ring dequeue function.
+ */
+static inline int __attribute__((always_inline))
+__mempool_get_bulk(struct rte_mempool *mp, void **obj_table,
+		   unsigned n, int is_mc)
+{
+	int ret;
+#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
+	struct rte_mempool_cache *cache;
+	uint32_t index, len;
+	void **cache_objs;
+	unsigned lcore_id = rte_lcore_id();
+	uint32_t cache_size = mp->cache_size;
+
+	/* cache is not enabled or single consumer */
+	if (unlikely(cache_size == 0 || is_mc == 0 ||
+		     n >= cache_size || lcore_id >= RTE_MAX_LCORE))
+		goto ring_dequeue;
+
+	cache = &mp->local_cache[lcore_id];
+	cache_objs = cache->objs;
+
+	/* Can this be satisfied from the cache? */
+	if (cache->len < n) {
+		/* No. Backfill the cache first, and then fill from it */
+		uint32_t req = n + (cache_size - cache->len);
+
+		/* How many do we require i.e. number to fill the cache + the request */
+		ret = rte_ring_mc_dequeue_bulk(mp->ring, &cache->objs[cache->len], req);
+		if (unlikely(ret < 0)) {
+			/*
+			 * In the offchance that we are buffer constrained,
+			 * where we are not able to allocate cache + n, go to
+			 * the ring directly. If that fails, we are truly out of
+			 * buffers.
+			 */
+			goto ring_dequeue;
+		}
+
+		cache->len += req;
+	}
+
+	/* Now fill in the response ... */
+	for (index = 0, len = cache->len - 1; index < n; ++index, len--, obj_table++)
+		*obj_table = cache_objs[len];
+
+	cache->len -= n;
+
+	__MEMPOOL_STAT_ADD(mp, get_success, n);
+
+	return 0;
+
+ring_dequeue:
+#endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */
+
+	/* get remaining objects from ring */
+	if (is_mc)
+		ret = rte_ring_mc_dequeue_bulk(mp->ring, obj_table, n);
+	else
+		ret = rte_ring_sc_dequeue_bulk(mp->ring, obj_table, n);
+
+	if (ret < 0)
+		__MEMPOOL_STAT_ADD(mp, get_fail, n);
+	else
+		__MEMPOOL_STAT_ADD(mp, get_success, n);
+
+	return ret;
+}
+
+/**
+ * Get several objects from the mempool (multi-consumers safe).
+ *
+ * If cache is enabled, objects will be retrieved first from cache,
+ * subsequently from the common pool. Note that it can return -ENOENT when
+ * the local cache and common pool are empty, even if cache from other
+ * lcores are full.
+ *
+ * @param mp
+ *   A pointer to the mempool structure.
+ * @param obj_table
+ *   A pointer to a table of void * pointers (objects) that will be filled.
+ * @param n
+ *   The number of objects to get from mempool to obj_table.
+ * @return
+ *   - 0: Success; objects taken.
+ *   - -ENOENT: Not enough entries in the mempool; no object is retrieved.
+ */
+static inline int __attribute__((always_inline))
+rte_mempool_mc_get_bulk(struct rte_mempool *mp, void **obj_table, unsigned n)
+{
+	int ret;
+	ret = __mempool_get_bulk(mp, obj_table, n, 1);
+	if (ret == 0)
+		__mempool_check_cookies(mp, obj_table, n, 1);
+	return ret;
+}
+
+/**
+ * Get several objects from the mempool (NOT multi-consumers safe).
+ *
+ * If cache is enabled, objects will be retrieved first from cache,
+ * subsequently from the common pool. Note that it can return -ENOENT when
+ * the local cache and common pool are empty, even if cache from other
+ * lcores are full.
+ *
+ * @param mp
+ *   A pointer to the mempool structure.
+ * @param obj_table
+ *   A pointer to a table of void * pointers (objects) that will be filled.
+ * @param n
+ *   The number of objects to get from the mempool to obj_table.
+ * @return
+ *   - 0: Success; objects taken.
+ *   - -ENOENT: Not enough entries in the mempool; no object is
+ *     retrieved.
+ */
+static inline int __attribute__((always_inline))
+rte_mempool_sc_get_bulk(struct rte_mempool *mp, void **obj_table, unsigned n)
+{
+	int ret;
+	ret = __mempool_get_bulk(mp, obj_table, n, 0);
+	if (ret == 0)
+		__mempool_check_cookies(mp, obj_table, n, 1);
+	return ret;
+}
+
+/**
+ * Get several objects from the mempool.
+ *
+ * This function calls the multi-consumers or the single-consumer
+ * version, depending on the default behaviour that was specified at
+ * mempool creation time (see flags).
+ *
+ * If cache is enabled, objects will be retrieved first from cache,
+ * subsequently from the common pool. Note that it can return -ENOENT when
+ * the local cache and common pool are empty, even if cache from other
+ * lcores are full.
+ *
+ * @param mp
+ *   A pointer to the mempool structure.
+ * @param obj_table
+ *   A pointer to a table of void * pointers (objects) that will be filled.
+ * @param n
+ *   The number of objects to get from the mempool to obj_table.
+ * @return
+ *   - 0: Success; objects taken
+ *   - -ENOENT: Not enough entries in the mempool; no object is retrieved.
+ */
+static inline int __attribute__((always_inline))
+rte_mempool_get_bulk(struct rte_mempool *mp, void **obj_table, unsigned n)
+{
+	int ret;
+	ret = __mempool_get_bulk(mp, obj_table, n,
+				 !(mp->flags & MEMPOOL_F_SC_GET));
+	if (ret == 0)
+		__mempool_check_cookies(mp, obj_table, n, 1);
+	return ret;
+}
+
+/**
+ * Get one object from the mempool (multi-consumers safe).
+ *
+ * If cache is enabled, objects will be retrieved first from cache,
+ * subsequently from the common pool. Note that it can return -ENOENT when
+ * the local cache and common pool are empty, even if cache from other
+ * lcores are full.
+ *
+ * @param mp
+ *   A pointer to the mempool structure.
+ * @param obj_p
+ *   A pointer to a void * pointer (object) that will be filled.
+ * @return
+ *   - 0: Success; objects taken.
+ *   - -ENOENT: Not enough entries in the mempool; no object is retrieved.
+ */
+static inline int __attribute__((always_inline))
+rte_mempool_mc_get(struct rte_mempool *mp, void **obj_p)
+{
+	return rte_mempool_mc_get_bulk(mp, obj_p, 1);
+}
+
+/**
+ * Get one object from the mempool (NOT multi-consumers safe).
+ *
+ * If cache is enabled, objects will be retrieved first from cache,
+ * subsequently from the common pool. Note that it can return -ENOENT when
+ * the local cache and common pool are empty, even if cache from other
+ * lcores are full.
+ *
+ * @param mp
+ *   A pointer to the mempool structure.
+ * @param obj_p
+ *   A pointer to a void * pointer (object) that will be filled.
+ * @return
+ *   - 0: Success; objects taken.
+ *   - -ENOENT: Not enough entries in the mempool; no object is retrieved.
+ */
+static inline int __attribute__((always_inline))
+rte_mempool_sc_get(struct rte_mempool *mp, void **obj_p)
+{
+	return rte_mempool_sc_get_bulk(mp, obj_p, 1);
+}
+
+/**
+ * Get one object from the mempool.
+ *
+ * This function calls the multi-consumers or the single-consumer
+ * version, depending on the default behavior that was specified at
+ * mempool creation (see flags).
+ *
+ * If cache is enabled, objects will be retrieved first from cache,
+ * subsequently from the common pool. Note that it can return -ENOENT when
+ * the local cache and common pool are empty, even if cache from other
+ * lcores are full.
+ *
+ * @param mp
+ *   A pointer to the mempool structure.
+ * @param obj_p
+ *   A pointer to a void * pointer (object) that will be filled.
+ * @return
+ *   - 0: Success; objects taken.
+ *   - -ENOENT: Not enough entries in the mempool; no object is retrieved.
+ */
+static inline int __attribute__((always_inline))
+rte_mempool_get(struct rte_mempool *mp, void **obj_p)
+{
+	return rte_mempool_get_bulk(mp, obj_p, 1);
+}
+
+/**
+ * Return the number of entries in the mempool.
+ *
+ * When cache is enabled, this function has to browse the length of
+ * all lcores, so it should not be used in a data path, but only for
+ * debug purposes.
+ *
+ * @param mp
+ *   A pointer to the mempool structure.
+ * @return
+ *   The number of entries in the mempool.
+ */
+unsigned rte_mempool_count(const struct rte_mempool *mp);
+
+/**
+ * Return the number of free entries in the mempool ring.
+ * i.e. how many entries can be freed back to the mempool.
+ *
+ * NOTE: This corresponds to the number of elements *allocated* from the
+ * memory pool, not the number of elements in the pool itself. To count
+ * the number elements currently available in the pool, use "rte_mempool_count"
+ *
+ * When cache is enabled, this function has to browse the length of
+ * all lcores, so it should not be used in a data path, but only for
+ * debug purposes.
+ *
+ * @param mp
+ *   A pointer to the mempool structure.
+ * @return
+ *   The number of free entries in the mempool.
+ */
+static inline unsigned
+rte_mempool_free_count(const struct rte_mempool *mp)
+{
+	return mp->size - rte_mempool_count(mp);
+}
+
+/**
+ * Test if the mempool is full.
+ *
+ * When cache is enabled, this function has to browse the length of all
+ * lcores, so it should not be used in a data path, but only for debug
+ * purposes.
+ *
+ * @param mp
+ *   A pointer to the mempool structure.
+ * @return
+ *   - 1: The mempool is full.
+ *   - 0: The mempool is not full.
+ */
+static inline int
+rte_mempool_full(const struct rte_mempool *mp)
+{
+	return !!(rte_mempool_count(mp) == mp->size);
+}
+
+/**
+ * Test if the mempool is empty.
+ *
+ * When cache is enabled, this function has to browse the length of all
+ * lcores, so it should not be used in a data path, but only for debug
+ * purposes.
+ *
+ * @param mp
+ *   A pointer to the mempool structure.
+ * @return
+ *   - 1: The mempool is empty.
+ *   - 0: The mempool is not empty.
+ */
+static inline int
+rte_mempool_empty(const struct rte_mempool *mp)
+{
+	return !!(rte_mempool_count(mp) == 0);
+}
+
+/**
+ * Return the physical address of elt, which is an element of the pool mp.
+ *
+ * @param mp
+ *   A pointer to the mempool structure.
+ * @param elt
+ *   A pointer (virtual address) to the element of the pool.
+ * @return
+ *   The physical address of the elt element.
+ */
+static inline phys_addr_t
+rte_mempool_virt2phy(const struct rte_mempool *mp, const void *elt)
+{
+	if (rte_eal_has_hugepages()) {
+		uintptr_t off;
+
+		off = (const char *)elt - (const char *)mp->elt_va_start;
+		return mp->elt_pa[off >> mp->pg_shift] + (off & mp->pg_mask);
+	} else {
+		/*
+		 * If huge pages are disabled, we cannot assume the
+		 * memory region to be physically contiguous.
+		 * Lookup for each element.
+		 */
+		return rte_mem_virt2phy(elt);
+	}
+}
+
+/**
+ * Check the consistency of mempool objects.
+ *
+ * Verify the coherency of fields in the mempool structure. Also check
+ * that the cookies of mempool objects (even the ones that are not
+ * present in pool) have a correct value. If not, a panic will occur.
+ *
+ * @param mp
+ *   A pointer to the mempool structure.
+ */
+void rte_mempool_audit(const struct rte_mempool *mp);
+
+/**
+ * Return a pointer to the private data in an mempool structure.
+ *
+ * @param mp
+ *   A pointer to the mempool structure.
+ * @return
+ *   A pointer to the private data.
+ */
+static inline void *rte_mempool_get_priv(struct rte_mempool *mp)
+{
+	return (char *)mp + MEMPOOL_HEADER_SIZE(mp, mp->pg_num);
+}
+
+/**
+ * Dump the status of all mempools on the console
+ *
+ * @param f
+ *   A pointer to a file for output
+ */
+void rte_mempool_list_dump(FILE *f);
+
+/**
+ * Search a mempool from its name
+ *
+ * @param name
+ *   The name of the mempool.
+ * @return
+ *   The pointer to the mempool matching the name, or NULL if not found.
+ *   NULL on error
+ *   with rte_errno set appropriately. Possible rte_errno values include:
+ *    - ENOENT - required entry not available to return.
+ *
+ */
+struct rte_mempool *rte_mempool_lookup(const char *name);
+
+/**
+ * Get the header, trailer and total size of a mempool element.
+ *
+ * Given a desired size of the mempool element and mempool flags,
+ * calculates header, trailer, body and total sizes of the mempool object.
+ *
+ * @param elt_size
+ *   The size of each element.
+ * @param flags
+ *   The flags used for the mempool creation.
+ *   Consult rte_mempool_create() for more information about possible values.
+ *   The size of each element.
+ * @param sz
+ *   The calculated detailed size the mempool object. May be NULL.
+ * @return
+ *   Total size of the mempool object.
+ */
+uint32_t rte_mempool_calc_obj_size(uint32_t elt_size, uint32_t flags,
+	struct rte_mempool_objsz *sz);
+
+/**
+ * Get the size of memory required to store mempool elements.
+ *
+ * Calculate the maximum amount of memory required to store given number
+ * of objects. Assume that the memory buffer will be aligned at page
+ * boundary.
+ *
+ * Note that if object size is bigger then page size, then it assumes
+ * that pages are grouped in subsets of physically continuous pages big
+ * enough to store at least one object.
+ *
+ * @param elt_num
+ *   Number of elements.
+ * @param elt_sz
+ *   The size of each element.
+ * @param pg_shift
+ *   LOG2 of the physical pages size.
+ * @return
+ *   Required memory size aligned at page boundary.
+ */
+size_t rte_mempool_xmem_size(uint32_t elt_num, size_t elt_sz,
+	uint32_t pg_shift);
+
+/**
+ * Get the size of memory required to store mempool elements.
+ *
+ * Calculate how much memory would be actually required with the given
+ * memory footprint to store required number of objects.
+ *
+ * @param vaddr
+ *   Virtual address of the externally allocated memory buffer.
+ *   Will be used to store mempool objects.
+ * @param elt_num
+ *   Number of elements.
+ * @param elt_sz
+ *   The size of each element.
+ * @param paddr
+ *   Array of physical addresses of the pages that comprises given memory
+ *   buffer.
+ * @param pg_num
+ *   Number of elements in the paddr array.
+ * @param pg_shift
+ *   LOG2 of the physical pages size.
+ * @return
+ *   On success, the number of bytes needed to store given number of
+ *   objects, aligned to the given page size. If the provided memory
+ *   buffer is too small, return a negative value whose absolute value
+ *   is the actual number of elements that can be stored in that buffer.
+ */
+ssize_t rte_mempool_xmem_usage(void *vaddr, uint32_t elt_num, size_t elt_sz,
+	const phys_addr_t paddr[], uint32_t pg_num, uint32_t pg_shift);
+
+/**
+ * Walk list of all memory pools
+ *
+ * @param func
+ *   Iterator function
+ * @param arg
+ *   Argument passed to iterator
+ */
+void rte_mempool_walk(void (*func)(const struct rte_mempool *, void *arg),
+		      void *arg);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_MEMPOOL_H_ */
diff --git a/lib/librte_mempool/rte_mempool_version.map b/lib/librte_mempool/rte_mempool_version.map
new file mode 100644
index 00000000..17151e08
--- /dev/null
+++ b/lib/librte_mempool/rte_mempool_version.map
@@ -0,0 +1,19 @@
+DPDK_2.0 {
+	global:
+
+	rte_dom0_mempool_create;
+	rte_mempool_audit;
+	rte_mempool_calc_obj_size;
+	rte_mempool_count;
+	rte_mempool_create;
+	rte_mempool_dump;
+	rte_mempool_list_dump;
+	rte_mempool_lookup;
+	rte_mempool_obj_iter;
+	rte_mempool_walk;
+	rte_mempool_xmem_create;
+	rte_mempool_xmem_size;
+	rte_mempool_xmem_usage;
+
+	local: *;
+};
diff --git a/lib/librte_meter/Makefile b/lib/librte_meter/Makefile
new file mode 100644
index 00000000..f07fced7
--- /dev/null
+++ b/lib/librte_meter/Makefile
@@ -0,0 +1,59 @@
+#   BSD LICENSE
+#
+#   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+#   All rights reserved.
+#
+#   Redistribution and use in source and binary forms, with or without
+#   modification, are permitted provided that the following conditions
+#   are met:
+#
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in
+#       the documentation and/or other materials provided with the
+#       distribution.
+#     * Neither the name of Intel Corporation nor the names of its
+#       contributors may be used to endorse or promote products derived
+#       from this software without specific prior written permission.
+#
+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+#
+# library name
+#
+LIB = librte_meter.a
+
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS)
+
+LDLIBS += -lm
+
+EXPORT_MAP := rte_meter_version.map
+
+LIBABIVER := 1
+
+#
+# all source are stored in SRCS-y
+#
+SRCS-$(CONFIG_RTE_LIBRTE_METER) := rte_meter.c
+
+# install includes
+SYMLINK-$(CONFIG_RTE_LIBRTE_METER)-include := rte_meter.h
+
+# this lib depends upon:
+DEPDIRS-$(CONFIG_RTE_LIBRTE_METER) += lib/librte_eal
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_meter/rte_meter.c b/lib/librte_meter/rte_meter.c
new file mode 100644
index 00000000..5e2dadb8
--- /dev/null
+++ b/lib/librte_meter/rte_meter.c
@@ -0,0 +1,120 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <inttypes.h>
+#include <stdio.h>
+#include <math.h>
+
+#include <rte_common.h>
+#include <rte_log.h>
+#include <rte_cycles.h>
+
+#include "rte_meter.h"
+
+#ifndef RTE_METER_TB_PERIOD_MIN
+#define RTE_METER_TB_PERIOD_MIN      100
+#endif
+
+static void
+rte_meter_get_tb_params(uint64_t hz, uint64_t rate, uint64_t *tb_period, uint64_t *tb_bytes_per_period)
+{
+	double period = ((double) hz) / ((double) rate);
+
+	if (period >= RTE_METER_TB_PERIOD_MIN) {
+		*tb_bytes_per_period = 1;
+		*tb_period = (uint64_t) period;
+	} else {
+		*tb_bytes_per_period = (uint64_t) ceil(RTE_METER_TB_PERIOD_MIN / period);
+		*tb_period = (hz * (*tb_bytes_per_period)) / rate;
+	}
+}
+
+int
+rte_meter_srtcm_config(struct rte_meter_srtcm *m, struct rte_meter_srtcm_params *params)
+{
+	uint64_t hz;
+
+	/* Check input parameters */
+	if ((m == NULL) || (params == NULL)) {
+		return -1;
+	}
+
+	if ((params->cir == 0) || ((params->cbs == 0) && (params->ebs == 0))) {
+		return -2;
+	}
+
+	/* Initialize srTCM run-time structure */
+	hz = rte_get_tsc_hz();
+	m->time = rte_get_tsc_cycles();
+	m->tc = m->cbs = params->cbs;
+	m->te = m->ebs = params->ebs;
+	rte_meter_get_tb_params(hz, params->cir, &m->cir_period, &m->cir_bytes_per_period);
+
+	RTE_LOG(INFO, METER, "Low level srTCM config: \n"
+		"\tCIR period = %" PRIu64 ", CIR bytes per period = %" PRIu64 "\n",
+		m->cir_period, m->cir_bytes_per_period);
+
+	return 0;
+}
+
+int
+rte_meter_trtcm_config(struct rte_meter_trtcm *m, struct rte_meter_trtcm_params *params)
+{
+	uint64_t hz;
+
+	/* Check input parameters */
+	if ((m == NULL) || (params == NULL)) {
+		return -1;
+	}
+
+	if ((params->cir == 0) || (params->pir == 0) || (params->pir < params->cir) ||
+		(params->cbs == 0) || (params->pbs == 0)) {
+		return -2;
+	}
+
+	/* Initialize trTCM run-time structure */
+	hz = rte_get_tsc_hz();
+	m->time_tc = m->time_tp = rte_get_tsc_cycles();
+	m->tc = m->cbs = params->cbs;
+	m->tp = m->pbs = params->pbs;
+	rte_meter_get_tb_params(hz, params->cir, &m->cir_period, &m->cir_bytes_per_period);
+	rte_meter_get_tb_params(hz, params->pir, &m->pir_period, &m->pir_bytes_per_period);
+
+	RTE_LOG(INFO, METER, "Low level trTCM config: \n"
+		"\tCIR period = %" PRIu64 ", CIR bytes per period = %" PRIu64 "\n"
+		"\tPIR period = %" PRIu64 ", PIR bytes per period = %" PRIu64 "\n",
+		m->cir_period, m->cir_bytes_per_period,
+		m->pir_period, m->pir_bytes_per_period);
+
+	return 0;
+}
diff --git a/lib/librte_meter/rte_meter.h b/lib/librte_meter/rte_meter.h
new file mode 100644
index 00000000..2cd8d814
--- /dev/null
+++ b/lib/librte_meter/rte_meter.h
@@ -0,0 +1,387 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __INCLUDE_RTE_METER_H__
+#define __INCLUDE_RTE_METER_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @file
+ * RTE Traffic Metering
+ *
+ * Traffic metering algorithms:
+ *    1. Single Rate Three Color Marker (srTCM): defined by IETF RFC 2697
+ *    2. Two Rate Three Color Marker (trTCM): defined by IETF RFC 2698
+ *
+ ***/
+
+#include <stdint.h>
+
+/*
+ * Application Programmer's Interface (API)
+ *
+ ***/
+
+/** Packet Color Set */
+enum rte_meter_color {
+	e_RTE_METER_GREEN = 0, /**< Green */
+	e_RTE_METER_YELLOW,    /**< Yellow */
+	e_RTE_METER_RED,       /**< Red */
+	e_RTE_METER_COLORS     /**< Number of available colors */
+};
+
+/** srTCM parameters per metered traffic flow. The CIR, CBS and EBS parameters only
+count bytes of IP packets and do not include link specific headers. At least one of
+the CBS or EBS parameters has to be greater than zero. */
+struct rte_meter_srtcm_params {
+	uint64_t cir; /**< Committed Information Rate (CIR). Measured in bytes per second. */
+	uint64_t cbs; /**< Committed Burst Size (CBS).  Measured in bytes. */
+	uint64_t ebs; /**< Excess Burst Size (EBS).  Measured in bytes. */
+};
+
+/** trTCM parameters per metered traffic flow. The CIR, PIR, CBS and PBS parameters
+only count bytes of IP packets and do not include link specific headers. PIR has to
+be greater than or equal to CIR. Both CBS or EBS have to be greater than zero. */
+struct rte_meter_trtcm_params {
+	uint64_t cir; /**< Committed Information Rate (CIR). Measured in bytes per second. */
+	uint64_t pir; /**< Peak Information Rate (PIR). Measured in bytes per second. */
+	uint64_t cbs; /**< Committed Burst Size (CBS). Measured in byes. */
+	uint64_t pbs; /**< Peak Burst Size (PBS). Measured in bytes. */
+};
+
+/** Internal data structure storing the srTCM run-time context per metered traffic flow. */
+struct rte_meter_srtcm;
+
+/** Internal data structure storing the trTCM run-time context per metered traffic flow. */
+struct rte_meter_trtcm;
+
+/**
+ * srTCM configuration per metered traffic flow
+ *
+ * @param m
+ *    Pointer to pre-allocated srTCM data structure
+ * @param params
+ *    User parameters per srTCM metered traffic flow
+ * @return
+ *    0 upon success, error code otherwise
+ */
+int
+rte_meter_srtcm_config(struct rte_meter_srtcm *m,
+	struct rte_meter_srtcm_params *params);
+
+/**
+ * trTCM configuration per metered traffic flow
+ *
+ * @param m
+ *    Pointer to pre-allocated trTCM data structure
+ * @param params
+ *    User parameters per trTCM metered traffic flow
+ * @return
+ *    0 upon success, error code otherwise
+ */
+int
+rte_meter_trtcm_config(struct rte_meter_trtcm *m,
+	struct rte_meter_trtcm_params *params);
+
+/**
+ * srTCM color blind traffic metering
+ *
+ * @param m
+ *    Handle to srTCM instance
+ * @param time
+ *    Current CPU time stamp (measured in CPU cycles)
+ * @param pkt_len
+ *    Length of the current IP packet (measured in bytes)
+ * @return
+ *    Color assigned to the current IP packet
+ */
+static inline enum rte_meter_color
+rte_meter_srtcm_color_blind_check(struct rte_meter_srtcm *m,
+	uint64_t time,
+	uint32_t pkt_len);
+
+/**
+ * srTCM color aware traffic metering
+ *
+ * @param m
+ *    Handle to srTCM instance
+ * @param time
+ *    Current CPU time stamp (measured in CPU cycles)
+ * @param pkt_len
+ *    Length of the current IP packet (measured in bytes)
+ * @param pkt_color
+ *    Input color of the current IP packet
+ * @return
+ *    Color assigned to the current IP packet
+ */
+static inline enum rte_meter_color
+rte_meter_srtcm_color_aware_check(struct rte_meter_srtcm *m,
+	uint64_t time,
+	uint32_t pkt_len,
+	enum rte_meter_color pkt_color);
+
+/**
+ * trTCM color blind traffic metering
+ *
+ * @param m
+ *    Handle to trTCM instance
+ * @param time
+ *    Current CPU time stamp (measured in CPU cycles)
+ * @param pkt_len
+ *    Length of the current IP packet (measured in bytes)
+ * @return
+ *    Color assigned to the current IP packet
+ */
+static inline enum rte_meter_color
+rte_meter_trtcm_color_blind_check(struct rte_meter_trtcm *m,
+	uint64_t time,
+	uint32_t pkt_len);
+
+/**
+ * trTCM color aware traffic metering
+ *
+ * @param m
+ *    Handle to trTCM instance
+ * @param time
+ *    Current CPU time stamp (measured in CPU cycles)
+ * @param pkt_len
+ *    Length of the current IP packet (measured in bytes)
+ * @param pkt_color
+ *    Input color of the current IP packet
+ * @return
+ *    Color assigned to the current IP packet
+ */
+static inline enum rte_meter_color
+rte_meter_trtcm_color_aware_check(struct rte_meter_trtcm *m,
+	uint64_t time,
+	uint32_t pkt_len,
+	enum rte_meter_color pkt_color);
+
+/*
+ * Inline implementation of run-time methods
+ *
+ ***/
+
+/* Internal data structure storing the srTCM run-time context per metered traffic flow. */
+struct rte_meter_srtcm {
+	uint64_t time; /* Time of latest update of C and E token buckets */
+	uint64_t tc;   /* Number of bytes currently available in the committed (C) token bucket */
+	uint64_t te;   /* Number of bytes currently available in the excess (E) token bucket */
+	uint64_t cbs;  /* Upper limit for C token bucket */
+	uint64_t ebs;  /* Upper limit for E token bucket */
+	uint64_t cir_period; /* Number of CPU cycles for one update of C and E token buckets */
+	uint64_t cir_bytes_per_period; /* Number of bytes to add to C and E token buckets on each update */
+};
+
+/* Internal data structure storing the trTCM run-time context per metered traffic flow. */
+struct rte_meter_trtcm {
+	uint64_t time_tc; /* Time of latest update of C token bucket */
+	uint64_t time_tp; /* Time of latest update of E token bucket */
+	uint64_t tc;      /* Number of bytes currently available in the committed (C) token bucket */
+	uint64_t tp;      /* Number of bytes currently available in the peak (P) token bucket */
+	uint64_t cbs;     /* Upper limit for C token bucket */
+	uint64_t pbs;     /* Upper limit for P token bucket */
+	uint64_t cir_period; /* Number of CPU cycles for one update of C token bucket */
+	uint64_t cir_bytes_per_period; /* Number of bytes to add to C token bucket on each update */
+	uint64_t pir_period; /* Number of CPU cycles for one update of P token bucket */
+	uint64_t pir_bytes_per_period; /* Number of bytes to add to P token bucket on each update */
+};
+
+static inline enum rte_meter_color
+rte_meter_srtcm_color_blind_check(struct rte_meter_srtcm *m,
+	uint64_t time,
+	uint32_t pkt_len)
+{
+	uint64_t time_diff, n_periods, tc, te;
+
+	/* Bucket update */
+	time_diff = time - m->time;
+	n_periods = time_diff / m->cir_period;
+	m->time += n_periods * m->cir_period;
+
+	tc = m->tc + n_periods * m->cir_bytes_per_period;
+	if (tc > m->cbs)
+		tc = m->cbs;
+
+	te = m->te + n_periods * m->cir_bytes_per_period;
+	if (te > m->ebs)
+		te = m->ebs;
+
+	/* Color logic */
+	if (tc >= pkt_len) {
+		m->tc = tc - pkt_len;
+		m->te = te;
+		return e_RTE_METER_GREEN;
+	}
+
+	if (te >= pkt_len) {
+		m->tc = tc;
+		m->te = te - pkt_len;
+		return e_RTE_METER_YELLOW;
+	}
+
+	m->tc = tc;
+	m->te = te;
+	return e_RTE_METER_RED;
+}
+
+static inline enum rte_meter_color
+rte_meter_srtcm_color_aware_check(struct rte_meter_srtcm *m,
+	uint64_t time,
+	uint32_t pkt_len,
+	enum rte_meter_color pkt_color)
+{
+	uint64_t time_diff, n_periods, tc, te;
+
+	/* Bucket update */
+	time_diff = time - m->time;
+	n_periods = time_diff / m->cir_period;
+	m->time += n_periods * m->cir_period;
+
+	tc = m->tc + n_periods * m->cir_bytes_per_period;
+	if (tc > m->cbs)
+		tc = m->cbs;
+
+	te = m->te + n_periods * m->cir_bytes_per_period;
+	if (te > m->ebs)
+		te = m->ebs;
+
+	/* Color logic */
+	if ((pkt_color == e_RTE_METER_GREEN) && (tc >= pkt_len)) {
+		m->tc = tc - pkt_len;
+		m->te = te;
+		return e_RTE_METER_GREEN;
+	}
+
+	if ((pkt_color != e_RTE_METER_RED) && (te >= pkt_len)) {
+		m->tc = tc;
+		m->te = te - pkt_len;
+		return e_RTE_METER_YELLOW;
+	}
+
+	m->tc = tc;
+	m->te = te;
+	return e_RTE_METER_RED;
+}
+
+static inline enum rte_meter_color
+rte_meter_trtcm_color_blind_check(struct rte_meter_trtcm *m,
+	uint64_t time,
+	uint32_t pkt_len)
+{
+	uint64_t time_diff_tc, time_diff_tp, n_periods_tc, n_periods_tp, tc, tp;
+
+	/* Bucket update */
+	time_diff_tc = time - m->time_tc;
+	time_diff_tp = time - m->time_tp;
+	n_periods_tc = time_diff_tc / m->cir_period;
+	n_periods_tp = time_diff_tp / m->pir_period;
+	m->time_tc += n_periods_tc * m->cir_period;
+	m->time_tp += n_periods_tp * m->pir_period;
+
+	tc = m->tc + n_periods_tc * m->cir_bytes_per_period;
+	if (tc > m->cbs)
+		tc = m->cbs;
+
+	tp = m->tp + n_periods_tp * m->pir_bytes_per_period;
+	if (tp > m->pbs)
+		tp = m->pbs;
+
+	/* Color logic */
+	if (tp < pkt_len) {
+		m->tc = tc;
+		m->tp = tp;
+		return e_RTE_METER_RED;
+	}
+
+	if (tc < pkt_len) {
+		m->tc = tc;
+		m->tp = tp - pkt_len;
+		return e_RTE_METER_YELLOW;
+	}
+
+	m->tc = tc - pkt_len;
+	m->tp = tp - pkt_len;
+	return e_RTE_METER_GREEN;
+}
+
+static inline enum rte_meter_color
+rte_meter_trtcm_color_aware_check(struct rte_meter_trtcm *m,
+	uint64_t time,
+	uint32_t pkt_len,
+	enum rte_meter_color pkt_color)
+{
+	uint64_t time_diff_tc, time_diff_tp, n_periods_tc, n_periods_tp, tc, tp;
+
+	/* Bucket update */
+	time_diff_tc = time - m->time_tc;
+	time_diff_tp = time - m->time_tp;
+	n_periods_tc = time_diff_tc / m->cir_period;
+	n_periods_tp = time_diff_tp / m->pir_period;
+	m->time_tc += n_periods_tc * m->cir_period;
+	m->time_tp += n_periods_tp * m->pir_period;
+
+	tc = m->tc + n_periods_tc * m->cir_bytes_per_period;
+	if (tc > m->cbs)
+		tc = m->cbs;
+
+	tp = m->tp + n_periods_tp * m->pir_bytes_per_period;
+	if (tp > m->pbs)
+		tp = m->pbs;
+
+	/* Color logic */
+	if ((pkt_color == e_RTE_METER_RED) || (tp < pkt_len)) {
+		m->tc = tc;
+		m->tp = tp;
+		return e_RTE_METER_RED;
+	}
+
+	if ((pkt_color == e_RTE_METER_YELLOW) || (tc < pkt_len)) {
+		m->tc = tc;
+		m->tp = tp - pkt_len;
+		return e_RTE_METER_YELLOW;
+	}
+
+	m->tc = tc - pkt_len;
+	m->tp = tp - pkt_len;
+	return e_RTE_METER_GREEN;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __INCLUDE_RTE_METER_H__ */
diff --git a/lib/librte_meter/rte_meter_version.map b/lib/librte_meter/rte_meter_version.map
new file mode 100644
index 00000000..2fd647c6
--- /dev/null
+++ b/lib/librte_meter/rte_meter_version.map
@@ -0,0 +1,12 @@
+DPDK_2.0 {
+	global:
+
+	rte_meter_srtcm_color_aware_check;
+	rte_meter_srtcm_color_blind_check;
+	rte_meter_srtcm_config;
+	rte_meter_trtcm_color_aware_check;
+	rte_meter_trtcm_color_blind_check;
+	rte_meter_trtcm_config;
+
+	local: *;
+};
diff --git a/lib/librte_net/Makefile b/lib/librte_net/Makefile
new file mode 100644
index 00000000..ad2e482d
--- /dev/null
+++ b/lib/librte_net/Makefile
@@ -0,0 +1,40 @@
+#   BSD LICENSE
+#
+#   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+#   All rights reserved.
+#
+#   Redistribution and use in source and binary forms, with or without
+#   modification, are permitted provided that the following conditions
+#   are met:
+#
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in
+#       the documentation and/or other materials provided with the
+#       distribution.
+#     * Neither the name of Intel Corporation nor the names of its
+#       contributors may be used to endorse or promote products derived
+#       from this software without specific prior written permission.
+#
+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3
+
+# install includes
+SYMLINK-$(CONFIG_RTE_LIBRTE_NET)-include := rte_ip.h rte_tcp.h rte_udp.h rte_sctp.h rte_icmp.h rte_arp.h
+
+
+include $(RTE_SDK)/mk/rte.install.mk
diff --git a/lib/librte_net/rte_arp.h b/lib/librte_net/rte_arp.h
new file mode 100644
index 00000000..18364187
--- /dev/null
+++ b/lib/librte_net/rte_arp.h
@@ -0,0 +1,83 @@
+/*   BSD LICENSE
+ *
+ *   Copyright(c) 2013 6WIND.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of 6WIND S.A. nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_ARP_H_
+#define _RTE_ARP_H_
+
+/**
+ * @file
+ *
+ * ARP-related defines
+ */
+
+#include <stdint.h>
+#include <rte_ether.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * ARP header IPv4 payload.
+ */
+struct arp_ipv4 {
+	struct ether_addr arp_sha;  /**< sender hardware address */
+	uint32_t          arp_sip;  /**< sender IP address */
+	struct ether_addr arp_tha;  /**< target hardware address */
+	uint32_t          arp_tip;  /**< target IP address */
+} __attribute__((__packed__));
+
+/**
+ * ARP header.
+ */
+struct arp_hdr {
+	uint16_t arp_hrd;    /* format of hardware address */
+#define ARP_HRD_ETHER     1  /* ARP Ethernet address format */
+
+	uint16_t arp_pro;    /* format of protocol address */
+	uint8_t  arp_hln;    /* length of hardware address */
+	uint8_t  arp_pln;    /* length of protocol address */
+	uint16_t arp_op;     /* ARP opcode (command) */
+#define	ARP_OP_REQUEST    1 /* request to resolve address */
+#define	ARP_OP_REPLY      2 /* response to previous request */
+#define	ARP_OP_REVREQUEST 3 /* request proto addr given hardware */
+#define	ARP_OP_REVREPLY   4 /* response giving protocol address */
+#define	ARP_OP_INVREQUEST 8 /* request to identify peer */
+#define	ARP_OP_INVREPLY   9 /* response identifying peer */
+
+	struct arp_ipv4 arp_data;
+} __attribute__((__packed__));
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_ARP_H_ */
diff --git a/lib/librte_net/rte_icmp.h b/lib/librte_net/rte_icmp.h
new file mode 100644
index 00000000..8b287f6d
--- /dev/null
+++ b/lib/librte_net/rte_icmp.h
@@ -0,0 +1,101 @@
+/*   BSD LICENSE
+ *
+ *   Copyright(c) 2013 6WIND.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of 6WIND S.A. nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+/*
+ * Copyright (c) 1982, 1986, 1990, 1993
+ *      The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *      This product includes software developed by the University of
+ *      California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *      @(#)in.h        8.3 (Berkeley) 1/3/94
+ * $FreeBSD: src/sys/netinet/in.h,v 1.82 2003/10/25 09:37:10 ume Exp $
+ */
+
+#ifndef _RTE_ICMP_H_
+#define _RTE_ICMP_H_
+
+/**
+ * @file
+ *
+ * ICMP-related defines
+ */
+
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * ICMP Header
+ */
+struct icmp_hdr {
+	uint8_t  icmp_type;   /* ICMP packet type. */
+	uint8_t  icmp_code;   /* ICMP packet code. */
+	uint16_t icmp_cksum;  /* ICMP packet checksum. */
+	uint16_t icmp_ident;  /* ICMP packet identifier. */
+	uint16_t icmp_seq_nb; /* ICMP packet sequence number. */
+} __attribute__((__packed__));
+
+/* ICMP packet types */
+#define IP_ICMP_ECHO_REPLY   0
+#define IP_ICMP_ECHO_REQUEST 8
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* RTE_ICMP_H_ */
diff --git a/lib/librte_net/rte_ip.h b/lib/librte_net/rte_ip.h
new file mode 100644
index 00000000..5b7554ab
--- /dev/null
+++ b/lib/librte_net/rte_ip.h
@@ -0,0 +1,413 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   Copyright 2014 6WIND S.A.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Copyright (c) 1982, 1986, 1990, 1993
+ *      The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *      This product includes software developed by the University of
+ *      California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *      @(#)in.h        8.3 (Berkeley) 1/3/94
+ * $FreeBSD: src/sys/netinet/in.h,v 1.82 2003/10/25 09:37:10 ume Exp $
+ */
+
+#ifndef _RTE_IP_H_
+#define _RTE_IP_H_
+
+/**
+ * @file
+ *
+ * IP-related defines
+ */
+
+#include <stdint.h>
+#include <netinet/in.h>
+
+#include <rte_byteorder.h>
+#include <rte_mbuf.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * IPv4 Header
+ */
+struct ipv4_hdr {
+	uint8_t  version_ihl;		/**< version and header length */
+	uint8_t  type_of_service;	/**< type of service */
+	uint16_t total_length;		/**< length of packet */
+	uint16_t packet_id;		/**< packet ID */
+	uint16_t fragment_offset;	/**< fragmentation offset */
+	uint8_t  time_to_live;		/**< time to live */
+	uint8_t  next_proto_id;		/**< protocol ID */
+	uint16_t hdr_checksum;		/**< header checksum */
+	uint32_t src_addr;		/**< source address */
+	uint32_t dst_addr;		/**< destination address */
+} __attribute__((__packed__));
+
+/** Create IPv4 address */
+#define IPv4(a,b,c,d) ((uint32_t)(((a) & 0xff) << 24) | \
+					   (((b) & 0xff) << 16) | \
+					   (((c) & 0xff) << 8)  | \
+					   ((d) & 0xff))
+
+/** Maximal IPv4 packet length (including a header) */
+#define IPV4_MAX_PKT_LEN        65535
+
+/** Internet header length mask for version_ihl field */
+#define IPV4_HDR_IHL_MASK	(0x0f)
+/**
+ * Internet header length field multiplier (IHL field specifies overall header
+ * length in number of 4-byte words)
+ */
+#define IPV4_IHL_MULTIPLIER	(4)
+
+/* Fragment Offset * Flags. */
+#define	IPV4_HDR_DF_SHIFT	14
+#define	IPV4_HDR_MF_SHIFT	13
+#define	IPV4_HDR_FO_SHIFT	3
+
+#define	IPV4_HDR_DF_FLAG	(1 << IPV4_HDR_DF_SHIFT)
+#define	IPV4_HDR_MF_FLAG	(1 << IPV4_HDR_MF_SHIFT)
+
+#define	IPV4_HDR_OFFSET_MASK	((1 << IPV4_HDR_MF_SHIFT) - 1)
+
+#define	IPV4_HDR_OFFSET_UNITS	8
+
+/*
+ * IPv4 address types
+ */
+#define IPV4_ANY              ((uint32_t)0x00000000) /**< 0.0.0.0 */
+#define IPV4_LOOPBACK         ((uint32_t)0x7f000001) /**< 127.0.0.1 */
+#define IPV4_BROADCAST        ((uint32_t)0xe0000000) /**< 224.0.0.0 */
+#define IPV4_ALLHOSTS_GROUP   ((uint32_t)0xe0000001) /**< 224.0.0.1 */
+#define IPV4_ALLRTRS_GROUP    ((uint32_t)0xe0000002) /**< 224.0.0.2 */
+#define IPV4_MAX_LOCAL_GROUP  ((uint32_t)0xe00000ff) /**< 224.0.0.255 */
+
+/*
+ * IPv4 Multicast-related macros
+ */
+#define IPV4_MIN_MCAST  IPv4(224, 0, 0, 0)          /**< Minimal IPv4-multicast address */
+#define IPV4_MAX_MCAST  IPv4(239, 255, 255, 255)    /**< Maximum IPv4 multicast address */
+
+#define IS_IPV4_MCAST(x) \
+	((x) >= IPV4_MIN_MCAST && (x) <= IPV4_MAX_MCAST) /**< check if IPv4 address is multicast */
+
+/**
+ * @internal Calculate a sum of all words in the buffer.
+ * Helper routine for the rte_raw_cksum().
+ *
+ * @param buf
+ *   Pointer to the buffer.
+ * @param len
+ *   Length of the buffer.
+ * @param sum
+ *   Initial value of the sum.
+ * @return
+ *   sum += Sum of all words in the buffer.
+ */
+static inline uint32_t
+__rte_raw_cksum(const void *buf, size_t len, uint32_t sum)
+{
+	/* workaround gcc strict-aliasing warning */
+	uintptr_t ptr = (uintptr_t)buf;
+	typedef uint16_t __attribute__((__may_alias__)) u16_p;
+	const u16_p *u16 = (const u16_p *)ptr;
+
+	while (len >= (sizeof(*u16) * 4)) {
+		sum += u16[0];
+		sum += u16[1];
+		sum += u16[2];
+		sum += u16[3];
+		len -= sizeof(*u16) * 4;
+		u16 += 4;
+	}
+	while (len >= sizeof(*u16)) {
+		sum += *u16;
+		len -= sizeof(*u16);
+		u16 += 1;
+	}
+
+	/* if length is in odd bytes */
+	if (len == 1)
+		sum += *((const uint8_t *)u16);
+
+	return sum;
+}
+
+/**
+ * @internal Reduce a sum to the non-complemented checksum.
+ * Helper routine for the rte_raw_cksum().
+ *
+ * @param sum
+ *   Value of the sum.
+ * @return
+ *   The non-complemented checksum.
+ */
+static inline uint16_t
+__rte_raw_cksum_reduce(uint32_t sum)
+{
+	sum = ((sum & 0xffff0000) >> 16) + (sum & 0xffff);
+	sum = ((sum & 0xffff0000) >> 16) + (sum & 0xffff);
+	return (uint16_t)sum;
+}
+
+/**
+ * Process the non-complemented checksum of a buffer.
+ *
+ * @param buf
+ *   Pointer to the buffer.
+ * @param len
+ *   Length of the buffer.
+ * @return
+ *   The non-complemented checksum.
+ */
+static inline uint16_t
+rte_raw_cksum(const void *buf, size_t len)
+{
+	uint32_t sum;
+
+	sum = __rte_raw_cksum(buf, len, 0);
+	return __rte_raw_cksum_reduce(sum);
+}
+
+/**
+ * Process the IPv4 checksum of an IPv4 header.
+ *
+ * The checksum field must be set to 0 by the caller.
+ *
+ * @param ipv4_hdr
+ *   The pointer to the contiguous IPv4 header.
+ * @return
+ *   The complemented checksum to set in the IP packet.
+ */
+static inline uint16_t
+rte_ipv4_cksum(const struct ipv4_hdr *ipv4_hdr)
+{
+	uint16_t cksum;
+	cksum = rte_raw_cksum(ipv4_hdr, sizeof(struct ipv4_hdr));
+	return (cksum == 0xffff) ? cksum : ~cksum;
+}
+
+/**
+ * Process the pseudo-header checksum of an IPv4 header.
+ *
+ * The checksum field must be set to 0 by the caller.
+ *
+ * Depending on the ol_flags, the pseudo-header checksum expected by the
+ * drivers is not the same. For instance, when TSO is enabled, the IP
+ * payload length must not be included in the packet.
+ *
+ * When ol_flags is 0, it computes the standard pseudo-header checksum.
+ *
+ * @param ipv4_hdr
+ *   The pointer to the contiguous IPv4 header.
+ * @param ol_flags
+ *   The ol_flags of the associated mbuf.
+ * @return
+ *   The non-complemented checksum to set in the L4 header.
+ */
+static inline uint16_t
+rte_ipv4_phdr_cksum(const struct ipv4_hdr *ipv4_hdr, uint64_t ol_flags)
+{
+	struct ipv4_psd_header {
+		uint32_t src_addr; /* IP address of source host. */
+		uint32_t dst_addr; /* IP address of destination host. */
+		uint8_t  zero;     /* zero. */
+		uint8_t  proto;    /* L4 protocol type. */
+		uint16_t len;      /* L4 length. */
+	} psd_hdr;
+
+	psd_hdr.src_addr = ipv4_hdr->src_addr;
+	psd_hdr.dst_addr = ipv4_hdr->dst_addr;
+	psd_hdr.zero = 0;
+	psd_hdr.proto = ipv4_hdr->next_proto_id;
+	if (ol_flags & PKT_TX_TCP_SEG) {
+		psd_hdr.len = 0;
+	} else {
+		psd_hdr.len = rte_cpu_to_be_16(
+			(uint16_t)(rte_be_to_cpu_16(ipv4_hdr->total_length)
+				- sizeof(struct ipv4_hdr)));
+	}
+	return rte_raw_cksum(&psd_hdr, sizeof(psd_hdr));
+}
+
+/**
+ * Process the IPv4 UDP or TCP checksum.
+ *
+ * The IPv4 header should not contains options. The IP and layer 4
+ * checksum must be set to 0 in the packet by the caller.
+ *
+ * @param ipv4_hdr
+ *   The pointer to the contiguous IPv4 header.
+ * @param l4_hdr
+ *   The pointer to the beginning of the L4 header.
+ * @return
+ *   The complemented checksum to set in the IP packet.
+ */
+static inline uint16_t
+rte_ipv4_udptcp_cksum(const struct ipv4_hdr *ipv4_hdr, const void *l4_hdr)
+{
+	uint32_t cksum;
+	uint32_t l4_len;
+
+	l4_len = rte_be_to_cpu_16(ipv4_hdr->total_length) -
+		sizeof(struct ipv4_hdr);
+
+	cksum = rte_raw_cksum(l4_hdr, l4_len);
+	cksum += rte_ipv4_phdr_cksum(ipv4_hdr, 0);
+
+	cksum = ((cksum & 0xffff0000) >> 16) + (cksum & 0xffff);
+	cksum = (~cksum) & 0xffff;
+	if (cksum == 0)
+		cksum = 0xffff;
+
+	return cksum;
+}
+
+/**
+ * IPv6 Header
+ */
+struct ipv6_hdr {
+	uint32_t vtc_flow;     /**< IP version, traffic class & flow label. */
+	uint16_t payload_len;  /**< IP packet length - includes sizeof(ip_header). */
+	uint8_t  proto;        /**< Protocol, next header. */
+	uint8_t  hop_limits;   /**< Hop limits. */
+	uint8_t  src_addr[16]; /**< IP address of source host. */
+	uint8_t  dst_addr[16]; /**< IP address of destination host(s). */
+} __attribute__((__packed__));
+
+/**
+ * Process the pseudo-header checksum of an IPv6 header.
+ *
+ * Depending on the ol_flags, the pseudo-header checksum expected by the
+ * drivers is not the same. For instance, when TSO is enabled, the IPv6
+ * payload length must not be included in the packet.
+ *
+ * When ol_flags is 0, it computes the standard pseudo-header checksum.
+ *
+ * @param ipv6_hdr
+ *   The pointer to the contiguous IPv6 header.
+ * @param ol_flags
+ *   The ol_flags of the associated mbuf.
+ * @return
+ *   The non-complemented checksum to set in the L4 header.
+ */
+static inline uint16_t
+rte_ipv6_phdr_cksum(const struct ipv6_hdr *ipv6_hdr, uint64_t ol_flags)
+{
+	uint32_t sum;
+	struct {
+		uint32_t len;   /* L4 length. */
+		uint32_t proto; /* L4 protocol - top 3 bytes must be zero */
+	} psd_hdr;
+
+	psd_hdr.proto = (ipv6_hdr->proto << 24);
+	if (ol_flags & PKT_TX_TCP_SEG) {
+		psd_hdr.len = 0;
+	} else {
+		psd_hdr.len = ipv6_hdr->payload_len;
+	}
+
+	sum = __rte_raw_cksum(ipv6_hdr->src_addr,
+		sizeof(ipv6_hdr->src_addr) + sizeof(ipv6_hdr->dst_addr),
+		0);
+	sum = __rte_raw_cksum(&psd_hdr, sizeof(psd_hdr), sum);
+	return __rte_raw_cksum_reduce(sum);
+}
+
+/**
+ * Process the IPv6 UDP or TCP checksum.
+ *
+ * The IPv4 header should not contains options. The layer 4 checksum
+ * must be set to 0 in the packet by the caller.
+ *
+ * @param ipv6_hdr
+ *   The pointer to the contiguous IPv6 header.
+ * @param l4_hdr
+ *   The pointer to the beginning of the L4 header.
+ * @return
+ *   The complemented checksum to set in the IP packet.
+ */
+static inline uint16_t
+rte_ipv6_udptcp_cksum(const struct ipv6_hdr *ipv6_hdr, const void *l4_hdr)
+{
+	uint32_t cksum;
+	uint32_t l4_len;
+
+	l4_len = rte_be_to_cpu_16(ipv6_hdr->payload_len);
+
+	cksum = rte_raw_cksum(l4_hdr, l4_len);
+	cksum += rte_ipv6_phdr_cksum(ipv6_hdr, 0);
+
+	cksum = ((cksum & 0xffff0000) >> 16) + (cksum & 0xffff);
+	cksum = (~cksum) & 0xffff;
+	if (cksum == 0)
+		cksum = 0xffff;
+
+	return cksum;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_IP_H_ */
diff --git a/lib/librte_net/rte_sctp.h b/lib/librte_net/rte_sctp.h
new file mode 100644
index 00000000..688e126f
--- /dev/null
+++ b/lib/librte_net/rte_sctp.h
@@ -0,0 +1,99 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Copyright (c) 1982, 1986, 1990, 1993
+ *      The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *      This product includes software developed by the University of
+ *      California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *      @(#)in.h        8.3 (Berkeley) 1/3/94
+ * $FreeBSD: src/sys/netinet/in.h,v 1.82 2003/10/25 09:37:10 ume Exp $
+ */
+
+/**
+ * @file
+ *
+ * SCTP-related defines
+ */
+
+#ifndef _RTE_SCTP_H_
+#define _RTE_SCTP_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+
+/**
+ * SCTP Header
+ */
+struct sctp_hdr {
+	uint16_t src_port; /**< Source port. */
+	uint16_t dst_port; /**< Destin port. */
+	uint32_t tag;      /**< Validation tag. */
+	uint32_t cksum;    /**< Checksum. */
+} __attribute__((__packed__));
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* RTE_SCTP_H_ */
diff --git a/lib/librte_net/rte_tcp.h b/lib/librte_net/rte_tcp.h
new file mode 100644
index 00000000..28b61e6d
--- /dev/null
+++ b/lib/librte_net/rte_tcp.h
@@ -0,0 +1,104 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Copyright (c) 1982, 1986, 1990, 1993
+ *      The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *      This product includes software developed by the University of
+ *      California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *      @(#)in.h        8.3 (Berkeley) 1/3/94
+ * $FreeBSD: src/sys/netinet/in.h,v 1.82 2003/10/25 09:37:10 ume Exp $
+ */
+
+#ifndef _RTE_TCP_H_
+#define _RTE_TCP_H_
+
+/**
+ * @file
+ *
+ * TCP-related defines
+ */
+
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * TCP Header
+ */
+struct tcp_hdr {
+	uint16_t src_port;  /**< TCP source port. */
+	uint16_t dst_port;  /**< TCP destination port. */
+	uint32_t sent_seq;  /**< TX data sequence number. */
+	uint32_t recv_ack;  /**< RX data acknowledgement sequence number. */
+	uint8_t  data_off;  /**< Data offset. */
+	uint8_t  tcp_flags; /**< TCP flags */
+	uint16_t rx_win;    /**< RX flow control window. */
+	uint16_t cksum;     /**< TCP checksum. */
+	uint16_t tcp_urp;   /**< TCP urgent pointer, if any. */
+} __attribute__((__packed__));
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* RTE_TCP_H_ */
diff --git a/lib/librte_net/rte_udp.h b/lib/librte_net/rte_udp.h
new file mode 100644
index 00000000..bc5be4af
--- /dev/null
+++ b/lib/librte_net/rte_udp.h
@@ -0,0 +1,99 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Copyright (c) 1982, 1986, 1990, 1993
+ *      The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *      This product includes software developed by the University of
+ *      California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *      @(#)in.h        8.3 (Berkeley) 1/3/94
+ * $FreeBSD: src/sys/netinet/in.h,v 1.82 2003/10/25 09:37:10 ume Exp $
+ */
+
+#ifndef _RTE_UDP_H_
+#define _RTE_UDP_H_
+
+/**
+ * @file
+ *
+ * UDP-related defines
+ */
+
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * UDP Header
+ */
+struct udp_hdr {
+	uint16_t src_port;    /**< UDP source port. */
+	uint16_t dst_port;    /**< UDP destination port. */
+	uint16_t dgram_len;   /**< UDP datagram length */
+	uint16_t dgram_cksum; /**< UDP datagram checksum */
+} __attribute__((__packed__));
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* RTE_UDP_H_ */
diff --git a/lib/librte_pipeline/Makefile b/lib/librte_pipeline/Makefile
new file mode 100644
index 00000000..822fd41c
--- /dev/null
+++ b/lib/librte_pipeline/Makefile
@@ -0,0 +1,58 @@
+#   BSD LICENSE
+#
+#   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
+#   All rights reserved.
+#
+#   Redistribution and use in source and binary forms, with or without
+#   modification, are permitted provided that the following conditions
+#   are met:
+#
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in
+#       the documentation and/or other materials provided with the
+#       distribution.
+#     * Neither the name of Intel Corporation nor the names of its
+#       contributors may be used to endorse or promote products derived
+#       from this software without specific prior written permission.
+#
+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+#
+# library name
+#
+LIB = librte_pipeline.a
+
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS)
+
+EXPORT_MAP := rte_pipeline_version.map
+
+LIBABIVER := 3
+
+#
+# all source are stored in SRCS-y
+#
+SRCS-$(CONFIG_RTE_LIBRTE_PIPELINE) := rte_pipeline.c
+
+# install includes
+SYMLINK-$(CONFIG_RTE_LIBRTE_PIPELINE)-include += rte_pipeline.h
+
+# this lib depends upon:
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PIPELINE) := lib/librte_table
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PIPELINE) += lib/librte_port
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_pipeline/rte_pipeline.c b/lib/librte_pipeline/rte_pipeline.c
new file mode 100644
index 00000000..7f8fbac5
--- /dev/null
+++ b/lib/librte_pipeline/rte_pipeline.c
@@ -0,0 +1,1649 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <string.h>
+#include <stdio.h>
+
+#include <rte_common.h>
+#include <rte_memory.h>
+#include <rte_memzone.h>
+#include <rte_cycles.h>
+#include <rte_prefetch.h>
+#include <rte_branch_prediction.h>
+#include <rte_mbuf.h>
+#include <rte_malloc.h>
+#include <rte_string_fns.h>
+
+#include "rte_pipeline.h"
+
+#define RTE_TABLE_INVALID                                 UINT32_MAX
+
+#ifdef RTE_PIPELINE_STATS_COLLECT
+
+#define RTE_PIPELINE_STATS_AH_DROP_WRITE(p, mask)			\
+	({ (p)->n_pkts_ah_drop = __builtin_popcountll(mask); })
+
+#define RTE_PIPELINE_STATS_AH_DROP_READ(p, counter)			\
+	({ (counter) += (p)->n_pkts_ah_drop; (p)->n_pkts_ah_drop = 0; })
+
+#define RTE_PIPELINE_STATS_TABLE_DROP0(p)				\
+	({ (p)->pkts_drop_mask = (p)->action_mask0[RTE_PIPELINE_ACTION_DROP]; })
+
+#define RTE_PIPELINE_STATS_TABLE_DROP1(p, counter)			\
+({									\
+	uint64_t mask = (p)->action_mask0[RTE_PIPELINE_ACTION_DROP];	\
+	mask ^= (p)->pkts_drop_mask;					\
+	(counter) += __builtin_popcountll(mask);			\
+})
+
+#else
+
+#define RTE_PIPELINE_STATS_AH_DROP_WRITE(p, mask)
+#define RTE_PIPELINE_STATS_AH_DROP_READ(p, counter)
+#define RTE_PIPELINE_STATS_TABLE_DROP0(p)
+#define RTE_PIPELINE_STATS_TABLE_DROP1(p, counter)
+
+#endif
+
+struct rte_port_in {
+	/* Input parameters */
+	struct rte_port_in_ops ops;
+	rte_pipeline_port_in_action_handler f_action;
+	void *arg_ah;
+	uint32_t burst_size;
+
+	/* The table to which this port is connected */
+	uint32_t table_id;
+
+	/* Handle to low-level port */
+	void *h_port;
+
+	/* List of enabled ports */
+	struct rte_port_in *next;
+
+	/* Statistics */
+	uint64_t n_pkts_dropped_by_ah;
+};
+
+struct rte_port_out {
+	/* Input parameters */
+	struct rte_port_out_ops ops;
+	rte_pipeline_port_out_action_handler f_action;
+	void *arg_ah;
+
+	/* Handle to low-level port */
+	void *h_port;
+
+	/* Statistics */
+	uint64_t n_pkts_dropped_by_ah;
+};
+
+struct rte_table {
+	/* Input parameters */
+	struct rte_table_ops ops;
+	rte_pipeline_table_action_handler_hit f_action_hit;
+	rte_pipeline_table_action_handler_miss f_action_miss;
+	void *arg_ah;
+	struct rte_pipeline_table_entry *default_entry;
+	uint32_t entry_size;
+
+	uint32_t table_next_id;
+	uint32_t table_next_id_valid;
+
+	/* Handle to the low-level table object */
+	void *h_table;
+
+	/* Statistics */
+	uint64_t n_pkts_dropped_by_lkp_hit_ah;
+	uint64_t n_pkts_dropped_by_lkp_miss_ah;
+	uint64_t n_pkts_dropped_lkp_hit;
+	uint64_t n_pkts_dropped_lkp_miss;
+};
+
+#define RTE_PIPELINE_MAX_NAME_SZ                           124
+
+struct rte_pipeline {
+	/* Input parameters */
+	char name[RTE_PIPELINE_MAX_NAME_SZ];
+	int socket_id;
+	uint32_t offset_port_id;
+
+	/* Internal tables */
+	struct rte_port_in ports_in[RTE_PIPELINE_PORT_IN_MAX];
+	struct rte_port_out ports_out[RTE_PIPELINE_PORT_OUT_MAX];
+	struct rte_table tables[RTE_PIPELINE_TABLE_MAX];
+
+	/* Occupancy of internal tables */
+	uint32_t num_ports_in;
+	uint32_t num_ports_out;
+	uint32_t num_tables;
+
+	/* List of enabled ports */
+	uint64_t enabled_port_in_mask;
+	struct rte_port_in *port_in_next;
+
+	/* Pipeline run structures */
+	struct rte_mbuf *pkts[RTE_PORT_IN_BURST_SIZE_MAX];
+	struct rte_pipeline_table_entry *entries[RTE_PORT_IN_BURST_SIZE_MAX];
+	uint64_t action_mask0[RTE_PIPELINE_ACTIONS];
+	uint64_t action_mask1[RTE_PIPELINE_ACTIONS];
+	uint64_t pkts_mask;
+	uint64_t n_pkts_ah_drop;
+	uint64_t pkts_drop_mask;
+} __rte_cache_aligned;
+
+static inline uint32_t
+rte_mask_get_next(uint64_t mask, uint32_t pos)
+{
+	uint64_t mask_rot = (mask << ((63 - pos) & 0x3F)) |
+			(mask >> ((pos + 1) & 0x3F));
+	return (__builtin_ctzll(mask_rot) - (63 - pos)) & 0x3F;
+}
+
+static inline uint32_t
+rte_mask_get_prev(uint64_t mask, uint32_t pos)
+{
+	uint64_t mask_rot = (mask >> (pos & 0x3F)) |
+			(mask << ((64 - pos) & 0x3F));
+	return ((63 - __builtin_clzll(mask_rot)) + pos) & 0x3F;
+}
+
+static void
+rte_pipeline_table_free(struct rte_table *table);
+
+static void
+rte_pipeline_port_in_free(struct rte_port_in *port);
+
+static void
+rte_pipeline_port_out_free(struct rte_port_out *port);
+
+/*
+ * Pipeline
+ *
+ */
+static int
+rte_pipeline_check_params(struct rte_pipeline_params *params)
+{
+	if (params == NULL) {
+		RTE_LOG(ERR, PIPELINE,
+			"%s: Incorrect value for parameter params\n", __func__);
+		return -EINVAL;
+	}
+
+	/* name */
+	if (params->name == NULL) {
+		RTE_LOG(ERR, PIPELINE,
+			"%s: Incorrect value for parameter name\n", __func__);
+		return -EINVAL;
+	}
+
+	/* socket */
+	if ((params->socket_id < 0) ||
+	    (params->socket_id >= RTE_MAX_NUMA_NODES)) {
+		RTE_LOG(ERR, PIPELINE,
+			"%s: Incorrect value for parameter socket_id\n",
+			__func__);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+struct rte_pipeline *
+rte_pipeline_create(struct rte_pipeline_params *params)
+{
+	struct rte_pipeline *p;
+	int status;
+
+	/* Check input parameters */
+	status = rte_pipeline_check_params(params);
+	if (status != 0) {
+		RTE_LOG(ERR, PIPELINE,
+			"%s: Pipeline params check failed (%d)\n",
+			__func__, status);
+		return NULL;
+	}
+
+	/* Allocate memory for the pipeline on requested socket */
+	p = rte_zmalloc_socket("PIPELINE", sizeof(struct rte_pipeline),
+			RTE_CACHE_LINE_SIZE, params->socket_id);
+
+	if (p == NULL) {
+		RTE_LOG(ERR, PIPELINE,
+			"%s: Pipeline memory allocation failed\n", __func__);
+		return NULL;
+	}
+
+	/* Save input parameters */
+	snprintf(p->name, RTE_PIPELINE_MAX_NAME_SZ, "%s", params->name);
+	p->socket_id = params->socket_id;
+	p->offset_port_id = params->offset_port_id;
+
+	/* Initialize pipeline internal data structure */
+	p->num_ports_in = 0;
+	p->num_ports_out = 0;
+	p->num_tables = 0;
+	p->enabled_port_in_mask = 0;
+	p->port_in_next = NULL;
+	p->pkts_mask = 0;
+	p->n_pkts_ah_drop = 0;
+
+	return p;
+}
+
+int
+rte_pipeline_free(struct rte_pipeline *p)
+{
+	uint32_t i;
+
+	/* Check input parameters */
+	if (p == NULL) {
+		RTE_LOG(ERR, PIPELINE,
+			"%s: rte_pipeline parameter is NULL\n", __func__);
+		return -EINVAL;
+	}
+
+	/* Free input ports */
+	for (i = 0; i < p->num_ports_in; i++) {
+		struct rte_port_in *port = &p->ports_in[i];
+
+		rte_pipeline_port_in_free(port);
+	}
+
+	/* Free tables */
+	for (i = 0; i < p->num_tables; i++) {
+		struct rte_table *table = &p->tables[i];
+
+		rte_pipeline_table_free(table);
+	}
+
+	/* Free output ports */
+	for (i = 0; i < p->num_ports_out; i++) {
+		struct rte_port_out *port = &p->ports_out[i];
+
+		rte_pipeline_port_out_free(port);
+	}
+
+	/* Free pipeline memory */
+	rte_free(p);
+
+	return 0;
+}
+
+/*
+ * Table
+ *
+ */
+static int
+rte_table_check_params(struct rte_pipeline *p,
+		struct rte_pipeline_table_params *params,
+		uint32_t *table_id)
+{
+	if (p == NULL) {
+		RTE_LOG(ERR, PIPELINE, "%s: pipeline parameter is NULL\n",
+			__func__);
+		return -EINVAL;
+	}
+	if (params == NULL) {
+		RTE_LOG(ERR, PIPELINE, "%s: params parameter is NULL\n",
+			__func__);
+		return -EINVAL;
+	}
+	if (table_id == NULL) {
+		RTE_LOG(ERR, PIPELINE, "%s: table_id parameter is NULL\n",
+			__func__);
+		return -EINVAL;
+	}
+
+	/* ops */
+	if (params->ops == NULL) {
+		RTE_LOG(ERR, PIPELINE, "%s: params->ops is NULL\n",
+			__func__);
+		return -EINVAL;
+	}
+
+	if (params->ops->f_create == NULL) {
+		RTE_LOG(ERR, PIPELINE,
+			"%s: f_create function pointer is NULL\n", __func__);
+		return -EINVAL;
+	}
+
+	if (params->ops->f_lookup == NULL) {
+		RTE_LOG(ERR, PIPELINE,
+			"%s: f_lookup function pointer is NULL\n", __func__);
+		return -EINVAL;
+	}
+
+	/* De we have room for one more table? */
+	if (p->num_tables == RTE_PIPELINE_TABLE_MAX) {
+		RTE_LOG(ERR, PIPELINE,
+			"%s: Incorrect value for num_tables parameter\n",
+			__func__);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+int
+rte_pipeline_table_create(struct rte_pipeline *p,
+		struct rte_pipeline_table_params *params,
+		uint32_t *table_id)
+{
+	struct rte_table *table;
+	struct rte_pipeline_table_entry *default_entry;
+	void *h_table;
+	uint32_t entry_size, id;
+	int status;
+
+	/* Check input arguments */
+	status = rte_table_check_params(p, params, table_id);
+	if (status != 0)
+		return status;
+
+	id = p->num_tables;
+	table = &p->tables[id];
+
+	/* Allocate space for the default table entry */
+	entry_size = sizeof(struct rte_pipeline_table_entry) +
+		params->action_data_size;
+	default_entry = (struct rte_pipeline_table_entry *) rte_zmalloc_socket(
+		"PIPELINE", entry_size, RTE_CACHE_LINE_SIZE, p->socket_id);
+	if (default_entry == NULL) {
+		RTE_LOG(ERR, PIPELINE,
+			"%s: Failed to allocate default entry\n", __func__);
+		return -EINVAL;
+	}
+
+	/* Create the table */
+	h_table = params->ops->f_create(params->arg_create, p->socket_id,
+		entry_size);
+	if (h_table == NULL) {
+		rte_free(default_entry);
+		RTE_LOG(ERR, PIPELINE, "%s: Table creation failed\n", __func__);
+		return -EINVAL;
+	}
+
+	/* Commit current table to the pipeline */
+	p->num_tables++;
+	*table_id = id;
+
+	/* Save input parameters */
+	memcpy(&table->ops, params->ops, sizeof(struct rte_table_ops));
+	table->f_action_hit = params->f_action_hit;
+	table->f_action_miss = params->f_action_miss;
+	table->arg_ah = params->arg_ah;
+	table->entry_size = entry_size;
+
+	/* Clear the lookup miss actions (to be set later through API) */
+	table->default_entry = default_entry;
+	table->default_entry->action = RTE_PIPELINE_ACTION_DROP;
+
+	/* Initialize table internal data structure */
+	table->h_table = h_table;
+	table->table_next_id = 0;
+	table->table_next_id_valid = 0;
+
+	return 0;
+}
+
+void
+rte_pipeline_table_free(struct rte_table *table)
+{
+	if (table->ops.f_free != NULL)
+		table->ops.f_free(table->h_table);
+
+	rte_free(table->default_entry);
+}
+
+int
+rte_pipeline_table_default_entry_add(struct rte_pipeline *p,
+	uint32_t table_id,
+	struct rte_pipeline_table_entry *default_entry,
+	struct rte_pipeline_table_entry **default_entry_ptr)
+{
+	struct rte_table *table;
+
+	/* Check input arguments */
+	if (p == NULL) {
+		RTE_LOG(ERR, PIPELINE, "%s: pipeline parameter is NULL\n",
+			__func__);
+		return -EINVAL;
+	}
+
+	if (default_entry == NULL) {
+		RTE_LOG(ERR, PIPELINE,
+			"%s: default_entry parameter is NULL\n", __func__);
+		return -EINVAL;
+	}
+
+	if (table_id >= p->num_tables) {
+		RTE_LOG(ERR, PIPELINE,
+			"%s: table_id %d out of range\n", __func__, table_id);
+		return -EINVAL;
+	}
+
+	table = &p->tables[table_id];
+
+	if ((default_entry->action == RTE_PIPELINE_ACTION_TABLE) &&
+		table->table_next_id_valid &&
+		(default_entry->table_id != table->table_next_id)) {
+		RTE_LOG(ERR, PIPELINE,
+			"%s: Tree-like topologies not allowed\n", __func__);
+		return -EINVAL;
+	}
+
+	/* Set the lookup miss actions */
+	if ((default_entry->action == RTE_PIPELINE_ACTION_TABLE) &&
+		(table->table_next_id_valid == 0)) {
+		table->table_next_id = default_entry->table_id;
+		table->table_next_id_valid = 1;
+	}
+
+	memcpy(table->default_entry, default_entry, table->entry_size);
+
+	*default_entry_ptr = table->default_entry;
+	return 0;
+}
+
+int
+rte_pipeline_table_default_entry_delete(struct rte_pipeline *p,
+		uint32_t table_id,
+		struct rte_pipeline_table_entry *entry)
+{
+	struct rte_table *table;
+
+	/* Check input arguments */
+	if (p == NULL) {
+		RTE_LOG(ERR, PIPELINE,
+			"%s: pipeline parameter is NULL\n", __func__);
+		return -EINVAL;
+	}
+
+	if (table_id >= p->num_tables) {
+		RTE_LOG(ERR, PIPELINE,
+			"%s: table_id %d out of range\n", __func__, table_id);
+		return -EINVAL;
+	}
+
+	table = &p->tables[table_id];
+
+	/* Save the current contents of the default entry */
+	if (entry)
+		memcpy(entry, table->default_entry, table->entry_size);
+
+	/* Clear the lookup miss actions */
+	memset(table->default_entry, 0, table->entry_size);
+	table->default_entry->action = RTE_PIPELINE_ACTION_DROP;
+
+	return 0;
+}
+
+int
+rte_pipeline_table_entry_add(struct rte_pipeline *p,
+		uint32_t table_id,
+		void *key,
+		struct rte_pipeline_table_entry *entry,
+		int *key_found,
+		struct rte_pipeline_table_entry **entry_ptr)
+{
+	struct rte_table *table;
+
+	/* Check input arguments */
+	if (p == NULL) {
+		RTE_LOG(ERR, PIPELINE, "%s: pipeline parameter is NULL\n",
+			__func__);
+		return -EINVAL;
+	}
+
+	if (key == NULL) {
+		RTE_LOG(ERR, PIPELINE, "%s: key parameter is NULL\n", __func__);
+		return -EINVAL;
+	}
+
+	if (entry == NULL) {
+		RTE_LOG(ERR, PIPELINE, "%s: entry parameter is NULL\n",
+			__func__);
+		return -EINVAL;
+	}
+
+	if (table_id >= p->num_tables) {
+		RTE_LOG(ERR, PIPELINE,
+			"%s: table_id %d out of range\n", __func__, table_id);
+		return -EINVAL;
+	}
+
+	table = &p->tables[table_id];
+
+	if (table->ops.f_add == NULL) {
+		RTE_LOG(ERR, PIPELINE, "%s: f_add function pointer NULL\n",
+			__func__);
+		return -EINVAL;
+	}
+
+	if ((entry->action == RTE_PIPELINE_ACTION_TABLE) &&
+		table->table_next_id_valid &&
+		(entry->table_id != table->table_next_id)) {
+		RTE_LOG(ERR, PIPELINE,
+			"%s: Tree-like topologies not allowed\n", __func__);
+		return -EINVAL;
+	}
+
+	/* Add entry */
+	if ((entry->action == RTE_PIPELINE_ACTION_TABLE) &&
+		(table->table_next_id_valid == 0)) {
+		table->table_next_id = entry->table_id;
+		table->table_next_id_valid = 1;
+	}
+
+	return (table->ops.f_add)(table->h_table, key, (void *) entry,
+		key_found, (void **) entry_ptr);
+}
+
+int
+rte_pipeline_table_entry_delete(struct rte_pipeline *p,
+		uint32_t table_id,
+		void *key,
+		int *key_found,
+		struct rte_pipeline_table_entry *entry)
+{
+	struct rte_table *table;
+
+	/* Check input arguments */
+	if (p == NULL) {
+		RTE_LOG(ERR, PIPELINE, "%s: pipeline parameter NULL\n",
+			__func__);
+		return -EINVAL;
+	}
+
+	if (key == NULL) {
+		RTE_LOG(ERR, PIPELINE, "%s: key parameter is NULL\n",
+			__func__);
+		return -EINVAL;
+	}
+
+	if (table_id >= p->num_tables) {
+		RTE_LOG(ERR, PIPELINE,
+			"%s: table_id %d out of range\n", __func__, table_id);
+		return -EINVAL;
+	}
+
+	table = &p->tables[table_id];
+
+	if (table->ops.f_delete == NULL) {
+		RTE_LOG(ERR, PIPELINE,
+			"%s: f_delete function pointer NULL\n", __func__);
+		return -EINVAL;
+	}
+
+	return (table->ops.f_delete)(table->h_table, key, key_found, entry);
+}
+
+int rte_pipeline_table_entry_add_bulk(struct rte_pipeline *p,
+	uint32_t table_id,
+	void **keys,
+	struct rte_pipeline_table_entry **entries,
+	uint32_t n_keys,
+	int *key_found,
+	struct rte_pipeline_table_entry **entries_ptr)
+{
+	struct rte_table *table;
+	uint32_t i;
+
+	/* Check input arguments */
+	if (p == NULL) {
+		RTE_LOG(ERR, PIPELINE, "%s: pipeline parameter is NULL\n",
+			__func__);
+		return -EINVAL;
+	}
+
+	if (keys == NULL) {
+		RTE_LOG(ERR, PIPELINE, "%s: keys parameter is NULL\n", __func__);
+		return -EINVAL;
+	}
+
+	if (entries == NULL) {
+		RTE_LOG(ERR, PIPELINE, "%s: entries parameter is NULL\n",
+			__func__);
+		return -EINVAL;
+	}
+
+	if (table_id >= p->num_tables) {
+		RTE_LOG(ERR, PIPELINE,
+			"%s: table_id %d out of range\n", __func__, table_id);
+		return -EINVAL;
+	}
+
+	table = &p->tables[table_id];
+
+	if (table->ops.f_add_bulk == NULL) {
+		RTE_LOG(ERR, PIPELINE, "%s: f_add_bulk function pointer NULL\n",
+			__func__);
+		return -EINVAL;
+	}
+
+	for (i = 0; i < n_keys; i++) {
+		if ((entries[i]->action == RTE_PIPELINE_ACTION_TABLE) &&
+			table->table_next_id_valid &&
+			(entries[i]->table_id != table->table_next_id)) {
+			RTE_LOG(ERR, PIPELINE,
+				"%s: Tree-like topologies not allowed\n", __func__);
+			return -EINVAL;
+		}
+	}
+
+	/* Add entry */
+	for (i = 0; i < n_keys; i++) {
+		if ((entries[i]->action == RTE_PIPELINE_ACTION_TABLE) &&
+			(table->table_next_id_valid == 0)) {
+			table->table_next_id = entries[i]->table_id;
+			table->table_next_id_valid = 1;
+		}
+	}
+
+	return (table->ops.f_add_bulk)(table->h_table, keys, (void **) entries,
+		n_keys, key_found, (void **) entries_ptr);
+}
+
+int rte_pipeline_table_entry_delete_bulk(struct rte_pipeline *p,
+	uint32_t table_id,
+	void **keys,
+	uint32_t n_keys,
+	int *key_found,
+	struct rte_pipeline_table_entry **entries)
+{
+	struct rte_table *table;
+
+	/* Check input arguments */
+	if (p == NULL) {
+		RTE_LOG(ERR, PIPELINE, "%s: pipeline parameter NULL\n",
+			__func__);
+		return -EINVAL;
+	}
+
+	if (keys == NULL) {
+		RTE_LOG(ERR, PIPELINE, "%s: key parameter is NULL\n",
+			__func__);
+		return -EINVAL;
+	}
+
+	if (table_id >= p->num_tables) {
+		RTE_LOG(ERR, PIPELINE,
+			"%s: table_id %d out of range\n", __func__, table_id);
+		return -EINVAL;
+	}
+
+	table = &p->tables[table_id];
+
+	if (table->ops.f_delete_bulk == NULL) {
+		RTE_LOG(ERR, PIPELINE,
+			"%s: f_delete function pointer NULL\n", __func__);
+		return -EINVAL;
+	}
+
+	return (table->ops.f_delete_bulk)(table->h_table, keys, n_keys, key_found,
+			(void **) entries);
+}
+
+/*
+ * Port
+ *
+ */
+static int
+rte_pipeline_port_in_check_params(struct rte_pipeline *p,
+		struct rte_pipeline_port_in_params *params,
+		uint32_t *port_id)
+{
+	if (p == NULL) {
+		RTE_LOG(ERR, PIPELINE, "%s: pipeline parameter NULL\n",
+			__func__);
+		return -EINVAL;
+	}
+	if (params == NULL) {
+		RTE_LOG(ERR, PIPELINE, "%s: params parameter NULL\n", __func__);
+		return -EINVAL;
+	}
+	if (port_id == NULL) {
+		RTE_LOG(ERR, PIPELINE, "%s: port_id parameter NULL\n",
+			__func__);
+		return -EINVAL;
+	}
+
+	/* ops */
+	if (params->ops == NULL) {
+		RTE_LOG(ERR, PIPELINE, "%s: params->ops parameter NULL\n",
+			__func__);
+		return -EINVAL;
+	}
+
+	if (params->ops->f_create == NULL) {
+		RTE_LOG(ERR, PIPELINE,
+			"%s: f_create function pointer NULL\n", __func__);
+		return -EINVAL;
+	}
+
+	if (params->ops->f_rx == NULL) {
+		RTE_LOG(ERR, PIPELINE, "%s: f_rx function pointer NULL\n",
+			__func__);
+		return -EINVAL;
+	}
+
+	/* burst_size */
+	if ((params->burst_size == 0) ||
+		(params->burst_size > RTE_PORT_IN_BURST_SIZE_MAX)) {
+		RTE_LOG(ERR, PIPELINE, "%s: invalid value for burst_size\n",
+			__func__);
+		return -EINVAL;
+	}
+
+	/* Do we have room for one more port? */
+	if (p->num_ports_in == RTE_PIPELINE_PORT_IN_MAX) {
+		RTE_LOG(ERR, PIPELINE,
+			"%s: invalid value for num_ports_in\n", __func__);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int
+rte_pipeline_port_out_check_params(struct rte_pipeline *p,
+		struct rte_pipeline_port_out_params *params,
+		uint32_t *port_id)
+{
+	if (p == NULL) {
+		RTE_LOG(ERR, PIPELINE, "%s: pipeline parameter NULL\n",
+			__func__);
+		return -EINVAL;
+	}
+
+	if (params == NULL) {
+		RTE_LOG(ERR, PIPELINE, "%s: params parameter NULL\n", __func__);
+		return -EINVAL;
+	}
+
+	if (port_id == NULL) {
+		RTE_LOG(ERR, PIPELINE, "%s: port_id parameter NULL\n",
+			__func__);
+		return -EINVAL;
+	}
+
+	/* ops */
+	if (params->ops == NULL) {
+		RTE_LOG(ERR, PIPELINE, "%s: params->ops parameter NULL\n",
+			__func__);
+		return -EINVAL;
+	}
+
+	if (params->ops->f_create == NULL) {
+		RTE_LOG(ERR, PIPELINE,
+			"%s: f_create function pointer NULL\n", __func__);
+		return -EINVAL;
+	}
+
+	if (params->ops->f_tx == NULL) {
+		RTE_LOG(ERR, PIPELINE,
+			"%s: f_tx function pointer NULL\n", __func__);
+		return -EINVAL;
+	}
+
+	if (params->ops->f_tx_bulk == NULL) {
+		RTE_LOG(ERR, PIPELINE,
+			"%s: f_tx_bulk function pointer NULL\n", __func__);
+		return -EINVAL;
+	}
+
+	/* Do we have room for one more port? */
+	if (p->num_ports_out == RTE_PIPELINE_PORT_OUT_MAX) {
+		RTE_LOG(ERR, PIPELINE,
+			"%s: invalid value for num_ports_out\n", __func__);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+int
+rte_pipeline_port_in_create(struct rte_pipeline *p,
+		struct rte_pipeline_port_in_params *params,
+		uint32_t *port_id)
+{
+	struct rte_port_in *port;
+	void *h_port;
+	uint32_t id;
+	int status;
+
+	/* Check input arguments */
+	status = rte_pipeline_port_in_check_params(p, params, port_id);
+	if (status != 0)
+		return status;
+
+	id = p->num_ports_in;
+	port = &p->ports_in[id];
+
+	/* Create the port */
+	h_port = params->ops->f_create(params->arg_create, p->socket_id);
+	if (h_port == NULL) {
+		RTE_LOG(ERR, PIPELINE, "%s: Port creation failed\n", __func__);
+		return -EINVAL;
+	}
+
+	/* Commit current table to the pipeline */
+	p->num_ports_in++;
+	*port_id = id;
+
+	/* Save input parameters */
+	memcpy(&port->ops, params->ops, sizeof(struct rte_port_in_ops));
+	port->f_action = params->f_action;
+	port->arg_ah = params->arg_ah;
+	port->burst_size = params->burst_size;
+
+	/* Initialize port internal data structure */
+	port->table_id = RTE_TABLE_INVALID;
+	port->h_port = h_port;
+	port->next = NULL;
+
+	return 0;
+}
+
+void
+rte_pipeline_port_in_free(struct rte_port_in *port)
+{
+	if (port->ops.f_free != NULL)
+		port->ops.f_free(port->h_port);
+}
+
+int
+rte_pipeline_port_out_create(struct rte_pipeline *p,
+		struct rte_pipeline_port_out_params *params,
+		uint32_t *port_id)
+{
+	struct rte_port_out *port;
+	void *h_port;
+	uint32_t id;
+	int status;
+
+	/* Check input arguments */
+	status = rte_pipeline_port_out_check_params(p, params, port_id);
+	if (status != 0)
+		return status;
+
+	id = p->num_ports_out;
+	port = &p->ports_out[id];
+
+	/* Create the port */
+	h_port = params->ops->f_create(params->arg_create, p->socket_id);
+	if (h_port == NULL) {
+		RTE_LOG(ERR, PIPELINE, "%s: Port creation failed\n", __func__);
+		return -EINVAL;
+	}
+
+	/* Commit current table to the pipeline */
+	p->num_ports_out++;
+	*port_id = id;
+
+	/* Save input parameters */
+	memcpy(&port->ops, params->ops, sizeof(struct rte_port_out_ops));
+	port->f_action = params->f_action;
+	port->arg_ah = params->arg_ah;
+
+	/* Initialize port internal data structure */
+	port->h_port = h_port;
+
+	return 0;
+}
+
+void
+rte_pipeline_port_out_free(struct rte_port_out *port)
+{
+	if (port->ops.f_free != NULL)
+		port->ops.f_free(port->h_port);
+}
+
+int
+rte_pipeline_port_in_connect_to_table(struct rte_pipeline *p,
+		uint32_t port_id,
+		uint32_t table_id)
+{
+	struct rte_port_in *port;
+
+	/* Check input arguments */
+	if (p == NULL) {
+		RTE_LOG(ERR, PIPELINE, "%s: pipeline parameter NULL\n",
+			__func__);
+		return -EINVAL;
+	}
+
+	if (port_id >= p->num_ports_in) {
+		RTE_LOG(ERR, PIPELINE,
+			"%s: port IN ID %u is out of range\n",
+			__func__, port_id);
+		return -EINVAL;
+	}
+
+	if (table_id >= p->num_tables) {
+		RTE_LOG(ERR, PIPELINE,
+			"%s: Table ID %u is out of range\n",
+			__func__, table_id);
+		return -EINVAL;
+	}
+
+	port = &p->ports_in[port_id];
+	port->table_id = table_id;
+
+	return 0;
+}
+
+int
+rte_pipeline_port_in_enable(struct rte_pipeline *p, uint32_t port_id)
+{
+	struct rte_port_in *port, *port_prev, *port_next;
+	uint64_t port_mask;
+	uint32_t port_prev_id, port_next_id;
+
+	/* Check input arguments */
+	if (p == NULL) {
+		RTE_LOG(ERR, PIPELINE, "%s: pipeline parameter NULL\n",
+			__func__);
+		return -EINVAL;
+	}
+
+	if (port_id >= p->num_ports_in) {
+		RTE_LOG(ERR, PIPELINE,
+			"%s: port IN ID %u is out of range\n",
+			__func__, port_id);
+		return -EINVAL;
+	}
+
+	port = &p->ports_in[port_id];
+
+	/* Return if current input port is already enabled */
+	port_mask = 1LLU << port_id;
+	if (p->enabled_port_in_mask & port_mask)
+		return 0;
+
+	p->enabled_port_in_mask |= port_mask;
+
+	/* Add current input port to the pipeline chain of enabled ports */
+	port_prev_id = rte_mask_get_prev(p->enabled_port_in_mask, port_id);
+	port_next_id = rte_mask_get_next(p->enabled_port_in_mask, port_id);
+
+	port_prev = &p->ports_in[port_prev_id];
+	port_next = &p->ports_in[port_next_id];
+
+	port_prev->next = port;
+	port->next = port_next;
+
+	/* Check if list of enabled ports was previously empty */
+	if (p->enabled_port_in_mask == port_mask)
+		p->port_in_next = port;
+
+	return 0;
+}
+
+int
+rte_pipeline_port_in_disable(struct rte_pipeline *p, uint32_t port_id)
+{
+	struct rte_port_in *port, *port_prev, *port_next;
+	uint64_t port_mask;
+	uint32_t port_prev_id, port_next_id;
+
+	/* Check input arguments */
+	if (p == NULL) {
+		RTE_LOG(ERR, PIPELINE, "%s: pipeline parameter NULL\n",
+		__func__);
+		return -EINVAL;
+	}
+
+	if (port_id >= p->num_ports_in) {
+		RTE_LOG(ERR, PIPELINE, "%s: port IN ID %u is out of range\n",
+			__func__, port_id);
+		return -EINVAL;
+	}
+
+	port = &p->ports_in[port_id];
+
+	/* Return if current input port is already disabled */
+	port_mask = 1LLU << port_id;
+	if ((p->enabled_port_in_mask & port_mask) == 0)
+		return 0;
+
+	p->enabled_port_in_mask &= ~port_mask;
+
+	/* Return if no other enabled ports */
+	if (p->enabled_port_in_mask == 0) {
+		p->port_in_next = NULL;
+
+		return 0;
+	}
+
+	/* Add current input port to the pipeline chain of enabled ports */
+	port_prev_id = rte_mask_get_prev(p->enabled_port_in_mask, port_id);
+	port_next_id = rte_mask_get_next(p->enabled_port_in_mask, port_id);
+
+	port_prev = &p->ports_in[port_prev_id];
+	port_next = &p->ports_in[port_next_id];
+
+	port_prev->next = port_next;
+
+	/* Check if the port which has just been disabled is next to serve */
+	if (port == p->port_in_next)
+		p->port_in_next = port_next;
+
+	return 0;
+}
+
+/*
+ * Pipeline run-time
+ *
+ */
+int
+rte_pipeline_check(struct rte_pipeline *p)
+{
+	uint32_t port_in_id;
+
+	/* Check input arguments */
+	if (p == NULL) {
+		RTE_LOG(ERR, PIPELINE, "%s: pipeline parameter NULL\n",
+			__func__);
+		return -EINVAL;
+	}
+
+	/* Check that pipeline has at least one input port, one table and one
+	output port */
+	if (p->num_ports_in == 0) {
+		RTE_LOG(ERR, PIPELINE, "%s: must have at least 1 input port\n",
+			__func__);
+		return -EINVAL;
+	}
+	if (p->num_tables == 0) {
+		RTE_LOG(ERR, PIPELINE, "%s: must have at least 1 table\n",
+			__func__);
+		return -EINVAL;
+	}
+	if (p->num_ports_out == 0) {
+		RTE_LOG(ERR, PIPELINE, "%s: must have at least 1 output port\n",
+			__func__);
+		return -EINVAL;
+	}
+
+	/* Check that all input ports are connected */
+	for (port_in_id = 0; port_in_id < p->num_ports_in; port_in_id++) {
+		struct rte_port_in *port_in = &p->ports_in[port_in_id];
+
+		if (port_in->table_id == RTE_TABLE_INVALID) {
+			RTE_LOG(ERR, PIPELINE,
+				"%s: Port IN ID %u is not connected\n",
+				__func__, port_in_id);
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
+static inline void
+rte_pipeline_compute_masks(struct rte_pipeline *p, uint64_t pkts_mask)
+{
+	p->action_mask1[RTE_PIPELINE_ACTION_DROP] = 0;
+	p->action_mask1[RTE_PIPELINE_ACTION_PORT] = 0;
+	p->action_mask1[RTE_PIPELINE_ACTION_PORT_META] = 0;
+	p->action_mask1[RTE_PIPELINE_ACTION_TABLE] = 0;
+
+	if ((pkts_mask & (pkts_mask + 1)) == 0) {
+		uint64_t n_pkts = __builtin_popcountll(pkts_mask);
+		uint32_t i;
+
+		for (i = 0; i < n_pkts; i++) {
+			uint64_t pkt_mask = 1LLU << i;
+			uint32_t pos = p->entries[i]->action;
+
+			p->action_mask1[pos] |= pkt_mask;
+		}
+	} else {
+		uint32_t i;
+
+		for (i = 0; i < RTE_PORT_IN_BURST_SIZE_MAX; i++) {
+			uint64_t pkt_mask = 1LLU << i;
+			uint32_t pos;
+
+			if ((pkt_mask & pkts_mask) == 0)
+				continue;
+
+			pos = p->entries[i]->action;
+			p->action_mask1[pos] |= pkt_mask;
+		}
+	}
+}
+
+static inline void
+rte_pipeline_action_handler_port_bulk(struct rte_pipeline *p,
+	uint64_t pkts_mask, uint32_t port_id)
+{
+	struct rte_port_out *port_out = &p->ports_out[port_id];
+
+	p->pkts_mask = pkts_mask;
+
+	/* Output port user actions */
+	if (port_out->f_action != NULL) {
+		port_out->f_action(p, p->pkts, pkts_mask, port_out->arg_ah);
+
+		RTE_PIPELINE_STATS_AH_DROP_READ(p,
+			port_out->n_pkts_dropped_by_ah);
+	}
+
+	/* Output port TX */
+	if (p->pkts_mask != 0)
+		port_out->ops.f_tx_bulk(port_out->h_port,
+			p->pkts,
+			p->pkts_mask);
+}
+
+static inline void
+rte_pipeline_action_handler_port(struct rte_pipeline *p, uint64_t pkts_mask)
+{
+	p->pkts_mask = pkts_mask;
+
+	if ((pkts_mask & (pkts_mask + 1)) == 0) {
+		uint64_t n_pkts = __builtin_popcountll(pkts_mask);
+		uint32_t i;
+
+		for (i = 0; i < n_pkts; i++) {
+			struct rte_mbuf *pkt = p->pkts[i];
+			uint32_t port_out_id = p->entries[i]->port_id;
+			struct rte_port_out *port_out =
+				&p->ports_out[port_out_id];
+
+			/* Output port user actions */
+			if (port_out->f_action == NULL) /* Output port TX */
+				port_out->ops.f_tx(port_out->h_port, pkt);
+			else {
+				uint64_t pkt_mask = 1LLU << i;
+
+				port_out->f_action(p,
+					p->pkts,
+					pkt_mask,
+					port_out->arg_ah);
+
+				RTE_PIPELINE_STATS_AH_DROP_READ(p,
+					port_out->n_pkts_dropped_by_ah);
+
+				/* Output port TX */
+				if (pkt_mask & p->pkts_mask)
+					port_out->ops.f_tx(port_out->h_port,
+						pkt);
+			}
+		}
+	} else {
+		uint32_t i;
+
+		for (i = 0;  i < RTE_PORT_IN_BURST_SIZE_MAX; i++) {
+			uint64_t pkt_mask = 1LLU << i;
+			struct rte_mbuf *pkt;
+			struct rte_port_out *port_out;
+			uint32_t port_out_id;
+
+			if ((pkt_mask & pkts_mask) == 0)
+				continue;
+
+			pkt = p->pkts[i];
+			port_out_id = p->entries[i]->port_id;
+			port_out = &p->ports_out[port_out_id];
+
+			/* Output port user actions */
+			if (port_out->f_action == NULL) /* Output port TX */
+				port_out->ops.f_tx(port_out->h_port, pkt);
+			else {
+				port_out->f_action(p,
+					p->pkts,
+					pkt_mask,
+					port_out->arg_ah);
+
+				RTE_PIPELINE_STATS_AH_DROP_READ(p,
+					port_out->n_pkts_dropped_by_ah);
+
+				/* Output port TX */
+				if (pkt_mask & p->pkts_mask)
+					port_out->ops.f_tx(port_out->h_port,
+						pkt);
+			}
+		}
+	}
+}
+
+static inline void
+rte_pipeline_action_handler_port_meta(struct rte_pipeline *p,
+	uint64_t pkts_mask)
+{
+	p->pkts_mask = pkts_mask;
+
+	if ((pkts_mask & (pkts_mask + 1)) == 0) {
+		uint64_t n_pkts = __builtin_popcountll(pkts_mask);
+		uint32_t i;
+
+		for (i = 0; i < n_pkts; i++) {
+			struct rte_mbuf *pkt = p->pkts[i];
+			uint32_t port_out_id =
+				RTE_MBUF_METADATA_UINT32(pkt,
+					p->offset_port_id);
+			struct rte_port_out *port_out = &p->ports_out[
+				port_out_id];
+
+			/* Output port user actions */
+			if (port_out->f_action == NULL) /* Output port TX */
+				port_out->ops.f_tx(port_out->h_port, pkt);
+			else {
+				uint64_t pkt_mask = 1LLU << i;
+
+				port_out->f_action(p,
+					p->pkts,
+					pkt_mask,
+					port_out->arg_ah);
+
+				RTE_PIPELINE_STATS_AH_DROP_READ(p,
+					port_out->n_pkts_dropped_by_ah);
+
+				/* Output port TX */
+				if (pkt_mask & p->pkts_mask)
+					port_out->ops.f_tx(port_out->h_port,
+						pkt);
+			}
+		}
+	} else {
+		uint32_t i;
+
+		for (i = 0;  i < RTE_PORT_IN_BURST_SIZE_MAX; i++) {
+			uint64_t pkt_mask = 1LLU << i;
+			struct rte_mbuf *pkt;
+			struct rte_port_out *port_out;
+			uint32_t port_out_id;
+
+			if ((pkt_mask & pkts_mask) == 0)
+				continue;
+
+			pkt = p->pkts[i];
+			port_out_id = RTE_MBUF_METADATA_UINT32(pkt,
+				p->offset_port_id);
+			port_out = &p->ports_out[port_out_id];
+
+			/* Output port user actions */
+			if (port_out->f_action == NULL) /* Output port TX */
+				port_out->ops.f_tx(port_out->h_port, pkt);
+			else {
+				port_out->f_action(p,
+					p->pkts,
+					pkt_mask,
+					port_out->arg_ah);
+
+				RTE_PIPELINE_STATS_AH_DROP_READ(p,
+					port_out->n_pkts_dropped_by_ah);
+
+				/* Output port TX */
+				if (pkt_mask & p->pkts_mask)
+					port_out->ops.f_tx(port_out->h_port,
+						pkt);
+			}
+		}
+	}
+}
+
+static inline void
+rte_pipeline_action_handler_drop(struct rte_pipeline *p, uint64_t pkts_mask)
+{
+	if ((pkts_mask & (pkts_mask + 1)) == 0) {
+		uint64_t n_pkts = __builtin_popcountll(pkts_mask);
+		uint32_t i;
+
+		for (i = 0; i < n_pkts; i++)
+			rte_pktmbuf_free(p->pkts[i]);
+	} else {
+		uint32_t i;
+
+		for (i = 0; i < RTE_PORT_IN_BURST_SIZE_MAX; i++) {
+			uint64_t pkt_mask = 1LLU << i;
+
+			if ((pkt_mask & pkts_mask) == 0)
+				continue;
+
+			rte_pktmbuf_free(p->pkts[i]);
+		}
+	}
+}
+
+int
+rte_pipeline_run(struct rte_pipeline *p)
+{
+	struct rte_port_in *port_in = p->port_in_next;
+	uint32_t n_pkts, table_id;
+
+	if (port_in == NULL)
+		return 0;
+
+	/* Input port RX */
+	n_pkts = port_in->ops.f_rx(port_in->h_port, p->pkts,
+		port_in->burst_size);
+	if (n_pkts == 0) {
+		p->port_in_next = port_in->next;
+		return 0;
+	}
+
+	p->pkts_mask = RTE_LEN2MASK(n_pkts, uint64_t);
+	p->action_mask0[RTE_PIPELINE_ACTION_DROP] = 0;
+	p->action_mask0[RTE_PIPELINE_ACTION_PORT] = 0;
+	p->action_mask0[RTE_PIPELINE_ACTION_PORT_META] = 0;
+	p->action_mask0[RTE_PIPELINE_ACTION_TABLE] = 0;
+
+	/* Input port user actions */
+	if (port_in->f_action != NULL) {
+		port_in->f_action(p, p->pkts, n_pkts, port_in->arg_ah);
+
+		RTE_PIPELINE_STATS_AH_DROP_READ(p,
+			port_in->n_pkts_dropped_by_ah);
+	}
+
+	/* Table */
+	for (table_id = port_in->table_id; p->pkts_mask != 0; ) {
+		struct rte_table *table;
+		uint64_t lookup_hit_mask, lookup_miss_mask;
+
+		/* Lookup */
+		table = &p->tables[table_id];
+		table->ops.f_lookup(table->h_table, p->pkts, p->pkts_mask,
+			&lookup_hit_mask, (void **) p->entries);
+		lookup_miss_mask = p->pkts_mask & (~lookup_hit_mask);
+
+		/* Lookup miss */
+		if (lookup_miss_mask != 0) {
+			struct rte_pipeline_table_entry *default_entry =
+				table->default_entry;
+
+			p->pkts_mask = lookup_miss_mask;
+
+			/* Table user actions */
+			if (table->f_action_miss != NULL) {
+				table->f_action_miss(p,
+					p->pkts,
+					lookup_miss_mask,
+					default_entry,
+					table->arg_ah);
+
+				RTE_PIPELINE_STATS_AH_DROP_READ(p,
+					table->n_pkts_dropped_by_lkp_miss_ah);
+			}
+
+			/* Table reserved actions */
+			if ((default_entry->action == RTE_PIPELINE_ACTION_PORT) &&
+				(p->pkts_mask != 0))
+				rte_pipeline_action_handler_port_bulk(p,
+					p->pkts_mask,
+					default_entry->port_id);
+			else {
+				uint32_t pos = default_entry->action;
+
+				RTE_PIPELINE_STATS_TABLE_DROP0(p);
+
+				p->action_mask0[pos] |= p->pkts_mask;
+
+				RTE_PIPELINE_STATS_TABLE_DROP1(p,
+					table->n_pkts_dropped_lkp_miss);
+			}
+		}
+
+		/* Lookup hit */
+		if (lookup_hit_mask != 0) {
+			p->pkts_mask = lookup_hit_mask;
+
+			/* Table user actions */
+			if (table->f_action_hit != NULL) {
+				table->f_action_hit(p,
+					p->pkts,
+					lookup_hit_mask,
+					p->entries,
+					table->arg_ah);
+
+				RTE_PIPELINE_STATS_AH_DROP_READ(p,
+					table->n_pkts_dropped_by_lkp_hit_ah);
+			}
+
+			/* Table reserved actions */
+			RTE_PIPELINE_STATS_TABLE_DROP0(p);
+			rte_pipeline_compute_masks(p, p->pkts_mask);
+			p->action_mask0[RTE_PIPELINE_ACTION_DROP] |=
+				p->action_mask1[
+					RTE_PIPELINE_ACTION_DROP];
+			p->action_mask0[RTE_PIPELINE_ACTION_PORT] |=
+				p->action_mask1[
+					RTE_PIPELINE_ACTION_PORT];
+			p->action_mask0[RTE_PIPELINE_ACTION_PORT_META] |=
+				p->action_mask1[
+					RTE_PIPELINE_ACTION_PORT_META];
+			p->action_mask0[RTE_PIPELINE_ACTION_TABLE] |=
+				p->action_mask1[
+					RTE_PIPELINE_ACTION_TABLE];
+
+			RTE_PIPELINE_STATS_TABLE_DROP1(p,
+				table->n_pkts_dropped_lkp_hit);
+		}
+
+		/* Prepare for next iteration */
+		p->pkts_mask = p->action_mask0[RTE_PIPELINE_ACTION_TABLE];
+		table_id = table->table_next_id;
+		p->action_mask0[RTE_PIPELINE_ACTION_TABLE] = 0;
+	}
+
+	/* Table reserved action PORT */
+	rte_pipeline_action_handler_port(p,
+		p->action_mask0[RTE_PIPELINE_ACTION_PORT]);
+
+	/* Table reserved action PORT META */
+	rte_pipeline_action_handler_port_meta(p,
+		p->action_mask0[RTE_PIPELINE_ACTION_PORT_META]);
+
+	/* Table reserved action DROP */
+	rte_pipeline_action_handler_drop(p,
+		p->action_mask0[RTE_PIPELINE_ACTION_DROP]);
+
+	/* Pick candidate for next port IN to serve */
+	p->port_in_next = port_in->next;
+
+	return (int) n_pkts;
+}
+
+int
+rte_pipeline_flush(struct rte_pipeline *p)
+{
+	uint32_t port_id;
+
+	/* Check input arguments */
+	if (p == NULL) {
+		RTE_LOG(ERR, PIPELINE, "%s: pipeline parameter NULL\n",
+			__func__);
+		return -EINVAL;
+	}
+
+	for (port_id = 0; port_id < p->num_ports_out; port_id++) {
+		struct rte_port_out *port = &p->ports_out[port_id];
+
+		if (port->ops.f_flush != NULL)
+			port->ops.f_flush(port->h_port);
+	}
+
+	return 0;
+}
+
+int
+rte_pipeline_port_out_packet_insert(struct rte_pipeline *p,
+	uint32_t port_id, struct rte_mbuf *pkt)
+{
+	struct rte_port_out *port_out = &p->ports_out[port_id];
+
+	port_out->ops.f_tx(port_out->h_port, pkt); /* Output port TX */
+
+	return 0;
+}
+
+int rte_pipeline_ah_packet_hijack(struct rte_pipeline *p,
+	uint64_t pkts_mask)
+{
+	pkts_mask &= p->pkts_mask;
+	p->pkts_mask &= ~pkts_mask;
+
+	return 0;
+}
+
+int rte_pipeline_ah_packet_drop(struct rte_pipeline *p,
+	uint64_t pkts_mask)
+{
+	pkts_mask &= p->pkts_mask;
+	p->pkts_mask &= ~pkts_mask;
+	p->action_mask0[RTE_PIPELINE_ACTION_DROP] |= pkts_mask;
+
+	RTE_PIPELINE_STATS_AH_DROP_WRITE(p, pkts_mask);
+	return 0;
+}
+
+int rte_pipeline_port_in_stats_read(struct rte_pipeline *p, uint32_t port_id,
+	struct rte_pipeline_port_in_stats *stats, int clear)
+{
+	struct rte_port_in *port;
+	int retval;
+
+	if (p == NULL) {
+		RTE_LOG(ERR, PIPELINE, "%s: pipeline parameter NULL\n",
+			__func__);
+		return -EINVAL;
+	}
+
+	if (port_id >= p->num_ports_in) {
+		RTE_LOG(ERR, PIPELINE,
+			"%s: port IN ID %u is out of range\n",
+			__func__, port_id);
+		return -EINVAL;
+	}
+
+	port = &p->ports_in[port_id];
+
+	if (port->ops.f_stats != NULL) {
+		retval = port->ops.f_stats(port->h_port, &stats->stats, clear);
+		if (retval)
+			return retval;
+	} else if (stats != NULL)
+		memset(&stats->stats, 0, sizeof(stats->stats));
+
+	if (stats != NULL)
+		stats->n_pkts_dropped_by_ah = port->n_pkts_dropped_by_ah;
+
+	if (clear != 0)
+		port->n_pkts_dropped_by_ah = 0;
+
+	return 0;
+}
+
+int rte_pipeline_port_out_stats_read(struct rte_pipeline *p, uint32_t port_id,
+	struct rte_pipeline_port_out_stats *stats, int clear)
+{
+	struct rte_port_out *port;
+	int retval;
+
+	if (p == NULL) {
+		RTE_LOG(ERR, PIPELINE, "%s: pipeline parameter NULL\n", __func__);
+		return -EINVAL;
+	}
+
+	if (port_id >= p->num_ports_out) {
+		RTE_LOG(ERR, PIPELINE,
+			"%s: port OUT ID %u is out of range\n", __func__, port_id);
+		return -EINVAL;
+	}
+
+	port = &p->ports_out[port_id];
+	if (port->ops.f_stats != NULL) {
+		retval = port->ops.f_stats(port->h_port, &stats->stats, clear);
+		if (retval != 0)
+			return retval;
+	} else if (stats != NULL)
+		memset(&stats->stats, 0, sizeof(stats->stats));
+
+	if (stats != NULL)
+		stats->n_pkts_dropped_by_ah = port->n_pkts_dropped_by_ah;
+
+	if (clear != 0)
+		port->n_pkts_dropped_by_ah = 0;
+
+	return 0;
+}
+
+int rte_pipeline_table_stats_read(struct rte_pipeline *p, uint32_t table_id,
+	struct rte_pipeline_table_stats *stats, int clear)
+{
+	struct rte_table *table;
+	int retval;
+
+	if (p == NULL) {
+		RTE_LOG(ERR, PIPELINE, "%s: pipeline parameter NULL\n",
+			__func__);
+		return -EINVAL;
+	}
+
+	if (table_id >= p->num_tables) {
+		RTE_LOG(ERR, PIPELINE,
+				"%s: table %u is out of range\n", __func__, table_id);
+		return -EINVAL;
+	}
+
+	table = &p->tables[table_id];
+	if (table->ops.f_stats != NULL) {
+		retval = table->ops.f_stats(table->h_table, &stats->stats, clear);
+		if (retval != 0)
+			return retval;
+	} else if (stats != NULL)
+		memset(&stats->stats, 0, sizeof(stats->stats));
+
+	if (stats != NULL) {
+		stats->n_pkts_dropped_by_lkp_hit_ah =
+			table->n_pkts_dropped_by_lkp_hit_ah;
+		stats->n_pkts_dropped_by_lkp_miss_ah =
+			table->n_pkts_dropped_by_lkp_miss_ah;
+		stats->n_pkts_dropped_lkp_hit = table->n_pkts_dropped_lkp_hit;
+		stats->n_pkts_dropped_lkp_miss = table->n_pkts_dropped_lkp_miss;
+	}
+
+	if (clear != 0) {
+		table->n_pkts_dropped_by_lkp_hit_ah = 0;
+		table->n_pkts_dropped_by_lkp_miss_ah = 0;
+		table->n_pkts_dropped_lkp_hit = 0;
+		table->n_pkts_dropped_lkp_miss = 0;
+	}
+
+	return 0;
+}
diff --git a/lib/librte_pipeline/rte_pipeline.h b/lib/librte_pipeline/rte_pipeline.h
new file mode 100644
index 00000000..84d18025
--- /dev/null
+++ b/lib/librte_pipeline/rte_pipeline.h
@@ -0,0 +1,875 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __INCLUDE_RTE_PIPELINE_H__
+#define __INCLUDE_RTE_PIPELINE_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @file
+ * RTE Pipeline
+ *
+ * This tool is part of the DPDK Packet Framework tool suite and provides
+ * a standard methodology (logically similar to OpenFlow) for rapid development
+ * of complex packet processing pipelines out of ports, tables and actions.
+ *
+ * <B>Basic operation.</B> A pipeline is constructed by connecting its input
+ * ports to its output ports through a chain of lookup tables. As result of
+ * lookup operation into the current table, one of the table entries (or the
+ * default table entry, in case of lookup miss) is identified to provide the
+ * actions to be executed on the current packet and the associated action
+ * meta-data. The behavior of user actions is defined through the configurable
+ * table action handler, while the reserved actions define the next hop for the
+ * current packet (either another table, an output port or packet drop) and are
+ * handled transparently by the framework.
+ *
+ * <B>Initialization and run-time flows.</B> Once all the pipeline elements
+ * (input ports, tables, output ports) have been created, input ports connected
+ * to tables, table action handlers configured, tables populated with the
+ * initial set of entries (actions and action meta-data) and input ports
+ * enabled, the pipeline runs automatically, pushing packets from input ports
+ * to tables and output ports. At each table, the identified user actions are
+ * being executed, resulting in action meta-data (stored in the table entry)
+ * and packet meta-data (stored with the packet descriptor) being updated. The
+ * pipeline tables can have further updates and input ports can be disabled or
+ * enabled later on as required.
+ *
+ * <B>Multi-core scaling.</B> Typically, each CPU core will run its own
+ * pipeline instance. Complex application-level pipelines can be implemented by
+ * interconnecting multiple CPU core-level pipelines in tree-like topologies,
+ * as the same port devices (e.g. SW rings) can serve as output ports for the
+ * pipeline running on CPU core A, as well as input ports for the pipeline
+ * running on CPU core B. This approach enables the application development
+ * using the pipeline (CPU cores connected serially), cluster/run-to-completion
+ * (CPU cores connected in parallel) or mixed (pipeline of CPU core clusters)
+ * programming models.
+ *
+ * <B>Thread safety.</B> It is possible to have multiple pipelines running on
+ * the same CPU core, but it is not allowed (for thread safety reasons) to have
+ * multiple CPU cores running the same pipeline instance.
+ *
+ ***/
+
+#include <stdint.h>
+
+#include <rte_port.h>
+#include <rte_table.h>
+
+struct rte_mbuf;
+
+/*
+ * Pipeline
+ *
+ */
+/** Opaque data type for pipeline */
+struct rte_pipeline;
+
+/** Parameters for pipeline creation  */
+struct rte_pipeline_params {
+	/** Pipeline name */
+	const char *name;
+
+	/** CPU socket ID where memory for the pipeline and its elements (ports
+	and tables) should be allocated */
+	int socket_id;
+
+	/** Offset within packet meta-data to port_id to be used by action
+	"Send packet to output port read from packet meta-data". Has to be
+	4-byte aligned. */
+	uint32_t offset_port_id;
+};
+
+/** Pipeline port in stats. */
+struct rte_pipeline_port_in_stats {
+	/** Port in stats. */
+	struct rte_port_in_stats stats;
+
+	/** Number of packets dropped by action handler. */
+	uint64_t n_pkts_dropped_by_ah;
+
+};
+
+/** Pipeline port out stats. */
+struct rte_pipeline_port_out_stats {
+	/** Port out stats. */
+	struct rte_port_out_stats stats;
+
+	/** Number of packets dropped by action handler. */
+	uint64_t n_pkts_dropped_by_ah;
+};
+
+/** Pipeline table stats. */
+struct rte_pipeline_table_stats {
+	/** Table stats. */
+	struct rte_table_stats stats;
+
+	/** Number of packets dropped by lookup hit action handler. */
+	uint64_t n_pkts_dropped_by_lkp_hit_ah;
+
+	/** Number of packets dropped by lookup miss action handler. */
+	uint64_t n_pkts_dropped_by_lkp_miss_ah;
+
+	/** Number of packets dropped by pipeline in behalf of this
+	 * table based on action specified in table entry. */
+	uint64_t n_pkts_dropped_lkp_hit;
+
+	/** Number of packets dropped by pipeline in behalf of this
+	 *  table based on action specified in table entry. */
+	uint64_t n_pkts_dropped_lkp_miss;
+};
+
+/**
+ * Pipeline create
+ *
+ * @param params
+ *   Parameters for pipeline creation
+ * @return
+ *   Handle to pipeline instance on success or NULL otherwise
+ */
+struct rte_pipeline *rte_pipeline_create(struct rte_pipeline_params *params);
+
+/**
+ * Pipeline free
+ *
+ * @param p
+ *   Handle to pipeline instance
+ * @return
+ *   0 on success, error code otherwise
+ */
+int rte_pipeline_free(struct rte_pipeline *p);
+
+/**
+ * Pipeline consistency check
+ *
+ * @param p
+ *   Handle to pipeline instance
+ * @return
+ *   0 on success, error code otherwise
+ */
+int rte_pipeline_check(struct rte_pipeline *p);
+
+/**
+ * Pipeline run
+ *
+ * @param p
+ *   Handle to pipeline instance
+ * @return
+ *   Number of packets read and processed
+ */
+int rte_pipeline_run(struct rte_pipeline *p);
+
+/**
+ * Pipeline flush
+ *
+ * @param p
+ *   Handle to pipeline instance
+ * @return
+ *   0 on success, error code otherwise
+ */
+int rte_pipeline_flush(struct rte_pipeline *p);
+
+/*
+ * Actions
+ *
+ */
+/** Reserved actions */
+enum rte_pipeline_action {
+	/** Drop the packet */
+	RTE_PIPELINE_ACTION_DROP = 0,
+
+	/** Send packet to output port */
+	RTE_PIPELINE_ACTION_PORT,
+
+	/** Send packet to output port read from packet meta-data */
+	RTE_PIPELINE_ACTION_PORT_META,
+
+	/** Send packet to table */
+	RTE_PIPELINE_ACTION_TABLE,
+
+	/** Number of reserved actions */
+	RTE_PIPELINE_ACTIONS
+};
+
+/*
+ * Table
+ *
+ */
+/** Maximum number of tables allowed for any given pipeline instance. The
+	value of this parameter cannot be changed. */
+#define RTE_PIPELINE_TABLE_MAX                                     64
+
+/**
+ * Head format for the table entry of any pipeline table. For any given
+ * pipeline table, all table entries should have the same size and format. For
+ * any given pipeline table, the table entry has to start with a head of this
+ * structure, which contains the reserved actions and their associated
+ * meta-data, and then optionally continues with user actions and their
+ * associated meta-data. As all the currently defined reserved actions are
+ * mutually exclusive, only one reserved action can be set per table entry.
+ */
+struct rte_pipeline_table_entry {
+	/** Reserved action */
+	enum rte_pipeline_action action;
+
+	union {
+		/** Output port ID (meta-data for "Send packet to output port"
+		action) */
+		uint32_t port_id;
+		/** Table ID (meta-data for "Send packet to table" action) */
+		uint32_t table_id;
+	};
+	/** Start of table entry area for user defined actions and meta-data */
+	uint8_t action_data[0];
+};
+
+/**
+ * Pipeline table action handler on lookup hit
+ *
+ * The action handler can decide to drop packets by resetting the associated
+ * packet bit in the pkts_mask parameter. In this case, the action handler is
+ * required not to free the packet buffer, which will be freed eventually by
+ * the pipeline.
+ *
+ * @param p
+ *   Handle to pipeline instance
+ * @param pkts
+ *   Burst of input packets specified as array of up to 64 pointers to struct
+ *   rte_mbuf
+ * @param pkts_mask
+ *   64-bit bitmask specifying which packets in the input burst are valid. When
+ *   pkts_mask bit n is set, then element n of pkts array is pointing to a
+ *   valid packet and element n of entries array is pointing to a valid table
+ *   entry associated with the packet, with the association typically done by
+ *   the table lookup operation. Otherwise, element n of pkts array and element
+ *   n of entries array will not be accessed.
+ * @param entries
+ *   Set of table entries specified as array of up to 64 pointers to struct
+ *   rte_pipeline_table_entry
+ * @param arg
+ *   Opaque parameter registered by the user at the pipeline table creation
+ *   time
+ * @return
+ *   0 on success, error code otherwise
+ */
+typedef int (*rte_pipeline_table_action_handler_hit)(
+	struct rte_pipeline *p,
+	struct rte_mbuf **pkts,
+	uint64_t pkts_mask,
+	struct rte_pipeline_table_entry **entries,
+	void *arg);
+
+/**
+ * Pipeline table action handler on lookup miss
+ *
+ * The action handler can decide to drop packets by resetting the associated
+ * packet bit in the pkts_mask parameter. In this case, the action handler is
+ * required not to free the packet buffer, which will be freed eventually by
+ * the pipeline.
+ *
+ * @param p
+ *   Handle to pipeline instance
+ * @param pkts
+ *   Burst of input packets specified as array of up to 64 pointers to struct
+ *   rte_mbuf
+ * @param pkts_mask
+ *   64-bit bitmask specifying which packets in the input burst are valid. When
+ *   pkts_mask bit n is set, then element n of pkts array is pointing to a
+ *   valid packet. Otherwise, element n of pkts array will not be accessed.
+ * @param entry
+ *   Single table entry associated with all the valid packets from the input
+ *   burst, specified as pointer to struct rte_pipeline_table_entry.
+ *   This entry is the pipeline table default entry that is associated by the
+ *   table lookup operation with the input packets that have resulted in lookup
+ *   miss.
+ * @param arg
+ *   Opaque parameter registered by the user at the pipeline table creation
+ *   time
+ * @return
+ *   0 on success, error code otherwise
+ */
+typedef int (*rte_pipeline_table_action_handler_miss)(
+	struct rte_pipeline *p,
+	struct rte_mbuf **pkts,
+	uint64_t pkts_mask,
+	struct rte_pipeline_table_entry *entry,
+	void *arg);
+
+/** Parameters for pipeline table creation. Action handlers have to be either
+    both enabled or both disabled (they can be disabled by setting them to
+    NULL). */
+struct rte_pipeline_table_params {
+	/** Table operations (specific to each table type) */
+	struct rte_table_ops *ops;
+	/** Opaque param to be passed to the table create operation when
+	invoked */
+	void *arg_create;
+	/** Callback function to execute the user actions on input packets in
+	case of lookup hit */
+	rte_pipeline_table_action_handler_hit f_action_hit;
+	/** Callback function to execute the user actions on input packets in
+	case of lookup miss */
+	rte_pipeline_table_action_handler_miss f_action_miss;
+
+	/** Opaque parameter to be passed to lookup hit and/or lookup miss
+	action handlers when invoked */
+	void *arg_ah;
+	/** Memory size to be reserved per table entry for storing the user
+	actions and their meta-data */
+	uint32_t action_data_size;
+};
+
+/**
+ * Pipeline table create
+ *
+ * @param p
+ *   Handle to pipeline instance
+ * @param params
+ *   Parameters for pipeline table creation
+ * @param table_id
+ *   Table ID. Valid only within the scope of table IDs of the current
+ *   pipeline. Only returned after a successful invocation.
+ * @return
+ *   0 on success, error code otherwise
+ */
+int rte_pipeline_table_create(struct rte_pipeline *p,
+	struct rte_pipeline_table_params *params,
+	uint32_t *table_id);
+
+/**
+ * Pipeline table default entry add
+ *
+ * The contents of the table default entry is updated with the provided actions
+ * and meta-data. When the default entry is not configured (by using this
+ * function), the built-in default entry has the action "Drop" and meta-data
+ * set to all-zeros.
+ *
+ * @param p
+ *   Handle to pipeline instance
+ * @param table_id
+ *   Table ID (returned by previous invocation of pipeline table create)
+ * @param default_entry
+ *   New contents for the table default entry
+ * @param default_entry_ptr
+ *   On successful invocation, pointer to the default table entry which can be
+ *   used for further read-write accesses to this table entry. This pointer
+ *   is valid until the default entry is deleted or re-added.
+ * @return
+ *   0 on success, error code otherwise
+ */
+int rte_pipeline_table_default_entry_add(struct rte_pipeline *p,
+	uint32_t table_id,
+	struct rte_pipeline_table_entry *default_entry,
+	struct rte_pipeline_table_entry **default_entry_ptr);
+
+/**
+ * Pipeline table default entry delete
+ *
+ * The new contents of the table default entry is set to reserved action "Drop
+ * the packet" with meta-data cleared (i.e. set to all-zeros).
+ *
+ * @param p
+ *   Handle to pipeline instance
+ * @param table_id
+ *   Table ID (returned by previous invocation of pipeline table create)
+ * @param entry
+ *   On successful invocation, when entry points to a valid buffer, the
+ *   previous contents of the table default entry (as it was just before the
+ *   delete operation) is copied to this buffer
+ * @return
+ *   0 on success, error code otherwise
+ */
+int rte_pipeline_table_default_entry_delete(struct rte_pipeline *p,
+	uint32_t table_id,
+	struct rte_pipeline_table_entry *entry);
+
+/**
+ * Pipeline table entry add
+ *
+ * @param p
+ *   Handle to pipeline instance
+ * @param table_id
+ *   Table ID (returned by previous invocation of pipeline table create)
+ * @param key
+ *   Table entry key
+ * @param entry
+ *   New contents for the table entry identified by key
+ * @param key_found
+ *   On successful invocation, set to TRUE (value different than 0) if key was
+ *   already present in the table before the add operation and to FALSE (value
+ *   0) if not
+ * @param entry_ptr
+ *   On successful invocation, pointer to the table entry associated with key.
+ *   This can be used for further read-write accesses to this table entry and
+ *   is valid until the key is deleted from the table or re-added (usually for
+ *   associating different actions and/or action meta-data to the current key)
+ * @return
+ *   0 on success, error code otherwise
+ */
+int rte_pipeline_table_entry_add(struct rte_pipeline *p,
+	uint32_t table_id,
+	void *key,
+	struct rte_pipeline_table_entry *entry,
+	int *key_found,
+	struct rte_pipeline_table_entry **entry_ptr);
+
+/**
+ * Pipeline table entry delete
+ *
+ * @param p
+ *   Handle to pipeline instance
+ * @param table_id
+ *   Table ID (returned by previous invocation of pipeline table create)
+ * @param key
+ *   Table entry key
+ * @param key_found
+ *   On successful invocation, set to TRUE (value different than 0) if key was
+ *   found in the table before the delete operation and to FALSE (value 0) if
+ *   not
+ * @param entry
+ *   On successful invocation, when key is found in the table and entry points
+ *   to a valid buffer, the table entry contents (as it was before the delete
+ *   was performed) is copied to this buffer
+ * @return
+ *   0 on success, error code otherwise
+ */
+int rte_pipeline_table_entry_delete(struct rte_pipeline *p,
+	uint32_t table_id,
+	void *key,
+	int *key_found,
+	struct rte_pipeline_table_entry *entry);
+
+/**
+ * Pipeline table entry add bulk
+ *
+ * @param p
+ *   Handle to pipeline instance
+ * @param table_id
+ *   Table ID (returned by previous invocation of pipeline table create)
+ * @param keys
+ *   Array containing table entry keys
+ * @param entries
+ *   Array containung new contents for every table entry identified by key
+ * @param n_keys
+ *   Number of keys to add
+ * @param key_found
+ *   On successful invocation, key_found for every item in the array is set to
+ *   TRUE (value different than 0) if key was already present in the table
+ *   before the add operation and to FALSE (value 0) if not
+ * @param entries_ptr
+ *   On successful invocation, array *entries_ptr stores pointer to every table
+ *   entry associated with key. This can be used for further read-write accesses
+ *   to this table entry and is valid until the key is deleted from the table or
+ *   re-added (usually for associating different actions and/or action meta-data
+ *   to the current key)
+ * @return
+ *   0 on success, error code otherwise
+ */
+int rte_pipeline_table_entry_add_bulk(struct rte_pipeline *p,
+	uint32_t table_id,
+	void **keys,
+	struct rte_pipeline_table_entry **entries,
+	uint32_t n_keys,
+	int *key_found,
+	struct rte_pipeline_table_entry **entries_ptr);
+
+/**
+ * Pipeline table entry delete bulk
+ *
+ * @param p
+ *   Handle to pipeline instance
+ * @param table_id
+ *   Table ID (returned by previous invocation of pipeline table create)
+ * @param keys
+ *   Array containing table entry keys
+ * @param n_keys
+ *   Number of keys to delete
+ * @param key_found
+ *   On successful invocation, key_found for every item in the array is set to
+ *   TRUE (value different than 0) if key was found in the table before the
+ *   delete operation and to FALSE (value 0) if not
+ * @param entries
+ *   If entries pointer is NULL, this pointer is ignored for every entry found.
+ *   Else, after successful invocation, if specific key is found in the table
+ *   and entry points to a valid buffer, the table entry contents (as it was
+ *   before the delete was performed) is copied to this buffer.
+ * @return
+ *   0 on success, error code otherwise
+ */
+int rte_pipeline_table_entry_delete_bulk(struct rte_pipeline *p,
+	uint32_t table_id,
+	void **keys,
+	uint32_t n_keys,
+	int *key_found,
+	struct rte_pipeline_table_entry **entries);
+
+/**
+ * Read pipeline table stats.
+ *
+ * This function reads table statistics identified by *table_id* of given
+ * pipeline *p*.
+ *
+ * @param p
+ *   Handle to pipeline instance.
+ * @param table_id
+ *   Port ID what stats will be returned.
+ * @param stats
+ *   Statistics buffer.
+ * @param clear
+ *   If not 0 clear stats after reading.
+ * @return
+ *   0 on success, error code otherwise
+ */
+int rte_pipeline_table_stats_read(struct rte_pipeline *p, uint32_t table_id,
+	struct rte_pipeline_table_stats *stats, int clear);
+
+/*
+ * Port IN
+ *
+ */
+/** Maximum number of input ports allowed for any given pipeline instance. The
+	value of this parameter cannot be changed. */
+#define RTE_PIPELINE_PORT_IN_MAX                                    64
+
+/**
+ * Pipeline input port action handler
+ *
+ * The action handler can decide to drop packets by resetting the associated
+ * packet bit in the pkts_mask parameter. In this case, the action handler is
+ * required not to free the packet buffer, which will be freed eventually by
+ * the pipeline.
+ *
+ * @param p
+ *   Handle to pipeline instance
+ * @param pkts
+ *   Burst of input packets specified as array of up to 64 pointers to struct
+ *   rte_mbuf
+ * @param n
+ *   Number of packets in the input burst. This parameter specifies that
+ *   elements 0 to (n-1) of pkts array are valid.
+ * @param arg
+ *   Opaque parameter registered by the user at the pipeline table creation
+ *   time
+ * @return
+ *   0 on success, error code otherwise
+ */
+typedef int (*rte_pipeline_port_in_action_handler)(
+	struct rte_pipeline *p,
+	struct rte_mbuf **pkts,
+	uint32_t n,
+	void *arg);
+
+/** Parameters for pipeline input port creation */
+struct rte_pipeline_port_in_params {
+	/** Input port operations (specific to each table type) */
+	struct rte_port_in_ops *ops;
+	/** Opaque parameter to be passed to create operation when invoked */
+	void *arg_create;
+
+	/** Callback function to execute the user actions on input packets.
+		Disabled if set to NULL. */
+	rte_pipeline_port_in_action_handler f_action;
+	/** Opaque parameter to be passed to the action handler when invoked */
+	void *arg_ah;
+
+	/** Recommended burst size for the RX operation(in number of pkts) */
+	uint32_t burst_size;
+};
+
+/**
+ * Pipeline input port create
+ *
+ * @param p
+ *   Handle to pipeline instance
+ * @param params
+ *   Parameters for pipeline input port creation
+ * @param port_id
+ *   Input port ID. Valid only within the scope of input port IDs of the
+ *   current pipeline. Only returned after a successful invocation.
+ * @return
+ *   0 on success, error code otherwise
+ */
+int rte_pipeline_port_in_create(struct rte_pipeline *p,
+	struct rte_pipeline_port_in_params *params,
+	uint32_t *port_id);
+
+/**
+ * Pipeline input port connect to table
+ *
+ * @param p
+ *   Handle to pipeline instance
+ * @param port_id
+ *   Port ID (returned by previous invocation of pipeline input port create)
+ * @param table_id
+ *   Table ID (returned by previous invocation of pipeline table create)
+ * @return
+ *   0 on success, error code otherwise
+ */
+int rte_pipeline_port_in_connect_to_table(struct rte_pipeline *p,
+	uint32_t port_id,
+	uint32_t table_id);
+
+/**
+ * Pipeline input port enable
+ *
+ * @param p
+ *   Handle to pipeline instance
+ * @param port_id
+ *   Port ID (returned by previous invocation of pipeline input port create)
+ * @return
+ *   0 on success, error code otherwise
+ */
+int rte_pipeline_port_in_enable(struct rte_pipeline *p,
+	uint32_t port_id);
+
+/**
+ * Pipeline input port disable
+ *
+ * @param p
+ *   Handle to pipeline instance
+ * @param port_id
+ *   Port ID (returned by previous invocation of pipeline input port create)
+ * @return
+ *   0 on success, error code otherwise
+ */
+int rte_pipeline_port_in_disable(struct rte_pipeline *p,
+	uint32_t port_id);
+
+/**
+ * Read pipeline port in stats.
+ *
+ * This function reads port in statistics identified by *port_id* of given
+ * pipeline *p*.
+ *
+ * @param p
+ *   Handle to pipeline instance.
+ * @param port_id
+ *   Port ID what stats will be returned.
+ * @param stats
+ *   Statistics buffer.
+ * @param clear
+ *   If not 0 clear stats after reading.
+ * @return
+ *   0 on success, error code otherwise
+ */
+int rte_pipeline_port_in_stats_read(struct rte_pipeline *p, uint32_t port_id,
+	struct rte_pipeline_port_in_stats *stats, int clear);
+
+/*
+ * Port OUT
+ *
+ */
+/** Maximum number of output ports allowed for any given pipeline instance. The
+	value of this parameter cannot be changed. */
+#define RTE_PIPELINE_PORT_OUT_MAX                                   64
+
+/**
+ * Pipeline output port action handler
+ *
+ * The action handler can decide to drop packets by resetting the associated
+ * packet bit in the pkts_mask parameter. In this case, the action handler is
+ * required not to free the packet buffer, which will be freed eventually by
+ * the pipeline.
+ *
+ * @param p
+ *   Handle to pipeline instance
+ * @param pkts
+ *   Burst of input packets specified as array of up to 64 pointers to struct
+ *   rte_mbuf
+ * @param pkts_mask
+ *   64-bit bitmask specifying which packets in the input burst are valid. When
+ *   pkts_mask bit n is set, then element n of pkts array is pointing to a
+ *   valid packet. Otherwise, element n of pkts array will not be accessed.
+ * @param arg
+ *   Opaque parameter registered by the user at the pipeline table creation
+ *   time
+ * @return
+ *   0 on success, error code otherwise
+ */
+typedef int (*rte_pipeline_port_out_action_handler)(
+	struct rte_pipeline *p,
+	struct rte_mbuf **pkts,
+	uint64_t pkts_mask,
+	void *arg);
+
+/** Parameters for pipeline output port creation. The action handlers have to
+be either both enabled or both disabled (by setting them to NULL). When
+enabled, the pipeline selects between them at different moments, based on the
+number of packets that have to be sent to the same output port. */
+struct rte_pipeline_port_out_params {
+	/** Output port operations (specific to each table type) */
+	struct rte_port_out_ops *ops;
+	/** Opaque parameter to be passed to create operation when invoked */
+	void *arg_create;
+
+	/** Callback function executing the user actions on bust of input
+	packets */
+	rte_pipeline_port_out_action_handler f_action;
+	/** Opaque parameter to be passed to the action handler when invoked */
+	void *arg_ah;
+};
+
+/**
+ * Pipeline output port create
+ *
+ * @param p
+ *   Handle to pipeline instance
+ * @param params
+ *   Parameters for pipeline output port creation
+ * @param port_id
+ *   Output port ID. Valid only within the scope of output port IDs of the
+ *   current pipeline. Only returned after a successful invocation.
+ * @return
+ *   0 on success, error code otherwise
+ */
+int rte_pipeline_port_out_create(struct rte_pipeline *p,
+	struct rte_pipeline_port_out_params *params,
+	uint32_t *port_id);
+
+/**
+ * Read pipeline port out stats.
+ *
+ * This function reads port out statistics identified by *port_id* of given
+ * pipeline *p*.
+ *
+ * @param p
+ *   Handle to pipeline instance.
+ * @param port_id
+ *   Port ID what stats will be returned.
+ * @param stats
+ *   Statistics buffer.
+ * @param clear
+ *   If not 0 clear stats after reading.
+ * @return
+ *   0 on success, error code otherwise
+ */
+int rte_pipeline_port_out_stats_read(struct rte_pipeline *p, uint32_t port_id,
+	struct rte_pipeline_port_out_stats *stats, int clear);
+
+/*
+ * Functions to be called as part of the port IN/OUT or table action handlers
+ *
+ */
+/**
+ * Action handler packet insert to output port
+ *
+ * This function can be called by any input/output port or table action handler
+ * to send a packet out through one of the pipeline output ports. This packet is
+ * generated by the action handler, i.e. this packet is not part of the burst of
+ * packets read from one of the pipeline input ports and currently processed by
+ * the pipeline (this packet is not an element of the pkts array input parameter
+ * of the action handler).
+ *
+ * @param p
+ *   Handle to pipeline instance
+ * @param port_id
+ *   Output port ID (returned by previous invocation of pipeline output port
+ *   create) to send the packet specified by pkt
+ * @param pkt
+ *   New packet generated by the action handler
+ * @return
+ *   0 on success, error code otherwise
+ */
+int rte_pipeline_port_out_packet_insert(struct rte_pipeline *p,
+	uint32_t port_id,
+	struct rte_mbuf *pkt);
+
+#define rte_pipeline_ah_port_out_packet_insert \
+	rte_pipeline_port_out_packet_insert
+
+/**
+ * Action handler packet hijack
+ *
+ * This function can be called by any input/output port or table action handler
+ * to hijack selected packets from the burst of packets read from one of the
+ * pipeline input ports and currently processed by the pipeline. The hijacked
+ * packets are removed from any further pipeline processing, with the action
+ * handler now having the full ownership for these packets.
+ *
+ * The action handler can further send the hijacked packets out through any
+ * pipeline output port by calling the rte_pipeline_ah_port_out_packet_insert()
+ * function. The action handler can also drop these packets by calling the
+ * rte_pktmbuf_free() function, although a better alternative is provided by
+ * the action handler using the rte_pipeline_ah_packet_drop() function.
+ *
+ * @param p
+ *   Handle to pipeline instance
+ * @param pkts_mask
+ *   64-bit bitmask specifying which of the packets handed over for processing
+ *   to the action handler is to be hijacked by the action handler. When
+ *   pkts_mask bit n is set, then element n of the pkts array (input argument to
+ *   the action handler) is hijacked.
+ * @return
+ *   0 on success, error code otherwise
+ */
+int rte_pipeline_ah_packet_hijack(struct rte_pipeline *p,
+	uint64_t pkts_mask);
+
+/**
+ * Action handler packet drop
+ *
+ * This function is called by the pipeline action handlers (port in/out, table)
+ * to drop the packets selected using packet mask.
+ *
+ * This function can be called by any input/output port or table action handler
+ * to drop selected packets from the burst of packets read from one of the
+ * pipeline input ports and currently processed by the pipeline. The dropped
+ * packets are removed from any further pipeline processing and the packet
+ * buffers are eventually freed to their buffer pool.
+ *
+ * This function updates the drop statistics counters correctly, therefore the
+ * recommended approach for dropping packets by the action handlers is to call
+ * this function as opposed to the action handler hijacking the packets first
+ * and then dropping them invisibly to the pipeline (by using the
+ * rte_pktmbuf_free() function).
+ *
+ * @param p
+ *   Handle to pipeline instance
+ * @param pkts_mask
+ *   64-bit bitmask specifying which of the packets handed over for processing
+ *   to the action handler is to be dropped by the action handler. When
+ *   pkts_mask bit n is set, then element n of the pkts array (input argument to
+ *   the action handler) is dropped.
+ * @return
+ *   0 on success, error code otherwise
+ */
+int rte_pipeline_ah_packet_drop(struct rte_pipeline *p,
+	uint64_t pkts_mask);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/lib/librte_pipeline/rte_pipeline_version.map b/lib/librte_pipeline/rte_pipeline_version.map
new file mode 100644
index 00000000..e4ee154f
--- /dev/null
+++ b/lib/librte_pipeline/rte_pipeline_version.map
@@ -0,0 +1,47 @@
+DPDK_2.0 {
+	global:
+
+	rte_pipeline_check;
+	rte_pipeline_create;
+	rte_pipeline_flush;
+	rte_pipeline_free;
+	rte_pipeline_port_in_connect_to_table;
+	rte_pipeline_port_in_create;
+	rte_pipeline_port_in_disable;
+	rte_pipeline_port_in_enable;
+	rte_pipeline_port_out_create;
+	rte_pipeline_port_out_packet_insert;
+	rte_pipeline_run;
+	rte_pipeline_table_create;
+	rte_pipeline_table_default_entry_add;
+	rte_pipeline_table_default_entry_delete;
+	rte_pipeline_table_entry_add;
+	rte_pipeline_table_entry_delete;
+
+	local: *;
+};
+
+DPDK_2.1 {
+	global:
+
+	rte_pipeline_port_in_stats_read;
+	rte_pipeline_port_out_stats_read;
+	rte_pipeline_table_stats_read;
+
+} DPDK_2.0;
+
+DPDK_2.2 {
+	global:
+
+	rte_pipeline_table_entry_add_bulk;
+	rte_pipeline_table_entry_delete_bulk;
+
+} DPDK_2.1;
+
+DPDK_16.04 {
+	global:
+
+	rte_pipeline_ah_packet_hijack;
+	rte_pipeline_ah_packet_drop;
+
+} DPDK_2.2;
diff --git a/lib/librte_port/Makefile b/lib/librte_port/Makefile
new file mode 100644
index 00000000..2c0ccbe5
--- /dev/null
+++ b/lib/librte_port/Makefile
@@ -0,0 +1,79 @@
+#   BSD LICENSE
+#
+#   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
+#   All rights reserved.
+#
+#   Redistribution and use in source and binary forms, with or without
+#   modification, are permitted provided that the following conditions
+#   are met:
+#
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in
+#       the documentation and/or other materials provided with the
+#       distribution.
+#     * Neither the name of Intel Corporation nor the names of its
+#       contributors may be used to endorse or promote products derived
+#       from this software without specific prior written permission.
+#
+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+#
+# library name
+#
+LIB = librte_port.a
+ifeq ($(CONFIG_RTE_PORT_PCAP),y)
+LDLIBS += -lpcap
+endif
+
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS)
+
+EXPORT_MAP := rte_port_version.map
+
+LIBABIVER := 2
+
+#
+# all source are stored in SRCS-y
+#
+SRCS-$(CONFIG_RTE_LIBRTE_PORT) += rte_port_ethdev.c
+SRCS-$(CONFIG_RTE_LIBRTE_PORT) += rte_port_ring.c
+ifeq ($(CONFIG_RTE_LIBRTE_IP_FRAG),y)
+SRCS-$(CONFIG_RTE_LIBRTE_PORT) += rte_port_frag.c
+SRCS-$(CONFIG_RTE_LIBRTE_PORT) += rte_port_ras.c
+endif
+SRCS-$(CONFIG_RTE_LIBRTE_PORT) += rte_port_sched.c
+SRCS-$(CONFIG_RTE_LIBRTE_PORT) += rte_port_source_sink.c
+
+# install includes
+SYMLINK-$(CONFIG_RTE_LIBRTE_PORT)-include += rte_port.h
+SYMLINK-$(CONFIG_RTE_LIBRTE_PORT)-include += rte_port_ethdev.h
+SYMLINK-$(CONFIG_RTE_LIBRTE_PORT)-include += rte_port_ring.h
+ifeq ($(CONFIG_RTE_LIBRTE_IP_FRAG),y)
+SYMLINK-$(CONFIG_RTE_LIBRTE_PORT)-include += rte_port_frag.h
+SYMLINK-$(CONFIG_RTE_LIBRTE_PORT)-include += rte_port_ras.h
+endif
+SYMLINK-$(CONFIG_RTE_LIBRTE_PORT)-include += rte_port_sched.h
+SYMLINK-$(CONFIG_RTE_LIBRTE_PORT)-include += rte_port_source_sink.h
+
+# this lib depends upon:
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PORT) := lib/librte_eal
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PORT) += lib/librte_mbuf
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PORT) += lib/librte_mempool
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PORT) += lib/librte_ether
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PORT) += lib/librte_ip_frag
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_port/rte_port.h b/lib/librte_port/rte_port.h
new file mode 100644
index 00000000..c3c53487
--- /dev/null
+++ b/lib/librte_port/rte_port.h
@@ -0,0 +1,263 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __INCLUDE_RTE_PORT_H__
+#define __INCLUDE_RTE_PORT_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @file
+ * RTE Port
+ *
+ * This tool is part of the DPDK Packet Framework tool suite and provides
+ * a standard interface to implement different types of packet ports.
+ *
+ ***/
+
+#include <stdint.h>
+#include <rte_mbuf.h>
+
+/**@{
+ * Macros to allow accessing metadata stored in the mbuf headroom
+ * just beyond the end of the mbuf data structure returned by a port
+ */
+#define RTE_MBUF_METADATA_UINT8_PTR(mbuf, offset)          \
+	(&((uint8_t *)(mbuf))[offset])
+#define RTE_MBUF_METADATA_UINT16_PTR(mbuf, offset)         \
+	((uint16_t *) RTE_MBUF_METADATA_UINT8_PTR(mbuf, offset))
+#define RTE_MBUF_METADATA_UINT32_PTR(mbuf, offset)         \
+	((uint32_t *) RTE_MBUF_METADATA_UINT8_PTR(mbuf, offset))
+#define RTE_MBUF_METADATA_UINT64_PTR(mbuf, offset)         \
+	((uint64_t *) RTE_MBUF_METADATA_UINT8_PTR(mbuf, offset))
+
+#define RTE_MBUF_METADATA_UINT8(mbuf, offset)              \
+	(*RTE_MBUF_METADATA_UINT8_PTR(mbuf, offset))
+#define RTE_MBUF_METADATA_UINT16(mbuf, offset)             \
+	(*RTE_MBUF_METADATA_UINT16_PTR(mbuf, offset))
+#define RTE_MBUF_METADATA_UINT32(mbuf, offset)             \
+	(*RTE_MBUF_METADATA_UINT32_PTR(mbuf, offset))
+#define RTE_MBUF_METADATA_UINT64(mbuf, offset)             \
+	(*RTE_MBUF_METADATA_UINT64_PTR(mbuf, offset))
+/**@}*/
+
+/*
+ * Port IN
+ *
+ */
+/** Maximum number of packets read from any input port in a single burst.
+Cannot be changed. */
+#define RTE_PORT_IN_BURST_SIZE_MAX                         64
+
+/** Input port statistics */
+struct rte_port_in_stats {
+	uint64_t n_pkts_in;
+	uint64_t n_pkts_drop;
+};
+
+/**
+ * Input port create
+ *
+ * @param params
+ *   Parameters for input port creation
+ * @param socket_id
+ *   CPU socket ID (e.g. for memory allocation purpose)
+ * @return
+ *   Handle to input port instance
+ */
+typedef void* (*rte_port_in_op_create)(void *params, int socket_id);
+
+/**
+ * Input port free
+ *
+ * @param port
+ *   Handle to input port instance
+ * @return
+ *   0 on success, error code otherwise
+ */
+typedef int (*rte_port_in_op_free)(void *port);
+
+/**
+ * Input port packet burst RX
+ *
+ * @param port
+ *   Handle to input port instance
+ * @param pkts
+ *   Burst of input packets
+ * @param n_pkts
+ *   Number of packets in the input burst
+ * @return
+ *   0 on success, error code otherwise
+ */
+typedef int (*rte_port_in_op_rx)(
+	void *port,
+	struct rte_mbuf **pkts,
+	uint32_t n_pkts);
+
+/**
+ * Input port stats get
+ *
+ * @param port
+ *   Handle to output port instance
+ * @param stats
+ *   Handle to port_in stats struct to copy data
+ * @param clear
+ *   Flag indicating that stats should be cleared after read
+ *
+ * @return
+ *   Error code or 0 on success.
+ */
+typedef int (*rte_port_in_op_stats_read)(
+		void *port,
+		struct rte_port_in_stats *stats,
+		int clear);
+
+/** Input port interface defining the input port operation */
+struct rte_port_in_ops {
+	rte_port_in_op_create f_create;      /**< Create */
+	rte_port_in_op_free f_free;          /**< Free */
+	rte_port_in_op_rx f_rx;              /**< Packet RX (packet burst) */
+	rte_port_in_op_stats_read f_stats;   /**< Stats */
+};
+
+/*
+ * Port OUT
+ *
+ */
+/** Output port statistics */
+struct rte_port_out_stats {
+	uint64_t n_pkts_in;
+	uint64_t n_pkts_drop;
+};
+
+/**
+ * Output port create
+ *
+ * @param params
+ *   Parameters for output port creation
+ * @param socket_id
+ *   CPU socket ID (e.g. for memory allocation purpose)
+ * @return
+ *   Handle to output port instance
+ */
+typedef void* (*rte_port_out_op_create)(void *params, int socket_id);
+
+/**
+ * Output port free
+ *
+ * @param port
+ *   Handle to output port instance
+ * @return
+ *   0 on success, error code otherwise
+ */
+typedef int (*rte_port_out_op_free)(void *port);
+
+/**
+ * Output port single packet TX
+ *
+ * @param port
+ *   Handle to output port instance
+ * @param pkt
+ *   Input packet
+ * @return
+ *   0 on success, error code otherwise
+ */
+typedef int (*rte_port_out_op_tx)(
+	void *port,
+	struct rte_mbuf *pkt);
+
+/**
+ * Output port packet burst TX
+ *
+ * @param port
+ *   Handle to output port instance
+ * @param pkts
+ *   Burst of input packets specified as array of up to 64 pointers to struct
+ *   rte_mbuf
+ * @param pkts_mask
+ *   64-bit bitmask specifying which packets in the input burst are valid. When
+ *   pkts_mask bit n is set, then element n of pkts array is pointing to a
+ *   valid packet. Otherwise, element n of pkts array will not be accessed.
+ * @return
+ *   0 on success, error code otherwise
+ */
+typedef int (*rte_port_out_op_tx_bulk)(
+	void *port,
+	struct rte_mbuf **pkt,
+	uint64_t pkts_mask);
+
+/**
+ * Output port flush
+ *
+ * @param port
+ *   Handle to output port instance
+ * @return
+ *   0 on success, error code otherwise
+ */
+typedef int (*rte_port_out_op_flush)(void *port);
+
+/**
+ * Output port stats read
+ *
+ * @param port
+ *   Handle to output port instance
+ * @param stats
+ *   Handle to port_out stats struct to copy data
+ * @param clear
+ *   Flag indicating that stats should be cleared after read
+ *
+ * @return
+ *   Error code or 0 on success.
+ */
+typedef int (*rte_port_out_op_stats_read)(
+		void *port,
+		struct rte_port_out_stats *stats,
+		int clear);
+
+/** Output port interface defining the output port operation */
+struct rte_port_out_ops {
+	rte_port_out_op_create f_create;      /**< Create */
+	rte_port_out_op_free f_free;          /**< Free */
+	rte_port_out_op_tx f_tx;              /**< Packet TX (single packet) */
+	rte_port_out_op_tx_bulk f_tx_bulk;    /**< Packet TX (packet burst) */
+	rte_port_out_op_flush f_flush;        /**< Flush */
+	rte_port_out_op_stats_read f_stats;   /**< Stats */
+};
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/lib/librte_port/rte_port_ethdev.c b/lib/librte_port/rte_port_ethdev.c
new file mode 100644
index 00000000..73e5f185
--- /dev/null
+++ b/lib/librte_port/rte_port_ethdev.c
@@ -0,0 +1,553 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <string.h>
+#include <stdint.h>
+
+#include <rte_mbuf.h>
+#include <rte_ethdev.h>
+#include <rte_malloc.h>
+
+#include "rte_port_ethdev.h"
+
+/*
+ * Port ETHDEV Reader
+ */
+#ifdef RTE_PORT_STATS_COLLECT
+
+#define RTE_PORT_ETHDEV_READER_STATS_PKTS_IN_ADD(port, val) \
+	port->stats.n_pkts_in += val
+#define RTE_PORT_ETHDEV_READER_STATS_PKTS_DROP_ADD(port, val) \
+	port->stats.n_pkts_drop += val
+
+#else
+
+#define RTE_PORT_ETHDEV_READER_STATS_PKTS_IN_ADD(port, val)
+#define RTE_PORT_ETHDEV_READER_STATS_PKTS_DROP_ADD(port, val)
+
+#endif
+
+struct rte_port_ethdev_reader {
+	struct rte_port_in_stats stats;
+
+	uint16_t queue_id;
+	uint8_t port_id;
+};
+
+static void *
+rte_port_ethdev_reader_create(void *params, int socket_id)
+{
+	struct rte_port_ethdev_reader_params *conf =
+			(struct rte_port_ethdev_reader_params *) params;
+	struct rte_port_ethdev_reader *port;
+
+	/* Check input parameters */
+	if (conf == NULL) {
+		RTE_LOG(ERR, PORT, "%s: params is NULL\n", __func__);
+		return NULL;
+	}
+
+	/* Memory allocation */
+	port = rte_zmalloc_socket("PORT", sizeof(*port),
+			RTE_CACHE_LINE_SIZE, socket_id);
+	if (port == NULL) {
+		RTE_LOG(ERR, PORT, "%s: Failed to allocate port\n", __func__);
+		return NULL;
+	}
+
+	/* Initialization */
+	port->port_id = conf->port_id;
+	port->queue_id = conf->queue_id;
+
+	return port;
+}
+
+static int
+rte_port_ethdev_reader_rx(void *port, struct rte_mbuf **pkts, uint32_t n_pkts)
+{
+	struct rte_port_ethdev_reader *p =
+		(struct rte_port_ethdev_reader *) port;
+	uint16_t rx_pkt_cnt;
+
+	rx_pkt_cnt = rte_eth_rx_burst(p->port_id, p->queue_id, pkts, n_pkts);
+	RTE_PORT_ETHDEV_READER_STATS_PKTS_IN_ADD(p, rx_pkt_cnt);
+	return rx_pkt_cnt;
+}
+
+static int
+rte_port_ethdev_reader_free(void *port)
+{
+	if (port == NULL) {
+		RTE_LOG(ERR, PORT, "%s: port is NULL\n", __func__);
+		return -EINVAL;
+	}
+
+	rte_free(port);
+
+	return 0;
+}
+
+static int rte_port_ethdev_reader_stats_read(void *port,
+		struct rte_port_in_stats *stats, int clear)
+{
+	struct rte_port_ethdev_reader *p =
+			(struct rte_port_ethdev_reader *) port;
+
+	if (stats != NULL)
+		memcpy(stats, &p->stats, sizeof(p->stats));
+
+	if (clear)
+		memset(&p->stats, 0, sizeof(p->stats));
+
+	return 0;
+}
+
+/*
+ * Port ETHDEV Writer
+ */
+#ifdef RTE_PORT_STATS_COLLECT
+
+#define RTE_PORT_ETHDEV_WRITER_STATS_PKTS_IN_ADD(port, val) \
+	port->stats.n_pkts_in += val
+#define RTE_PORT_ETHDEV_WRITER_STATS_PKTS_DROP_ADD(port, val) \
+	port->stats.n_pkts_drop += val
+
+#else
+
+#define RTE_PORT_ETHDEV_WRITER_STATS_PKTS_IN_ADD(port, val)
+#define RTE_PORT_ETHDEV_WRITER_STATS_PKTS_DROP_ADD(port, val)
+
+#endif
+
+struct rte_port_ethdev_writer {
+	struct rte_port_out_stats stats;
+
+	struct rte_mbuf *tx_buf[2 * RTE_PORT_IN_BURST_SIZE_MAX];
+	uint32_t tx_burst_sz;
+	uint16_t tx_buf_count;
+	uint64_t bsz_mask;
+	uint16_t queue_id;
+	uint8_t port_id;
+};
+
+static void *
+rte_port_ethdev_writer_create(void *params, int socket_id)
+{
+	struct rte_port_ethdev_writer_params *conf =
+			(struct rte_port_ethdev_writer_params *) params;
+	struct rte_port_ethdev_writer *port;
+
+	/* Check input parameters */
+	if ((conf == NULL) ||
+		(conf->tx_burst_sz == 0) ||
+		(conf->tx_burst_sz > RTE_PORT_IN_BURST_SIZE_MAX) ||
+		(!rte_is_power_of_2(conf->tx_burst_sz))) {
+		RTE_LOG(ERR, PORT, "%s: Invalid input parameters\n", __func__);
+		return NULL;
+	}
+
+	/* Memory allocation */
+	port = rte_zmalloc_socket("PORT", sizeof(*port),
+			RTE_CACHE_LINE_SIZE, socket_id);
+	if (port == NULL) {
+		RTE_LOG(ERR, PORT, "%s: Failed to allocate port\n", __func__);
+		return NULL;
+	}
+
+	/* Initialization */
+	port->port_id = conf->port_id;
+	port->queue_id = conf->queue_id;
+	port->tx_burst_sz = conf->tx_burst_sz;
+	port->tx_buf_count = 0;
+	port->bsz_mask = 1LLU << (conf->tx_burst_sz - 1);
+
+	return port;
+}
+
+static inline void
+send_burst(struct rte_port_ethdev_writer *p)
+{
+	uint32_t nb_tx;
+
+	nb_tx = rte_eth_tx_burst(p->port_id, p->queue_id,
+			 p->tx_buf, p->tx_buf_count);
+
+	RTE_PORT_ETHDEV_WRITER_STATS_PKTS_DROP_ADD(p, p->tx_buf_count - nb_tx);
+	for ( ; nb_tx < p->tx_buf_count; nb_tx++)
+		rte_pktmbuf_free(p->tx_buf[nb_tx]);
+
+	p->tx_buf_count = 0;
+}
+
+static int
+rte_port_ethdev_writer_tx(void *port, struct rte_mbuf *pkt)
+{
+	struct rte_port_ethdev_writer *p =
+		(struct rte_port_ethdev_writer *) port;
+
+	p->tx_buf[p->tx_buf_count++] = pkt;
+	RTE_PORT_ETHDEV_WRITER_STATS_PKTS_IN_ADD(p, 1);
+	if (p->tx_buf_count >= p->tx_burst_sz)
+		send_burst(p);
+
+	return 0;
+}
+
+static int
+rte_port_ethdev_writer_tx_bulk(void *port,
+		struct rte_mbuf **pkts,
+		uint64_t pkts_mask)
+{
+	struct rte_port_ethdev_writer *p =
+		(struct rte_port_ethdev_writer *) port;
+	uint64_t bsz_mask = p->bsz_mask;
+	uint32_t tx_buf_count = p->tx_buf_count;
+	uint64_t expr = (pkts_mask & (pkts_mask + 1)) |
+			((pkts_mask & bsz_mask) ^ bsz_mask);
+
+	if (expr == 0) {
+		uint64_t n_pkts = __builtin_popcountll(pkts_mask);
+		uint32_t n_pkts_ok;
+
+		if (tx_buf_count)
+			send_burst(p);
+
+		RTE_PORT_ETHDEV_WRITER_STATS_PKTS_IN_ADD(p, n_pkts);
+		n_pkts_ok = rte_eth_tx_burst(p->port_id, p->queue_id, pkts,
+			n_pkts);
+
+		RTE_PORT_ETHDEV_WRITER_STATS_PKTS_DROP_ADD(p, n_pkts - n_pkts_ok);
+		for ( ; n_pkts_ok < n_pkts; n_pkts_ok++) {
+			struct rte_mbuf *pkt = pkts[n_pkts_ok];
+
+			rte_pktmbuf_free(pkt);
+		}
+	} else {
+		for ( ; pkts_mask; ) {
+			uint32_t pkt_index = __builtin_ctzll(pkts_mask);
+			uint64_t pkt_mask = 1LLU << pkt_index;
+			struct rte_mbuf *pkt = pkts[pkt_index];
+
+			p->tx_buf[tx_buf_count++] = pkt;
+			RTE_PORT_ETHDEV_WRITER_STATS_PKTS_IN_ADD(p, 1);
+			pkts_mask &= ~pkt_mask;
+		}
+
+		p->tx_buf_count = tx_buf_count;
+		if (tx_buf_count >= p->tx_burst_sz)
+			send_burst(p);
+	}
+
+	return 0;
+}
+
+static int
+rte_port_ethdev_writer_flush(void *port)
+{
+	struct rte_port_ethdev_writer *p =
+		(struct rte_port_ethdev_writer *) port;
+
+	if (p->tx_buf_count > 0)
+		send_burst(p);
+
+	return 0;
+}
+
+static int
+rte_port_ethdev_writer_free(void *port)
+{
+	if (port == NULL) {
+		RTE_LOG(ERR, PORT, "%s: Port is NULL\n", __func__);
+		return -EINVAL;
+	}
+
+	rte_port_ethdev_writer_flush(port);
+	rte_free(port);
+
+	return 0;
+}
+
+static int rte_port_ethdev_writer_stats_read(void *port,
+		struct rte_port_out_stats *stats, int clear)
+{
+	struct rte_port_ethdev_writer *p =
+		(struct rte_port_ethdev_writer *) port;
+
+	if (stats != NULL)
+		memcpy(stats, &p->stats, sizeof(p->stats));
+
+	if (clear)
+		memset(&p->stats, 0, sizeof(p->stats));
+
+	return 0;
+}
+
+/*
+ * Port ETHDEV Writer Nodrop
+ */
+#ifdef RTE_PORT_STATS_COLLECT
+
+#define RTE_PORT_ETHDEV_WRITER_NODROP_STATS_PKTS_IN_ADD(port, val) \
+	port->stats.n_pkts_in += val
+#define RTE_PORT_ETHDEV_WRITER_NODROP_STATS_PKTS_DROP_ADD(port, val) \
+	port->stats.n_pkts_drop += val
+
+#else
+
+#define RTE_PORT_ETHDEV_WRITER_NODROP_STATS_PKTS_IN_ADD(port, val)
+#define RTE_PORT_ETHDEV_WRITER_NODROP_STATS_PKTS_DROP_ADD(port, val)
+
+#endif
+
+struct rte_port_ethdev_writer_nodrop {
+	struct rte_port_out_stats stats;
+
+	struct rte_mbuf *tx_buf[2 * RTE_PORT_IN_BURST_SIZE_MAX];
+	uint32_t tx_burst_sz;
+	uint16_t tx_buf_count;
+	uint64_t bsz_mask;
+	uint64_t n_retries;
+	uint16_t queue_id;
+	uint8_t port_id;
+};
+
+static void *
+rte_port_ethdev_writer_nodrop_create(void *params, int socket_id)
+{
+	struct rte_port_ethdev_writer_nodrop_params *conf =
+			(struct rte_port_ethdev_writer_nodrop_params *) params;
+	struct rte_port_ethdev_writer_nodrop *port;
+
+	/* Check input parameters */
+	if ((conf == NULL) ||
+		(conf->tx_burst_sz == 0) ||
+		(conf->tx_burst_sz > RTE_PORT_IN_BURST_SIZE_MAX) ||
+		(!rte_is_power_of_2(conf->tx_burst_sz))) {
+		RTE_LOG(ERR, PORT, "%s: Invalid input parameters\n", __func__);
+		return NULL;
+	}
+
+	/* Memory allocation */
+	port = rte_zmalloc_socket("PORT", sizeof(*port),
+			RTE_CACHE_LINE_SIZE, socket_id);
+	if (port == NULL) {
+		RTE_LOG(ERR, PORT, "%s: Failed to allocate port\n", __func__);
+		return NULL;
+	}
+
+	/* Initialization */
+	port->port_id = conf->port_id;
+	port->queue_id = conf->queue_id;
+	port->tx_burst_sz = conf->tx_burst_sz;
+	port->tx_buf_count = 0;
+	port->bsz_mask = 1LLU << (conf->tx_burst_sz - 1);
+
+	/*
+	 * When n_retries is 0 it means that we should wait for every packet to
+	 * send no matter how many retries should it take. To limit number of
+	 * branches in fast path, we use UINT64_MAX instead of branching.
+	 */
+	port->n_retries = (conf->n_retries == 0) ? UINT64_MAX : conf->n_retries;
+
+	return port;
+}
+
+static inline void
+send_burst_nodrop(struct rte_port_ethdev_writer_nodrop *p)
+{
+	uint32_t nb_tx = 0, i;
+
+	nb_tx = rte_eth_tx_burst(p->port_id, p->queue_id, p->tx_buf,
+			p->tx_buf_count);
+
+	/* We sent all the packets in a first try */
+	if (nb_tx >= p->tx_buf_count) {
+		p->tx_buf_count = 0;
+		return;
+	}
+
+	for (i = 0; i < p->n_retries; i++) {
+		nb_tx += rte_eth_tx_burst(p->port_id, p->queue_id,
+							 p->tx_buf + nb_tx, p->tx_buf_count - nb_tx);
+
+		/* We sent all the packets in more than one try */
+		if (nb_tx >= p->tx_buf_count) {
+			p->tx_buf_count = 0;
+			return;
+		}
+	}
+
+	/* We didn't send the packets in maximum allowed attempts */
+	RTE_PORT_ETHDEV_WRITER_NODROP_STATS_PKTS_DROP_ADD(p, p->tx_buf_count - nb_tx);
+	for ( ; nb_tx < p->tx_buf_count; nb_tx++)
+		rte_pktmbuf_free(p->tx_buf[nb_tx]);
+
+	p->tx_buf_count = 0;
+}
+
+static int
+rte_port_ethdev_writer_nodrop_tx(void *port, struct rte_mbuf *pkt)
+{
+	struct rte_port_ethdev_writer_nodrop *p =
+		(struct rte_port_ethdev_writer_nodrop *) port;
+
+	p->tx_buf[p->tx_buf_count++] = pkt;
+	RTE_PORT_ETHDEV_WRITER_NODROP_STATS_PKTS_IN_ADD(p, 1);
+	if (p->tx_buf_count >= p->tx_burst_sz)
+		send_burst_nodrop(p);
+
+	return 0;
+}
+
+static int
+rte_port_ethdev_writer_nodrop_tx_bulk(void *port,
+		struct rte_mbuf **pkts,
+		uint64_t pkts_mask)
+{
+	struct rte_port_ethdev_writer_nodrop *p =
+		(struct rte_port_ethdev_writer_nodrop *) port;
+
+	uint64_t bsz_mask = p->bsz_mask;
+	uint32_t tx_buf_count = p->tx_buf_count;
+	uint64_t expr = (pkts_mask & (pkts_mask + 1)) |
+			((pkts_mask & bsz_mask) ^ bsz_mask);
+
+	if (expr == 0) {
+		uint64_t n_pkts = __builtin_popcountll(pkts_mask);
+		uint32_t n_pkts_ok;
+
+		if (tx_buf_count)
+			send_burst_nodrop(p);
+
+		RTE_PORT_ETHDEV_WRITER_NODROP_STATS_PKTS_IN_ADD(p, n_pkts);
+		n_pkts_ok = rte_eth_tx_burst(p->port_id, p->queue_id, pkts,
+			n_pkts);
+
+		if (n_pkts_ok >= n_pkts)
+			return 0;
+
+		/*
+		 * If we didnt manage to send all packets in single burst, move
+		 * remaining packets to the buffer and call send burst.
+		 */
+		for (; n_pkts_ok < n_pkts; n_pkts_ok++) {
+			struct rte_mbuf *pkt = pkts[n_pkts_ok];
+			p->tx_buf[p->tx_buf_count++] = pkt;
+		}
+		send_burst_nodrop(p);
+	} else {
+		for ( ; pkts_mask; ) {
+			uint32_t pkt_index = __builtin_ctzll(pkts_mask);
+			uint64_t pkt_mask = 1LLU << pkt_index;
+			struct rte_mbuf *pkt = pkts[pkt_index];
+
+			p->tx_buf[tx_buf_count++] = pkt;
+			RTE_PORT_ETHDEV_WRITER_NODROP_STATS_PKTS_IN_ADD(p, 1);
+			pkts_mask &= ~pkt_mask;
+		}
+
+		p->tx_buf_count = tx_buf_count;
+		if (tx_buf_count >= p->tx_burst_sz)
+			send_burst_nodrop(p);
+	}
+
+	return 0;
+}
+
+static int
+rte_port_ethdev_writer_nodrop_flush(void *port)
+{
+	struct rte_port_ethdev_writer_nodrop *p =
+		(struct rte_port_ethdev_writer_nodrop *) port;
+
+	if (p->tx_buf_count > 0)
+		send_burst_nodrop(p);
+
+	return 0;
+}
+
+static int
+rte_port_ethdev_writer_nodrop_free(void *port)
+{
+	if (port == NULL) {
+		RTE_LOG(ERR, PORT, "%s: Port is NULL\n", __func__);
+		return -EINVAL;
+	}
+
+	rte_port_ethdev_writer_nodrop_flush(port);
+	rte_free(port);
+
+	return 0;
+}
+
+static int rte_port_ethdev_writer_nodrop_stats_read(void *port,
+		struct rte_port_out_stats *stats, int clear)
+{
+	struct rte_port_ethdev_writer_nodrop *p =
+		(struct rte_port_ethdev_writer_nodrop *) port;
+
+	if (stats != NULL)
+		memcpy(stats, &p->stats, sizeof(p->stats));
+
+	if (clear)
+		memset(&p->stats, 0, sizeof(p->stats));
+
+	return 0;
+}
+
+/*
+ * Summary of port operations
+ */
+struct rte_port_in_ops rte_port_ethdev_reader_ops = {
+	.f_create = rte_port_ethdev_reader_create,
+	.f_free = rte_port_ethdev_reader_free,
+	.f_rx = rte_port_ethdev_reader_rx,
+	.f_stats = rte_port_ethdev_reader_stats_read,
+};
+
+struct rte_port_out_ops rte_port_ethdev_writer_ops = {
+	.f_create = rte_port_ethdev_writer_create,
+	.f_free = rte_port_ethdev_writer_free,
+	.f_tx = rte_port_ethdev_writer_tx,
+	.f_tx_bulk = rte_port_ethdev_writer_tx_bulk,
+	.f_flush = rte_port_ethdev_writer_flush,
+	.f_stats = rte_port_ethdev_writer_stats_read,
+};
+
+struct rte_port_out_ops rte_port_ethdev_writer_nodrop_ops = {
+	.f_create = rte_port_ethdev_writer_nodrop_create,
+	.f_free = rte_port_ethdev_writer_nodrop_free,
+	.f_tx = rte_port_ethdev_writer_nodrop_tx,
+	.f_tx_bulk = rte_port_ethdev_writer_nodrop_tx_bulk,
+	.f_flush = rte_port_ethdev_writer_nodrop_flush,
+	.f_stats = rte_port_ethdev_writer_nodrop_stats_read,
+};
diff --git a/lib/librte_port/rte_port_ethdev.h b/lib/librte_port/rte_port_ethdev.h
new file mode 100644
index 00000000..201a79e4
--- /dev/null
+++ b/lib/librte_port/rte_port_ethdev.h
@@ -0,0 +1,105 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __INCLUDE_RTE_PORT_ETHDEV_H__
+#define __INCLUDE_RTE_PORT_ETHDEV_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @file
+ * RTE Port Ethernet Device
+ *
+ * ethdev_reader: input port built on top of pre-initialized NIC RX queue
+ * ethdev_writer: output port built on top of pre-initialized NIC TX queue
+ *
+ ***/
+
+#include <stdint.h>
+
+#include "rte_port.h"
+
+/** ethdev_reader port parameters */
+struct rte_port_ethdev_reader_params {
+	/** NIC RX port ID */
+	uint8_t port_id;
+
+	/** NIC RX queue ID */
+	uint16_t queue_id;
+};
+
+/** ethdev_reader port operations */
+extern struct rte_port_in_ops rte_port_ethdev_reader_ops;
+
+/** ethdev_writer port parameters */
+struct rte_port_ethdev_writer_params {
+	/** NIC RX port ID */
+	uint8_t port_id;
+
+	/** NIC RX queue ID */
+	uint16_t queue_id;
+
+	/** Recommended burst size to NIC TX queue. The actual burst size can be
+	bigger or smaller than this value. */
+	uint32_t tx_burst_sz;
+};
+
+/** ethdev_writer port operations */
+extern struct rte_port_out_ops rte_port_ethdev_writer_ops;
+
+/** ethdev_writer_nodrop port parameters */
+struct rte_port_ethdev_writer_nodrop_params {
+	/** NIC RX port ID */
+	uint8_t port_id;
+
+	/** NIC RX queue ID */
+	uint16_t queue_id;
+
+	/** Recommended burst size to NIC TX queue. The actual burst size can be
+	bigger or smaller than this value. */
+	uint32_t tx_burst_sz;
+
+	/** Maximum number of retries, 0 for no limit */
+	uint32_t n_retries;
+};
+
+/** ethdev_writer_nodrop port operations */
+extern struct rte_port_out_ops rte_port_ethdev_writer_nodrop_ops;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/lib/librte_port/rte_port_frag.c b/lib/librte_port/rte_port_frag.c
new file mode 100644
index 00000000..0fcace99
--- /dev/null
+++ b/lib/librte_port/rte_port_frag.c
@@ -0,0 +1,305 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <string.h>
+
+#include <rte_ether.h>
+#include <rte_ip_frag.h>
+#include <rte_memory.h>
+
+#include "rte_port_frag.h"
+
+/* Max number of fragments per packet allowed */
+#define	RTE_PORT_FRAG_MAX_FRAGS_PER_PACKET 0x80
+
+#ifdef RTE_PORT_STATS_COLLECT
+
+#define RTE_PORT_RING_READER_FRAG_STATS_PKTS_IN_ADD(port, val) \
+	port->stats.n_pkts_in += val
+#define RTE_PORT_RING_READER_FRAG_STATS_PKTS_DROP_ADD(port, val) \
+	port->stats.n_pkts_drop += val
+
+#else
+
+#define RTE_PORT_RING_READER_FRAG_STATS_PKTS_IN_ADD(port, val)
+#define RTE_PORT_RING_READER_FRAG_STATS_PKTS_DROP_ADD(port, val)
+
+#endif
+
+typedef int32_t
+		(*frag_op)(struct rte_mbuf *pkt_in,
+			struct rte_mbuf **pkts_out,
+			uint16_t nb_pkts_out,
+			uint16_t mtu_size,
+			struct rte_mempool *pool_direct,
+			struct rte_mempool *pool_indirect);
+
+struct rte_port_ring_reader_frag {
+	struct rte_port_in_stats stats;
+
+	/* Input parameters */
+	struct rte_ring *ring;
+	uint32_t mtu;
+	uint32_t metadata_size;
+	struct rte_mempool *pool_direct;
+	struct rte_mempool *pool_indirect;
+
+	/* Internal buffers */
+	struct rte_mbuf *pkts[RTE_PORT_IN_BURST_SIZE_MAX];
+	struct rte_mbuf *frags[RTE_PORT_FRAG_MAX_FRAGS_PER_PACKET];
+	uint32_t n_pkts;
+	uint32_t pos_pkts;
+	uint32_t n_frags;
+	uint32_t pos_frags;
+
+	frag_op f_frag;
+} __rte_cache_aligned;
+
+static void *
+rte_port_ring_reader_frag_create(void *params, int socket_id, int is_ipv4)
+{
+	struct rte_port_ring_reader_frag_params *conf =
+			(struct rte_port_ring_reader_frag_params *) params;
+	struct rte_port_ring_reader_frag *port;
+
+	/* Check input parameters */
+	if (conf == NULL) {
+		RTE_LOG(ERR, PORT, "%s: Parameter conf is NULL\n", __func__);
+		return NULL;
+	}
+	if (conf->ring == NULL) {
+		RTE_LOG(ERR, PORT, "%s: Parameter ring is NULL\n", __func__);
+		return NULL;
+	}
+	if (conf->mtu == 0) {
+		RTE_LOG(ERR, PORT, "%s: Parameter mtu is invalid\n", __func__);
+		return NULL;
+	}
+	if (conf->pool_direct == NULL) {
+		RTE_LOG(ERR, PORT, "%s: Parameter pool_direct is NULL\n",
+			__func__);
+		return NULL;
+	}
+	if (conf->pool_indirect == NULL) {
+		RTE_LOG(ERR, PORT, "%s: Parameter pool_indirect is NULL\n",
+			__func__);
+		return NULL;
+	}
+
+	/* Memory allocation */
+	port = rte_zmalloc_socket("PORT", sizeof(*port), RTE_CACHE_LINE_SIZE,
+		socket_id);
+	if (port == NULL) {
+		RTE_LOG(ERR, PORT, "%s: port is NULL\n", __func__);
+		return NULL;
+	}
+
+	/* Initialization */
+	port->ring = conf->ring;
+	port->mtu = conf->mtu;
+	port->metadata_size = conf->metadata_size;
+	port->pool_direct = conf->pool_direct;
+	port->pool_indirect = conf->pool_indirect;
+
+	port->n_pkts = 0;
+	port->pos_pkts = 0;
+	port->n_frags = 0;
+	port->pos_frags = 0;
+
+	port->f_frag = (is_ipv4) ?
+			rte_ipv4_fragment_packet : rte_ipv6_fragment_packet;
+
+	return port;
+}
+
+static void *
+rte_port_ring_reader_ipv4_frag_create(void *params, int socket_id)
+{
+	return rte_port_ring_reader_frag_create(params, socket_id, 1);
+}
+
+static void *
+rte_port_ring_reader_ipv6_frag_create(void *params, int socket_id)
+{
+	return rte_port_ring_reader_frag_create(params, socket_id, 0);
+}
+
+static int
+rte_port_ring_reader_frag_rx(void *port,
+		struct rte_mbuf **pkts,
+		uint32_t n_pkts)
+{
+	struct rte_port_ring_reader_frag *p =
+			(struct rte_port_ring_reader_frag *) port;
+	uint32_t n_pkts_out;
+
+	n_pkts_out = 0;
+
+	/* Get packets from the "frag" buffer */
+	if (p->n_frags >= n_pkts) {
+		memcpy(pkts, &p->frags[p->pos_frags], n_pkts * sizeof(void *));
+		p->pos_frags += n_pkts;
+		p->n_frags -= n_pkts;
+
+		return n_pkts;
+	}
+
+	memcpy(pkts, &p->frags[p->pos_frags], p->n_frags * sizeof(void *));
+	n_pkts_out = p->n_frags;
+	p->n_frags = 0;
+
+	/* Look to "pkts" buffer to get more packets */
+	for ( ; ; ) {
+		struct rte_mbuf *pkt;
+		uint32_t n_pkts_to_provide, i;
+		int status;
+
+		/* If "pkts" buffer is empty, read packet burst from ring */
+		if (p->n_pkts == 0) {
+			p->n_pkts = rte_ring_sc_dequeue_burst(p->ring,
+				(void **) p->pkts, RTE_PORT_IN_BURST_SIZE_MAX);
+			RTE_PORT_RING_READER_FRAG_STATS_PKTS_IN_ADD(p, p->n_pkts);
+			if (p->n_pkts == 0)
+				return n_pkts_out;
+			p->pos_pkts = 0;
+		}
+
+		/* Read next packet from "pkts" buffer */
+		pkt = p->pkts[p->pos_pkts++];
+		p->n_pkts--;
+
+		/* If not jumbo, pass current packet to output */
+		if (pkt->pkt_len <= p->mtu) {
+			pkts[n_pkts_out++] = pkt;
+
+			n_pkts_to_provide = n_pkts - n_pkts_out;
+			if (n_pkts_to_provide == 0)
+				return n_pkts;
+
+			continue;
+		}
+
+		/* Fragment current packet into the "frags" buffer */
+		status = p->f_frag(
+			pkt,
+			p->frags,
+			RTE_PORT_FRAG_MAX_FRAGS_PER_PACKET,
+			p->mtu,
+			p->pool_direct,
+			p->pool_indirect
+		);
+
+		if (status < 0) {
+			rte_pktmbuf_free(pkt);
+			RTE_PORT_RING_READER_FRAG_STATS_PKTS_DROP_ADD(p, 1);
+			continue;
+		}
+
+		p->n_frags = (uint32_t) status;
+		p->pos_frags = 0;
+
+		/* Copy meta-data from input jumbo packet to its fragments */
+		for (i = 0; i < p->n_frags; i++) {
+			uint8_t *src =
+			  RTE_MBUF_METADATA_UINT8_PTR(pkt, sizeof(struct rte_mbuf));
+			uint8_t *dst =
+			  RTE_MBUF_METADATA_UINT8_PTR(p->frags[i], sizeof(struct rte_mbuf));
+
+			memcpy(dst, src, p->metadata_size);
+		}
+
+		/* Free input jumbo packet */
+		rte_pktmbuf_free(pkt);
+
+		/* Get packets from "frag" buffer */
+		n_pkts_to_provide = n_pkts - n_pkts_out;
+		if (p->n_frags >= n_pkts_to_provide) {
+			memcpy(&pkts[n_pkts_out], p->frags,
+				n_pkts_to_provide * sizeof(void *));
+			p->n_frags -= n_pkts_to_provide;
+			p->pos_frags += n_pkts_to_provide;
+
+			return n_pkts;
+		}
+
+		memcpy(&pkts[n_pkts_out], p->frags,
+			p->n_frags * sizeof(void *));
+		n_pkts_out += p->n_frags;
+		p->n_frags = 0;
+	}
+}
+
+static int
+rte_port_ring_reader_frag_free(void *port)
+{
+	if (port == NULL) {
+		RTE_LOG(ERR, PORT, "%s: Parameter port is NULL\n", __func__);
+		return -1;
+	}
+
+	rte_free(port);
+
+	return 0;
+}
+
+static int
+rte_port_frag_reader_stats_read(void *port,
+		struct rte_port_in_stats *stats, int clear)
+{
+	struct rte_port_ring_reader_frag *p =
+		(struct rte_port_ring_reader_frag *) port;
+
+	if (stats != NULL)
+		memcpy(stats, &p->stats, sizeof(p->stats));
+
+	if (clear)
+		memset(&p->stats, 0, sizeof(p->stats));
+
+	return 0;
+}
+
+/*
+ * Summary of port operations
+ */
+struct rte_port_in_ops rte_port_ring_reader_ipv4_frag_ops = {
+	.f_create = rte_port_ring_reader_ipv4_frag_create,
+	.f_free = rte_port_ring_reader_frag_free,
+	.f_rx = rte_port_ring_reader_frag_rx,
+	.f_stats = rte_port_frag_reader_stats_read,
+};
+
+struct rte_port_in_ops rte_port_ring_reader_ipv6_frag_ops = {
+	.f_create = rte_port_ring_reader_ipv6_frag_create,
+	.f_free = rte_port_ring_reader_frag_free,
+	.f_rx = rte_port_ring_reader_frag_rx,
+	.f_stats = rte_port_frag_reader_stats_read,
+};
diff --git a/lib/librte_port/rte_port_frag.h b/lib/librte_port/rte_port_frag.h
new file mode 100644
index 00000000..0085ff7c
--- /dev/null
+++ b/lib/librte_port/rte_port_frag.h
@@ -0,0 +1,101 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __INCLUDE_RTE_PORT_IP_FRAG_H__
+#define __INCLUDE_RTE_PORT_IP_FRAG_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @file
+ * RTE Port for IPv4 Fragmentation
+ *
+ * This port is built on top of pre-initialized single consumer rte_ring. In
+ * order to minimize the amount of packets stored in the ring at any given
+ * time, the IP fragmentation functionality is executed on ring read operation,
+ * hence this port is implemented as an input port. A regular ring_writer port
+ * can be created to write to the same ring.
+ *
+ * The packets written to the ring are either complete IP datagrams or jumbo
+ * frames (i.e. IP packets with length bigger than provided MTU value). The
+ * packets read from the ring are all non-jumbo frames. The complete IP
+ * datagrams written to the ring are not changed. The jumbo frames are
+ * fragmented into several IP packets with length less or equal to MTU.
+ *
+ ***/
+
+#include <stdint.h>
+
+#include <rte_ring.h>
+
+#include "rte_port.h"
+
+/** ring_reader_ipv4_frag port parameters */
+struct rte_port_ring_reader_frag_params {
+	/** Underlying single consumer ring that has to be pre-initialized. */
+	struct rte_ring *ring;
+
+	/** Maximum Transfer Unit (MTU). Maximum IP packet size (in bytes). */
+	uint32_t mtu;
+
+	/** Size of application dependent meta-data stored per each input packet
+	    that has to be copied to each of the fragments originating from the
+	    same input IP datagram. */
+	uint32_t metadata_size;
+
+	/** Pre-initialized buffer pool used for allocating direct buffers for
+	    the output fragments. */
+	struct rte_mempool *pool_direct;
+
+	/** Pre-initialized buffer pool used for allocating indirect buffers for
+	    the output fragments. */
+	struct rte_mempool *pool_indirect;
+};
+
+#define rte_port_ring_reader_ipv4_frag_params rte_port_ring_reader_frag_params
+
+#define rte_port_ring_reader_ipv6_frag_params rte_port_ring_reader_frag_params
+
+/** ring_reader_ipv4_frag port operations */
+extern struct rte_port_in_ops rte_port_ring_reader_ipv4_frag_ops;
+
+/** ring_reader_ipv6_frag port operations */
+extern struct rte_port_in_ops rte_port_ring_reader_ipv6_frag_ops;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/lib/librte_port/rte_port_ras.c b/lib/librte_port/rte_port_ras.c
new file mode 100644
index 00000000..c4bb5081
--- /dev/null
+++ b/lib/librte_port/rte_port_ras.c
@@ -0,0 +1,359 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <string.h>
+
+#include <rte_ether.h>
+#include <rte_ip_frag.h>
+#include <rte_cycles.h>
+#include <rte_log.h>
+
+#include "rte_port_ras.h"
+
+#ifndef RTE_PORT_RAS_N_BUCKETS
+#define RTE_PORT_RAS_N_BUCKETS                                 4094
+#endif
+
+#ifndef RTE_PORT_RAS_N_ENTRIES_PER_BUCKET
+#define RTE_PORT_RAS_N_ENTRIES_PER_BUCKET                      8
+#endif
+
+#ifndef RTE_PORT_RAS_N_ENTRIES
+#define RTE_PORT_RAS_N_ENTRIES (RTE_PORT_RAS_N_BUCKETS * RTE_PORT_RAS_N_ENTRIES_PER_BUCKET)
+#endif
+
+#ifdef RTE_PORT_STATS_COLLECT
+
+#define RTE_PORT_RING_WRITER_RAS_STATS_PKTS_IN_ADD(port, val) \
+	port->stats.n_pkts_in += val
+#define RTE_PORT_RING_WRITER_RAS_STATS_PKTS_DROP_ADD(port, val) \
+	port->stats.n_pkts_drop += val
+
+#else
+
+#define RTE_PORT_RING_WRITER_RAS_STATS_PKTS_IN_ADD(port, val)
+#define RTE_PORT_RING_WRITER_RAS_STATS_PKTS_DROP_ADD(port, val)
+
+#endif
+
+struct rte_port_ring_writer_ras;
+
+typedef void (*ras_op)(
+		struct rte_port_ring_writer_ras *p,
+		struct rte_mbuf *pkt);
+
+static void
+process_ipv4(struct rte_port_ring_writer_ras *p, struct rte_mbuf *pkt);
+static void
+process_ipv6(struct rte_port_ring_writer_ras *p, struct rte_mbuf *pkt);
+
+struct rte_port_ring_writer_ras {
+	struct rte_port_out_stats stats;
+
+	struct rte_mbuf *tx_buf[RTE_PORT_IN_BURST_SIZE_MAX];
+	struct rte_ring *ring;
+	uint32_t tx_burst_sz;
+	uint32_t tx_buf_count;
+	struct rte_ip_frag_tbl *frag_tbl;
+	struct rte_ip_frag_death_row death_row;
+
+	ras_op f_ras;
+};
+
+static void *
+rte_port_ring_writer_ras_create(void *params, int socket_id, int is_ipv4)
+{
+	struct rte_port_ring_writer_ras_params *conf =
+			(struct rte_port_ring_writer_ras_params *) params;
+	struct rte_port_ring_writer_ras *port;
+	uint64_t frag_cycles;
+
+	/* Check input parameters */
+	if (conf == NULL) {
+		RTE_LOG(ERR, PORT, "%s: Parameter conf is NULL\n", __func__);
+		return NULL;
+	}
+	if (conf->ring == NULL) {
+		RTE_LOG(ERR, PORT, "%s: Parameter ring is NULL\n", __func__);
+		return NULL;
+	}
+	if ((conf->tx_burst_sz == 0) ||
+	    (conf->tx_burst_sz > RTE_PORT_IN_BURST_SIZE_MAX)) {
+		RTE_LOG(ERR, PORT, "%s: Parameter tx_burst_sz is invalid\n",
+			__func__);
+		return NULL;
+	}
+
+	/* Memory allocation */
+	port = rte_zmalloc_socket("PORT", sizeof(*port),
+			RTE_CACHE_LINE_SIZE, socket_id);
+	if (port == NULL) {
+		RTE_LOG(ERR, PORT, "%s: Failed to allocate socket\n", __func__);
+		return NULL;
+	}
+
+	/* Create fragmentation table */
+	frag_cycles = (rte_get_tsc_hz() + MS_PER_S - 1) / MS_PER_S * MS_PER_S;
+	frag_cycles *= 100;
+
+	port->frag_tbl = rte_ip_frag_table_create(
+		RTE_PORT_RAS_N_BUCKETS,
+		RTE_PORT_RAS_N_ENTRIES_PER_BUCKET,
+		RTE_PORT_RAS_N_ENTRIES,
+		frag_cycles,
+		socket_id);
+
+	if (port->frag_tbl == NULL) {
+		RTE_LOG(ERR, PORT, "%s: rte_ip_frag_table_create failed\n",
+			__func__);
+		rte_free(port);
+		return NULL;
+	}
+
+	/* Initialization */
+	port->ring = conf->ring;
+	port->tx_burst_sz = conf->tx_burst_sz;
+	port->tx_buf_count = 0;
+
+	port->f_ras = (is_ipv4 == 1) ? process_ipv4 : process_ipv6;
+
+	return port;
+}
+
+static void *
+rte_port_ring_writer_ipv4_ras_create(void *params, int socket_id)
+{
+	return rte_port_ring_writer_ras_create(params, socket_id, 1);
+}
+
+static void *
+rte_port_ring_writer_ipv6_ras_create(void *params, int socket_id)
+{
+	return rte_port_ring_writer_ras_create(params, socket_id, 0);
+}
+
+static inline void
+send_burst(struct rte_port_ring_writer_ras *p)
+{
+	uint32_t nb_tx;
+
+	nb_tx = rte_ring_sp_enqueue_burst(p->ring, (void **)p->tx_buf,
+			p->tx_buf_count);
+
+	RTE_PORT_RING_WRITER_RAS_STATS_PKTS_DROP_ADD(p, p->tx_buf_count - nb_tx);
+	for ( ; nb_tx < p->tx_buf_count; nb_tx++)
+		rte_pktmbuf_free(p->tx_buf[nb_tx]);
+
+	p->tx_buf_count = 0;
+}
+
+static void
+process_ipv4(struct rte_port_ring_writer_ras *p, struct rte_mbuf *pkt)
+{
+	/* Assume there is no ethernet header */
+	struct ipv4_hdr *pkt_hdr = rte_pktmbuf_mtod(pkt, struct ipv4_hdr *);
+
+	/* Get "More fragments" flag and fragment offset */
+	uint16_t frag_field = rte_be_to_cpu_16(pkt_hdr->fragment_offset);
+	uint16_t frag_offset = (uint16_t)(frag_field & IPV4_HDR_OFFSET_MASK);
+	uint16_t frag_flag = (uint16_t)(frag_field & IPV4_HDR_MF_FLAG);
+
+	/* If it is a fragmented packet, then try to reassemble */
+	if ((frag_flag == 0) && (frag_offset == 0))
+		p->tx_buf[p->tx_buf_count++] = pkt;
+	else {
+		struct rte_mbuf *mo;
+		struct rte_ip_frag_tbl *tbl = p->frag_tbl;
+		struct rte_ip_frag_death_row *dr = &p->death_row;
+
+		pkt->l3_len = sizeof(*pkt_hdr);
+
+		/* Process this fragment */
+		mo = rte_ipv4_frag_reassemble_packet(tbl, dr, pkt, rte_rdtsc(),
+				pkt_hdr);
+		if (mo != NULL)
+			p->tx_buf[p->tx_buf_count++] = mo;
+
+		rte_ip_frag_free_death_row(&p->death_row, 3);
+	}
+}
+
+static void
+process_ipv6(struct rte_port_ring_writer_ras *p, struct rte_mbuf *pkt)
+{
+	/* Assume there is no ethernet header */
+	struct ipv6_hdr *pkt_hdr = rte_pktmbuf_mtod(pkt, struct ipv6_hdr *);
+
+	struct ipv6_extension_fragment *frag_hdr;
+	uint16_t frag_data = 0;
+	frag_hdr = rte_ipv6_frag_get_ipv6_fragment_header(pkt_hdr);
+	if (frag_hdr != NULL)
+		frag_data = rte_be_to_cpu_16(frag_hdr->frag_data);
+
+	/* If it is a fragmented packet, then try to reassemble */
+	if ((frag_data & RTE_IPV6_FRAG_USED_MASK) == 0)
+		p->tx_buf[p->tx_buf_count++] = pkt;
+	else {
+		struct rte_mbuf *mo;
+		struct rte_ip_frag_tbl *tbl = p->frag_tbl;
+		struct rte_ip_frag_death_row *dr = &p->death_row;
+
+		pkt->l3_len = sizeof(*pkt_hdr) + sizeof(*frag_hdr);
+
+		/* Process this fragment */
+		mo = rte_ipv6_frag_reassemble_packet(tbl, dr, pkt, rte_rdtsc(), pkt_hdr,
+				frag_hdr);
+		if (mo != NULL)
+			p->tx_buf[p->tx_buf_count++] = mo;
+
+		rte_ip_frag_free_death_row(&p->death_row, 3);
+	}
+}
+
+static int
+rte_port_ring_writer_ras_tx(void *port, struct rte_mbuf *pkt)
+{
+	struct rte_port_ring_writer_ras *p =
+			(struct rte_port_ring_writer_ras *) port;
+
+	RTE_PORT_RING_WRITER_RAS_STATS_PKTS_IN_ADD(p, 1);
+	p->f_ras(p, pkt);
+	if (p->tx_buf_count >= p->tx_burst_sz)
+		send_burst(p);
+
+	return 0;
+}
+
+static int
+rte_port_ring_writer_ras_tx_bulk(void *port,
+		struct rte_mbuf **pkts,
+		uint64_t pkts_mask)
+{
+	struct rte_port_ring_writer_ras *p =
+			(struct rte_port_ring_writer_ras *) port;
+
+	if ((pkts_mask & (pkts_mask + 1)) == 0) {
+		uint64_t n_pkts = __builtin_popcountll(pkts_mask);
+		uint32_t i;
+
+		for (i = 0; i < n_pkts; i++) {
+			struct rte_mbuf *pkt = pkts[i];
+
+			RTE_PORT_RING_WRITER_RAS_STATS_PKTS_IN_ADD(p, 1);
+			p->f_ras(p, pkt);
+			if (p->tx_buf_count >= p->tx_burst_sz)
+				send_burst(p);
+		}
+	} else {
+		for ( ; pkts_mask; ) {
+			uint32_t pkt_index = __builtin_ctzll(pkts_mask);
+			uint64_t pkt_mask = 1LLU << pkt_index;
+			struct rte_mbuf *pkt = pkts[pkt_index];
+
+			RTE_PORT_RING_WRITER_RAS_STATS_PKTS_IN_ADD(p, 1);
+			p->f_ras(p, pkt);
+			if (p->tx_buf_count >= p->tx_burst_sz)
+				send_burst(p);
+
+			pkts_mask &= ~pkt_mask;
+		}
+	}
+
+	return 0;
+}
+
+static int
+rte_port_ring_writer_ras_flush(void *port)
+{
+	struct rte_port_ring_writer_ras *p =
+			(struct rte_port_ring_writer_ras *) port;
+
+	if (p->tx_buf_count > 0)
+		send_burst(p);
+
+	return 0;
+}
+
+static int
+rte_port_ring_writer_ras_free(void *port)
+{
+	struct rte_port_ring_writer_ras *p =
+			(struct rte_port_ring_writer_ras *) port;
+
+	if (port == NULL) {
+		RTE_LOG(ERR, PORT, "%s: Parameter port is NULL\n", __func__);
+		return -1;
+	}
+
+	rte_port_ring_writer_ras_flush(port);
+	rte_ip_frag_table_destroy(p->frag_tbl);
+	rte_free(port);
+
+	return 0;
+}
+
+static int
+rte_port_ras_writer_stats_read(void *port,
+		struct rte_port_out_stats *stats, int clear)
+{
+	struct rte_port_ring_writer_ras *p =
+		(struct rte_port_ring_writer_ras *) port;
+
+	if (stats != NULL)
+		memcpy(stats, &p->stats, sizeof(p->stats));
+
+	if (clear)
+		memset(&p->stats, 0, sizeof(p->stats));
+
+	return 0;
+}
+
+/*
+ * Summary of port operations
+ */
+struct rte_port_out_ops rte_port_ring_writer_ipv4_ras_ops = {
+	.f_create = rte_port_ring_writer_ipv4_ras_create,
+	.f_free = rte_port_ring_writer_ras_free,
+	.f_tx = rte_port_ring_writer_ras_tx,
+	.f_tx_bulk = rte_port_ring_writer_ras_tx_bulk,
+	.f_flush = rte_port_ring_writer_ras_flush,
+	.f_stats = rte_port_ras_writer_stats_read,
+};
+
+struct rte_port_out_ops rte_port_ring_writer_ipv6_ras_ops = {
+	.f_create = rte_port_ring_writer_ipv6_ras_create,
+	.f_free = rte_port_ring_writer_ras_free,
+	.f_tx = rte_port_ring_writer_ras_tx,
+	.f_tx_bulk = rte_port_ring_writer_ras_tx_bulk,
+	.f_flush = rte_port_ring_writer_ras_flush,
+	.f_stats = rte_port_ras_writer_stats_read,
+};
diff --git a/lib/librte_port/rte_port_ras.h b/lib/librte_port/rte_port_ras.h
new file mode 100644
index 00000000..5a16f831
--- /dev/null
+++ b/lib/librte_port/rte_port_ras.h
@@ -0,0 +1,90 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __INCLUDE_RTE_PORT_RAS_H__
+#define __INCLUDE_RTE_PORT_RAS_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @file
+ * RTE Port for IPv4 Reassembly
+ *
+ * This port is built on top of pre-initialized single producer rte_ring. In
+ * order to minimize the amount of packets stored in the ring at any given
+ * time, the IP reassembly functionality is executed on ring write operation,
+ * hence this port is implemented as an output port. A regular ring_reader port
+ * can be created to read from the same ring.
+ *
+ * The packets written to the ring are either complete IP datagrams or IP
+ * fragments. The packets read from the ring are all complete IP datagrams,
+ * either jumbo frames (i.e. IP packets with length bigger than MTU) or not.
+ * The complete IP datagrams written to the ring are not changed. The IP
+ * fragments written to the ring are first reassembled and into complete IP
+ * datagrams or dropped on error or IP reassembly time-out.
+ *
+ ***/
+
+#include <stdint.h>
+
+#include <rte_ring.h>
+
+#include "rte_port.h"
+
+/** ring_writer_ipv4_ras port parameters */
+struct rte_port_ring_writer_ras_params {
+	/** Underlying single consumer ring that has to be pre-initialized. */
+	struct rte_ring *ring;
+
+	/** Recommended burst size to ring. The actual burst size can be bigger
+	or smaller than this value. */
+	uint32_t tx_burst_sz;
+};
+
+#define rte_port_ring_writer_ipv4_ras_params rte_port_ring_writer_ras_params
+
+#define rte_port_ring_writer_ipv6_ras_params rte_port_ring_writer_ras_params
+
+/** ring_writer_ipv4_ras port operations */
+extern struct rte_port_out_ops rte_port_ring_writer_ipv4_ras_ops;
+
+/** ring_writer_ipv6_ras port operations */
+extern struct rte_port_out_ops rte_port_ring_writer_ipv6_ras_ops;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/lib/librte_port/rte_port_ring.c b/lib/librte_port/rte_port_ring.c
new file mode 100644
index 00000000..3b9d3d08
--- /dev/null
+++ b/lib/librte_port/rte_port_ring.c
@@ -0,0 +1,810 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <string.h>
+#include <stdint.h>
+
+#include <rte_mbuf.h>
+#include <rte_ring.h>
+#include <rte_malloc.h>
+
+#include "rte_port_ring.h"
+
+/*
+ * Port RING Reader
+ */
+#ifdef RTE_PORT_STATS_COLLECT
+
+#define RTE_PORT_RING_READER_STATS_PKTS_IN_ADD(port, val) \
+	port->stats.n_pkts_in += val
+#define RTE_PORT_RING_READER_STATS_PKTS_DROP_ADD(port, val) \
+	port->stats.n_pkts_drop += val
+
+#else
+
+#define RTE_PORT_RING_READER_STATS_PKTS_IN_ADD(port, val)
+#define RTE_PORT_RING_READER_STATS_PKTS_DROP_ADD(port, val)
+
+#endif
+
+struct rte_port_ring_reader {
+	struct rte_port_in_stats stats;
+
+	struct rte_ring *ring;
+};
+
+static void *
+rte_port_ring_reader_create_internal(void *params, int socket_id,
+	uint32_t is_multi)
+{
+	struct rte_port_ring_reader_params *conf =
+			(struct rte_port_ring_reader_params *) params;
+	struct rte_port_ring_reader *port;
+
+	/* Check input parameters */
+	if ((conf == NULL) ||
+		(conf->ring == NULL) ||
+		(conf->ring->cons.sc_dequeue && is_multi) ||
+		(!(conf->ring->cons.sc_dequeue) && !is_multi)) {
+		RTE_LOG(ERR, PORT, "%s: Invalid Parameters\n", __func__);
+		return NULL;
+	}
+
+	/* Memory allocation */
+	port = rte_zmalloc_socket("PORT", sizeof(*port),
+			RTE_CACHE_LINE_SIZE, socket_id);
+	if (port == NULL) {
+		RTE_LOG(ERR, PORT, "%s: Failed to allocate port\n", __func__);
+		return NULL;
+	}
+
+	/* Initialization */
+	port->ring = conf->ring;
+
+	return port;
+}
+
+static void *
+rte_port_ring_reader_create(void *params, int socket_id)
+{
+	return rte_port_ring_reader_create_internal(params, socket_id, 0);
+}
+
+static void *
+rte_port_ring_multi_reader_create(void *params, int socket_id)
+{
+	return rte_port_ring_reader_create_internal(params, socket_id, 1);
+}
+
+static int
+rte_port_ring_reader_rx(void *port, struct rte_mbuf **pkts, uint32_t n_pkts)
+{
+	struct rte_port_ring_reader *p = (struct rte_port_ring_reader *) port;
+	uint32_t nb_rx;
+
+	nb_rx = rte_ring_sc_dequeue_burst(p->ring, (void **) pkts, n_pkts);
+	RTE_PORT_RING_READER_STATS_PKTS_IN_ADD(p, nb_rx);
+
+	return nb_rx;
+}
+
+static int
+rte_port_ring_multi_reader_rx(void *port, struct rte_mbuf **pkts,
+	uint32_t n_pkts)
+{
+	struct rte_port_ring_reader *p = (struct rte_port_ring_reader *) port;
+	uint32_t nb_rx;
+
+	nb_rx = rte_ring_mc_dequeue_burst(p->ring, (void **) pkts, n_pkts);
+	RTE_PORT_RING_READER_STATS_PKTS_IN_ADD(p, nb_rx);
+
+	return nb_rx;
+}
+
+static int
+rte_port_ring_reader_free(void *port)
+{
+	if (port == NULL) {
+		RTE_LOG(ERR, PORT, "%s: port is NULL\n", __func__);
+		return -EINVAL;
+	}
+
+	rte_free(port);
+
+	return 0;
+}
+
+static int
+rte_port_ring_reader_stats_read(void *port,
+		struct rte_port_in_stats *stats, int clear)
+{
+	struct rte_port_ring_reader *p =
+		(struct rte_port_ring_reader *) port;
+
+	if (stats != NULL)
+		memcpy(stats, &p->stats, sizeof(p->stats));
+
+	if (clear)
+		memset(&p->stats, 0, sizeof(p->stats));
+
+	return 0;
+}
+
+/*
+ * Port RING Writer
+ */
+#ifdef RTE_PORT_STATS_COLLECT
+
+#define RTE_PORT_RING_WRITER_STATS_PKTS_IN_ADD(port, val) \
+	port->stats.n_pkts_in += val
+#define RTE_PORT_RING_WRITER_STATS_PKTS_DROP_ADD(port, val) \
+	port->stats.n_pkts_drop += val
+
+#else
+
+#define RTE_PORT_RING_WRITER_STATS_PKTS_IN_ADD(port, val)
+#define RTE_PORT_RING_WRITER_STATS_PKTS_DROP_ADD(port, val)
+
+#endif
+
+struct rte_port_ring_writer {
+	struct rte_port_out_stats stats;
+
+	struct rte_mbuf *tx_buf[2 * RTE_PORT_IN_BURST_SIZE_MAX];
+	struct rte_ring *ring;
+	uint32_t tx_burst_sz;
+	uint32_t tx_buf_count;
+	uint64_t bsz_mask;
+	uint32_t is_multi;
+};
+
+static void *
+rte_port_ring_writer_create_internal(void *params, int socket_id,
+	uint32_t is_multi)
+{
+	struct rte_port_ring_writer_params *conf =
+			(struct rte_port_ring_writer_params *) params;
+	struct rte_port_ring_writer *port;
+
+	/* Check input parameters */
+	if ((conf == NULL) ||
+		(conf->ring == NULL) ||
+		(conf->ring->prod.sp_enqueue && is_multi) ||
+		(!(conf->ring->prod.sp_enqueue) && !is_multi) ||
+		(conf->tx_burst_sz > RTE_PORT_IN_BURST_SIZE_MAX)) {
+		RTE_LOG(ERR, PORT, "%s: Invalid Parameters\n", __func__);
+		return NULL;
+	}
+
+	/* Memory allocation */
+	port = rte_zmalloc_socket("PORT", sizeof(*port),
+			RTE_CACHE_LINE_SIZE, socket_id);
+	if (port == NULL) {
+		RTE_LOG(ERR, PORT, "%s: Failed to allocate port\n", __func__);
+		return NULL;
+	}
+
+	/* Initialization */
+	port->ring = conf->ring;
+	port->tx_burst_sz = conf->tx_burst_sz;
+	port->tx_buf_count = 0;
+	port->bsz_mask = 1LLU << (conf->tx_burst_sz - 1);
+	port->is_multi = is_multi;
+
+	return port;
+}
+
+static void *
+rte_port_ring_writer_create(void *params, int socket_id)
+{
+	return rte_port_ring_writer_create_internal(params, socket_id, 0);
+}
+
+static void *
+rte_port_ring_multi_writer_create(void *params, int socket_id)
+{
+	return rte_port_ring_writer_create_internal(params, socket_id, 1);
+}
+
+static inline void
+send_burst(struct rte_port_ring_writer *p)
+{
+	uint32_t nb_tx;
+
+	nb_tx = rte_ring_sp_enqueue_burst(p->ring, (void **)p->tx_buf,
+			p->tx_buf_count);
+
+	RTE_PORT_RING_WRITER_STATS_PKTS_DROP_ADD(p, p->tx_buf_count - nb_tx);
+	for ( ; nb_tx < p->tx_buf_count; nb_tx++)
+		rte_pktmbuf_free(p->tx_buf[nb_tx]);
+
+	p->tx_buf_count = 0;
+}
+
+static inline void
+send_burst_mp(struct rte_port_ring_writer *p)
+{
+	uint32_t nb_tx;
+
+	nb_tx = rte_ring_mp_enqueue_burst(p->ring, (void **)p->tx_buf,
+			p->tx_buf_count);
+
+	RTE_PORT_RING_WRITER_STATS_PKTS_DROP_ADD(p, p->tx_buf_count - nb_tx);
+	for ( ; nb_tx < p->tx_buf_count; nb_tx++)
+		rte_pktmbuf_free(p->tx_buf[nb_tx]);
+
+	p->tx_buf_count = 0;
+}
+
+static int
+rte_port_ring_writer_tx(void *port, struct rte_mbuf *pkt)
+{
+	struct rte_port_ring_writer *p = (struct rte_port_ring_writer *) port;
+
+	p->tx_buf[p->tx_buf_count++] = pkt;
+	RTE_PORT_RING_WRITER_STATS_PKTS_IN_ADD(p, 1);
+	if (p->tx_buf_count >= p->tx_burst_sz)
+		send_burst(p);
+
+	return 0;
+}
+
+static int
+rte_port_ring_multi_writer_tx(void *port, struct rte_mbuf *pkt)
+{
+	struct rte_port_ring_writer *p = (struct rte_port_ring_writer *) port;
+
+	p->tx_buf[p->tx_buf_count++] = pkt;
+	RTE_PORT_RING_WRITER_STATS_PKTS_IN_ADD(p, 1);
+	if (p->tx_buf_count >= p->tx_burst_sz)
+		send_burst_mp(p);
+
+	return 0;
+}
+
+static inline int __attribute__((always_inline))
+rte_port_ring_writer_tx_bulk_internal(void *port,
+		struct rte_mbuf **pkts,
+		uint64_t pkts_mask,
+		uint32_t is_multi)
+{
+	struct rte_port_ring_writer *p =
+		(struct rte_port_ring_writer *) port;
+
+	uint64_t bsz_mask = p->bsz_mask;
+	uint32_t tx_buf_count = p->tx_buf_count;
+	uint64_t expr = (pkts_mask & (pkts_mask + 1)) |
+			((pkts_mask & bsz_mask) ^ bsz_mask);
+
+	if (expr == 0) {
+		uint64_t n_pkts = __builtin_popcountll(pkts_mask);
+		uint32_t n_pkts_ok;
+
+		if (tx_buf_count) {
+			if (is_multi)
+				send_burst_mp(p);
+			else
+				send_burst(p);
+		}
+
+		RTE_PORT_RING_WRITER_STATS_PKTS_IN_ADD(p, n_pkts);
+		if (is_multi)
+			n_pkts_ok = rte_ring_mp_enqueue_burst(p->ring, (void **)pkts,
+				n_pkts);
+		else
+			n_pkts_ok = rte_ring_sp_enqueue_burst(p->ring, (void **)pkts,
+				n_pkts);
+
+		RTE_PORT_RING_WRITER_STATS_PKTS_DROP_ADD(p, n_pkts - n_pkts_ok);
+		for ( ; n_pkts_ok < n_pkts; n_pkts_ok++) {
+			struct rte_mbuf *pkt = pkts[n_pkts_ok];
+
+			rte_pktmbuf_free(pkt);
+		}
+	} else {
+		for ( ; pkts_mask; ) {
+			uint32_t pkt_index = __builtin_ctzll(pkts_mask);
+			uint64_t pkt_mask = 1LLU << pkt_index;
+			struct rte_mbuf *pkt = pkts[pkt_index];
+
+			p->tx_buf[tx_buf_count++] = pkt;
+			RTE_PORT_RING_WRITER_STATS_PKTS_IN_ADD(p, 1);
+			pkts_mask &= ~pkt_mask;
+		}
+
+		p->tx_buf_count = tx_buf_count;
+		if (tx_buf_count >= p->tx_burst_sz) {
+			if (is_multi)
+				send_burst_mp(p);
+			else
+				send_burst(p);
+		}
+	}
+
+	return 0;
+}
+
+static int
+rte_port_ring_writer_tx_bulk(void *port,
+		struct rte_mbuf **pkts,
+		uint64_t pkts_mask)
+{
+	return rte_port_ring_writer_tx_bulk_internal(port, pkts, pkts_mask, 0);
+}
+
+static int
+rte_port_ring_multi_writer_tx_bulk(void *port,
+		struct rte_mbuf **pkts,
+		uint64_t pkts_mask)
+{
+	return rte_port_ring_writer_tx_bulk_internal(port, pkts, pkts_mask, 1);
+}
+
+static int
+rte_port_ring_writer_flush(void *port)
+{
+	struct rte_port_ring_writer *p = (struct rte_port_ring_writer *) port;
+
+	if (p->tx_buf_count > 0)
+		send_burst(p);
+
+	return 0;
+}
+
+static int
+rte_port_ring_multi_writer_flush(void *port)
+{
+	struct rte_port_ring_writer *p = (struct rte_port_ring_writer *) port;
+
+	if (p->tx_buf_count > 0)
+		send_burst_mp(p);
+
+	return 0;
+}
+
+static int
+rte_port_ring_writer_free(void *port)
+{
+	struct rte_port_ring_writer *p = (struct rte_port_ring_writer *) port;
+
+	if (port == NULL) {
+		RTE_LOG(ERR, PORT, "%s: Port is NULL\n", __func__);
+		return -EINVAL;
+	}
+
+	if (p->is_multi)
+		rte_port_ring_multi_writer_flush(port);
+	else
+		rte_port_ring_writer_flush(port);
+
+	rte_free(port);
+
+	return 0;
+}
+
+static int
+rte_port_ring_writer_stats_read(void *port,
+		struct rte_port_out_stats *stats, int clear)
+{
+	struct rte_port_ring_writer *p =
+		(struct rte_port_ring_writer *) port;
+
+	if (stats != NULL)
+		memcpy(stats, &p->stats, sizeof(p->stats));
+
+	if (clear)
+		memset(&p->stats, 0, sizeof(p->stats));
+
+	return 0;
+}
+
+/*
+ * Port RING Writer Nodrop
+ */
+#ifdef RTE_PORT_STATS_COLLECT
+
+#define RTE_PORT_RING_WRITER_NODROP_STATS_PKTS_IN_ADD(port, val) \
+	port->stats.n_pkts_in += val
+#define RTE_PORT_RING_WRITER_NODROP_STATS_PKTS_DROP_ADD(port, val) \
+	port->stats.n_pkts_drop += val
+
+#else
+
+#define RTE_PORT_RING_WRITER_NODROP_STATS_PKTS_IN_ADD(port, val)
+#define RTE_PORT_RING_WRITER_NODROP_STATS_PKTS_DROP_ADD(port, val)
+
+#endif
+
+struct rte_port_ring_writer_nodrop {
+	struct rte_port_out_stats stats;
+
+	struct rte_mbuf *tx_buf[2 * RTE_PORT_IN_BURST_SIZE_MAX];
+	struct rte_ring *ring;
+	uint32_t tx_burst_sz;
+	uint32_t tx_buf_count;
+	uint64_t bsz_mask;
+	uint64_t n_retries;
+	uint32_t is_multi;
+};
+
+static void *
+rte_port_ring_writer_nodrop_create_internal(void *params, int socket_id,
+	uint32_t is_multi)
+{
+	struct rte_port_ring_writer_nodrop_params *conf =
+			(struct rte_port_ring_writer_nodrop_params *) params;
+	struct rte_port_ring_writer_nodrop *port;
+
+	/* Check input parameters */
+	if ((conf == NULL) ||
+		(conf->ring == NULL) ||
+		(conf->ring->prod.sp_enqueue && is_multi) ||
+		(!(conf->ring->prod.sp_enqueue) && !is_multi) ||
+		(conf->tx_burst_sz > RTE_PORT_IN_BURST_SIZE_MAX)) {
+		RTE_LOG(ERR, PORT, "%s: Invalid Parameters\n", __func__);
+		return NULL;
+	}
+
+	/* Memory allocation */
+	port = rte_zmalloc_socket("PORT", sizeof(*port),
+			RTE_CACHE_LINE_SIZE, socket_id);
+	if (port == NULL) {
+		RTE_LOG(ERR, PORT, "%s: Failed to allocate port\n", __func__);
+		return NULL;
+	}
+
+	/* Initialization */
+	port->ring = conf->ring;
+	port->tx_burst_sz = conf->tx_burst_sz;
+	port->tx_buf_count = 0;
+	port->bsz_mask = 1LLU << (conf->tx_burst_sz - 1);
+	port->is_multi = is_multi;
+
+	/*
+	 * When n_retries is 0 it means that we should wait for every packet to
+	 * send no matter how many retries should it take. To limit number of
+	 * branches in fast path, we use UINT64_MAX instead of branching.
+	 */
+	port->n_retries = (conf->n_retries == 0) ? UINT64_MAX : conf->n_retries;
+
+	return port;
+}
+
+static void *
+rte_port_ring_writer_nodrop_create(void *params, int socket_id)
+{
+	return rte_port_ring_writer_nodrop_create_internal(params, socket_id, 0);
+}
+
+static void *
+rte_port_ring_multi_writer_nodrop_create(void *params, int socket_id)
+{
+	return rte_port_ring_writer_nodrop_create_internal(params, socket_id, 1);
+}
+
+static inline void
+send_burst_nodrop(struct rte_port_ring_writer_nodrop *p)
+{
+	uint32_t nb_tx = 0, i;
+
+	nb_tx = rte_ring_sp_enqueue_burst(p->ring, (void **)p->tx_buf,
+				p->tx_buf_count);
+
+	/* We sent all the packets in a first try */
+	if (nb_tx >= p->tx_buf_count) {
+		p->tx_buf_count = 0;
+		return;
+	}
+
+	for (i = 0; i < p->n_retries; i++) {
+		nb_tx += rte_ring_sp_enqueue_burst(p->ring,
+				(void **) (p->tx_buf + nb_tx), p->tx_buf_count - nb_tx);
+
+		/* We sent all the packets in more than one try */
+		if (nb_tx >= p->tx_buf_count) {
+			p->tx_buf_count = 0;
+			return;
+		}
+	}
+
+	/* We didn't send the packets in maximum allowed attempts */
+	RTE_PORT_RING_WRITER_NODROP_STATS_PKTS_DROP_ADD(p, p->tx_buf_count - nb_tx);
+	for ( ; nb_tx < p->tx_buf_count; nb_tx++)
+		rte_pktmbuf_free(p->tx_buf[nb_tx]);
+
+	p->tx_buf_count = 0;
+}
+
+static inline void
+send_burst_mp_nodrop(struct rte_port_ring_writer_nodrop *p)
+{
+	uint32_t nb_tx = 0, i;
+
+	nb_tx = rte_ring_mp_enqueue_burst(p->ring, (void **)p->tx_buf,
+				p->tx_buf_count);
+
+	/* We sent all the packets in a first try */
+	if (nb_tx >= p->tx_buf_count) {
+		p->tx_buf_count = 0;
+		return;
+	}
+
+	for (i = 0; i < p->n_retries; i++) {
+		nb_tx += rte_ring_mp_enqueue_burst(p->ring,
+				(void **) (p->tx_buf + nb_tx), p->tx_buf_count - nb_tx);
+
+		/* We sent all the packets in more than one try */
+		if (nb_tx >= p->tx_buf_count) {
+			p->tx_buf_count = 0;
+			return;
+		}
+	}
+
+	/* We didn't send the packets in maximum allowed attempts */
+	RTE_PORT_RING_WRITER_NODROP_STATS_PKTS_DROP_ADD(p, p->tx_buf_count - nb_tx);
+	for ( ; nb_tx < p->tx_buf_count; nb_tx++)
+		rte_pktmbuf_free(p->tx_buf[nb_tx]);
+
+	p->tx_buf_count = 0;
+}
+
+static int
+rte_port_ring_writer_nodrop_tx(void *port, struct rte_mbuf *pkt)
+{
+	struct rte_port_ring_writer_nodrop *p =
+			(struct rte_port_ring_writer_nodrop *) port;
+
+	p->tx_buf[p->tx_buf_count++] = pkt;
+	RTE_PORT_RING_WRITER_NODROP_STATS_PKTS_IN_ADD(p, 1);
+	if (p->tx_buf_count >= p->tx_burst_sz)
+		send_burst_nodrop(p);
+
+	return 0;
+}
+
+static int
+rte_port_ring_multi_writer_nodrop_tx(void *port, struct rte_mbuf *pkt)
+{
+	struct rte_port_ring_writer_nodrop *p =
+			(struct rte_port_ring_writer_nodrop *) port;
+
+	p->tx_buf[p->tx_buf_count++] = pkt;
+	RTE_PORT_RING_WRITER_NODROP_STATS_PKTS_IN_ADD(p, 1);
+	if (p->tx_buf_count >= p->tx_burst_sz)
+		send_burst_mp_nodrop(p);
+
+	return 0;
+}
+
+static inline int __attribute__((always_inline))
+rte_port_ring_writer_nodrop_tx_bulk_internal(void *port,
+		struct rte_mbuf **pkts,
+		uint64_t pkts_mask,
+		uint32_t is_multi)
+{
+	struct rte_port_ring_writer_nodrop *p =
+		(struct rte_port_ring_writer_nodrop *) port;
+
+	uint64_t bsz_mask = p->bsz_mask;
+	uint32_t tx_buf_count = p->tx_buf_count;
+	uint64_t expr = (pkts_mask & (pkts_mask + 1)) |
+			((pkts_mask & bsz_mask) ^ bsz_mask);
+
+	if (expr == 0) {
+		uint64_t n_pkts = __builtin_popcountll(pkts_mask);
+		uint32_t n_pkts_ok;
+
+		if (tx_buf_count) {
+			if (is_multi)
+				send_burst_mp_nodrop(p);
+			else
+				send_burst_nodrop(p);
+		}
+
+		RTE_PORT_RING_WRITER_NODROP_STATS_PKTS_IN_ADD(p, n_pkts);
+		if (is_multi)
+			n_pkts_ok =
+				rte_ring_mp_enqueue_burst(p->ring, (void **)pkts, n_pkts);
+		else
+			n_pkts_ok =
+				rte_ring_sp_enqueue_burst(p->ring, (void **)pkts, n_pkts);
+
+		if (n_pkts_ok >= n_pkts)
+			return 0;
+
+		/*
+		 * If we didn't manage to send all packets in single burst, move
+		 * remaining packets to the buffer and call send burst.
+		 */
+		for (; n_pkts_ok < n_pkts; n_pkts_ok++) {
+			struct rte_mbuf *pkt = pkts[n_pkts_ok];
+
+			p->tx_buf[p->tx_buf_count++] = pkt;
+		}
+		if (is_multi)
+			send_burst_mp_nodrop(p);
+		else
+			send_burst_nodrop(p);
+	} else {
+		for ( ; pkts_mask; ) {
+			uint32_t pkt_index = __builtin_ctzll(pkts_mask);
+			uint64_t pkt_mask = 1LLU << pkt_index;
+			struct rte_mbuf *pkt = pkts[pkt_index];
+
+			p->tx_buf[tx_buf_count++] = pkt;
+			RTE_PORT_RING_WRITER_NODROP_STATS_PKTS_IN_ADD(p, 1);
+			pkts_mask &= ~pkt_mask;
+		}
+
+		p->tx_buf_count = tx_buf_count;
+		if (tx_buf_count >= p->tx_burst_sz) {
+			if (is_multi)
+				send_burst_mp_nodrop(p);
+			else
+				send_burst_nodrop(p);
+		}
+	}
+
+	return 0;
+}
+
+static int
+rte_port_ring_writer_nodrop_tx_bulk(void *port,
+		struct rte_mbuf **pkts,
+		uint64_t pkts_mask)
+{
+	return
+		rte_port_ring_writer_nodrop_tx_bulk_internal(port, pkts, pkts_mask, 0);
+}
+
+static int
+rte_port_ring_multi_writer_nodrop_tx_bulk(void *port,
+		struct rte_mbuf **pkts,
+		uint64_t pkts_mask)
+{
+	return
+		rte_port_ring_writer_nodrop_tx_bulk_internal(port, pkts, pkts_mask, 1);
+}
+
+static int
+rte_port_ring_writer_nodrop_flush(void *port)
+{
+	struct rte_port_ring_writer_nodrop *p =
+			(struct rte_port_ring_writer_nodrop *) port;
+
+	if (p->tx_buf_count > 0)
+		send_burst_nodrop(p);
+
+	return 0;
+}
+
+static int
+rte_port_ring_multi_writer_nodrop_flush(void *port)
+{
+	struct rte_port_ring_writer_nodrop *p =
+			(struct rte_port_ring_writer_nodrop *) port;
+
+	if (p->tx_buf_count > 0)
+		send_burst_mp_nodrop(p);
+
+	return 0;
+}
+
+static int
+rte_port_ring_writer_nodrop_free(void *port)
+{
+	struct rte_port_ring_writer_nodrop *p =
+			(struct rte_port_ring_writer_nodrop *) port;
+
+	if (port == NULL) {
+		RTE_LOG(ERR, PORT, "%s: Port is NULL\n", __func__);
+		return -EINVAL;
+	}
+
+	if (p->is_multi)
+		rte_port_ring_multi_writer_nodrop_flush(port);
+	else
+		rte_port_ring_writer_nodrop_flush(port);
+
+	rte_free(port);
+
+	return 0;
+}
+
+static int
+rte_port_ring_writer_nodrop_stats_read(void *port,
+		struct rte_port_out_stats *stats, int clear)
+{
+	struct rte_port_ring_writer_nodrop *p =
+		(struct rte_port_ring_writer_nodrop *) port;
+
+	if (stats != NULL)
+		memcpy(stats, &p->stats, sizeof(p->stats));
+
+	if (clear)
+		memset(&p->stats, 0, sizeof(p->stats));
+
+	return 0;
+}
+
+/*
+ * Summary of port operations
+ */
+struct rte_port_in_ops rte_port_ring_reader_ops = {
+	.f_create = rte_port_ring_reader_create,
+	.f_free = rte_port_ring_reader_free,
+	.f_rx = rte_port_ring_reader_rx,
+	.f_stats = rte_port_ring_reader_stats_read,
+};
+
+struct rte_port_out_ops rte_port_ring_writer_ops = {
+	.f_create = rte_port_ring_writer_create,
+	.f_free = rte_port_ring_writer_free,
+	.f_tx = rte_port_ring_writer_tx,
+	.f_tx_bulk = rte_port_ring_writer_tx_bulk,
+	.f_flush = rte_port_ring_writer_flush,
+	.f_stats = rte_port_ring_writer_stats_read,
+};
+
+struct rte_port_out_ops rte_port_ring_writer_nodrop_ops = {
+	.f_create = rte_port_ring_writer_nodrop_create,
+	.f_free = rte_port_ring_writer_nodrop_free,
+	.f_tx = rte_port_ring_writer_nodrop_tx,
+	.f_tx_bulk = rte_port_ring_writer_nodrop_tx_bulk,
+	.f_flush = rte_port_ring_writer_nodrop_flush,
+	.f_stats = rte_port_ring_writer_nodrop_stats_read,
+};
+
+struct rte_port_in_ops rte_port_ring_multi_reader_ops = {
+	.f_create = rte_port_ring_multi_reader_create,
+	.f_free = rte_port_ring_reader_free,
+	.f_rx = rte_port_ring_multi_reader_rx,
+	.f_stats = rte_port_ring_reader_stats_read,
+};
+
+struct rte_port_out_ops rte_port_ring_multi_writer_ops = {
+	.f_create = rte_port_ring_multi_writer_create,
+	.f_free = rte_port_ring_writer_free,
+	.f_tx = rte_port_ring_multi_writer_tx,
+	.f_tx_bulk = rte_port_ring_multi_writer_tx_bulk,
+	.f_flush = rte_port_ring_multi_writer_flush,
+	.f_stats = rte_port_ring_writer_stats_read,
+};
+
+struct rte_port_out_ops rte_port_ring_multi_writer_nodrop_ops = {
+	.f_create = rte_port_ring_multi_writer_nodrop_create,
+	.f_free = rte_port_ring_writer_nodrop_free,
+	.f_tx = rte_port_ring_multi_writer_nodrop_tx,
+	.f_tx_bulk = rte_port_ring_multi_writer_nodrop_tx_bulk,
+	.f_flush = rte_port_ring_multi_writer_nodrop_flush,
+	.f_stats = rte_port_ring_writer_nodrop_stats_read,
+};
diff --git a/lib/librte_port/rte_port_ring.h b/lib/librte_port/rte_port_ring.h
new file mode 100644
index 00000000..de377d28
--- /dev/null
+++ b/lib/librte_port/rte_port_ring.h
@@ -0,0 +1,123 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __INCLUDE_RTE_PORT_RING_H__
+#define __INCLUDE_RTE_PORT_RING_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @file
+ * RTE Port Ring
+ *
+ * ring_reader:
+ *      input port built on top of pre-initialized single consumer ring
+ * ring_writer:
+ *      output port built on top of pre-initialized single producer ring
+ * ring_multi_reader:
+ *      input port built on top of pre-initialized multi consumers ring
+ * ring_multi_writer:
+ *      output port built on top of pre-initialized multi producers ring
+ *
+ ***/
+
+#include <stdint.h>
+
+#include <rte_ring.h>
+
+#include "rte_port.h"
+
+/** ring_reader port parameters */
+struct rte_port_ring_reader_params {
+	/** Underlying consumer ring that has to be pre-initialized */
+	struct rte_ring *ring;
+};
+
+/** ring_reader port operations */
+extern struct rte_port_in_ops rte_port_ring_reader_ops;
+
+/** ring_writer port parameters */
+struct rte_port_ring_writer_params {
+	/** Underlying producer ring that has to be pre-initialized */
+	struct rte_ring *ring;
+
+	/** Recommended burst size to ring. The actual burst size can be
+		bigger or smaller than this value. */
+	uint32_t tx_burst_sz;
+};
+
+/** ring_writer port operations */
+extern struct rte_port_out_ops rte_port_ring_writer_ops;
+
+/** ring_writer_nodrop port parameters */
+struct rte_port_ring_writer_nodrop_params {
+	/** Underlying producer ring that has to be pre-initialized */
+	struct rte_ring *ring;
+
+	/** Recommended burst size to ring. The actual burst size can be
+		bigger or smaller than this value. */
+	uint32_t tx_burst_sz;
+
+	/** Maximum number of retries, 0 for no limit */
+	uint32_t n_retries;
+};
+
+/** ring_writer_nodrop port operations */
+extern struct rte_port_out_ops rte_port_ring_writer_nodrop_ops;
+
+/** ring_multi_reader port parameters */
+#define rte_port_ring_multi_reader_params rte_port_ring_reader_params
+
+/** ring_multi_reader port operations */
+extern struct rte_port_in_ops rte_port_ring_multi_reader_ops;
+
+/** ring_multi_writer port parameters */
+#define rte_port_ring_multi_writer_params rte_port_ring_writer_params
+
+/** ring_multi_writer port operations */
+extern struct rte_port_out_ops rte_port_ring_multi_writer_ops;
+
+/** ring_multi_writer_nodrop port parameters */
+#define rte_port_ring_multi_writer_nodrop_params \
+	rte_port_ring_writer_nodrop_params
+
+/** ring_multi_writer_nodrop port operations */
+extern struct rte_port_out_ops rte_port_ring_multi_writer_nodrop_ops;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/lib/librte_port/rte_port_sched.c b/lib/librte_port/rte_port_sched.c
new file mode 100644
index 00000000..25217d62
--- /dev/null
+++ b/lib/librte_port/rte_port_sched.c
@@ -0,0 +1,323 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <string.h>
+
+#include <rte_mbuf.h>
+#include <rte_malloc.h>
+
+#include "rte_port_sched.h"
+
+/*
+ * Reader
+ */
+#ifdef RTE_PORT_STATS_COLLECT
+
+#define RTE_PORT_SCHED_READER_PKTS_IN_ADD(port, val) \
+	port->stats.n_pkts_in += val
+#define RTE_PORT_SCHED_READER_PKTS_DROP_ADD(port, val) \
+	port->stats.n_pkts_drop += val
+
+#else
+
+#define RTE_PORT_SCHED_READER_PKTS_IN_ADD(port, val)
+#define RTE_PORT_SCHED_READER_PKTS_DROP_ADD(port, val)
+
+#endif
+
+struct rte_port_sched_reader {
+	struct rte_port_in_stats stats;
+
+	struct rte_sched_port *sched;
+};
+
+static void *
+rte_port_sched_reader_create(void *params, int socket_id)
+{
+	struct rte_port_sched_reader_params *conf =
+			(struct rte_port_sched_reader_params *) params;
+	struct rte_port_sched_reader *port;
+
+	/* Check input parameters */
+	if ((conf == NULL) ||
+	    (conf->sched == NULL)) {
+		RTE_LOG(ERR, PORT, "%s: Invalid params\n", __func__);
+		return NULL;
+	}
+
+	/* Memory allocation */
+	port = rte_zmalloc_socket("PORT", sizeof(*port),
+			RTE_CACHE_LINE_SIZE, socket_id);
+	if (port == NULL) {
+		RTE_LOG(ERR, PORT, "%s: Failed to allocate port\n", __func__);
+		return NULL;
+	}
+
+	/* Initialization */
+	port->sched = conf->sched;
+
+	return port;
+}
+
+static int
+rte_port_sched_reader_rx(void *port, struct rte_mbuf **pkts, uint32_t n_pkts)
+{
+	struct rte_port_sched_reader *p = (struct rte_port_sched_reader *) port;
+	uint32_t nb_rx;
+
+	nb_rx = rte_sched_port_dequeue(p->sched, pkts, n_pkts);
+	RTE_PORT_SCHED_READER_PKTS_IN_ADD(p, nb_rx);
+
+	return nb_rx;
+}
+
+static int
+rte_port_sched_reader_free(void *port)
+{
+	if (port == NULL) {
+		RTE_LOG(ERR, PORT, "%s: port is NULL\n", __func__);
+		return -EINVAL;
+	}
+
+	rte_free(port);
+
+	return 0;
+}
+
+static int
+rte_port_sched_reader_stats_read(void *port,
+		struct rte_port_in_stats *stats, int clear)
+{
+	struct rte_port_sched_reader *p =
+		(struct rte_port_sched_reader *) port;
+
+	if (stats != NULL)
+		memcpy(stats, &p->stats, sizeof(p->stats));
+
+	if (clear)
+		memset(&p->stats, 0, sizeof(p->stats));
+
+	return 0;
+}
+
+/*
+ * Writer
+ */
+#ifdef RTE_PORT_STATS_COLLECT
+
+#define RTE_PORT_SCHED_WRITER_STATS_PKTS_IN_ADD(port, val) \
+	port->stats.n_pkts_in += val
+#define RTE_PORT_SCHED_WRITER_STATS_PKTS_DROP_ADD(port, val) \
+	port->stats.n_pkts_drop += val
+
+#else
+
+#define RTE_PORT_SCHED_WRITER_STATS_PKTS_IN_ADD(port, val)
+#define RTE_PORT_SCHED_WRITER_STATS_PKTS_DROP_ADD(port, val)
+
+#endif
+
+struct rte_port_sched_writer {
+	struct rte_port_out_stats stats;
+
+	struct rte_mbuf *tx_buf[2 * RTE_PORT_IN_BURST_SIZE_MAX];
+	struct rte_sched_port *sched;
+	uint32_t tx_burst_sz;
+	uint32_t tx_buf_count;
+	uint64_t bsz_mask;
+};
+
+static void *
+rte_port_sched_writer_create(void *params, int socket_id)
+{
+	struct rte_port_sched_writer_params *conf =
+			(struct rte_port_sched_writer_params *) params;
+	struct rte_port_sched_writer *port;
+
+	/* Check input parameters */
+	if ((conf == NULL) ||
+	    (conf->sched == NULL) ||
+	    (conf->tx_burst_sz == 0) ||
+	    (conf->tx_burst_sz > RTE_PORT_IN_BURST_SIZE_MAX) ||
+		(!rte_is_power_of_2(conf->tx_burst_sz))) {
+		RTE_LOG(ERR, PORT, "%s: Invalid params\n", __func__);
+		return NULL;
+	}
+
+	/* Memory allocation */
+	port = rte_zmalloc_socket("PORT", sizeof(*port),
+			RTE_CACHE_LINE_SIZE, socket_id);
+	if (port == NULL) {
+		RTE_LOG(ERR, PORT, "%s: Failed to allocate port\n", __func__);
+		return NULL;
+	}
+
+	/* Initialization */
+	port->sched = conf->sched;
+	port->tx_burst_sz = conf->tx_burst_sz;
+	port->tx_buf_count = 0;
+	port->bsz_mask = 1LLU << (conf->tx_burst_sz - 1);
+
+	return port;
+}
+
+static int
+rte_port_sched_writer_tx(void *port, struct rte_mbuf *pkt)
+{
+	struct rte_port_sched_writer *p = (struct rte_port_sched_writer *) port;
+
+	p->tx_buf[p->tx_buf_count++] = pkt;
+	RTE_PORT_SCHED_WRITER_STATS_PKTS_IN_ADD(p, 1);
+	if (p->tx_buf_count >= p->tx_burst_sz) {
+		__rte_unused uint32_t nb_tx;
+
+		nb_tx = rte_sched_port_enqueue(p->sched, p->tx_buf, p->tx_buf_count);
+		RTE_PORT_SCHED_WRITER_STATS_PKTS_DROP_ADD(p, p->tx_buf_count - nb_tx);
+		p->tx_buf_count = 0;
+	}
+
+	return 0;
+}
+
+static int
+rte_port_sched_writer_tx_bulk(void *port,
+		struct rte_mbuf **pkts,
+		uint64_t pkts_mask)
+{
+	struct rte_port_sched_writer *p = (struct rte_port_sched_writer *) port;
+	uint64_t bsz_mask = p->bsz_mask;
+	uint32_t tx_buf_count = p->tx_buf_count;
+	uint64_t expr = (pkts_mask & (pkts_mask + 1)) |
+			((pkts_mask & bsz_mask) ^ bsz_mask);
+
+	if (expr == 0) {
+		__rte_unused uint32_t nb_tx;
+		uint64_t n_pkts = __builtin_popcountll(pkts_mask);
+
+		if (tx_buf_count) {
+			nb_tx = rte_sched_port_enqueue(p->sched, p->tx_buf,
+				tx_buf_count);
+			RTE_PORT_SCHED_WRITER_STATS_PKTS_DROP_ADD(p, tx_buf_count - nb_tx);
+			p->tx_buf_count = 0;
+		}
+
+		nb_tx = rte_sched_port_enqueue(p->sched, pkts, n_pkts);
+		RTE_PORT_SCHED_WRITER_STATS_PKTS_DROP_ADD(p, n_pkts - nb_tx);
+	} else {
+		for ( ; pkts_mask; ) {
+			uint32_t pkt_index = __builtin_ctzll(pkts_mask);
+			uint64_t pkt_mask = 1LLU << pkt_index;
+			struct rte_mbuf *pkt = pkts[pkt_index];
+
+			p->tx_buf[tx_buf_count++] = pkt;
+			RTE_PORT_SCHED_WRITER_STATS_PKTS_IN_ADD(p, 1);
+			pkts_mask &= ~pkt_mask;
+		}
+		p->tx_buf_count = tx_buf_count;
+
+		if (tx_buf_count >= p->tx_burst_sz) {
+			__rte_unused uint32_t nb_tx;
+
+			nb_tx = rte_sched_port_enqueue(p->sched, p->tx_buf,
+				tx_buf_count);
+			RTE_PORT_SCHED_WRITER_STATS_PKTS_DROP_ADD(p, tx_buf_count - nb_tx);
+			p->tx_buf_count = 0;
+		}
+	}
+
+	return 0;
+}
+
+static int
+rte_port_sched_writer_flush(void *port)
+{
+	struct rte_port_sched_writer *p = (struct rte_port_sched_writer *) port;
+
+	if (p->tx_buf_count) {
+		__rte_unused uint32_t nb_tx;
+
+		nb_tx = rte_sched_port_enqueue(p->sched, p->tx_buf, p->tx_buf_count);
+		RTE_PORT_SCHED_WRITER_STATS_PKTS_DROP_ADD(p, p->tx_buf_count - nb_tx);
+		p->tx_buf_count = 0;
+	}
+
+	return 0;
+}
+
+static int
+rte_port_sched_writer_free(void *port)
+{
+	if (port == NULL) {
+		RTE_LOG(ERR, PORT, "%s: port is NULL\n", __func__);
+		return -EINVAL;
+	}
+
+	rte_port_sched_writer_flush(port);
+	rte_free(port);
+
+	return 0;
+}
+
+static int
+rte_port_sched_writer_stats_read(void *port,
+		struct rte_port_out_stats *stats, int clear)
+{
+	struct rte_port_sched_writer *p =
+		(struct rte_port_sched_writer *) port;
+
+	if (stats != NULL)
+		memcpy(stats, &p->stats, sizeof(p->stats));
+
+	if (clear)
+		memset(&p->stats, 0, sizeof(p->stats));
+
+	return 0;
+}
+
+/*
+ * Summary of port operations
+ */
+struct rte_port_in_ops rte_port_sched_reader_ops = {
+	.f_create = rte_port_sched_reader_create,
+	.f_free = rte_port_sched_reader_free,
+	.f_rx = rte_port_sched_reader_rx,
+	.f_stats = rte_port_sched_reader_stats_read,
+};
+
+struct rte_port_out_ops rte_port_sched_writer_ops = {
+	.f_create = rte_port_sched_writer_create,
+	.f_free = rte_port_sched_writer_free,
+	.f_tx = rte_port_sched_writer_tx,
+	.f_tx_bulk = rte_port_sched_writer_tx_bulk,
+	.f_flush = rte_port_sched_writer_flush,
+	.f_stats = rte_port_sched_writer_stats_read,
+};
diff --git a/lib/librte_port/rte_port_sched.h b/lib/librte_port/rte_port_sched.h
new file mode 100644
index 00000000..555415ab
--- /dev/null
+++ b/lib/librte_port/rte_port_sched.h
@@ -0,0 +1,82 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __INCLUDE_RTE_PORT_SCHED_H__
+#define __INCLUDE_RTE_PORT_SCHED_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @file
+ * RTE Port Hierarchical Scheduler
+ *
+ * sched_reader: input port built on top of pre-initialized rte_sched_port
+ * sched_writer: output port built on top of pre-initialized rte_sched_port
+ *
+ ***/
+
+#include <stdint.h>
+
+#include <rte_sched.h>
+
+#include "rte_port.h"
+
+/** sched_reader port parameters */
+struct rte_port_sched_reader_params {
+	/** Underlying pre-initialized rte_sched_port */
+	struct rte_sched_port *sched;
+};
+
+/** sched_reader port operations */
+extern struct rte_port_in_ops rte_port_sched_reader_ops;
+
+/** sched_writer port parameters */
+struct rte_port_sched_writer_params {
+	/** Underlying pre-initialized rte_sched_port */
+	struct rte_sched_port *sched;
+
+	/** Recommended burst size. The actual burst size can be bigger or
+	smaller than this value. */
+	uint32_t tx_burst_sz;
+};
+
+/** sched_writer port operations */
+extern struct rte_port_out_ops rte_port_sched_writer_ops;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/lib/librte_port/rte_port_source_sink.c b/lib/librte_port/rte_port_source_sink.c
new file mode 100644
index 00000000..056c9756
--- /dev/null
+++ b/lib/librte_port/rte_port_source_sink.c
@@ -0,0 +1,667 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <stdint.h>
+#include <string.h>
+
+#include <rte_mbuf.h>
+#include <rte_mempool.h>
+#include <rte_malloc.h>
+#include <rte_memcpy.h>
+
+#ifdef RTE_NEXT_ABI
+
+#ifdef RTE_PORT_PCAP
+#include <rte_ether.h>
+#include <pcap.h>
+#endif
+
+#else
+#undef RTE_PORT_PCAP
+#endif
+
+#include "rte_port_source_sink.h"
+
+/*
+ * Port SOURCE
+ */
+#ifdef RTE_PORT_STATS_COLLECT
+
+#define RTE_PORT_SOURCE_STATS_PKTS_IN_ADD(port, val) \
+	port->stats.n_pkts_in += val
+#define RTE_PORT_SOURCE_STATS_PKTS_DROP_ADD(port, val) \
+	port->stats.n_pkts_drop += val
+
+#else
+
+#define RTE_PORT_SOURCE_STATS_PKTS_IN_ADD(port, val)
+#define RTE_PORT_SOURCE_STATS_PKTS_DROP_ADD(port, val)
+
+#endif
+
+struct rte_port_source {
+	struct rte_port_in_stats stats;
+
+	struct rte_mempool *mempool;
+
+	/* PCAP buffers and indices */
+	uint8_t **pkts;
+	uint8_t *pkt_buff;
+	uint32_t *pkt_len;
+	uint32_t n_pkts;
+	uint32_t pkt_index;
+};
+
+#ifdef RTE_NEXT_ABI
+
+#ifdef RTE_PORT_PCAP
+
+static int
+pcap_source_load(struct rte_port_source *port,
+		const char *file_name,
+		uint32_t n_bytes_per_pkt,
+		int socket_id)
+{
+	uint32_t n_pkts = 0;
+	uint32_t i;
+	uint32_t *pkt_len_aligns = NULL;
+	size_t total_buff_len = 0;
+	pcap_t *pcap_handle;
+	char pcap_errbuf[PCAP_ERRBUF_SIZE];
+	uint32_t max_len;
+	struct pcap_pkthdr pcap_hdr;
+	const uint8_t *pkt;
+	uint8_t *buff = NULL;
+	uint32_t pktmbuf_maxlen = (uint32_t)
+			(rte_pktmbuf_data_room_size(port->mempool) -
+			RTE_PKTMBUF_HEADROOM);
+
+	if (n_bytes_per_pkt == 0)
+		max_len = pktmbuf_maxlen;
+	else
+		max_len = RTE_MIN(n_bytes_per_pkt, pktmbuf_maxlen);
+
+	/* first time open, get packet number */
+	pcap_handle = pcap_open_offline(file_name, pcap_errbuf);
+	if (pcap_handle == NULL) {
+		RTE_LOG(ERR, PORT, "Failed to open pcap file "
+			"'%s' for reading\n", file_name);
+		goto error_exit;
+	}
+
+	while ((pkt = pcap_next(pcap_handle, &pcap_hdr)) != NULL)
+		n_pkts++;
+
+	pcap_close(pcap_handle);
+
+	port->pkt_len = rte_zmalloc_socket("PCAP",
+		(sizeof(*port->pkt_len) * n_pkts), 0, socket_id);
+	if (port->pkt_len == NULL) {
+		RTE_LOG(ERR, PORT, "No enough memory\n");
+		goto error_exit;
+	}
+
+	pkt_len_aligns = rte_malloc("PCAP",
+		(sizeof(*pkt_len_aligns) * n_pkts), 0);
+	if (pkt_len_aligns == NULL) {
+		RTE_LOG(ERR, PORT, "No enough memory\n");
+		goto error_exit;
+	}
+
+	port->pkts = rte_zmalloc_socket("PCAP",
+		(sizeof(*port->pkts) * n_pkts), 0, socket_id);
+	if (port->pkts == NULL) {
+		RTE_LOG(ERR, PORT, "No enough memory\n");
+		goto error_exit;
+	}
+
+	/* open 2nd time, get pkt_len */
+	pcap_handle = pcap_open_offline(file_name, pcap_errbuf);
+	if (pcap_handle == NULL) {
+		RTE_LOG(ERR, PORT, "Failed to open pcap file "
+			"'%s' for reading\n", file_name);
+		goto error_exit;
+	}
+
+	for (i = 0; i < n_pkts; i++) {
+		pkt = pcap_next(pcap_handle, &pcap_hdr);
+		port->pkt_len[i] = RTE_MIN(max_len, pcap_hdr.len);
+		pkt_len_aligns[i] = RTE_CACHE_LINE_ROUNDUP(
+			port->pkt_len[i]);
+		total_buff_len += pkt_len_aligns[i];
+	}
+
+	pcap_close(pcap_handle);
+
+	/* allocate a big trunk of data for pcap file load */
+	buff = rte_zmalloc_socket("PCAP",
+		total_buff_len, 0, socket_id);
+	if (buff == NULL) {
+		RTE_LOG(ERR, PORT, "No enough memory\n");
+		goto error_exit;
+	}
+
+	port->pkt_buff = buff;
+
+	/* open file one last time to copy the pkt content */
+	pcap_handle = pcap_open_offline(file_name, pcap_errbuf);
+	if (pcap_handle == NULL) {
+		RTE_LOG(ERR, PORT, "Failed to open pcap file "
+			"'%s' for reading\n", file_name);
+		goto error_exit;
+	}
+
+	for (i = 0; i < n_pkts; i++) {
+		pkt = pcap_next(pcap_handle, &pcap_hdr);
+		rte_memcpy(buff, pkt, port->pkt_len[i]);
+		port->pkts[i] = buff;
+		buff += pkt_len_aligns[i];
+	}
+
+	pcap_close(pcap_handle);
+
+	port->n_pkts = n_pkts;
+
+	rte_free(pkt_len_aligns);
+
+	RTE_LOG(INFO, PORT, "Successfully load pcap file "
+		"'%s' with %u pkts\n",
+		file_name, port->n_pkts);
+
+	return 0;
+
+error_exit:
+	if (pkt_len_aligns)
+		rte_free(pkt_len_aligns);
+	if (port->pkt_len)
+		rte_free(port->pkt_len);
+	if (port->pkts)
+		rte_free(port->pkts);
+	if (port->pkt_buff)
+		rte_free(port->pkt_buff);
+
+	return -1;
+}
+
+#define PCAP_SOURCE_LOAD(port, file_name, n_bytes, socket_id)	\
+	pcap_source_load(port, file_name, n_bytes, socket_id)
+
+#else /* RTE_PORT_PCAP */
+
+#define PCAP_SOURCE_LOAD(port, file_name, n_bytes, socket_id)	\
+({								\
+	int _ret = 0;						\
+								\
+	if (file_name) {					\
+		RTE_LOG(ERR, PORT, "Source port field "		\
+			"\"file_name\" is not NULL.\n");	\
+		_ret = -1;					\
+	}							\
+								\
+	_ret;							\
+})
+
+#endif /* RTE_PORT_PCAP */
+
+#endif /* RTE_NEXT_ABI */
+
+static void *
+rte_port_source_create(void *params, int socket_id)
+{
+	struct rte_port_source_params *p =
+			(struct rte_port_source_params *) params;
+	struct rte_port_source *port;
+
+	/* Check input arguments*/
+	if ((p == NULL) || (p->mempool == NULL)) {
+		RTE_LOG(ERR, PORT, "%s: Invalid params\n", __func__);
+		return NULL;
+	}
+
+	/* Memory allocation */
+	port = rte_zmalloc_socket("PORT", sizeof(*port),
+			RTE_CACHE_LINE_SIZE, socket_id);
+	if (port == NULL) {
+		RTE_LOG(ERR, PORT, "%s: Failed to allocate port\n", __func__);
+		return NULL;
+	}
+
+	/* Initialization */
+	port->mempool = (struct rte_mempool *) p->mempool;
+
+#ifdef RTE_NEXT_ABI
+
+	if (p->file_name) {
+		int status = PCAP_SOURCE_LOAD(port, p->file_name,
+			p->n_bytes_per_pkt, socket_id);
+
+		if (status < 0) {
+			rte_free(port);
+			port = NULL;
+		}
+	}
+
+#endif
+
+	return port;
+}
+
+static int
+rte_port_source_free(void *port)
+{
+	struct rte_port_source *p =
+			(struct rte_port_source *)port;
+
+	/* Check input parameters */
+	if (p == NULL)
+		return 0;
+
+	if (p->pkt_len)
+		rte_free(p->pkt_len);
+	if (p->pkts)
+		rte_free(p->pkts);
+	if (p->pkt_buff)
+		rte_free(p->pkt_buff);
+
+	rte_free(p);
+
+	return 0;
+}
+
+static int
+rte_port_source_rx(void *port, struct rte_mbuf **pkts, uint32_t n_pkts)
+{
+	struct rte_port_source *p = (struct rte_port_source *) port;
+	uint32_t i;
+
+	if (rte_mempool_get_bulk(p->mempool, (void **) pkts, n_pkts) != 0)
+		return 0;
+
+	for (i = 0; i < n_pkts; i++) {
+		rte_mbuf_refcnt_set(pkts[i], 1);
+		rte_pktmbuf_reset(pkts[i]);
+	}
+
+	if (p->pkt_buff != NULL) {
+		for (i = 0; i < n_pkts; i++) {
+			uint8_t *pkt_data = rte_pktmbuf_mtod(pkts[i],
+				uint8_t *);
+
+			rte_memcpy(pkt_data, p->pkts[p->pkt_index],
+					p->pkt_len[p->pkt_index]);
+			pkts[i]->data_len = p->pkt_len[p->pkt_index];
+			pkts[i]->pkt_len = pkts[i]->data_len;
+
+			p->pkt_index++;
+			if (p->pkt_index >= p->n_pkts)
+				p->pkt_index = 0;
+		}
+	}
+
+	RTE_PORT_SOURCE_STATS_PKTS_IN_ADD(p, n_pkts);
+
+	return n_pkts;
+}
+
+static int
+rte_port_source_stats_read(void *port,
+		struct rte_port_in_stats *stats, int clear)
+{
+	struct rte_port_source *p =
+		(struct rte_port_source *) port;
+
+	if (stats != NULL)
+		memcpy(stats, &p->stats, sizeof(p->stats));
+
+	if (clear)
+		memset(&p->stats, 0, sizeof(p->stats));
+
+	return 0;
+}
+
+/*
+ * Port SINK
+ */
+#ifdef RTE_PORT_STATS_COLLECT
+
+#define RTE_PORT_SINK_STATS_PKTS_IN_ADD(port, val) \
+	(port->stats.n_pkts_in += val)
+#define RTE_PORT_SINK_STATS_PKTS_DROP_ADD(port, val) \
+	(port->stats.n_pkts_drop += val)
+
+#else
+
+#define RTE_PORT_SINK_STATS_PKTS_IN_ADD(port, val)
+#define RTE_PORT_SINK_STATS_PKTS_DROP_ADD(port, val)
+
+#endif
+
+struct rte_port_sink {
+	struct rte_port_out_stats stats;
+
+	/* PCAP dumper handle and pkts number */
+	void *dumper;
+	uint32_t max_pkts;
+	uint32_t pkt_index;
+	uint32_t dump_finish;
+};
+
+#ifdef RTE_PORT_PCAP
+
+static int
+pcap_sink_open(struct rte_port_sink *port,
+	const char *file_name,
+	uint32_t max_n_pkts)
+{
+	pcap_t *tx_pcap;
+	pcap_dumper_t *pcap_dumper;
+
+	/** Open a dead pcap handler for opening dumper file */
+	tx_pcap = pcap_open_dead(DLT_EN10MB, 65535);
+	if (tx_pcap == NULL) {
+		RTE_LOG(ERR, PORT, "Cannot open pcap dead handler\n");
+		return -1;
+	}
+
+	/* The dumper is created using the previous pcap_t reference */
+	pcap_dumper = pcap_dump_open(tx_pcap, file_name);
+	if (pcap_dumper == NULL) {
+		RTE_LOG(ERR, PORT, "Failed to open pcap file "
+			"\"%s\" for writing\n", file_name);
+		return -1;
+	}
+
+	port->dumper = pcap_dumper;
+	port->max_pkts = max_n_pkts;
+	port->pkt_index = 0;
+	port->dump_finish = 0;
+
+	RTE_LOG(INFO, PORT, "Ready to dump packets to file \"%s\"\n",
+		file_name);
+
+	return 0;
+}
+
+static void
+pcap_sink_write_pkt(struct rte_port_sink *port, struct rte_mbuf *mbuf)
+{
+	uint8_t *pcap_dumper = (uint8_t *)(port->dumper);
+	struct pcap_pkthdr pcap_hdr;
+	uint8_t jumbo_pkt_buf[ETHER_MAX_JUMBO_FRAME_LEN];
+	uint8_t *pkt;
+
+	/* Maximum num packets already reached */
+	if (port->dump_finish)
+		return;
+
+	pkt = rte_pktmbuf_mtod(mbuf, uint8_t *);
+
+	pcap_hdr.len = mbuf->pkt_len;
+	pcap_hdr.caplen = pcap_hdr.len;
+	gettimeofday(&(pcap_hdr.ts), NULL);
+
+	if (mbuf->nb_segs > 1) {
+		struct rte_mbuf *jumbo_mbuf;
+		uint32_t pkt_index = 0;
+
+		/* if packet size longer than ETHER_MAX_JUMBO_FRAME_LEN,
+		 * ignore it.
+		 */
+		if (mbuf->pkt_len > ETHER_MAX_JUMBO_FRAME_LEN)
+			return;
+
+		for (jumbo_mbuf = mbuf; jumbo_mbuf != NULL;
+				jumbo_mbuf = jumbo_mbuf->next) {
+			rte_memcpy(&jumbo_pkt_buf[pkt_index],
+				rte_pktmbuf_mtod(jumbo_mbuf, uint8_t *),
+				jumbo_mbuf->data_len);
+			pkt_index += jumbo_mbuf->data_len;
+		}
+
+		jumbo_pkt_buf[pkt_index] = '\0';
+
+		pkt = jumbo_pkt_buf;
+	}
+
+	pcap_dump(pcap_dumper, &pcap_hdr, pkt);
+
+	port->pkt_index++;
+
+	if ((port->max_pkts != 0) && (port->pkt_index >= port->max_pkts)) {
+		port->dump_finish = 1;
+		RTE_LOG(INFO, PORT, "Dumped %u packets to file\n",
+				port->pkt_index);
+	}
+
+}
+
+#define PCAP_SINK_OPEN(port, file_name, max_n_pkts)		\
+	pcap_sink_open(port, file_name, max_n_pkts)
+
+#define PCAP_SINK_WRITE_PKT(port, mbuf)				\
+	pcap_sink_write_pkt(port, mbuf)
+
+#define PCAP_SINK_FLUSH_PKT(dumper)				\
+do {								\
+	if (dumper)						\
+		pcap_dump_flush((pcap_dumper_t *)dumper);	\
+} while (0)
+
+#define PCAP_SINK_CLOSE(dumper)					\
+do {								\
+	if (dumper)						\
+		pcap_dump_close((pcap_dumper_t *)dumper);	\
+} while (0)
+
+#else
+
+#define PCAP_SINK_OPEN(port, file_name, max_n_pkts)		\
+({								\
+	int _ret = 0;						\
+								\
+	if (file_name) {					\
+		RTE_LOG(ERR, PORT, "Sink port field "		\
+			"\"file_name\" is not NULL.\n");	\
+		_ret = -1;					\
+	}							\
+								\
+	_ret;							\
+})
+
+#define PCAP_SINK_WRITE_PKT(port, mbuf) {}
+
+#define PCAP_SINK_FLUSH_PKT(dumper)
+
+#define PCAP_SINK_CLOSE(dumper)
+
+#endif
+
+static void *
+rte_port_sink_create(void *params, int socket_id)
+{
+	struct rte_port_sink *port;
+	struct rte_port_sink_params *p = params;
+
+	/* Memory allocation */
+	port = rte_zmalloc_socket("PORT", sizeof(*port),
+			RTE_CACHE_LINE_SIZE, socket_id);
+	if (port == NULL) {
+		RTE_LOG(ERR, PORT, "%s: Failed to allocate port\n", __func__);
+		return NULL;
+	}
+
+	if (!p)
+		return port;
+
+	if (p->file_name) {
+		int status = PCAP_SINK_OPEN(port, p->file_name,
+			p->max_n_pkts);
+
+		if (status < 0) {
+			rte_free(port);
+			port = NULL;
+		}
+	}
+
+	return port;
+}
+
+static int
+rte_port_sink_tx(void *port, struct rte_mbuf *pkt)
+{
+	struct rte_port_sink *p = (struct rte_port_sink *) port;
+
+	RTE_PORT_SINK_STATS_PKTS_IN_ADD(p, 1);
+	if (p->dumper != NULL)
+		PCAP_SINK_WRITE_PKT(p, pkt);
+	rte_pktmbuf_free(pkt);
+	RTE_PORT_SINK_STATS_PKTS_DROP_ADD(p, 1);
+
+	return 0;
+}
+
+static int
+rte_port_sink_tx_bulk(void *port, struct rte_mbuf **pkts,
+	uint64_t pkts_mask)
+{
+	struct rte_port_sink *p = (struct rte_port_sink *) port;
+
+	if ((pkts_mask & (pkts_mask + 1)) == 0) {
+		uint64_t n_pkts = __builtin_popcountll(pkts_mask);
+		uint32_t i;
+
+		RTE_PORT_SINK_STATS_PKTS_IN_ADD(p, n_pkts);
+		RTE_PORT_SINK_STATS_PKTS_DROP_ADD(p, n_pkts);
+
+		if (p->dumper) {
+			for (i = 0; i < n_pkts; i++)
+				PCAP_SINK_WRITE_PKT(p, pkts[i]);
+		}
+
+		for (i = 0; i < n_pkts; i++) {
+			struct rte_mbuf *pkt = pkts[i];
+
+			rte_pktmbuf_free(pkt);
+		}
+
+	} else {
+		if (p->dumper) {
+			uint64_t dump_pkts_mask = pkts_mask;
+			uint32_t pkt_index;
+
+			for ( ; dump_pkts_mask; ) {
+				pkt_index = __builtin_ctzll(
+					dump_pkts_mask);
+				PCAP_SINK_WRITE_PKT(p, pkts[pkt_index]);
+				dump_pkts_mask &= ~(1LLU << pkt_index);
+			}
+		}
+
+		for ( ; pkts_mask; ) {
+			uint32_t pkt_index = __builtin_ctzll(pkts_mask);
+			uint64_t pkt_mask = 1LLU << pkt_index;
+			struct rte_mbuf *pkt = pkts[pkt_index];
+
+			RTE_PORT_SINK_STATS_PKTS_IN_ADD(p, 1);
+			RTE_PORT_SINK_STATS_PKTS_DROP_ADD(p, 1);
+			rte_pktmbuf_free(pkt);
+			pkts_mask &= ~pkt_mask;
+		}
+	}
+
+	return 0;
+}
+
+static int
+rte_port_sink_flush(void *port)
+{
+	struct rte_port_sink *p =
+			(struct rte_port_sink *)port;
+
+	if (p == NULL)
+		return 0;
+
+	PCAP_SINK_FLUSH_PKT(p->dumper);
+
+	return 0;
+}
+
+static int
+rte_port_sink_free(void *port)
+{
+	struct rte_port_sink *p =
+			(struct rte_port_sink *)port;
+
+	if (p == NULL)
+		return 0;
+
+	PCAP_SINK_CLOSE(p->dumper);
+
+	rte_free(p);
+
+	return 0;
+}
+
+static int
+rte_port_sink_stats_read(void *port, struct rte_port_out_stats *stats,
+		int clear)
+{
+	struct rte_port_sink *p =
+		(struct rte_port_sink *) port;
+
+	if (stats != NULL)
+		memcpy(stats, &p->stats, sizeof(p->stats));
+
+	if (clear)
+		memset(&p->stats, 0, sizeof(p->stats));
+
+	return 0;
+}
+
+/*
+ * Summary of port operations
+ */
+struct rte_port_in_ops rte_port_source_ops = {
+	.f_create = rte_port_source_create,
+	.f_free = rte_port_source_free,
+	.f_rx = rte_port_source_rx,
+	.f_stats = rte_port_source_stats_read,
+};
+
+struct rte_port_out_ops rte_port_sink_ops = {
+	.f_create = rte_port_sink_create,
+	.f_free = rte_port_sink_free,
+	.f_tx = rte_port_sink_tx,
+	.f_tx_bulk = rte_port_sink_tx_bulk,
+	.f_flush = rte_port_sink_flush,
+	.f_stats = rte_port_sink_stats_read,
+};
diff --git a/lib/librte_port/rte_port_source_sink.h b/lib/librte_port/rte_port_source_sink.h
new file mode 100644
index 00000000..917abe4f
--- /dev/null
+++ b/lib/librte_port/rte_port_source_sink.h
@@ -0,0 +1,90 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __INCLUDE_RTE_PORT_SOURCE_SINK_H__
+#define __INCLUDE_RTE_PORT_SOURCE_SINK_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @file
+ * RTE Port Source/Sink
+ *
+ * source: input port that can be used to generate packets
+ * sink: output port that drops all packets written to it
+ *
+ ***/
+
+#include "rte_port.h"
+
+/** source port parameters */
+struct rte_port_source_params {
+	/** Pre-initialized buffer pool */
+	struct rte_mempool *mempool;
+#ifdef RTE_NEXT_ABI
+
+	/** The full path of the pcap file to read packets from */
+	char *file_name;
+	/** The number of bytes to be read from each packet in the
+	 *  pcap file. If this value is 0, the whole packet is read;
+	 *  if it is bigger than packet size, the generated packets
+	 *  will contain the whole packet */
+	uint32_t n_bytes_per_pkt;
+
+#endif
+};
+
+/** source port operations */
+extern struct rte_port_in_ops rte_port_source_ops;
+
+/** sink port parameters */
+struct rte_port_sink_params {
+	/** The full path of the pcap file to write the packets to */
+	char *file_name;
+	/** The maximum number of packets write to the pcap file.
+	 *  If this value is 0, the "infinite" write will be carried
+	 *  out.
+	 */
+	uint32_t max_n_pkts;
+};
+
+/** sink port operations */
+extern struct rte_port_out_ops rte_port_sink_ops;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/lib/librte_port/rte_port_version.map b/lib/librte_port/rte_port_version.map
new file mode 100644
index 00000000..7a0b34d0
--- /dev/null
+++ b/lib/librte_port/rte_port_version.map
@@ -0,0 +1,37 @@
+DPDK_2.0 {
+	global:
+
+	rte_port_ethdev_reader_ops;
+	rte_port_ethdev_writer_ops;
+	rte_port_ring_reader_ipv4_frag_ops;
+	rte_port_ring_reader_ops;
+	rte_port_ring_reader_ops;
+	rte_port_ring_writer_ipv4_ras_ops;
+	rte_port_ring_writer_ops;
+	rte_port_ring_writer_ops;
+	rte_port_sched_reader_ops;
+	rte_port_sched_writer_ops;
+	rte_port_sink_ops;
+	rte_port_source_ops;
+
+	local: *;
+};
+
+DPDK_2.1 {
+	global:
+
+	rte_port_ethdev_writer_nodrop_ops;
+	rte_port_ring_reader_ipv6_frag_ops;
+	rte_port_ring_writer_ipv6_ras_ops;
+	rte_port_ring_writer_nodrop_ops;
+
+} DPDK_2.0;
+
+DPDK_2.2 {
+	global:
+
+	rte_port_ring_multi_reader_ops;
+	rte_port_ring_multi_writer_ops;
+	rte_port_ring_multi_writer_nodrop_ops;
+
+} DPDK_2.1;
diff --git a/lib/librte_power/Makefile b/lib/librte_power/Makefile
new file mode 100644
index 00000000..cee95cd8
--- /dev/null
+++ b/lib/librte_power/Makefile
@@ -0,0 +1,53 @@
+#   BSD LICENSE
+#
+#   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+#   All rights reserved.
+#
+#   Redistribution and use in source and binary forms, with or without
+#   modification, are permitted provided that the following conditions
+#   are met:
+#
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in
+#       the documentation and/or other materials provided with the
+#       distribution.
+#     * Neither the name of Intel Corporation nor the names of its
+#       contributors may be used to endorse or promote products derived
+#       from this software without specific prior written permission.
+#
+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# library name
+LIB = librte_power.a
+
+CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3 -fno-strict-aliasing
+
+EXPORT_MAP := rte_power_version.map
+
+LIBABIVER := 1
+
+# all source are stored in SRCS-y
+SRCS-$(CONFIG_RTE_LIBRTE_POWER) := rte_power.c rte_power_acpi_cpufreq.c
+SRCS-$(CONFIG_RTE_LIBRTE_POWER) += rte_power_kvm_vm.c guest_channel.c
+
+# install this header file
+SYMLINK-$(CONFIG_RTE_LIBRTE_POWER)-include := rte_power.h
+
+# this lib needs eal
+DEPDIRS-$(CONFIG_RTE_LIBRTE_POWER) += lib/librte_eal
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_power/channel_commands.h b/lib/librte_power/channel_commands.h
new file mode 100644
index 00000000..de6d9269
--- /dev/null
+++ b/lib/librte_power/channel_commands.h
@@ -0,0 +1,74 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef CHANNEL_COMMANDS_H_
+#define CHANNEL_COMMANDS_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+
+/* Maximum number of CPUs */
+#define CHANNEL_CMDS_MAX_CPUS        64
+#if CHANNEL_CMDS_MAX_CPUS > 64
+#error Maximum number of cores is 64, overflow is guaranteed to \
+	cause problems with VM Power Management
+#endif
+
+/* Maximum number of channels per VM */
+#define CHANNEL_CMDS_MAX_VM_CHANNELS 64
+
+/* Valid Commands */
+#define CPU_POWER               1
+#define CPU_POWER_CONNECT       2
+
+/* CPU Power Command Scaling */
+#define CPU_POWER_SCALE_UP      1
+#define CPU_POWER_SCALE_DOWN    2
+#define CPU_POWER_SCALE_MAX     3
+#define CPU_POWER_SCALE_MIN     4
+
+struct channel_packet {
+	uint64_t resource_id; /**< core_num, device */
+	uint32_t unit;        /**< scale down/up/min/max */
+	uint32_t command;     /**< Power, IO, etc */
+};
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* CHANNEL_COMMANDS_H_ */
diff --git a/lib/librte_power/guest_channel.c b/lib/librte_power/guest_channel.c
new file mode 100644
index 00000000..d6b6d0aa
--- /dev/null
+++ b/lib/librte_power/guest_channel.c
@@ -0,0 +1,161 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <signal.h>
+#include <limits.h>
+#include <fcntl.h>
+#include <string.h>
+#include <errno.h>
+
+
+#include <rte_log.h>
+
+#include "guest_channel.h"
+#include "channel_commands.h"
+
+#define RTE_LOGTYPE_GUEST_CHANNEL RTE_LOGTYPE_USER1
+
+static int global_fds[RTE_MAX_LCORE];
+
+int
+guest_channel_host_connect(const char *path, unsigned lcore_id)
+{
+	int flags, ret;
+	struct channel_packet pkt;
+	char fd_path[PATH_MAX];
+	int fd = -1;
+
+	if (lcore_id >= RTE_MAX_LCORE) {
+		RTE_LOG(ERR, GUEST_CHANNEL, "Channel(%u) is out of range 0...%d\n",
+				lcore_id, RTE_MAX_LCORE-1);
+		return -1;
+	}
+	/* check if path is already open */
+	if (global_fds[lcore_id] != 0) {
+		RTE_LOG(ERR, GUEST_CHANNEL, "Channel(%u) is already open with fd %d\n",
+				lcore_id, global_fds[lcore_id]);
+		return -1;
+	}
+
+	snprintf(fd_path, PATH_MAX, "%s.%u", path, lcore_id);
+	RTE_LOG(INFO, GUEST_CHANNEL, "Opening channel '%s' for lcore %u\n",
+			fd_path, lcore_id);
+	fd = open(fd_path, O_RDWR);
+	if (fd < 0) {
+		RTE_LOG(ERR, GUEST_CHANNEL, "Unable to to connect to '%s' with error "
+				"%s\n", fd_path, strerror(errno));
+		return -1;
+	}
+
+	flags = fcntl(fd, F_GETFL, 0);
+	if (flags < 0) {
+		RTE_LOG(ERR, GUEST_CHANNEL, "Failed on fcntl get flags for file %s\n",
+				fd_path);
+		goto error;
+	}
+
+	flags |= O_NONBLOCK;
+	if (fcntl(fd, F_SETFL, flags) < 0) {
+		RTE_LOG(ERR, GUEST_CHANNEL, "Failed on setting non-blocking mode for "
+				"file %s", fd_path);
+		goto error;
+	}
+	/* QEMU needs a delay after connection */
+	sleep(1);
+
+	/* Send a test packet, this command is ignored by the host, but a successful
+	 * send indicates that the host endpoint is monitoring.
+	 */
+	pkt.command = CPU_POWER_CONNECT;
+	global_fds[lcore_id] = fd;
+	ret = guest_channel_send_msg(&pkt, lcore_id);
+	if (ret != 0) {
+		RTE_LOG(ERR, GUEST_CHANNEL, "Error on channel '%s' communications "
+				"test: %s\n", fd_path, strerror(ret));
+		goto error;
+	}
+	RTE_LOG(INFO, GUEST_CHANNEL, "Channel '%s' is now connected\n", fd_path);
+	return 0;
+error:
+	close(fd);
+	global_fds[lcore_id] = 0;
+	return -1;
+}
+
+int
+guest_channel_send_msg(struct channel_packet *pkt, unsigned lcore_id)
+{
+	int ret, buffer_len = sizeof(*pkt);
+	void *buffer = pkt;
+
+	if (lcore_id >= RTE_MAX_LCORE) {
+		RTE_LOG(ERR, GUEST_CHANNEL, "Channel(%u) is out of range 0...%d\n",
+				lcore_id, RTE_MAX_LCORE-1);
+		return -1;
+	}
+
+	if (global_fds[lcore_id] == 0) {
+		RTE_LOG(ERR, GUEST_CHANNEL, "Channel is not connected\n");
+		return -1;
+	}
+	while (buffer_len > 0) {
+		ret = write(global_fds[lcore_id], buffer, buffer_len);
+		if (ret == buffer_len)
+			return 0;
+		if (ret == -1) {
+			if (errno == EINTR)
+				continue;
+			return errno;
+		}
+		buffer = (char *)buffer + ret;
+		buffer_len -= ret;
+	}
+	return 0;
+}
+
+void
+guest_channel_host_disconnect(unsigned lcore_id)
+{
+	if (lcore_id >= RTE_MAX_LCORE) {
+		RTE_LOG(ERR, GUEST_CHANNEL, "Channel(%u) is out of range 0...%d\n",
+				lcore_id, RTE_MAX_LCORE-1);
+		return;
+	}
+	if (global_fds[lcore_id] == 0)
+		return;
+	close(global_fds[lcore_id]);
+	global_fds[lcore_id] = 0;
+}
diff --git a/lib/librte_power/guest_channel.h b/lib/librte_power/guest_channel.h
new file mode 100644
index 00000000..9e18af52
--- /dev/null
+++ b/lib/librte_power/guest_channel.h
@@ -0,0 +1,89 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifndef _GUEST_CHANNEL_H
+#define _GUEST_CHANNEL_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <channel_commands.h>
+
+/**
+ * Connect to the Virtio-Serial VM end-point located in path. It is
+ * thread safe for unique lcore_ids. This function must be only called once from
+ * each lcore.
+ *
+ * @param path
+ *  The path to the serial device on the filesystem
+ * @param lcore_id
+ *  lcore_id.
+ *
+ * @return
+ *  - 0 on success.
+ *  - Negative on error.
+ */
+int guest_channel_host_connect(const char *path, unsigned lcore_id);
+
+/**
+ * Disconnect from an already connected Virtio-Serial Endpoint.
+ *
+ *
+ * @param lcore_id
+ *  lcore_id.
+ *
+ */
+void guest_channel_host_disconnect(unsigned lcore_id);
+
+/**
+ * Send a message contained in pkt over the Virtio-Serial to the host endpoint.
+ *
+ * @param pkt
+ *  Pointer to a populated struct guest_agent_pkt
+ *
+ * @param lcore_id
+ *  lcore_id.
+ *
+ * @return
+ *  - 0 on success.
+ *  - Negative on channel not connected.
+ *  - errno on write to channel error.
+ */
+int guest_channel_send_msg(struct channel_packet *pkt, unsigned lcore_id);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/lib/librte_power/rte_power.c b/lib/librte_power/rte_power.c
new file mode 100644
index 00000000..998ed1c9
--- /dev/null
+++ b/lib/librte_power/rte_power.c
@@ -0,0 +1,143 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <rte_atomic.h>
+
+#include "rte_power.h"
+#include "rte_power_acpi_cpufreq.h"
+#include "rte_power_kvm_vm.h"
+#include "rte_power_common.h"
+
+enum power_management_env global_default_env = PM_ENV_NOT_SET;
+
+volatile uint32_t global_env_cfg_status = 0;
+
+/* function pointers */
+rte_power_freqs_t rte_power_freqs  = NULL;
+rte_power_get_freq_t rte_power_get_freq = NULL;
+rte_power_set_freq_t rte_power_set_freq = NULL;
+rte_power_freq_change_t rte_power_freq_up = NULL;
+rte_power_freq_change_t rte_power_freq_down = NULL;
+rte_power_freq_change_t rte_power_freq_max = NULL;
+rte_power_freq_change_t rte_power_freq_min = NULL;
+
+int
+rte_power_set_env(enum power_management_env env)
+{
+	if (rte_atomic32_cmpset(&global_env_cfg_status, 0, 1) == 0) {
+		return 0;
+	}
+	if (env == PM_ENV_ACPI_CPUFREQ) {
+		rte_power_freqs = rte_power_acpi_cpufreq_freqs;
+		rte_power_get_freq = rte_power_acpi_cpufreq_get_freq;
+		rte_power_set_freq = rte_power_acpi_cpufreq_set_freq;
+		rte_power_freq_up = rte_power_acpi_cpufreq_freq_up;
+		rte_power_freq_down = rte_power_acpi_cpufreq_freq_down;
+		rte_power_freq_min = rte_power_acpi_cpufreq_freq_min;
+		rte_power_freq_max = rte_power_acpi_cpufreq_freq_max;
+	} else if (env == PM_ENV_KVM_VM) {
+		rte_power_freqs = rte_power_kvm_vm_freqs;
+		rte_power_get_freq = rte_power_kvm_vm_get_freq;
+		rte_power_set_freq = rte_power_kvm_vm_set_freq;
+		rte_power_freq_up = rte_power_kvm_vm_freq_up;
+		rte_power_freq_down = rte_power_kvm_vm_freq_down;
+		rte_power_freq_min = rte_power_kvm_vm_freq_min;
+		rte_power_freq_max = rte_power_kvm_vm_freq_max;
+	} else {
+		RTE_LOG(ERR, POWER, "Invalid Power Management Environment(%d) set\n",
+				env);
+		rte_power_unset_env();
+		return -1;
+	}
+	global_default_env = env;
+	return 0;
+
+}
+
+void
+rte_power_unset_env(void)
+{
+	if (rte_atomic32_cmpset(&global_env_cfg_status, 1, 0) != 0)
+		global_default_env = PM_ENV_NOT_SET;
+}
+
+enum power_management_env
+rte_power_get_env(void) {
+	return global_default_env;
+}
+
+int
+rte_power_init(unsigned lcore_id)
+{
+	int ret = -1;
+
+	if (global_default_env == PM_ENV_ACPI_CPUFREQ) {
+		return rte_power_acpi_cpufreq_init(lcore_id);
+	}
+	if (global_default_env == PM_ENV_KVM_VM) {
+		return rte_power_kvm_vm_init(lcore_id);
+	}
+	/* Auto detect Environment */
+	RTE_LOG(INFO, POWER, "Attempting to initialise ACPI cpufreq power "
+			"management...\n");
+	ret = rte_power_acpi_cpufreq_init(lcore_id);
+	if (ret == 0) {
+		rte_power_set_env(PM_ENV_ACPI_CPUFREQ);
+		goto out;
+	}
+
+	RTE_LOG(INFO, POWER, "Attempting to initialise VM power management...\n");
+	ret = rte_power_kvm_vm_init(lcore_id);
+	if (ret == 0) {
+		rte_power_set_env(PM_ENV_KVM_VM);
+		goto out;
+	}
+	RTE_LOG(ERR, POWER, "Unable to set Power Management Environment for lcore "
+			"%u\n", lcore_id);
+out:
+	return ret;
+}
+
+int
+rte_power_exit(unsigned lcore_id)
+{
+	if (global_default_env == PM_ENV_ACPI_CPUFREQ)
+		return rte_power_acpi_cpufreq_exit(lcore_id);
+	if (global_default_env == PM_ENV_KVM_VM)
+		return rte_power_kvm_vm_exit(lcore_id);
+
+	RTE_LOG(ERR, POWER, "Environment has not been set, unable to exit "
+				"gracefully\n");
+	return -1;
+
+}
diff --git a/lib/librte_power/rte_power.h b/lib/librte_power/rte_power.h
new file mode 100644
index 00000000..67e0ec02
--- /dev/null
+++ b/lib/librte_power/rte_power.h
@@ -0,0 +1,243 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_POWER_H
+#define _RTE_POWER_H
+
+/**
+ * @file
+ * RTE Power Management
+ */
+
+#include <rte_common.h>
+#include <rte_byteorder.h>
+#include <rte_log.h>
+#include <rte_string_fns.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Power Management Environment State */
+enum power_management_env {PM_ENV_NOT_SET, PM_ENV_ACPI_CPUFREQ, PM_ENV_KVM_VM};
+
+/**
+ * Set the default power management implementation. If this is not called prior
+ * to rte_power_init(), then auto-detect of the environment will take place.
+ * It is not thread safe.
+ *
+ * @param env
+ *  env. The environment in which to initialise Power Management for.
+ *
+ * @return
+ *  - 0 on success.
+ *  - Negative on error.
+ */
+int rte_power_set_env(enum power_management_env env);
+
+/**
+ * Unset the global environment configuration.
+ * This can only be called after all threads have completed.
+ */
+void rte_power_unset_env(void);
+
+/**
+ * Get the default power management implementation.
+ *
+ * @return
+ *  power_management_env The configured environment.
+ */
+enum power_management_env rte_power_get_env(void);
+
+/**
+ * Initialize power management for a specific lcore. If rte_power_set_env() has
+ * not been called then an auto-detect of the environment will start and
+ * initialise the corresponding resources.
+ *
+ * @param lcore_id
+ *  lcore id.
+ *
+ * @return
+ *  - 0 on success.
+ *  - Negative on error.
+ */
+int rte_power_init(unsigned lcore_id);
+
+/**
+ * Exit power management on a specific lcore. This will call the environment
+ * dependent exit function.
+ *
+ * @param lcore_id
+ *  lcore id.
+ *
+ * @return
+ *  - 0 on success.
+ *  - Negative on error.
+ */
+int rte_power_exit(unsigned lcore_id);
+
+/**
+ * Get the available frequencies of a specific lcore.
+ * Function pointer definition. Review each environments
+ * specific documentation for usage.
+ *
+ * @param lcore_id
+ *  lcore id.
+ * @param freqs
+ *  The buffer array to save the frequencies.
+ * @param num
+ *  The number of frequencies to get.
+ *
+ * @return
+ *  The number of available frequencies.
+ */
+typedef uint32_t (*rte_power_freqs_t)(unsigned lcore_id, uint32_t *freqs,
+		uint32_t num);
+
+extern rte_power_freqs_t rte_power_freqs;
+
+/**
+ * Return the current index of available frequencies of a specific lcore.
+ * Function pointer definition. Review each environments
+ * specific documentation for usage.
+ *
+ * @param lcore_id
+ *  lcore id.
+ *
+ * @return
+ *  The current index of available frequencies.
+ */
+typedef uint32_t (*rte_power_get_freq_t)(unsigned lcore_id);
+
+extern rte_power_get_freq_t rte_power_get_freq;
+
+/**
+ * Set the new frequency for a specific lcore by indicating the index of
+ * available frequencies.
+ * Function pointer definition. Review each environments
+ * specific documentation for usage.
+ *
+ * @param lcore_id
+ *  lcore id.
+ * @param index
+ *  The index of available frequencies.
+ *
+ * @return
+ *  - 1 on success with frequency changed.
+ *  - 0 on success without frequency changed.
+ *  - Negative on error.
+ */
+typedef int (*rte_power_set_freq_t)(unsigned lcore_id, uint32_t index);
+
+extern rte_power_set_freq_t rte_power_set_freq;
+
+/**
+ * Function pointer definition for generic frequency change functions. Review
+ * each environments specific documentation for usage.
+ *
+ * @param lcore_id
+ *  lcore id.
+ *
+ * @return
+ *  - 1 on success with frequency changed.
+ *  - 0 on success without frequency changed.
+ *  - Negative on error.
+ */
+typedef int (*rte_power_freq_change_t)(unsigned lcore_id);
+
+/**
+ * Scale up the frequency of a specific lcore according to the available
+ * frequencies.
+ * Review each environments specific documentation for usage.
+ *
+ * @param lcore_id
+ *  lcore id.
+ *
+ * @return
+ *  - 1 on success with frequency changed.
+ *  - 0 on success without frequency changed.
+ *  - Negative on error.
+ */
+extern rte_power_freq_change_t rte_power_freq_up;
+
+/**
+ * Scale down the frequency of a specific lcore according to the available
+ * frequencies.
+ * Review each environments specific documentation for usage.
+ *
+ * @param lcore_id
+ *  lcore id.
+ *
+ * @return
+ *  - 1 on success with frequency changed.
+ *  - 0 on success without frequency changed.
+ *  - Negative on error.
+ */
+
+extern rte_power_freq_change_t rte_power_freq_down;
+
+/**
+ * Scale up the frequency of a specific lcore to the highest according to the
+ * available frequencies.
+ * Review each environments specific documentation for usage.
+ *
+ * @param lcore_id
+ *  lcore id.
+ *
+ * @return
+ *  - 1 on success with frequency changed.
+ *  - 0 on success without frequency changed.
+ *  - Negative on error.
+ */
+extern rte_power_freq_change_t rte_power_freq_max;
+
+/**
+ * Scale down the frequency of a specific lcore to the lowest according to the
+ * available frequencies.
+ * Review each environments specific documentation for usage..
+ *
+ * @param lcore_id
+ *  lcore id.
+ *
+ * @return
+ *  - 1 on success with frequency changed.
+ *  - 0 on success without frequency changed.
+ *  - Negative on error.
+ */
+extern rte_power_freq_change_t rte_power_freq_min;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/lib/librte_power/rte_power_acpi_cpufreq.c b/lib/librte_power/rte_power_acpi_cpufreq.c
new file mode 100644
index 00000000..a56c9b59
--- /dev/null
+++ b/lib/librte_power/rte_power_acpi_cpufreq.c
@@ -0,0 +1,545 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <signal.h>
+#include <limits.h>
+
+#include <rte_memcpy.h>
+#include <rte_atomic.h>
+
+#include "rte_power_acpi_cpufreq.h"
+#include "rte_power_common.h"
+
+#ifdef RTE_LIBRTE_POWER_DEBUG
+#define POWER_DEBUG_TRACE(fmt, args...) do { \
+		RTE_LOG(ERR, POWER, "%s: " fmt, __func__, ## args); \
+} while (0)
+#else
+#define POWER_DEBUG_TRACE(fmt, args...)
+#endif
+
+#define FOPEN_OR_ERR_RET(f, retval) do { \
+		if ((f) == NULL) { \
+			RTE_LOG(ERR, POWER, "File not openned\n"); \
+			return retval; \
+		} \
+} while (0)
+
+#define FOPS_OR_NULL_GOTO(ret, label) do { \
+		if ((ret) == NULL) { \
+			RTE_LOG(ERR, POWER, "fgets returns nothing\n"); \
+			goto label; \
+		} \
+} while (0)
+
+#define FOPS_OR_ERR_GOTO(ret, label) do { \
+		if ((ret) < 0) { \
+			RTE_LOG(ERR, POWER, "File operations failed\n"); \
+			goto label; \
+		} \
+} while (0)
+
+#define STR_SIZE     1024
+#define POWER_CONVERT_TO_DECIMAL 10
+
+#define POWER_GOVERNOR_USERSPACE "userspace"
+#define POWER_SYSFILE_GOVERNOR   \
+		"/sys/devices/system/cpu/cpu%u/cpufreq/scaling_governor"
+#define POWER_SYSFILE_AVAIL_FREQ \
+		"/sys/devices/system/cpu/cpu%u/cpufreq/scaling_available_frequencies"
+#define POWER_SYSFILE_SETSPEED   \
+		"/sys/devices/system/cpu/cpu%u/cpufreq/scaling_setspeed"
+
+enum power_state {
+	POWER_IDLE = 0,
+	POWER_ONGOING,
+	POWER_USED,
+	POWER_UNKNOWN
+};
+
+/**
+ * Power info per lcore.
+ */
+struct rte_power_info {
+	unsigned lcore_id;                   /**< Logical core id */
+	uint32_t freqs[RTE_MAX_LCORE_FREQS]; /**< Frequency array */
+	uint32_t nb_freqs;                   /**< number of available freqs */
+	FILE *f;                             /**< FD of scaling_setspeed */
+	char governor_ori[32];               /**< Original governor name */
+	uint32_t curr_idx;                   /**< Freq index in freqs array */
+	volatile uint32_t state;             /**< Power in use state */
+} __rte_cache_aligned;
+
+static struct rte_power_info lcore_power_info[RTE_MAX_LCORE];
+
+/**
+ * It is to set specific freq for specific logical core, according to the index
+ * of supported frequencies.
+ */
+static int
+set_freq_internal(struct rte_power_info *pi, uint32_t idx)
+{
+	if (idx >= RTE_MAX_LCORE_FREQS || idx >= pi->nb_freqs) {
+		RTE_LOG(ERR, POWER, "Invalid frequency index %u, which "
+				"should be less than %u\n", idx, pi->nb_freqs);
+		return -1;
+	}
+
+	/* Check if it is the same as current */
+	if (idx == pi->curr_idx)
+		return 0;
+
+	POWER_DEBUG_TRACE("Freqency[%u] %u to be set for lcore %u\n",
+			idx, pi->freqs[idx], pi->lcore_id);
+	if (fseek(pi->f, 0, SEEK_SET) < 0) {
+		RTE_LOG(ERR, POWER, "Fail to set file position indicator to 0 "
+				"for setting frequency for lcore %u\n", pi->lcore_id);
+		return -1;
+	}
+	if (fprintf(pi->f, "%u", pi->freqs[idx]) < 0) {
+		RTE_LOG(ERR, POWER, "Fail to write new frequency for "
+				"lcore %u\n", pi->lcore_id);
+		return -1;
+	}
+	fflush(pi->f);
+	pi->curr_idx = idx;
+
+	return 1;
+}
+
+/**
+ * It is to check the current scaling governor by reading sys file, and then
+ * set it into 'userspace' if it is not by writing the sys file. The original
+ * governor will be saved for rolling back.
+ */
+static int
+power_set_governor_userspace(struct rte_power_info *pi)
+{
+	FILE *f;
+	int ret = -1;
+	char buf[BUFSIZ];
+	char fullpath[PATH_MAX];
+	char *s;
+	int val;
+
+	snprintf(fullpath, sizeof(fullpath), POWER_SYSFILE_GOVERNOR,
+			pi->lcore_id);
+	f = fopen(fullpath, "rw+");
+	FOPEN_OR_ERR_RET(f, ret);
+
+	s = fgets(buf, sizeof(buf), f);
+	FOPS_OR_NULL_GOTO(s, out);
+
+	/* Check if current governor is userspace */
+	if (strncmp(buf, POWER_GOVERNOR_USERSPACE,
+			sizeof(POWER_GOVERNOR_USERSPACE)) == 0) {
+		ret = 0;
+		POWER_DEBUG_TRACE("Power management governor of lcore %u is "
+				"already userspace\n", pi->lcore_id);
+		goto out;
+	}
+	/* Save the original governor */
+	snprintf(pi->governor_ori, sizeof(pi->governor_ori), "%s", buf);
+
+	/* Write 'userspace' to the governor */
+	val = fseek(f, 0, SEEK_SET);
+	FOPS_OR_ERR_GOTO(val, out);
+
+	val = fputs(POWER_GOVERNOR_USERSPACE, f);
+	FOPS_OR_ERR_GOTO(val, out);
+
+	ret = 0;
+	RTE_LOG(INFO, POWER, "Power management governor of lcore %u has been "
+			"set to user space successfully\n", pi->lcore_id);
+out:
+	fclose(f);
+
+	return ret;
+}
+
+/**
+ * It is to get the available frequencies of the specific lcore by reading the
+ * sys file.
+ */
+static int
+power_get_available_freqs(struct rte_power_info *pi)
+{
+	FILE *f;
+	int ret = -1, i, count;
+	char *p;
+	char buf[BUFSIZ];
+	char fullpath[PATH_MAX];
+	char *freqs[RTE_MAX_LCORE_FREQS];
+	char *s;
+
+	snprintf(fullpath, sizeof(fullpath), POWER_SYSFILE_AVAIL_FREQ,
+			pi->lcore_id);
+	f = fopen(fullpath, "r");
+	FOPEN_OR_ERR_RET(f, ret);
+
+	s = fgets(buf, sizeof(buf), f);
+	FOPS_OR_NULL_GOTO(s, out);
+
+	/* Strip the line break if there is */
+	p = strchr(buf, '\n');
+	if (p != NULL)
+		*p = 0;
+
+	/* Split string into at most RTE_MAX_LCORE_FREQS frequencies */
+	count = rte_strsplit(buf, sizeof(buf), freqs,
+			RTE_MAX_LCORE_FREQS, ' ');
+	if (count <= 0) {
+		RTE_LOG(ERR, POWER, "No available frequency in "
+				""POWER_SYSFILE_AVAIL_FREQ"\n", pi->lcore_id);
+		goto out;
+	}
+	if (count >= RTE_MAX_LCORE_FREQS) {
+		RTE_LOG(ERR, POWER, "Too many available frequencies : %d\n",
+				count);
+		goto out;
+	}
+
+	/* Store the available frequncies into power context */
+	for (i = 0, pi->nb_freqs = 0; i < count; i++) {
+		POWER_DEBUG_TRACE("Lcore %u frequency[%d]: %s\n", pi->lcore_id,
+				i, freqs[i]);
+		pi->freqs[pi->nb_freqs++] = strtoul(freqs[i], &p,
+				POWER_CONVERT_TO_DECIMAL);
+	}
+
+	ret = 0;
+	POWER_DEBUG_TRACE("%d frequencie(s) of lcore %u are available\n",
+			count, pi->lcore_id);
+out:
+	fclose(f);
+
+	return ret;
+}
+
+/**
+ * It is to fopen the sys file for the future setting the lcore frequency.
+ */
+static int
+power_init_for_setting_freq(struct rte_power_info *pi)
+{
+	FILE *f;
+	char fullpath[PATH_MAX];
+	char buf[BUFSIZ];
+	uint32_t i, freq;
+	char *s;
+
+	snprintf(fullpath, sizeof(fullpath), POWER_SYSFILE_SETSPEED,
+			pi->lcore_id);
+	f = fopen(fullpath, "rw+");
+	FOPEN_OR_ERR_RET(f, -1);
+
+	s = fgets(buf, sizeof(buf), f);
+	FOPS_OR_NULL_GOTO(s, out);
+
+	freq = strtoul(buf, NULL, POWER_CONVERT_TO_DECIMAL);
+	for (i = 0; i < pi->nb_freqs; i++) {
+		if (freq == pi->freqs[i]) {
+			pi->curr_idx = i;
+			pi->f = f;
+			return 0;
+		}
+	}
+
+out:
+	fclose(f);
+
+	return -1;
+}
+
+int
+rte_power_acpi_cpufreq_init(unsigned lcore_id)
+{
+	struct rte_power_info *pi;
+
+	if (lcore_id >= RTE_MAX_LCORE) {
+		RTE_LOG(ERR, POWER, "Lcore id %u can not exceeds %u\n",
+				lcore_id, RTE_MAX_LCORE - 1U);
+		return -1;
+	}
+
+	pi = &lcore_power_info[lcore_id];
+	if (rte_atomic32_cmpset(&(pi->state), POWER_IDLE, POWER_ONGOING)
+			== 0) {
+		RTE_LOG(INFO, POWER, "Power management of lcore %u is "
+				"in use\n", lcore_id);
+		return -1;
+	}
+
+	pi->lcore_id = lcore_id;
+	/* Check and set the governor */
+	if (power_set_governor_userspace(pi) < 0) {
+		RTE_LOG(ERR, POWER, "Cannot set governor of lcore %u to "
+				"userspace\n", lcore_id);
+		goto fail;
+	}
+
+	/* Get the available frequencies */
+	if (power_get_available_freqs(pi) < 0) {
+		RTE_LOG(ERR, POWER, "Cannot get available frequencies of "
+				"lcore %u\n", lcore_id);
+		goto fail;
+	}
+
+	/* Init for setting lcore frequency */
+	if (power_init_for_setting_freq(pi) < 0) {
+		RTE_LOG(ERR, POWER, "Cannot init for setting frequency for "
+				"lcore %u\n", lcore_id);
+		goto fail;
+	}
+
+	/* Set freq to max by default */
+	if (rte_power_acpi_cpufreq_freq_max(lcore_id) < 0) {
+		RTE_LOG(ERR, POWER, "Cannot set frequency of lcore %u "
+				"to max\n", lcore_id);
+		goto fail;
+	}
+
+	RTE_LOG(INFO, POWER, "Initialized successfully for lcore %u "
+			"power manamgement\n", lcore_id);
+	rte_atomic32_cmpset(&(pi->state), POWER_ONGOING, POWER_USED);
+
+	return 0;
+
+fail:
+	rte_atomic32_cmpset(&(pi->state), POWER_ONGOING, POWER_UNKNOWN);
+
+	return -1;
+}
+
+/**
+ * It is to check the governor and then set the original governor back if
+ * needed by writing the the sys file.
+ */
+static int
+power_set_governor_original(struct rte_power_info *pi)
+{
+	FILE *f;
+	int ret = -1;
+	char buf[BUFSIZ];
+	char fullpath[PATH_MAX];
+	char *s;
+	int val;
+
+	snprintf(fullpath, sizeof(fullpath), POWER_SYSFILE_GOVERNOR,
+			pi->lcore_id);
+	f = fopen(fullpath, "rw+");
+	FOPEN_OR_ERR_RET(f, ret);
+
+	s = fgets(buf, sizeof(buf), f);
+	FOPS_OR_NULL_GOTO(s, out);
+
+	/* Check if the governor to be set is the same as current */
+	if (strncmp(buf, pi->governor_ori, sizeof(pi->governor_ori)) == 0) {
+		ret = 0;
+		POWER_DEBUG_TRACE("Power management governor of lcore %u "
+				"has already been set to %s\n",
+				pi->lcore_id, pi->governor_ori);
+		goto out;
+	}
+
+	/* Write back the original governor */
+	val = fseek(f, 0, SEEK_SET);
+	FOPS_OR_ERR_GOTO(val, out);
+
+	val = fputs(pi->governor_ori, f);
+	FOPS_OR_ERR_GOTO(val, out);
+
+	ret = 0;
+	RTE_LOG(INFO, POWER, "Power management governor of lcore %u "
+			"has been set back to %s successfully\n",
+			pi->lcore_id, pi->governor_ori);
+out:
+	fclose(f);
+
+	return ret;
+}
+
+int
+rte_power_acpi_cpufreq_exit(unsigned lcore_id)
+{
+	struct rte_power_info *pi;
+
+	if (lcore_id >= RTE_MAX_LCORE) {
+		RTE_LOG(ERR, POWER, "Lcore id %u can not exceeds %u\n",
+				lcore_id, RTE_MAX_LCORE - 1U);
+		return -1;
+	}
+	pi = &lcore_power_info[lcore_id];
+	if (rte_atomic32_cmpset(&(pi->state), POWER_USED, POWER_ONGOING)
+			== 0) {
+		RTE_LOG(INFO, POWER, "Power management of lcore %u is "
+				"not used\n", lcore_id);
+		return -1;
+	}
+
+	/* Close FD of setting freq */
+	fclose(pi->f);
+	pi->f = NULL;
+
+	/* Set the governor back to the original */
+	if (power_set_governor_original(pi) < 0) {
+		RTE_LOG(ERR, POWER, "Cannot set the governor of %u back "
+				"to the original\n", lcore_id);
+		goto fail;
+	}
+
+	RTE_LOG(INFO, POWER, "Power management of lcore %u has exited from "
+			"'userspace' mode and been set back to the "
+			"original\n", lcore_id);
+	rte_atomic32_cmpset(&(pi->state), POWER_ONGOING, POWER_IDLE);
+
+	return 0;
+
+fail:
+	rte_atomic32_cmpset(&(pi->state), POWER_ONGOING, POWER_UNKNOWN);
+
+	return -1;
+}
+
+uint32_t
+rte_power_acpi_cpufreq_freqs(unsigned lcore_id, uint32_t *freqs, uint32_t num)
+{
+	struct rte_power_info *pi;
+
+	if (lcore_id >= RTE_MAX_LCORE || !freqs) {
+		RTE_LOG(ERR, POWER, "Invalid input parameter\n");
+		return 0;
+	}
+
+	pi = &lcore_power_info[lcore_id];
+	if (num < pi->nb_freqs) {
+		RTE_LOG(ERR, POWER, "Buffer size is not enough\n");
+		return 0;
+	}
+	rte_memcpy(freqs, pi->freqs, pi->nb_freqs * sizeof(uint32_t));
+
+	return pi->nb_freqs;
+}
+
+uint32_t
+rte_power_acpi_cpufreq_get_freq(unsigned lcore_id)
+{
+	if (lcore_id >= RTE_MAX_LCORE) {
+		RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
+		return RTE_POWER_INVALID_FREQ_INDEX;
+	}
+
+	return lcore_power_info[lcore_id].curr_idx;
+}
+
+int
+rte_power_acpi_cpufreq_set_freq(unsigned lcore_id, uint32_t index)
+{
+	if (lcore_id >= RTE_MAX_LCORE) {
+		RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
+		return -1;
+	}
+
+	return set_freq_internal(&(lcore_power_info[lcore_id]), index);
+}
+
+int
+rte_power_acpi_cpufreq_freq_down(unsigned lcore_id)
+{
+	struct rte_power_info *pi;
+
+	if (lcore_id >= RTE_MAX_LCORE) {
+		RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
+		return -1;
+	}
+
+	pi = &lcore_power_info[lcore_id];
+	if (pi->curr_idx + 1 == pi->nb_freqs)
+		return 0;
+
+	/* Frequencies in the array are from high to low. */
+	return set_freq_internal(pi, pi->curr_idx + 1);
+}
+
+int
+rte_power_acpi_cpufreq_freq_up(unsigned lcore_id)
+{
+	struct rte_power_info *pi;
+
+	if (lcore_id >= RTE_MAX_LCORE) {
+		RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
+		return -1;
+	}
+
+	pi = &lcore_power_info[lcore_id];
+	if (pi->curr_idx == 0)
+		return 0;
+
+	/* Frequencies in the array are from high to low. */
+	return set_freq_internal(pi, pi->curr_idx - 1);
+}
+
+int
+rte_power_acpi_cpufreq_freq_max(unsigned lcore_id)
+{
+	if (lcore_id >= RTE_MAX_LCORE) {
+		RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
+		return -1;
+	}
+
+	/* Frequencies in the array are from high to low. */
+	return set_freq_internal(&lcore_power_info[lcore_id], 0);
+}
+
+int
+rte_power_acpi_cpufreq_freq_min(unsigned lcore_id)
+{
+	struct rte_power_info *pi;
+
+	if (lcore_id >= RTE_MAX_LCORE) {
+		RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
+		return -1;
+	}
+
+	pi = &lcore_power_info[lcore_id];
+
+	/* Frequencies in the array are from high to low. */
+	return set_freq_internal(pi, pi->nb_freqs - 1);
+}
diff --git a/lib/librte_power/rte_power_acpi_cpufreq.h b/lib/librte_power/rte_power_acpi_cpufreq.h
new file mode 100644
index 00000000..68578e9b
--- /dev/null
+++ b/lib/librte_power/rte_power_acpi_cpufreq.h
@@ -0,0 +1,192 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_POWER_ACPI_CPUFREQ_H
+#define _RTE_POWER_ACPI_CPUFREQ_H
+
+/**
+ * @file
+ * RTE Power Management via userspace ACPI cpufreq
+ */
+
+#include <rte_common.h>
+#include <rte_byteorder.h>
+#include <rte_log.h>
+#include <rte_string_fns.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Initialize power management for a specific lcore. It will check and set the
+ * governor to userspace for the lcore, get the available frequencies, and
+ * prepare to set new lcore frequency.
+ *
+ * @param lcore_id
+ *  lcore id.
+ *
+ * @return
+ *  - 0 on success.
+ *  - Negative on error.
+ */
+int rte_power_acpi_cpufreq_init(unsigned lcore_id);
+
+/**
+ * Exit power management on a specific lcore. It will set the governor to which
+ * is before initialized.
+ *
+ * @param lcore_id
+ *  lcore id.
+ *
+ * @return
+ *  - 0 on success.
+ *  - Negative on error.
+ */
+int rte_power_acpi_cpufreq_exit(unsigned lcore_id);
+
+/**
+ * Get the available frequencies of a specific lcore. The return value will be
+ * the minimal one of the total number of available frequencies and the number
+ * of buffer. The index of available frequencies used in other interfaces
+ * should be in the range of 0 to this return value.
+ * It should be protected outside of this function for threadsafe.
+ *
+ * @param lcore_id
+ *  lcore id.
+ * @param freqs
+ *  The buffer array to save the frequencies.
+ * @param num
+ *  The number of frequencies to get.
+ *
+ * @return
+ *  The number of available frequencies.
+ */
+uint32_t rte_power_acpi_cpufreq_freqs(unsigned lcore_id, uint32_t *freqs,
+		uint32_t num);
+
+/**
+ * Return the current index of available frequencies of a specific lcore. It
+ * will return 'RTE_POWER_INVALID_FREQ_INDEX = (~0)' if error.
+ * It should be protected outside of this function for threadsafe.
+ *
+ * @param lcore_id
+ *  lcore id.
+ *
+ * @return
+ *  The current index of available frequencies.
+ */
+uint32_t rte_power_acpi_cpufreq_get_freq(unsigned lcore_id);
+
+/**
+ * Set the new frequency for a specific lcore by indicating the index of
+ * available frequencies.
+ * It should be protected outside of this function for threadsafe.
+ *
+ * @param lcore_id
+ *  lcore id.
+ * @param index
+ *  The index of available frequencies.
+ *
+ * @return
+ *  - 1 on success with frequency changed.
+ *  - 0 on success without frequency changed.
+ *  - Negative on error.
+ */
+int rte_power_acpi_cpufreq_set_freq(unsigned lcore_id, uint32_t index);
+
+/**
+ * Scale up the frequency of a specific lcore according to the available
+ * frequencies.
+ * It should be protected outside of this function for threadsafe.
+ *
+ * @param lcore_id
+ *  lcore id.
+ *
+ * @return
+ *  - 1 on success with frequency changed.
+ *  - 0 on success without frequency changed.
+ *  - Negative on error.
+ */
+int rte_power_acpi_cpufreq_freq_up(unsigned lcore_id);
+
+/**
+ * Scale down the frequency of a specific lcore according to the available
+ * frequencies.
+ * It should be protected outside of this function for threadsafe.
+ *
+ * @param lcore_id
+ *  lcore id.
+ *
+ * @return
+ *  - 1 on success with frequency changed.
+ *  - 0 on success without frequency changed.
+ *  - Negative on error.
+ */
+int rte_power_acpi_cpufreq_freq_down(unsigned lcore_id);
+
+/**
+ * Scale up the frequency of a specific lcore to the highest according to the
+ * available frequencies.
+ * It should be protected outside of this function for threadsafe.
+ *
+ * @param lcore_id
+ *  lcore id.
+ *
+ * @return
+ *  - 1 on success with frequency changed.
+ *  - 0 on success without frequency changed.
+ *  - Negative on error.
+ */
+int rte_power_acpi_cpufreq_freq_max(unsigned lcore_id);
+
+/**
+ * Scale down the frequency of a specific lcore to the lowest according to the
+ * available frequencies.
+ * It should be protected outside of this function for threadsafe.
+ *
+ * @param lcore_id
+ *  lcore id.
+ *
+ * @return
+ *  - 1 on success with frequency changed.
+ *  - 0 on success without frequency chnaged.
+ *  - Negative on error.
+ */
+int rte_power_acpi_cpufreq_freq_min(unsigned lcore_id);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/lib/librte_power/rte_power_common.h b/lib/librte_power/rte_power_common.h
new file mode 100644
index 00000000..64bd168f
--- /dev/null
+++ b/lib/librte_power/rte_power_common.h
@@ -0,0 +1,39 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef RTE_POWER_COMMON_H_
+#define RTE_POWER_COMMON_H_
+
+#define RTE_POWER_INVALID_FREQ_INDEX (~0)
+
+#endif /* RTE_POWER_COMMON_H_ */
diff --git a/lib/librte_power/rte_power_kvm_vm.c b/lib/librte_power/rte_power_kvm_vm.c
new file mode 100644
index 00000000..7bb2774c
--- /dev/null
+++ b/lib/librte_power/rte_power_kvm_vm.c
@@ -0,0 +1,135 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <errno.h>
+#include <string.h>
+
+#include <rte_log.h>
+
+#include "guest_channel.h"
+#include "channel_commands.h"
+#include "rte_power_kvm_vm.h"
+#include "rte_power_common.h"
+
+#define FD_PATH "/dev/virtio-ports/virtio.serial.port.poweragent"
+
+static struct channel_packet pkt[CHANNEL_CMDS_MAX_VM_CHANNELS];
+
+
+int
+rte_power_kvm_vm_init(unsigned lcore_id)
+{
+	if (lcore_id >= CHANNEL_CMDS_MAX_VM_CHANNELS) {
+		RTE_LOG(ERR, POWER, "Core(%u) is out of range 0...%d\n",
+				lcore_id, CHANNEL_CMDS_MAX_VM_CHANNELS-1);
+		return -1;
+	}
+	pkt[lcore_id].command = CPU_POWER;
+	pkt[lcore_id].resource_id = lcore_id;
+	return guest_channel_host_connect(FD_PATH, lcore_id);
+}
+
+int
+rte_power_kvm_vm_exit(unsigned lcore_id)
+{
+	guest_channel_host_disconnect(lcore_id);
+	return 0;
+}
+
+uint32_t
+rte_power_kvm_vm_freqs(__attribute__((unused)) unsigned lcore_id,
+		__attribute__((unused)) uint32_t *freqs,
+		__attribute__((unused)) uint32_t num)
+{
+	RTE_LOG(ERR, POWER, "rte_power_freqs is not implemented "
+			"for Virtual Machine Power Management\n");
+	return -ENOTSUP;
+}
+
+uint32_t
+rte_power_kvm_vm_get_freq(__attribute__((unused)) unsigned lcore_id)
+{
+	RTE_LOG(ERR, POWER, "rte_power_get_freq is not implemented "
+			"for Virtual Machine Power Management\n");
+	return -ENOTSUP;
+}
+
+int
+rte_power_kvm_vm_set_freq(__attribute__((unused)) unsigned lcore_id,
+		__attribute__((unused)) uint32_t index)
+{
+	RTE_LOG(ERR, POWER, "rte_power_set_freq is not implemented "
+			"for Virtual Machine Power Management\n");
+	return -ENOTSUP;
+}
+
+static inline int
+send_msg(unsigned lcore_id, uint32_t scale_direction)
+{
+	int ret;
+
+	if (lcore_id >= CHANNEL_CMDS_MAX_VM_CHANNELS) {
+		RTE_LOG(ERR, POWER, "Core(%u) is out of range 0...%d\n",
+				lcore_id, CHANNEL_CMDS_MAX_VM_CHANNELS-1);
+		return -1;
+	}
+	pkt[lcore_id].unit = scale_direction;
+	ret = guest_channel_send_msg(&pkt[lcore_id], lcore_id);
+	if (ret == 0)
+		return 1;
+	RTE_LOG(DEBUG, POWER, "Error sending message: %s\n", strerror(ret));
+	return -1;
+}
+
+int
+rte_power_kvm_vm_freq_up(unsigned lcore_id)
+{
+	return send_msg(lcore_id, CPU_POWER_SCALE_UP);
+}
+
+int
+rte_power_kvm_vm_freq_down(unsigned lcore_id)
+{
+	return send_msg(lcore_id, CPU_POWER_SCALE_DOWN);
+}
+
+int
+rte_power_kvm_vm_freq_max(unsigned lcore_id)
+{
+	return send_msg(lcore_id, CPU_POWER_SCALE_MAX);
+}
+
+int
+rte_power_kvm_vm_freq_min(unsigned lcore_id)
+{
+	return send_msg(lcore_id, CPU_POWER_SCALE_MIN);
+}
diff --git a/lib/librte_power/rte_power_kvm_vm.h b/lib/librte_power/rte_power_kvm_vm.h
new file mode 100644
index 00000000..dcbc878a
--- /dev/null
+++ b/lib/librte_power/rte_power_kvm_vm.h
@@ -0,0 +1,179 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_POWER_KVM_VM_H
+#define _RTE_POWER_KVM_VM_H
+
+/**
+ * @file
+ * RTE Power Management KVM VM
+ */
+
+#include <rte_common.h>
+#include <rte_byteorder.h>
+#include <rte_log.h>
+#include <rte_string_fns.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Initialize power management for a specific lcore.
+ *
+ * @param lcore_id
+ *  lcore id.
+ *
+ * @return
+ *  - 0 on success.
+ *  - Negative on error.
+ */
+int rte_power_kvm_vm_init(unsigned lcore_id);
+
+/**
+ * Exit power management on a specific lcore.
+ *
+ * @param lcore_id
+ *  lcore id.
+ *
+ * @return
+ *  - 0 on success.
+ *  - Negative on error.
+ */
+int rte_power_kvm_vm_exit(unsigned lcore_id);
+
+/**
+ * Get the available frequencies of a specific lcore.
+ * It is not currently supported for VM Power Management.
+ *
+ * @param lcore_id
+ *  lcore id.
+ * @param freqs
+ *  The buffer array to save the frequencies.
+ * @param num
+ *  The number of frequencies to get.
+ *
+ * @return
+ *  -ENOTSUP
+ */
+uint32_t rte_power_kvm_vm_freqs(unsigned lcore_id, uint32_t *freqs,
+		uint32_t num);
+
+/**
+ * Return the current index of available frequencies of a specific lcore.
+ * It is not currently supported for VM Power Management.
+ *
+ * @param lcore_id
+ *  lcore id.
+ *
+ * @return
+ *  -ENOTSUP
+ */
+uint32_t rte_power_kvm_vm_get_freq(unsigned lcore_id);
+
+/**
+ * Set the new frequency for a specific lcore by indicating the index of
+ * available frequencies.
+ * It is not currently supported for VM Power Management.
+ *
+ * @param lcore_id
+ *  lcore id.
+ * @param index
+ *  The index of available frequencies.
+ *
+ * @return
+ *  -ENOTSUP
+ */
+int rte_power_kvm_vm_set_freq(unsigned lcore_id, uint32_t index);
+
+/**
+ * Scale up the frequency of a specific lcore. This request is forwarded to the
+ * host monitor.
+ * It should be protected outside of this function for threadsafe.
+ *
+ * @param lcore_id
+ *  lcore id.
+ *
+ * @return
+ *  - 1 on success.
+ *  - Negative on error.
+ */
+int rte_power_kvm_vm_freq_up(unsigned lcore_id);
+
+/**
+ * Scale down the frequency of a specific lcore according to the available
+ * frequencies.
+ * It should be protected outside of this function for threadsafe.
+ *
+ * @param lcore_id
+ *  lcore id.
+ *
+ * @return
+ *  - 1 on success.
+ *  - Negative on error.
+ */
+int rte_power_kvm_vm_freq_down(unsigned lcore_id);
+
+/**
+ * Scale up the frequency of a specific lcore to the highest according to the
+ * available frequencies.
+ * It should be protected outside of this function for threadsafe.
+ *
+ * @param lcore_id
+ *  lcore id.
+ *
+ * @return
+ *  - 1 on success.
+ *  - Negative on error.
+ */
+int rte_power_kvm_vm_freq_max(unsigned lcore_id);
+
+/**
+ * Scale down the frequency of a specific lcore to the lowest according to the
+ * available frequencies.
+ * It should be protected outside of this function for threadsafe.
+ *
+ * @param lcore_id
+ *  lcore id.
+ *
+ * @return
+ *  - 1 on success.
+ *  - Negative on error.
+ */
+int rte_power_kvm_vm_freq_min(unsigned lcore_id);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/lib/librte_power/rte_power_version.map b/lib/librte_power/rte_power_version.map
new file mode 100644
index 00000000..db75ff3e
--- /dev/null
+++ b/lib/librte_power/rte_power_version.map
@@ -0,0 +1,18 @@
+DPDK_2.0 {
+	global:
+
+	rte_power_exit;
+	rte_power_freq_down;
+	rte_power_freq_max;
+	rte_power_freq_min;
+	rte_power_freq_up;
+	rte_power_freqs;
+	rte_power_get_env;
+	rte_power_get_freq;
+	rte_power_init;
+	rte_power_set_env;
+	rte_power_set_freq;
+	rte_power_unset_env;
+
+	local: *;
+};
diff --git a/lib/librte_reorder/Makefile b/lib/librte_reorder/Makefile
new file mode 100644
index 00000000..0c01de16
--- /dev/null
+++ b/lib/librte_reorder/Makefile
@@ -0,0 +1,54 @@
+#   BSD LICENSE
+#
+#   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+#   All rights reserved.
+#
+#   Redistribution and use in source and binary forms, with or without
+#   modification, are permitted provided that the following conditions
+#   are met:
+#
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in
+#       the documentation and/or other materials provided with the
+#       distribution.
+#     * Neither the name of Intel Corporation nor the names of its
+#       contributors may be used to endorse or promote products derived
+#       from this software without specific prior written permission.
+#
+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# library name
+LIB = librte_reorder.a
+
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR)
+
+EXPORT_MAP := rte_reorder_version.map
+
+LIBABIVER := 1
+
+# all source are stored in SRCS-y
+SRCS-$(CONFIG_RTE_LIBRTE_REORDER) := rte_reorder.c
+
+# install this header file
+SYMLINK-$(CONFIG_RTE_LIBRTE_REORDER)-include := rte_reorder.h
+
+# this lib depends upon:
+DEPDIRS-$(CONFIG_RTE_LIBRTE_REORDER) += lib/librte_mbuf
+DEPDIRS-$(CONFIG_RTE_LIBRTE_REORDER) += lib/librte_eal
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_reorder/rte_reorder.c b/lib/librte_reorder/rte_reorder.c
new file mode 100644
index 00000000..010dff68
--- /dev/null
+++ b/lib/librte_reorder/rte_reorder.c
@@ -0,0 +1,411 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <inttypes.h>
+#include <string.h>
+
+#include <rte_log.h>
+#include <rte_mbuf.h>
+#include <rte_memzone.h>
+#include <rte_eal_memconfig.h>
+#include <rte_errno.h>
+#include <rte_malloc.h>
+
+#include "rte_reorder.h"
+
+TAILQ_HEAD(rte_reorder_list, rte_tailq_entry);
+
+static struct rte_tailq_elem rte_reorder_tailq = {
+	.name = "RTE_REORDER",
+};
+EAL_REGISTER_TAILQ(rte_reorder_tailq)
+
+#define NO_FLAGS 0
+#define RTE_REORDER_PREFIX "RO_"
+#define RTE_REORDER_NAMESIZE 32
+
+/* Macros for printing using RTE_LOG */
+#define RTE_LOGTYPE_REORDER	RTE_LOGTYPE_USER1
+
+/* A generic circular buffer */
+struct cir_buffer {
+	unsigned int size;   /**< Number of entries that can be stored */
+	unsigned int mask;   /**< [buffer_size - 1]: used for wrap-around */
+	unsigned int head;   /**< insertion point in buffer */
+	unsigned int tail;   /**< extraction point in buffer */
+	struct rte_mbuf **entries;
+} __rte_cache_aligned;
+
+/* The reorder buffer data structure itself */
+struct rte_reorder_buffer {
+	char name[RTE_REORDER_NAMESIZE];
+	uint32_t min_seqn;  /**< Lowest seq. number that can be in the buffer */
+	unsigned int memsize; /**< memory area size of reorder buffer */
+	struct cir_buffer ready_buf; /**< temp buffer for dequeued entries */
+	struct cir_buffer order_buf; /**< buffer used to reorder entries */
+	int is_initialized;
+} __rte_cache_aligned;
+
+static void
+rte_reorder_free_mbufs(struct rte_reorder_buffer *b);
+
+struct rte_reorder_buffer *
+rte_reorder_init(struct rte_reorder_buffer *b, unsigned int bufsize,
+		const char *name, unsigned int size)
+{
+	const unsigned int min_bufsize = sizeof(*b) +
+					(2 * size * sizeof(struct rte_mbuf *));
+
+	if (b == NULL) {
+		RTE_LOG(ERR, REORDER, "Invalid reorder buffer parameter:"
+					" NULL\n");
+		rte_errno = EINVAL;
+		return NULL;
+	}
+	if (!rte_is_power_of_2(size)) {
+		RTE_LOG(ERR, REORDER, "Invalid reorder buffer size"
+				" - Not a power of 2\n");
+		rte_errno = EINVAL;
+		return NULL;
+	}
+	if (name == NULL) {
+		RTE_LOG(ERR, REORDER, "Invalid reorder buffer name ptr:"
+					" NULL\n");
+		rte_errno = EINVAL;
+		return NULL;
+	}
+	if (bufsize < min_bufsize) {
+		RTE_LOG(ERR, REORDER, "Invalid reorder buffer memory size: %u, "
+			"minimum required: %u\n", bufsize, min_bufsize);
+		rte_errno = EINVAL;
+		return NULL;
+	}
+
+	memset(b, 0, bufsize);
+	snprintf(b->name, sizeof(b->name), "%s", name);
+	b->memsize = bufsize;
+	b->order_buf.size = b->ready_buf.size = size;
+	b->order_buf.mask = b->ready_buf.mask = size - 1;
+	b->ready_buf.entries = (void *)&b[1];
+	b->order_buf.entries = RTE_PTR_ADD(&b[1],
+			size * sizeof(b->ready_buf.entries[0]));
+
+	return b;
+}
+
+struct rte_reorder_buffer*
+rte_reorder_create(const char *name, unsigned socket_id, unsigned int size)
+{
+	struct rte_reorder_buffer *b = NULL;
+	struct rte_tailq_entry *te;
+	struct rte_reorder_list *reorder_list;
+	const unsigned int bufsize = sizeof(struct rte_reorder_buffer) +
+					(2 * size * sizeof(struct rte_mbuf *));
+
+	reorder_list = RTE_TAILQ_CAST(rte_reorder_tailq.head, rte_reorder_list);
+
+	/* Check user arguments. */
+	if (!rte_is_power_of_2(size)) {
+		RTE_LOG(ERR, REORDER, "Invalid reorder buffer size"
+				" - Not a power of 2\n");
+		rte_errno = EINVAL;
+		return NULL;
+	}
+	if (name == NULL) {
+		RTE_LOG(ERR, REORDER, "Invalid reorder buffer name ptr:"
+					" NULL\n");
+		rte_errno = EINVAL;
+		return NULL;
+	}
+
+	rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK);
+
+	/* guarantee there's no existing */
+	TAILQ_FOREACH(te, reorder_list, next) {
+		b = (struct rte_reorder_buffer *) te->data;
+		if (strncmp(name, b->name, RTE_REORDER_NAMESIZE) == 0)
+			break;
+	}
+	if (te != NULL)
+		goto exit;
+
+	/* allocate tailq entry */
+	te = rte_zmalloc("REORDER_TAILQ_ENTRY", sizeof(*te), 0);
+	if (te == NULL) {
+		RTE_LOG(ERR, REORDER, "Failed to allocate tailq entry\n");
+		rte_errno = ENOMEM;
+		b = NULL;
+		goto exit;
+	}
+
+	/* Allocate memory to store the reorder buffer structure. */
+	b = rte_zmalloc_socket("REORDER_BUFFER", bufsize, 0, socket_id);
+	if (b == NULL) {
+		RTE_LOG(ERR, REORDER, "Memzone allocation failed\n");
+		rte_errno = ENOMEM;
+		rte_free(te);
+	} else {
+		rte_reorder_init(b, bufsize, name, size);
+		te->data = (void *)b;
+		TAILQ_INSERT_TAIL(reorder_list, te, next);
+	}
+
+exit:
+	rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
+	return b;
+}
+
+void
+rte_reorder_reset(struct rte_reorder_buffer *b)
+{
+	char name[RTE_REORDER_NAMESIZE];
+
+	rte_reorder_free_mbufs(b);
+	snprintf(name, sizeof(name), "%s", b->name);
+	/* No error checking as current values should be valid */
+	rte_reorder_init(b, b->memsize, name, b->order_buf.size);
+}
+
+static void
+rte_reorder_free_mbufs(struct rte_reorder_buffer *b)
+{
+	unsigned i;
+
+	/* Free up the mbufs of order buffer & ready buffer */
+	for (i = 0; i < b->order_buf.size; i++) {
+		if (b->order_buf.entries[i])
+			rte_pktmbuf_free(b->order_buf.entries[i]);
+		if (b->ready_buf.entries[i])
+			rte_pktmbuf_free(b->ready_buf.entries[i]);
+	}
+}
+
+void
+rte_reorder_free(struct rte_reorder_buffer *b)
+{
+	struct rte_reorder_list *reorder_list;
+	struct rte_tailq_entry *te;
+
+	/* Check user arguments. */
+	if (b == NULL)
+		return;
+
+	reorder_list = RTE_TAILQ_CAST(rte_reorder_tailq.head, rte_reorder_list);
+
+	rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK);
+
+	/* find our tailq entry */
+	TAILQ_FOREACH(te, reorder_list, next) {
+		if (te->data == (void *) b)
+			break;
+	}
+	if (te == NULL) {
+		rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
+		return;
+	}
+
+	TAILQ_REMOVE(reorder_list, te, next);
+
+	rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
+
+	rte_reorder_free_mbufs(b);
+
+	rte_free(b);
+	rte_free(te);
+}
+
+struct rte_reorder_buffer *
+rte_reorder_find_existing(const char *name)
+{
+	struct rte_reorder_buffer *b = NULL;
+	struct rte_tailq_entry *te;
+	struct rte_reorder_list *reorder_list;
+
+	reorder_list = RTE_TAILQ_CAST(rte_reorder_tailq.head, rte_reorder_list);
+
+	rte_rwlock_read_lock(RTE_EAL_TAILQ_RWLOCK);
+	TAILQ_FOREACH(te, reorder_list, next) {
+		b = (struct rte_reorder_buffer *) te->data;
+		if (strncmp(name, b->name, RTE_REORDER_NAMESIZE) == 0)
+			break;
+	}
+	rte_rwlock_read_unlock(RTE_EAL_TAILQ_RWLOCK);
+
+	if (te == NULL) {
+		rte_errno = ENOENT;
+		return NULL;
+	}
+
+	return b;
+}
+
+static unsigned
+rte_reorder_fill_overflow(struct rte_reorder_buffer *b, unsigned n)
+{
+	/*
+	 * 1. Move all ready entries that fit to the ready_buf
+	 * 2. check if we meet the minimum needed (n).
+	 * 3. If not, then skip any gaps and keep moving.
+	 * 4. If at any point the ready buffer is full, stop
+	 * 5. Return the number of positions the order_buf head has moved
+	 */
+
+	struct cir_buffer *order_buf = &b->order_buf,
+			*ready_buf = &b->ready_buf;
+
+	unsigned int order_head_adv = 0;
+
+	/*
+	 * move at least n packets to ready buffer, assuming ready buffer
+	 * has room for those packets.
+	 */
+	while (order_head_adv < n &&
+			((ready_buf->head + 1) & ready_buf->mask) != ready_buf->tail) {
+
+		/* if we are blocked waiting on a packet, skip it */
+		if (order_buf->entries[order_buf->head] == NULL) {
+			order_buf->head = (order_buf->head + 1) & order_buf->mask;
+			order_head_adv++;
+		}
+
+		/* Move all ready entries that fit to the ready_buf */
+		while (order_buf->entries[order_buf->head] != NULL) {
+			ready_buf->entries[ready_buf->head] =
+					order_buf->entries[order_buf->head];
+
+			order_buf->entries[order_buf->head] = NULL;
+			order_head_adv++;
+
+			order_buf->head = (order_buf->head + 1) & order_buf->mask;
+
+			if (((ready_buf->head + 1) & ready_buf->mask) == ready_buf->tail)
+				break;
+
+			ready_buf->head = (ready_buf->head + 1) & ready_buf->mask;
+		}
+	}
+
+	b->min_seqn += order_head_adv;
+	/* Return the number of positions the order_buf head has moved */
+	return order_head_adv;
+}
+
+int
+rte_reorder_insert(struct rte_reorder_buffer *b, struct rte_mbuf *mbuf)
+{
+	uint32_t offset, position;
+	struct cir_buffer *order_buf = &b->order_buf;
+
+	if (!b->is_initialized) {
+		b->min_seqn = mbuf->seqn;
+		b->is_initialized = 1;
+	}
+
+	/*
+	 * calculate the offset from the head pointer we need to go.
+	 * The subtraction takes care of the sequence number wrapping.
+	 * For example (using 16-bit for brevity):
+	 *	min_seqn  = 0xFFFD
+	 *	mbuf_seqn = 0x0010
+	 *	offset    = 0x0010 - 0xFFFD = 0x13
+	 */
+	offset = mbuf->seqn - b->min_seqn;
+
+	/*
+	 * action to take depends on offset.
+	 * offset < buffer->size: the mbuf fits within the current window of
+	 *    sequence numbers we can reorder. EXPECTED CASE.
+	 * offset > buffer->size: the mbuf is outside the current window. There
+	 *    are a number of cases to consider:
+	 *    1. The packet sequence is just outside the window, then we need
+	 *       to see about shifting the head pointer and taking any ready
+	 *       to return packets out of the ring. If there was a delayed
+	 *       or dropped packet preventing drains from shifting the window
+	 *       this case will skip over the dropped packet instead, and any
+	 *       packets dequeued here will be returned on the next drain call.
+	 *    2. The packet sequence number is vastly outside our window, taken
+	 *       here as having offset greater than twice the buffer size. In
+	 *       this case, the packet is probably an old or late packet that
+	 *       was previously skipped, so just enqueue the packet for
+	 *       immediate return on the next drain call, or else return error.
+	 */
+	if (offset < b->order_buf.size) {
+		position = (order_buf->head + offset) & order_buf->mask;
+		order_buf->entries[position] = mbuf;
+	} else if (offset < 2 * b->order_buf.size) {
+		if (rte_reorder_fill_overflow(b, offset + 1 - order_buf->size)
+				< (offset + 1 - order_buf->size)) {
+			/* Put in handling for enqueue straight to output */
+			rte_errno = ENOSPC;
+			return -1;
+		}
+		offset = mbuf->seqn - b->min_seqn;
+		position = (order_buf->head + offset) & order_buf->mask;
+		order_buf->entries[position] = mbuf;
+	} else {
+		/* Put in handling for enqueue straight to output */
+		rte_errno = ERANGE;
+		return -1;
+	}
+	return 0;
+}
+
+unsigned int
+rte_reorder_drain(struct rte_reorder_buffer *b, struct rte_mbuf **mbufs,
+		unsigned max_mbufs)
+{
+	unsigned int drain_cnt = 0;
+
+	struct cir_buffer *order_buf = &b->order_buf,
+			*ready_buf = &b->ready_buf;
+
+	/* Try to fetch requested number of mbufs from ready buffer */
+	while ((drain_cnt < max_mbufs) && (ready_buf->tail != ready_buf->head)) {
+		mbufs[drain_cnt++] = ready_buf->entries[ready_buf->tail];
+		ready_buf->tail = (ready_buf->tail + 1) & ready_buf->mask;
+	}
+
+	/*
+	 * If requested number of buffers not fetched from ready buffer, fetch
+	 * remaining buffers from order buffer
+	 */
+	while ((drain_cnt < max_mbufs) &&
+			(order_buf->entries[order_buf->head] != NULL)) {
+		mbufs[drain_cnt++] = order_buf->entries[order_buf->head];
+		order_buf->entries[order_buf->head] = NULL;
+		b->min_seqn++;
+		order_buf->head = (order_buf->head + 1) & order_buf->mask;
+	}
+
+	return drain_cnt;
+}
diff --git a/lib/librte_reorder/rte_reorder.h b/lib/librte_reorder/rte_reorder.h
new file mode 100644
index 00000000..c7a2934c
--- /dev/null
+++ b/lib/librte_reorder/rte_reorder.h
@@ -0,0 +1,180 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_REORDER_H_
+#define _RTE_REORDER_H_
+
+/**
+ * @file
+ * RTE reorder
+ *
+ * Reorder library is a component which is designed to
+ * provide ordering of out of ordered packets based on
+ * sequence number present in mbuf.
+ *
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct rte_reorder_buffer;
+
+/**
+ * Create a new reorder buffer instance
+ *
+ * Allocate memory and initialize a new reorder buffer in that
+ * memory, returning the reorder buffer pointer to the user
+ *
+ * @param name
+ *   The name to be given to the reorder buffer instance.
+ * @param socket_id
+ *   The NUMA node on which the memory for the reorder buffer
+ *   instance is to be reserved.
+ * @param size
+ *   Max number of elements that can be stored in the reorder buffer
+ * @return
+ *   The initialized reorder buffer instance, or NULL on error
+ *   On error case, rte_errno will be set appropriately:
+ *    - ENOMEM - no appropriate memory area found in which to create memzone
+ *    - EINVAL - invalid parameters
+ */
+struct rte_reorder_buffer *
+rte_reorder_create(const char *name, unsigned socket_id, unsigned int size);
+
+/**
+ * Initializes given reorder buffer instance
+ *
+ * @param b
+ *   Reorder buffer instance to initialize
+ * @param bufsize
+ *   Size of the reorder buffer
+ * @param name
+ *   The name to be given to the reorder buffer
+ * @param size
+ *   Number of elements that can be stored in reorder buffer
+ * @return
+ *   The initialized reorder buffer instance, or NULL on error
+ *   On error case, rte_errno will be set appropriately:
+ *    - EINVAL - invalid parameters
+ */
+struct rte_reorder_buffer *
+rte_reorder_init(struct rte_reorder_buffer *b, unsigned int bufsize,
+		const char *name, unsigned int size);
+
+/**
+ * Find an existing reorder buffer instance
+ * and return a pointer to it.
+ *
+ * @param name
+ *   Name of the reorder buffer instacne as passed to rte_reorder_create()
+ * @return
+ *   Pointer to reorder buffer instance or NULL if object not found with rte_errno
+ *   set appropriately. Possible rte_errno values include:
+ *    - ENOENT - required entry not available to return.
+ *    reorder instance list
+ */
+struct rte_reorder_buffer *
+rte_reorder_find_existing(const char *name);
+
+/**
+ * Reset the given reorder buffer instance with initial values.
+ *
+ * @param b
+ *   Reorder buffer instance which has to be reset
+ */
+void
+rte_reorder_reset(struct rte_reorder_buffer *b);
+
+/**
+ * Free reorder buffer instance.
+ *
+ * @param b
+ *   reorder buffer instance
+ * @return
+ *   None
+ */
+void
+rte_reorder_free(struct rte_reorder_buffer *b);
+
+/**
+ * Insert given mbuf in reorder buffer in its correct position
+ *
+ * The given mbuf is to be reordered relative to other mbufs in the system.
+ * The mbuf must contain a sequence number which is then used to place
+ * the buffer in the correct position in the reorder buffer. Reordered
+ * packets can later be taken from the buffer using the rte_reorder_drain()
+ * API.
+ *
+ * @param b
+ *   Reorder buffer where the mbuf has to be inserted.
+ * @param mbuf
+ *   mbuf of packet that needs to be inserted in reorder buffer.
+ * @return
+ *   0 on success
+ *   -1 on error
+ *   On error case, rte_errno will be set appropriately:
+ *    - ENOSPC - Cannot move existing mbufs from reorder buffer to accommodate
+ *      ealry mbuf, but it can be accomodated by performing drain and then insert.
+ *    - ERANGE - Too early or late mbuf which is vastly out of range of expected
+ *      window should be ingnored without any handling.
+ */
+int
+rte_reorder_insert(struct rte_reorder_buffer *b, struct rte_mbuf *mbuf);
+
+/**
+ * Fetch reordered buffers
+ *
+ * Returns a set of in-order buffers from the reorder buffer structure. Gaps
+ * may be present in the sequence numbers of the mbuf if packets have been
+ * delayed too long before reaching the reorder window, or have been previously
+ * dropped by the system.
+ *
+ * @param b
+ *   Reorder buffer instance from which packets are to be drained
+ * @param mbufs
+ *   array of mbufs where reordered packets will be inserted from reorder buffer
+ * @param max_mbufs
+ *   the number of elements in the mbufs array.
+ * @return
+ *   number of mbuf pointers written to mbufs. 0 <= N < max_mbufs.
+ */
+unsigned int
+rte_reorder_drain(struct rte_reorder_buffer *b, struct rte_mbuf **mbufs,
+		unsigned max_mbufs);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_REORDER_H_ */
diff --git a/lib/librte_reorder/rte_reorder_version.map b/lib/librte_reorder/rte_reorder_version.map
new file mode 100644
index 00000000..0a8a54de
--- /dev/null
+++ b/lib/librte_reorder/rte_reorder_version.map
@@ -0,0 +1,13 @@
+DPDK_2.0 {
+	global:
+
+	rte_reorder_create;
+	rte_reorder_init;
+	rte_reorder_find_existing;
+	rte_reorder_reset;
+	rte_reorder_free;
+	rte_reorder_insert;
+	rte_reorder_drain;
+
+	local: *;
+};
diff --git a/lib/librte_ring/Makefile b/lib/librte_ring/Makefile
new file mode 100644
index 00000000..4b1112e4
--- /dev/null
+++ b/lib/librte_ring/Makefile
@@ -0,0 +1,51 @@
+#   BSD LICENSE
+#
+#   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+#   All rights reserved.
+#
+#   Redistribution and use in source and binary forms, with or without
+#   modification, are permitted provided that the following conditions
+#   are met:
+#
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in
+#       the documentation and/or other materials provided with the
+#       distribution.
+#     * Neither the name of Intel Corporation nor the names of its
+#       contributors may be used to endorse or promote products derived
+#       from this software without specific prior written permission.
+#
+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# library name
+LIB = librte_ring.a
+
+CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3
+
+EXPORT_MAP := rte_ring_version.map
+
+LIBABIVER := 1
+
+# all source are stored in SRCS-y
+SRCS-$(CONFIG_RTE_LIBRTE_RING) := rte_ring.c
+
+# install includes
+SYMLINK-$(CONFIG_RTE_LIBRTE_RING)-include := rte_ring.h
+
+DEPDIRS-$(CONFIG_RTE_LIBRTE_RING) += lib/librte_eal
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_ring/rte_ring.c b/lib/librte_ring/rte_ring.c
new file mode 100644
index 00000000..d80faf3b
--- /dev/null
+++ b/lib/librte_ring/rte_ring.c
@@ -0,0 +1,373 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Derived from FreeBSD's bufring.c
+ *
+ **************************************************************************
+ *
+ * Copyright (c) 2007,2008 Kip Macy kmacy@freebsd.org
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ *
+ * 2. The name of Kip Macy nor the names of other
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ ***************************************************************************/
+
+#include <stdio.h>
+#include <stdarg.h>
+#include <string.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/queue.h>
+
+#include <rte_common.h>
+#include <rte_log.h>
+#include <rte_memory.h>
+#include <rte_memzone.h>
+#include <rte_malloc.h>
+#include <rte_launch.h>
+#include <rte_eal.h>
+#include <rte_eal_memconfig.h>
+#include <rte_atomic.h>
+#include <rte_per_lcore.h>
+#include <rte_lcore.h>
+#include <rte_branch_prediction.h>
+#include <rte_errno.h>
+#include <rte_string_fns.h>
+#include <rte_spinlock.h>
+
+#include "rte_ring.h"
+
+TAILQ_HEAD(rte_ring_list, rte_tailq_entry);
+
+static struct rte_tailq_elem rte_ring_tailq = {
+	.name = RTE_TAILQ_RING_NAME,
+};
+EAL_REGISTER_TAILQ(rte_ring_tailq)
+
+/* true if x is a power of 2 */
+#define POWEROF2(x) ((((x)-1) & (x)) == 0)
+
+/* return the size of memory occupied by a ring */
+ssize_t
+rte_ring_get_memsize(unsigned count)
+{
+	ssize_t sz;
+
+	/* count must be a power of 2 */
+	if ((!POWEROF2(count)) || (count > RTE_RING_SZ_MASK )) {
+		RTE_LOG(ERR, RING,
+			"Requested size is invalid, must be power of 2, and "
+			"do not exceed the size limit %u\n", RTE_RING_SZ_MASK);
+		return -EINVAL;
+	}
+
+	sz = sizeof(struct rte_ring) + count * sizeof(void *);
+	sz = RTE_ALIGN(sz, RTE_CACHE_LINE_SIZE);
+	return sz;
+}
+
+int
+rte_ring_init(struct rte_ring *r, const char *name, unsigned count,
+	unsigned flags)
+{
+	/* compilation-time checks */
+	RTE_BUILD_BUG_ON((sizeof(struct rte_ring) &
+			  RTE_CACHE_LINE_MASK) != 0);
+#ifdef RTE_RING_SPLIT_PROD_CONS
+	RTE_BUILD_BUG_ON((offsetof(struct rte_ring, cons) &
+			  RTE_CACHE_LINE_MASK) != 0);
+#endif
+	RTE_BUILD_BUG_ON((offsetof(struct rte_ring, prod) &
+			  RTE_CACHE_LINE_MASK) != 0);
+#ifdef RTE_LIBRTE_RING_DEBUG
+	RTE_BUILD_BUG_ON((sizeof(struct rte_ring_debug_stats) &
+			  RTE_CACHE_LINE_MASK) != 0);
+	RTE_BUILD_BUG_ON((offsetof(struct rte_ring, stats) &
+			  RTE_CACHE_LINE_MASK) != 0);
+#endif
+
+	/* init the ring structure */
+	memset(r, 0, sizeof(*r));
+	snprintf(r->name, sizeof(r->name), "%s", name);
+	r->flags = flags;
+	r->prod.watermark = count;
+	r->prod.sp_enqueue = !!(flags & RING_F_SP_ENQ);
+	r->cons.sc_dequeue = !!(flags & RING_F_SC_DEQ);
+	r->prod.size = r->cons.size = count;
+	r->prod.mask = r->cons.mask = count-1;
+	r->prod.head = r->cons.head = 0;
+	r->prod.tail = r->cons.tail = 0;
+
+	return 0;
+}
+
+/* create the ring */
+struct rte_ring *
+rte_ring_create(const char *name, unsigned count, int socket_id,
+		unsigned flags)
+{
+	char mz_name[RTE_MEMZONE_NAMESIZE];
+	struct rte_ring *r;
+	struct rte_tailq_entry *te;
+	const struct rte_memzone *mz;
+	ssize_t ring_size;
+	int mz_flags = 0;
+	struct rte_ring_list* ring_list = NULL;
+
+	ring_list = RTE_TAILQ_CAST(rte_ring_tailq.head, rte_ring_list);
+
+	ring_size = rte_ring_get_memsize(count);
+	if (ring_size < 0) {
+		rte_errno = ring_size;
+		return NULL;
+	}
+
+	te = rte_zmalloc("RING_TAILQ_ENTRY", sizeof(*te), 0);
+	if (te == NULL) {
+		RTE_LOG(ERR, RING, "Cannot reserve memory for tailq\n");
+		rte_errno = ENOMEM;
+		return NULL;
+	}
+
+	snprintf(mz_name, sizeof(mz_name), "%s%s", RTE_RING_MZ_PREFIX, name);
+
+	rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK);
+
+	/* reserve a memory zone for this ring. If we can't get rte_config or
+	 * we are secondary process, the memzone_reserve function will set
+	 * rte_errno for us appropriately - hence no check in this this function */
+	mz = rte_memzone_reserve(mz_name, ring_size, socket_id, mz_flags);
+	if (mz != NULL) {
+		r = mz->addr;
+		/* no need to check return value here, we already checked the
+		 * arguments above */
+		rte_ring_init(r, name, count, flags);
+
+		te->data = (void *) r;
+		r->memzone = mz;
+
+		TAILQ_INSERT_TAIL(ring_list, te, next);
+	} else {
+		r = NULL;
+		RTE_LOG(ERR, RING, "Cannot reserve memory\n");
+		rte_free(te);
+	}
+	rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
+
+	return r;
+}
+
+/* free the ring */
+void
+rte_ring_free(struct rte_ring *r)
+{
+	struct rte_ring_list *ring_list = NULL;
+	struct rte_tailq_entry *te;
+
+	if (r == NULL)
+		return;
+
+	/*
+	 * Ring was not created with rte_ring_create,
+	 * therefore, there is no memzone to free.
+	 */
+	if (r->memzone == NULL) {
+		RTE_LOG(ERR, RING, "Cannot free ring (not created with rte_ring_create()");
+		return;
+	}
+
+	if (rte_memzone_free(r->memzone) != 0) {
+		RTE_LOG(ERR, RING, "Cannot free memory\n");
+		return;
+	}
+
+	ring_list = RTE_TAILQ_CAST(rte_ring_tailq.head, rte_ring_list);
+	rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK);
+
+	/* find out tailq entry */
+	TAILQ_FOREACH(te, ring_list, next) {
+		if (te->data == (void *) r)
+			break;
+	}
+
+	if (te == NULL) {
+		rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
+		return;
+	}
+
+	TAILQ_REMOVE(ring_list, te, next);
+
+	rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
+
+	rte_free(te);
+}
+
+/*
+ * change the high water mark. If *count* is 0, water marking is
+ * disabled
+ */
+int
+rte_ring_set_water_mark(struct rte_ring *r, unsigned count)
+{
+	if (count >= r->prod.size)
+		return -EINVAL;
+
+	/* if count is 0, disable the watermarking */
+	if (count == 0)
+		count = r->prod.size;
+
+	r->prod.watermark = count;
+	return 0;
+}
+
+/* dump the status of the ring on the console */
+void
+rte_ring_dump(FILE *f, const struct rte_ring *r)
+{
+#ifdef RTE_LIBRTE_RING_DEBUG
+	struct rte_ring_debug_stats sum;
+	unsigned lcore_id;
+#endif
+
+	fprintf(f, "ring <%s>@%p\n", r->name, r);
+	fprintf(f, "  flags=%x\n", r->flags);
+	fprintf(f, "  size=%"PRIu32"\n", r->prod.size);
+	fprintf(f, "  ct=%"PRIu32"\n", r->cons.tail);
+	fprintf(f, "  ch=%"PRIu32"\n", r->cons.head);
+	fprintf(f, "  pt=%"PRIu32"\n", r->prod.tail);
+	fprintf(f, "  ph=%"PRIu32"\n", r->prod.head);
+	fprintf(f, "  used=%u\n", rte_ring_count(r));
+	fprintf(f, "  avail=%u\n", rte_ring_free_count(r));
+	if (r->prod.watermark == r->prod.size)
+		fprintf(f, "  watermark=0\n");
+	else
+		fprintf(f, "  watermark=%"PRIu32"\n", r->prod.watermark);
+
+	/* sum and dump statistics */
+#ifdef RTE_LIBRTE_RING_DEBUG
+	memset(&sum, 0, sizeof(sum));
+	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
+		sum.enq_success_bulk += r->stats[lcore_id].enq_success_bulk;
+		sum.enq_success_objs += r->stats[lcore_id].enq_success_objs;
+		sum.enq_quota_bulk += r->stats[lcore_id].enq_quota_bulk;
+		sum.enq_quota_objs += r->stats[lcore_id].enq_quota_objs;
+		sum.enq_fail_bulk += r->stats[lcore_id].enq_fail_bulk;
+		sum.enq_fail_objs += r->stats[lcore_id].enq_fail_objs;
+		sum.deq_success_bulk += r->stats[lcore_id].deq_success_bulk;
+		sum.deq_success_objs += r->stats[lcore_id].deq_success_objs;
+		sum.deq_fail_bulk += r->stats[lcore_id].deq_fail_bulk;
+		sum.deq_fail_objs += r->stats[lcore_id].deq_fail_objs;
+	}
+	fprintf(f, "  size=%"PRIu32"\n", r->prod.size);
+	fprintf(f, "  enq_success_bulk=%"PRIu64"\n", sum.enq_success_bulk);
+	fprintf(f, "  enq_success_objs=%"PRIu64"\n", sum.enq_success_objs);
+	fprintf(f, "  enq_quota_bulk=%"PRIu64"\n", sum.enq_quota_bulk);
+	fprintf(f, "  enq_quota_objs=%"PRIu64"\n", sum.enq_quota_objs);
+	fprintf(f, "  enq_fail_bulk=%"PRIu64"\n", sum.enq_fail_bulk);
+	fprintf(f, "  enq_fail_objs=%"PRIu64"\n", sum.enq_fail_objs);
+	fprintf(f, "  deq_success_bulk=%"PRIu64"\n", sum.deq_success_bulk);
+	fprintf(f, "  deq_success_objs=%"PRIu64"\n", sum.deq_success_objs);
+	fprintf(f, "  deq_fail_bulk=%"PRIu64"\n", sum.deq_fail_bulk);
+	fprintf(f, "  deq_fail_objs=%"PRIu64"\n", sum.deq_fail_objs);
+#else
+	fprintf(f, "  no statistics available\n");
+#endif
+}
+
+/* dump the status of all rings on the console */
+void
+rte_ring_list_dump(FILE *f)
+{
+	const struct rte_tailq_entry *te;
+	struct rte_ring_list *ring_list;
+
+	ring_list = RTE_TAILQ_CAST(rte_ring_tailq.head, rte_ring_list);
+
+	rte_rwlock_read_lock(RTE_EAL_TAILQ_RWLOCK);
+
+	TAILQ_FOREACH(te, ring_list, next) {
+		rte_ring_dump(f, (struct rte_ring *) te->data);
+	}
+
+	rte_rwlock_read_unlock(RTE_EAL_TAILQ_RWLOCK);
+}
+
+/* search a ring from its name */
+struct rte_ring *
+rte_ring_lookup(const char *name)
+{
+	struct rte_tailq_entry *te;
+	struct rte_ring *r = NULL;
+	struct rte_ring_list *ring_list;
+
+	ring_list = RTE_TAILQ_CAST(rte_ring_tailq.head, rte_ring_list);
+
+	rte_rwlock_read_lock(RTE_EAL_TAILQ_RWLOCK);
+
+	TAILQ_FOREACH(te, ring_list, next) {
+		r = (struct rte_ring *) te->data;
+		if (strncmp(name, r->name, RTE_RING_NAMESIZE) == 0)
+			break;
+	}
+
+	rte_rwlock_read_unlock(RTE_EAL_TAILQ_RWLOCK);
+
+	if (te == NULL) {
+		rte_errno = ENOENT;
+		return NULL;
+	}
+
+	return r;
+}
diff --git a/lib/librte_ring/rte_ring.h b/lib/librte_ring/rte_ring.h
new file mode 100644
index 00000000..eb45e414
--- /dev/null
+++ b/lib/librte_ring/rte_ring.h
@@ -0,0 +1,1261 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Derived from FreeBSD's bufring.h
+ *
+ **************************************************************************
+ *
+ * Copyright (c) 2007-2009 Kip Macy kmacy@freebsd.org
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ *
+ * 2. The name of Kip Macy nor the names of other
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ ***************************************************************************/
+
+#ifndef _RTE_RING_H_
+#define _RTE_RING_H_
+
+/**
+ * @file
+ * RTE Ring
+ *
+ * The Ring Manager is a fixed-size queue, implemented as a table of
+ * pointers. Head and tail pointers are modified atomically, allowing
+ * concurrent access to it. It has the following features:
+ *
+ * - FIFO (First In First Out)
+ * - Maximum size is fixed; the pointers are stored in a table.
+ * - Lockless implementation.
+ * - Multi- or single-consumer dequeue.
+ * - Multi- or single-producer enqueue.
+ * - Bulk dequeue.
+ * - Bulk enqueue.
+ *
+ * Note: the ring implementation is not preemptable. A lcore must not
+ * be interrupted by another task that uses the same ring.
+ *
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdio.h>
+#include <stdint.h>
+#include <sys/queue.h>
+#include <errno.h>
+#include <rte_common.h>
+#include <rte_memory.h>
+#include <rte_lcore.h>
+#include <rte_atomic.h>
+#include <rte_branch_prediction.h>
+
+#define RTE_TAILQ_RING_NAME "RTE_RING"
+
+enum rte_ring_queue_behavior {
+	RTE_RING_QUEUE_FIXED = 0, /* Enq/Deq a fixed number of items from a ring */
+	RTE_RING_QUEUE_VARIABLE   /* Enq/Deq as many items a possible from ring */
+};
+
+#ifdef RTE_LIBRTE_RING_DEBUG
+/**
+ * A structure that stores the ring statistics (per-lcore).
+ */
+struct rte_ring_debug_stats {
+	uint64_t enq_success_bulk; /**< Successful enqueues number. */
+	uint64_t enq_success_objs; /**< Objects successfully enqueued. */
+	uint64_t enq_quota_bulk;   /**< Successful enqueues above watermark. */
+	uint64_t enq_quota_objs;   /**< Objects enqueued above watermark. */
+	uint64_t enq_fail_bulk;    /**< Failed enqueues number. */
+	uint64_t enq_fail_objs;    /**< Objects that failed to be enqueued. */
+	uint64_t deq_success_bulk; /**< Successful dequeues number. */
+	uint64_t deq_success_objs; /**< Objects successfully dequeued. */
+	uint64_t deq_fail_bulk;    /**< Failed dequeues number. */
+	uint64_t deq_fail_objs;    /**< Objects that failed to be dequeued. */
+} __rte_cache_aligned;
+#endif
+
+#define RTE_RING_NAMESIZE 32 /**< The maximum length of a ring name. */
+#define RTE_RING_MZ_PREFIX "RG_"
+
+#ifndef RTE_RING_PAUSE_REP_COUNT
+#define RTE_RING_PAUSE_REP_COUNT 0 /**< Yield after pause num of times, no yield
+                                    *   if RTE_RING_PAUSE_REP not defined. */
+#endif
+
+struct rte_memzone; /* forward declaration, so as not to require memzone.h */
+
+/**
+ * An RTE ring structure.
+ *
+ * The producer and the consumer have a head and a tail index. The particularity
+ * of these index is that they are not between 0 and size(ring). These indexes
+ * are between 0 and 2^32, and we mask their value when we access the ring[]
+ * field. Thanks to this assumption, we can do subtractions between 2 index
+ * values in a modulo-32bit base: that's why the overflow of the indexes is not
+ * a problem.
+ */
+struct rte_ring {
+	char name[RTE_RING_NAMESIZE];    /**< Name of the ring. */
+	int flags;                       /**< Flags supplied at creation. */
+	const struct rte_memzone *memzone;
+			/**< Memzone, if any, containing the rte_ring */
+
+	/** Ring producer status. */
+	struct prod {
+		uint32_t watermark;      /**< Maximum items before EDQUOT. */
+		uint32_t sp_enqueue;     /**< True, if single producer. */
+		uint32_t size;           /**< Size of ring. */
+		uint32_t mask;           /**< Mask (size-1) of ring. */
+		volatile uint32_t head;  /**< Producer head. */
+		volatile uint32_t tail;  /**< Producer tail. */
+	} prod __rte_cache_aligned;
+
+	/** Ring consumer status. */
+	struct cons {
+		uint32_t sc_dequeue;     /**< True, if single consumer. */
+		uint32_t size;           /**< Size of the ring. */
+		uint32_t mask;           /**< Mask (size-1) of ring. */
+		volatile uint32_t head;  /**< Consumer head. */
+		volatile uint32_t tail;  /**< Consumer tail. */
+#ifdef RTE_RING_SPLIT_PROD_CONS
+	} cons __rte_cache_aligned;
+#else
+	} cons;
+#endif
+
+#ifdef RTE_LIBRTE_RING_DEBUG
+	struct rte_ring_debug_stats stats[RTE_MAX_LCORE];
+#endif
+
+	void * ring[0] __rte_cache_aligned; /**< Memory space of ring starts here.
+	                                     * not volatile so need to be careful
+	                                     * about compiler re-ordering */
+};
+
+#define RING_F_SP_ENQ 0x0001 /**< The default enqueue is "single-producer". */
+#define RING_F_SC_DEQ 0x0002 /**< The default dequeue is "single-consumer". */
+#define RTE_RING_QUOT_EXCEED (1 << 31)  /**< Quota exceed for burst ops */
+#define RTE_RING_SZ_MASK  (unsigned)(0x0fffffff) /**< Ring size mask */
+
+/**
+ * @internal When debug is enabled, store ring statistics.
+ * @param r
+ *   A pointer to the ring.
+ * @param name
+ *   The name of the statistics field to increment in the ring.
+ * @param n
+ *   The number to add to the object-oriented statistics.
+ */
+#ifdef RTE_LIBRTE_RING_DEBUG
+#define __RING_STAT_ADD(r, name, n) do {                        \
+		unsigned __lcore_id = rte_lcore_id();           \
+		if (__lcore_id < RTE_MAX_LCORE) {               \
+			r->stats[__lcore_id].name##_objs += n;  \
+			r->stats[__lcore_id].name##_bulk += 1;  \
+		}                                               \
+	} while(0)
+#else
+#define __RING_STAT_ADD(r, name, n) do {} while(0)
+#endif
+
+/**
+ * Calculate the memory size needed for a ring
+ *
+ * This function returns the number of bytes needed for a ring, given
+ * the number of elements in it. This value is the sum of the size of
+ * the structure rte_ring and the size of the memory needed by the
+ * objects pointers. The value is aligned to a cache line size.
+ *
+ * @param count
+ *   The number of elements in the ring (must be a power of 2).
+ * @return
+ *   - The memory size needed for the ring on success.
+ *   - -EINVAL if count is not a power of 2.
+ */
+ssize_t rte_ring_get_memsize(unsigned count);
+
+/**
+ * Initialize a ring structure.
+ *
+ * Initialize a ring structure in memory pointed by "r". The size of the
+ * memory area must be large enough to store the ring structure and the
+ * object table. It is advised to use rte_ring_get_memsize() to get the
+ * appropriate size.
+ *
+ * The ring size is set to *count*, which must be a power of two. Water
+ * marking is disabled by default. The real usable ring size is
+ * *count-1* instead of *count* to differentiate a free ring from an
+ * empty ring.
+ *
+ * The ring is not added in RTE_TAILQ_RING global list. Indeed, the
+ * memory given by the caller may not be shareable among dpdk
+ * processes.
+ *
+ * @param r
+ *   The pointer to the ring structure followed by the objects table.
+ * @param name
+ *   The name of the ring.
+ * @param count
+ *   The number of elements in the ring (must be a power of 2).
+ * @param flags
+ *   An OR of the following:
+ *    - RING_F_SP_ENQ: If this flag is set, the default behavior when
+ *      using ``rte_ring_enqueue()`` or ``rte_ring_enqueue_bulk()``
+ *      is "single-producer". Otherwise, it is "multi-producers".
+ *    - RING_F_SC_DEQ: If this flag is set, the default behavior when
+ *      using ``rte_ring_dequeue()`` or ``rte_ring_dequeue_bulk()``
+ *      is "single-consumer". Otherwise, it is "multi-consumers".
+ * @return
+ *   0 on success, or a negative value on error.
+ */
+int rte_ring_init(struct rte_ring *r, const char *name, unsigned count,
+	unsigned flags);
+
+/**
+ * Create a new ring named *name* in memory.
+ *
+ * This function uses ``memzone_reserve()`` to allocate memory. Then it
+ * calls rte_ring_init() to initialize an empty ring.
+ *
+ * The new ring size is set to *count*, which must be a power of
+ * two. Water marking is disabled by default. The real usable ring size
+ * is *count-1* instead of *count* to differentiate a free ring from an
+ * empty ring.
+ *
+ * The ring is added in RTE_TAILQ_RING list.
+ *
+ * @param name
+ *   The name of the ring.
+ * @param count
+ *   The size of the ring (must be a power of 2).
+ * @param socket_id
+ *   The *socket_id* argument is the socket identifier in case of
+ *   NUMA. The value can be *SOCKET_ID_ANY* if there is no NUMA
+ *   constraint for the reserved zone.
+ * @param flags
+ *   An OR of the following:
+ *    - RING_F_SP_ENQ: If this flag is set, the default behavior when
+ *      using ``rte_ring_enqueue()`` or ``rte_ring_enqueue_bulk()``
+ *      is "single-producer". Otherwise, it is "multi-producers".
+ *    - RING_F_SC_DEQ: If this flag is set, the default behavior when
+ *      using ``rte_ring_dequeue()`` or ``rte_ring_dequeue_bulk()``
+ *      is "single-consumer". Otherwise, it is "multi-consumers".
+ * @return
+ *   On success, the pointer to the new allocated ring. NULL on error with
+ *    rte_errno set appropriately. Possible errno values include:
+ *    - E_RTE_NO_CONFIG - function could not get pointer to rte_config structure
+ *    - E_RTE_SECONDARY - function was called from a secondary process instance
+ *    - EINVAL - count provided is not a power of 2
+ *    - ENOSPC - the maximum number of memzones has already been allocated
+ *    - EEXIST - a memzone with the same name already exists
+ *    - ENOMEM - no appropriate memory area found in which to create memzone
+ */
+struct rte_ring *rte_ring_create(const char *name, unsigned count,
+				 int socket_id, unsigned flags);
+/**
+ * De-allocate all memory used by the ring.
+ *
+ * @param r
+ *   Ring to free
+ */
+void rte_ring_free(struct rte_ring *r);
+
+/**
+ * Change the high water mark.
+ *
+ * If *count* is 0, water marking is disabled. Otherwise, it is set to the
+ * *count* value. The *count* value must be greater than 0 and less
+ * than the ring size.
+ *
+ * This function can be called at any time (not necessarily at
+ * initialization).
+ *
+ * @param r
+ *   A pointer to the ring structure.
+ * @param count
+ *   The new water mark value.
+ * @return
+ *   - 0: Success; water mark changed.
+ *   - -EINVAL: Invalid water mark value.
+ */
+int rte_ring_set_water_mark(struct rte_ring *r, unsigned count);
+
+/**
+ * Dump the status of the ring to the console.
+ *
+ * @param f
+ *   A pointer to a file for output
+ * @param r
+ *   A pointer to the ring structure.
+ */
+void rte_ring_dump(FILE *f, const struct rte_ring *r);
+
+/* the actual enqueue of pointers on the ring.
+ * Placed here since identical code needed in both
+ * single and multi producer enqueue functions */
+#define ENQUEUE_PTRS() do { \
+	const uint32_t size = r->prod.size; \
+	uint32_t idx = prod_head & mask; \
+	if (likely(idx + n < size)) { \
+		for (i = 0; i < (n & ((~(unsigned)0x3))); i+=4, idx+=4) { \
+			r->ring[idx] = obj_table[i]; \
+			r->ring[idx+1] = obj_table[i+1]; \
+			r->ring[idx+2] = obj_table[i+2]; \
+			r->ring[idx+3] = obj_table[i+3]; \
+		} \
+		switch (n & 0x3) { \
+			case 3: r->ring[idx++] = obj_table[i++]; \
+			case 2: r->ring[idx++] = obj_table[i++]; \
+			case 1: r->ring[idx++] = obj_table[i++]; \
+		} \
+	} else { \
+		for (i = 0; idx < size; i++, idx++)\
+			r->ring[idx] = obj_table[i]; \
+		for (idx = 0; i < n; i++, idx++) \
+			r->ring[idx] = obj_table[i]; \
+	} \
+} while(0)
+
+/* the actual copy of pointers on the ring to obj_table.
+ * Placed here since identical code needed in both
+ * single and multi consumer dequeue functions */
+#define DEQUEUE_PTRS() do { \
+	uint32_t idx = cons_head & mask; \
+	const uint32_t size = r->cons.size; \
+	if (likely(idx + n < size)) { \
+		for (i = 0; i < (n & (~(unsigned)0x3)); i+=4, idx+=4) {\
+			obj_table[i] = r->ring[idx]; \
+			obj_table[i+1] = r->ring[idx+1]; \
+			obj_table[i+2] = r->ring[idx+2]; \
+			obj_table[i+3] = r->ring[idx+3]; \
+		} \
+		switch (n & 0x3) { \
+			case 3: obj_table[i++] = r->ring[idx++]; \
+			case 2: obj_table[i++] = r->ring[idx++]; \
+			case 1: obj_table[i++] = r->ring[idx++]; \
+		} \
+	} else { \
+		for (i = 0; idx < size; i++, idx++) \
+			obj_table[i] = r->ring[idx]; \
+		for (idx = 0; i < n; i++, idx++) \
+			obj_table[i] = r->ring[idx]; \
+	} \
+} while (0)
+
+/**
+ * @internal Enqueue several objects on the ring (multi-producers safe).
+ *
+ * This function uses a "compare and set" instruction to move the
+ * producer index atomically.
+ *
+ * @param r
+ *   A pointer to the ring structure.
+ * @param obj_table
+ *   A pointer to a table of void * pointers (objects).
+ * @param n
+ *   The number of objects to add in the ring from the obj_table.
+ * @param behavior
+ *   RTE_RING_QUEUE_FIXED:    Enqueue a fixed number of items from a ring
+ *   RTE_RING_QUEUE_VARIABLE: Enqueue as many items a possible from ring
+ * @return
+ *   Depend on the behavior value
+ *   if behavior = RTE_RING_QUEUE_FIXED
+ *   - 0: Success; objects enqueue.
+ *   - -EDQUOT: Quota exceeded. The objects have been enqueued, but the
+ *     high water mark is exceeded.
+ *   - -ENOBUFS: Not enough room in the ring to enqueue, no object is enqueued.
+ *   if behavior = RTE_RING_QUEUE_VARIABLE
+ *   - n: Actual number of objects enqueued.
+ */
+static inline int __attribute__((always_inline))
+__rte_ring_mp_do_enqueue(struct rte_ring *r, void * const *obj_table,
+			 unsigned n, enum rte_ring_queue_behavior behavior)
+{
+	uint32_t prod_head, prod_next;
+	uint32_t cons_tail, free_entries;
+	const unsigned max = n;
+	int success;
+	unsigned i, rep = 0;
+	uint32_t mask = r->prod.mask;
+	int ret;
+
+	/* Avoid the unnecessary cmpset operation below, which is also
+	 * potentially harmful when n equals 0. */
+	if (n == 0)
+		return 0;
+
+	/* move prod.head atomically */
+	do {
+		/* Reset n to the initial burst count */
+		n = max;
+
+		prod_head = r->prod.head;
+		cons_tail = r->cons.tail;
+		/* The subtraction is done between two unsigned 32bits value
+		 * (the result is always modulo 32 bits even if we have
+		 * prod_head > cons_tail). So 'free_entries' is always between 0
+		 * and size(ring)-1. */
+		free_entries = (mask + cons_tail - prod_head);
+
+		/* check that we have enough room in ring */
+		if (unlikely(n > free_entries)) {
+			if (behavior == RTE_RING_QUEUE_FIXED) {
+				__RING_STAT_ADD(r, enq_fail, n);
+				return -ENOBUFS;
+			}
+			else {
+				/* No free entry available */
+				if (unlikely(free_entries == 0)) {
+					__RING_STAT_ADD(r, enq_fail, n);
+					return 0;
+				}
+
+				n = free_entries;
+			}
+		}
+
+		prod_next = prod_head + n;
+		success = rte_atomic32_cmpset(&r->prod.head, prod_head,
+					      prod_next);
+	} while (unlikely(success == 0));
+
+	/* write entries in ring */
+	ENQUEUE_PTRS();
+	rte_smp_wmb();
+
+	/* if we exceed the watermark */
+	if (unlikely(((mask + 1) - free_entries + n) > r->prod.watermark)) {
+		ret = (behavior == RTE_RING_QUEUE_FIXED) ? -EDQUOT :
+				(int)(n | RTE_RING_QUOT_EXCEED);
+		__RING_STAT_ADD(r, enq_quota, n);
+	}
+	else {
+		ret = (behavior == RTE_RING_QUEUE_FIXED) ? 0 : n;
+		__RING_STAT_ADD(r, enq_success, n);
+	}
+
+	/*
+	 * If there are other enqueues in progress that preceded us,
+	 * we need to wait for them to complete
+	 */
+	while (unlikely(r->prod.tail != prod_head)) {
+		rte_pause();
+
+		/* Set RTE_RING_PAUSE_REP_COUNT to avoid spin too long waiting
+		 * for other thread finish. It gives pre-empted thread a chance
+		 * to proceed and finish with ring dequeue operation. */
+		if (RTE_RING_PAUSE_REP_COUNT &&
+		    ++rep == RTE_RING_PAUSE_REP_COUNT) {
+			rep = 0;
+			sched_yield();
+		}
+	}
+	r->prod.tail = prod_next;
+	return ret;
+}
+
+/**
+ * @internal Enqueue several objects on a ring (NOT multi-producers safe).
+ *
+ * @param r
+ *   A pointer to the ring structure.
+ * @param obj_table
+ *   A pointer to a table of void * pointers (objects).
+ * @param n
+ *   The number of objects to add in the ring from the obj_table.
+ * @param behavior
+ *   RTE_RING_QUEUE_FIXED:    Enqueue a fixed number of items from a ring
+ *   RTE_RING_QUEUE_VARIABLE: Enqueue as many items a possible from ring
+ * @return
+ *   Depend on the behavior value
+ *   if behavior = RTE_RING_QUEUE_FIXED
+ *   - 0: Success; objects enqueue.
+ *   - -EDQUOT: Quota exceeded. The objects have been enqueued, but the
+ *     high water mark is exceeded.
+ *   - -ENOBUFS: Not enough room in the ring to enqueue, no object is enqueued.
+ *   if behavior = RTE_RING_QUEUE_VARIABLE
+ *   - n: Actual number of objects enqueued.
+ */
+static inline int __attribute__((always_inline))
+__rte_ring_sp_do_enqueue(struct rte_ring *r, void * const *obj_table,
+			 unsigned n, enum rte_ring_queue_behavior behavior)
+{
+	uint32_t prod_head, cons_tail;
+	uint32_t prod_next, free_entries;
+	unsigned i;
+	uint32_t mask = r->prod.mask;
+	int ret;
+
+	prod_head = r->prod.head;
+	cons_tail = r->cons.tail;
+	/* The subtraction is done between two unsigned 32bits value
+	 * (the result is always modulo 32 bits even if we have
+	 * prod_head > cons_tail). So 'free_entries' is always between 0
+	 * and size(ring)-1. */
+	free_entries = mask + cons_tail - prod_head;
+
+	/* check that we have enough room in ring */
+	if (unlikely(n > free_entries)) {
+		if (behavior == RTE_RING_QUEUE_FIXED) {
+			__RING_STAT_ADD(r, enq_fail, n);
+			return -ENOBUFS;
+		}
+		else {
+			/* No free entry available */
+			if (unlikely(free_entries == 0)) {
+				__RING_STAT_ADD(r, enq_fail, n);
+				return 0;
+			}
+
+			n = free_entries;
+		}
+	}
+
+	prod_next = prod_head + n;
+	r->prod.head = prod_next;
+
+	/* write entries in ring */
+	ENQUEUE_PTRS();
+	rte_smp_wmb();
+
+	/* if we exceed the watermark */
+	if (unlikely(((mask + 1) - free_entries + n) > r->prod.watermark)) {
+		ret = (behavior == RTE_RING_QUEUE_FIXED) ? -EDQUOT :
+			(int)(n | RTE_RING_QUOT_EXCEED);
+		__RING_STAT_ADD(r, enq_quota, n);
+	}
+	else {
+		ret = (behavior == RTE_RING_QUEUE_FIXED) ? 0 : n;
+		__RING_STAT_ADD(r, enq_success, n);
+	}
+
+	r->prod.tail = prod_next;
+	return ret;
+}
+
+/**
+ * @internal Dequeue several objects from a ring (multi-consumers safe). When
+ * the request objects are more than the available objects, only dequeue the
+ * actual number of objects
+ *
+ * This function uses a "compare and set" instruction to move the
+ * consumer index atomically.
+ *
+ * @param r
+ *   A pointer to the ring structure.
+ * @param obj_table
+ *   A pointer to a table of void * pointers (objects) that will be filled.
+ * @param n
+ *   The number of objects to dequeue from the ring to the obj_table.
+ * @param behavior
+ *   RTE_RING_QUEUE_FIXED:    Dequeue a fixed number of items from a ring
+ *   RTE_RING_QUEUE_VARIABLE: Dequeue as many items a possible from ring
+ * @return
+ *   Depend on the behavior value
+ *   if behavior = RTE_RING_QUEUE_FIXED
+ *   - 0: Success; objects dequeued.
+ *   - -ENOENT: Not enough entries in the ring to dequeue; no object is
+ *     dequeued.
+ *   if behavior = RTE_RING_QUEUE_VARIABLE
+ *   - n: Actual number of objects dequeued.
+ */
+
+static inline int __attribute__((always_inline))
+__rte_ring_mc_do_dequeue(struct rte_ring *r, void **obj_table,
+		 unsigned n, enum rte_ring_queue_behavior behavior)
+{
+	uint32_t cons_head, prod_tail;
+	uint32_t cons_next, entries;
+	const unsigned max = n;
+	int success;
+	unsigned i, rep = 0;
+	uint32_t mask = r->prod.mask;
+
+	/* Avoid the unnecessary cmpset operation below, which is also
+	 * potentially harmful when n equals 0. */
+	if (n == 0)
+		return 0;
+
+	/* move cons.head atomically */
+	do {
+		/* Restore n as it may change every loop */
+		n = max;
+
+		cons_head = r->cons.head;
+		prod_tail = r->prod.tail;
+		/* The subtraction is done between two unsigned 32bits value
+		 * (the result is always modulo 32 bits even if we have
+		 * cons_head > prod_tail). So 'entries' is always between 0
+		 * and size(ring)-1. */
+		entries = (prod_tail - cons_head);
+
+		/* Set the actual entries for dequeue */
+		if (n > entries) {
+			if (behavior == RTE_RING_QUEUE_FIXED) {
+				__RING_STAT_ADD(r, deq_fail, n);
+				return -ENOENT;
+			}
+			else {
+				if (unlikely(entries == 0)){
+					__RING_STAT_ADD(r, deq_fail, n);
+					return 0;
+				}
+
+				n = entries;
+			}
+		}
+
+		cons_next = cons_head + n;
+		success = rte_atomic32_cmpset(&r->cons.head, cons_head,
+					      cons_next);
+	} while (unlikely(success == 0));
+
+	/* copy in table */
+	DEQUEUE_PTRS();
+	rte_smp_rmb();
+
+	/*
+	 * If there are other dequeues in progress that preceded us,
+	 * we need to wait for them to complete
+	 */
+	while (unlikely(r->cons.tail != cons_head)) {
+		rte_pause();
+
+		/* Set RTE_RING_PAUSE_REP_COUNT to avoid spin too long waiting
+		 * for other thread finish. It gives pre-empted thread a chance
+		 * to proceed and finish with ring dequeue operation. */
+		if (RTE_RING_PAUSE_REP_COUNT &&
+		    ++rep == RTE_RING_PAUSE_REP_COUNT) {
+			rep = 0;
+			sched_yield();
+		}
+	}
+	__RING_STAT_ADD(r, deq_success, n);
+	r->cons.tail = cons_next;
+
+	return behavior == RTE_RING_QUEUE_FIXED ? 0 : n;
+}
+
+/**
+ * @internal Dequeue several objects from a ring (NOT multi-consumers safe).
+ * When the request objects are more than the available objects, only dequeue
+ * the actual number of objects
+ *
+ * @param r
+ *   A pointer to the ring structure.
+ * @param obj_table
+ *   A pointer to a table of void * pointers (objects) that will be filled.
+ * @param n
+ *   The number of objects to dequeue from the ring to the obj_table.
+ * @param behavior
+ *   RTE_RING_QUEUE_FIXED:    Dequeue a fixed number of items from a ring
+ *   RTE_RING_QUEUE_VARIABLE: Dequeue as many items a possible from ring
+ * @return
+ *   Depend on the behavior value
+ *   if behavior = RTE_RING_QUEUE_FIXED
+ *   - 0: Success; objects dequeued.
+ *   - -ENOENT: Not enough entries in the ring to dequeue; no object is
+ *     dequeued.
+ *   if behavior = RTE_RING_QUEUE_VARIABLE
+ *   - n: Actual number of objects dequeued.
+ */
+static inline int __attribute__((always_inline))
+__rte_ring_sc_do_dequeue(struct rte_ring *r, void **obj_table,
+		 unsigned n, enum rte_ring_queue_behavior behavior)
+{
+	uint32_t cons_head, prod_tail;
+	uint32_t cons_next, entries;
+	unsigned i;
+	uint32_t mask = r->prod.mask;
+
+	cons_head = r->cons.head;
+	prod_tail = r->prod.tail;
+	/* The subtraction is done between two unsigned 32bits value
+	 * (the result is always modulo 32 bits even if we have
+	 * cons_head > prod_tail). So 'entries' is always between 0
+	 * and size(ring)-1. */
+	entries = prod_tail - cons_head;
+
+	if (n > entries) {
+		if (behavior == RTE_RING_QUEUE_FIXED) {
+			__RING_STAT_ADD(r, deq_fail, n);
+			return -ENOENT;
+		}
+		else {
+			if (unlikely(entries == 0)){
+				__RING_STAT_ADD(r, deq_fail, n);
+				return 0;
+			}
+
+			n = entries;
+		}
+	}
+
+	cons_next = cons_head + n;
+	r->cons.head = cons_next;
+
+	/* copy in table */
+	DEQUEUE_PTRS();
+	rte_smp_rmb();
+
+	__RING_STAT_ADD(r, deq_success, n);
+	r->cons.tail = cons_next;
+	return behavior == RTE_RING_QUEUE_FIXED ? 0 : n;
+}
+
+/**
+ * Enqueue several objects on the ring (multi-producers safe).
+ *
+ * This function uses a "compare and set" instruction to move the
+ * producer index atomically.
+ *
+ * @param r
+ *   A pointer to the ring structure.
+ * @param obj_table
+ *   A pointer to a table of void * pointers (objects).
+ * @param n
+ *   The number of objects to add in the ring from the obj_table.
+ * @return
+ *   - 0: Success; objects enqueue.
+ *   - -EDQUOT: Quota exceeded. The objects have been enqueued, but the
+ *     high water mark is exceeded.
+ *   - -ENOBUFS: Not enough room in the ring to enqueue, no object is enqueued.
+ */
+static inline int __attribute__((always_inline))
+rte_ring_mp_enqueue_bulk(struct rte_ring *r, void * const *obj_table,
+			 unsigned n)
+{
+	return __rte_ring_mp_do_enqueue(r, obj_table, n, RTE_RING_QUEUE_FIXED);
+}
+
+/**
+ * Enqueue several objects on a ring (NOT multi-producers safe).
+ *
+ * @param r
+ *   A pointer to the ring structure.
+ * @param obj_table
+ *   A pointer to a table of void * pointers (objects).
+ * @param n
+ *   The number of objects to add in the ring from the obj_table.
+ * @return
+ *   - 0: Success; objects enqueued.
+ *   - -EDQUOT: Quota exceeded. The objects have been enqueued, but the
+ *     high water mark is exceeded.
+ *   - -ENOBUFS: Not enough room in the ring to enqueue; no object is enqueued.
+ */
+static inline int __attribute__((always_inline))
+rte_ring_sp_enqueue_bulk(struct rte_ring *r, void * const *obj_table,
+			 unsigned n)
+{
+	return __rte_ring_sp_do_enqueue(r, obj_table, n, RTE_RING_QUEUE_FIXED);
+}
+
+/**
+ * Enqueue several objects on a ring.
+ *
+ * This function calls the multi-producer or the single-producer
+ * version depending on the default behavior that was specified at
+ * ring creation time (see flags).
+ *
+ * @param r
+ *   A pointer to the ring structure.
+ * @param obj_table
+ *   A pointer to a table of void * pointers (objects).
+ * @param n
+ *   The number of objects to add in the ring from the obj_table.
+ * @return
+ *   - 0: Success; objects enqueued.
+ *   - -EDQUOT: Quota exceeded. The objects have been enqueued, but the
+ *     high water mark is exceeded.
+ *   - -ENOBUFS: Not enough room in the ring to enqueue; no object is enqueued.
+ */
+static inline int __attribute__((always_inline))
+rte_ring_enqueue_bulk(struct rte_ring *r, void * const *obj_table,
+		      unsigned n)
+{
+	if (r->prod.sp_enqueue)
+		return rte_ring_sp_enqueue_bulk(r, obj_table, n);
+	else
+		return rte_ring_mp_enqueue_bulk(r, obj_table, n);
+}
+
+/**
+ * Enqueue one object on a ring (multi-producers safe).
+ *
+ * This function uses a "compare and set" instruction to move the
+ * producer index atomically.
+ *
+ * @param r
+ *   A pointer to the ring structure.
+ * @param obj
+ *   A pointer to the object to be added.
+ * @return
+ *   - 0: Success; objects enqueued.
+ *   - -EDQUOT: Quota exceeded. The objects have been enqueued, but the
+ *     high water mark is exceeded.
+ *   - -ENOBUFS: Not enough room in the ring to enqueue; no object is enqueued.
+ */
+static inline int __attribute__((always_inline))
+rte_ring_mp_enqueue(struct rte_ring *r, void *obj)
+{
+	return rte_ring_mp_enqueue_bulk(r, &obj, 1);
+}
+
+/**
+ * Enqueue one object on a ring (NOT multi-producers safe).
+ *
+ * @param r
+ *   A pointer to the ring structure.
+ * @param obj
+ *   A pointer to the object to be added.
+ * @return
+ *   - 0: Success; objects enqueued.
+ *   - -EDQUOT: Quota exceeded. The objects have been enqueued, but the
+ *     high water mark is exceeded.
+ *   - -ENOBUFS: Not enough room in the ring to enqueue; no object is enqueued.
+ */
+static inline int __attribute__((always_inline))
+rte_ring_sp_enqueue(struct rte_ring *r, void *obj)
+{
+	return rte_ring_sp_enqueue_bulk(r, &obj, 1);
+}
+
+/**
+ * Enqueue one object on a ring.
+ *
+ * This function calls the multi-producer or the single-producer
+ * version, depending on the default behaviour that was specified at
+ * ring creation time (see flags).
+ *
+ * @param r
+ *   A pointer to the ring structure.
+ * @param obj
+ *   A pointer to the object to be added.
+ * @return
+ *   - 0: Success; objects enqueued.
+ *   - -EDQUOT: Quota exceeded. The objects have been enqueued, but the
+ *     high water mark is exceeded.
+ *   - -ENOBUFS: Not enough room in the ring to enqueue; no object is enqueued.
+ */
+static inline int __attribute__((always_inline))
+rte_ring_enqueue(struct rte_ring *r, void *obj)
+{
+	if (r->prod.sp_enqueue)
+		return rte_ring_sp_enqueue(r, obj);
+	else
+		return rte_ring_mp_enqueue(r, obj);
+}
+
+/**
+ * Dequeue several objects from a ring (multi-consumers safe).
+ *
+ * This function uses a "compare and set" instruction to move the
+ * consumer index atomically.
+ *
+ * @param r
+ *   A pointer to the ring structure.
+ * @param obj_table
+ *   A pointer to a table of void * pointers (objects) that will be filled.
+ * @param n
+ *   The number of objects to dequeue from the ring to the obj_table.
+ * @return
+ *   - 0: Success; objects dequeued.
+ *   - -ENOENT: Not enough entries in the ring to dequeue; no object is
+ *     dequeued.
+ */
+static inline int __attribute__((always_inline))
+rte_ring_mc_dequeue_bulk(struct rte_ring *r, void **obj_table, unsigned n)
+{
+	return __rte_ring_mc_do_dequeue(r, obj_table, n, RTE_RING_QUEUE_FIXED);
+}
+
+/**
+ * Dequeue several objects from a ring (NOT multi-consumers safe).
+ *
+ * @param r
+ *   A pointer to the ring structure.
+ * @param obj_table
+ *   A pointer to a table of void * pointers (objects) that will be filled.
+ * @param n
+ *   The number of objects to dequeue from the ring to the obj_table,
+ *   must be strictly positive.
+ * @return
+ *   - 0: Success; objects dequeued.
+ *   - -ENOENT: Not enough entries in the ring to dequeue; no object is
+ *     dequeued.
+ */
+static inline int __attribute__((always_inline))
+rte_ring_sc_dequeue_bulk(struct rte_ring *r, void **obj_table, unsigned n)
+{
+	return __rte_ring_sc_do_dequeue(r, obj_table, n, RTE_RING_QUEUE_FIXED);
+}
+
+/**
+ * Dequeue several objects from a ring.
+ *
+ * This function calls the multi-consumers or the single-consumer
+ * version, depending on the default behaviour that was specified at
+ * ring creation time (see flags).
+ *
+ * @param r
+ *   A pointer to the ring structure.
+ * @param obj_table
+ *   A pointer to a table of void * pointers (objects) that will be filled.
+ * @param n
+ *   The number of objects to dequeue from the ring to the obj_table.
+ * @return
+ *   - 0: Success; objects dequeued.
+ *   - -ENOENT: Not enough entries in the ring to dequeue, no object is
+ *     dequeued.
+ */
+static inline int __attribute__((always_inline))
+rte_ring_dequeue_bulk(struct rte_ring *r, void **obj_table, unsigned n)
+{
+	if (r->cons.sc_dequeue)
+		return rte_ring_sc_dequeue_bulk(r, obj_table, n);
+	else
+		return rte_ring_mc_dequeue_bulk(r, obj_table, n);
+}
+
+/**
+ * Dequeue one object from a ring (multi-consumers safe).
+ *
+ * This function uses a "compare and set" instruction to move the
+ * consumer index atomically.
+ *
+ * @param r
+ *   A pointer to the ring structure.
+ * @param obj_p
+ *   A pointer to a void * pointer (object) that will be filled.
+ * @return
+ *   - 0: Success; objects dequeued.
+ *   - -ENOENT: Not enough entries in the ring to dequeue; no object is
+ *     dequeued.
+ */
+static inline int __attribute__((always_inline))
+rte_ring_mc_dequeue(struct rte_ring *r, void **obj_p)
+{
+	return rte_ring_mc_dequeue_bulk(r, obj_p, 1);
+}
+
+/**
+ * Dequeue one object from a ring (NOT multi-consumers safe).
+ *
+ * @param r
+ *   A pointer to the ring structure.
+ * @param obj_p
+ *   A pointer to a void * pointer (object) that will be filled.
+ * @return
+ *   - 0: Success; objects dequeued.
+ *   - -ENOENT: Not enough entries in the ring to dequeue, no object is
+ *     dequeued.
+ */
+static inline int __attribute__((always_inline))
+rte_ring_sc_dequeue(struct rte_ring *r, void **obj_p)
+{
+	return rte_ring_sc_dequeue_bulk(r, obj_p, 1);
+}
+
+/**
+ * Dequeue one object from a ring.
+ *
+ * This function calls the multi-consumers or the single-consumer
+ * version depending on the default behaviour that was specified at
+ * ring creation time (see flags).
+ *
+ * @param r
+ *   A pointer to the ring structure.
+ * @param obj_p
+ *   A pointer to a void * pointer (object) that will be filled.
+ * @return
+ *   - 0: Success, objects dequeued.
+ *   - -ENOENT: Not enough entries in the ring to dequeue, no object is
+ *     dequeued.
+ */
+static inline int __attribute__((always_inline))
+rte_ring_dequeue(struct rte_ring *r, void **obj_p)
+{
+	if (r->cons.sc_dequeue)
+		return rte_ring_sc_dequeue(r, obj_p);
+	else
+		return rte_ring_mc_dequeue(r, obj_p);
+}
+
+/**
+ * Test if a ring is full.
+ *
+ * @param r
+ *   A pointer to the ring structure.
+ * @return
+ *   - 1: The ring is full.
+ *   - 0: The ring is not full.
+ */
+static inline int
+rte_ring_full(const struct rte_ring *r)
+{
+	uint32_t prod_tail = r->prod.tail;
+	uint32_t cons_tail = r->cons.tail;
+	return ((cons_tail - prod_tail - 1) & r->prod.mask) == 0;
+}
+
+/**
+ * Test if a ring is empty.
+ *
+ * @param r
+ *   A pointer to the ring structure.
+ * @return
+ *   - 1: The ring is empty.
+ *   - 0: The ring is not empty.
+ */
+static inline int
+rte_ring_empty(const struct rte_ring *r)
+{
+	uint32_t prod_tail = r->prod.tail;
+	uint32_t cons_tail = r->cons.tail;
+	return !!(cons_tail == prod_tail);
+}
+
+/**
+ * Return the number of entries in a ring.
+ *
+ * @param r
+ *   A pointer to the ring structure.
+ * @return
+ *   The number of entries in the ring.
+ */
+static inline unsigned
+rte_ring_count(const struct rte_ring *r)
+{
+	uint32_t prod_tail = r->prod.tail;
+	uint32_t cons_tail = r->cons.tail;
+	return (prod_tail - cons_tail) & r->prod.mask;
+}
+
+/**
+ * Return the number of free entries in a ring.
+ *
+ * @param r
+ *   A pointer to the ring structure.
+ * @return
+ *   The number of free entries in the ring.
+ */
+static inline unsigned
+rte_ring_free_count(const struct rte_ring *r)
+{
+	uint32_t prod_tail = r->prod.tail;
+	uint32_t cons_tail = r->cons.tail;
+	return (cons_tail - prod_tail - 1) & r->prod.mask;
+}
+
+/**
+ * Dump the status of all rings on the console
+ *
+ * @param f
+ *   A pointer to a file for output
+ */
+void rte_ring_list_dump(FILE *f);
+
+/**
+ * Search a ring from its name
+ *
+ * @param name
+ *   The name of the ring.
+ * @return
+ *   The pointer to the ring matching the name, or NULL if not found,
+ *   with rte_errno set appropriately. Possible rte_errno values include:
+ *    - ENOENT - required entry not available to return.
+ */
+struct rte_ring *rte_ring_lookup(const char *name);
+
+/**
+ * Enqueue several objects on the ring (multi-producers safe).
+ *
+ * This function uses a "compare and set" instruction to move the
+ * producer index atomically.
+ *
+ * @param r
+ *   A pointer to the ring structure.
+ * @param obj_table
+ *   A pointer to a table of void * pointers (objects).
+ * @param n
+ *   The number of objects to add in the ring from the obj_table.
+ * @return
+ *   - n: Actual number of objects enqueued.
+ */
+static inline unsigned __attribute__((always_inline))
+rte_ring_mp_enqueue_burst(struct rte_ring *r, void * const *obj_table,
+			 unsigned n)
+{
+	return __rte_ring_mp_do_enqueue(r, obj_table, n, RTE_RING_QUEUE_VARIABLE);
+}
+
+/**
+ * Enqueue several objects on a ring (NOT multi-producers safe).
+ *
+ * @param r
+ *   A pointer to the ring structure.
+ * @param obj_table
+ *   A pointer to a table of void * pointers (objects).
+ * @param n
+ *   The number of objects to add in the ring from the obj_table.
+ * @return
+ *   - n: Actual number of objects enqueued.
+ */
+static inline unsigned __attribute__((always_inline))
+rte_ring_sp_enqueue_burst(struct rte_ring *r, void * const *obj_table,
+			 unsigned n)
+{
+	return __rte_ring_sp_do_enqueue(r, obj_table, n, RTE_RING_QUEUE_VARIABLE);
+}
+
+/**
+ * Enqueue several objects on a ring.
+ *
+ * This function calls the multi-producer or the single-producer
+ * version depending on the default behavior that was specified at
+ * ring creation time (see flags).
+ *
+ * @param r
+ *   A pointer to the ring structure.
+ * @param obj_table
+ *   A pointer to a table of void * pointers (objects).
+ * @param n
+ *   The number of objects to add in the ring from the obj_table.
+ * @return
+ *   - n: Actual number of objects enqueued.
+ */
+static inline unsigned __attribute__((always_inline))
+rte_ring_enqueue_burst(struct rte_ring *r, void * const *obj_table,
+		      unsigned n)
+{
+	if (r->prod.sp_enqueue)
+		return rte_ring_sp_enqueue_burst(r, obj_table, n);
+	else
+		return rte_ring_mp_enqueue_burst(r, obj_table, n);
+}
+
+/**
+ * Dequeue several objects from a ring (multi-consumers safe). When the request
+ * objects are more than the available objects, only dequeue the actual number
+ * of objects
+ *
+ * This function uses a "compare and set" instruction to move the
+ * consumer index atomically.
+ *
+ * @param r
+ *   A pointer to the ring structure.
+ * @param obj_table
+ *   A pointer to a table of void * pointers (objects) that will be filled.
+ * @param n
+ *   The number of objects to dequeue from the ring to the obj_table.
+ * @return
+ *   - n: Actual number of objects dequeued, 0 if ring is empty
+ */
+static inline unsigned __attribute__((always_inline))
+rte_ring_mc_dequeue_burst(struct rte_ring *r, void **obj_table, unsigned n)
+{
+	return __rte_ring_mc_do_dequeue(r, obj_table, n, RTE_RING_QUEUE_VARIABLE);
+}
+
+/**
+ * Dequeue several objects from a ring (NOT multi-consumers safe).When the
+ * request objects are more than the available objects, only dequeue the
+ * actual number of objects
+ *
+ * @param r
+ *   A pointer to the ring structure.
+ * @param obj_table
+ *   A pointer to a table of void * pointers (objects) that will be filled.
+ * @param n
+ *   The number of objects to dequeue from the ring to the obj_table.
+ * @return
+ *   - n: Actual number of objects dequeued, 0 if ring is empty
+ */
+static inline unsigned __attribute__((always_inline))
+rte_ring_sc_dequeue_burst(struct rte_ring *r, void **obj_table, unsigned n)
+{
+	return __rte_ring_sc_do_dequeue(r, obj_table, n, RTE_RING_QUEUE_VARIABLE);
+}
+
+/**
+ * Dequeue multiple objects from a ring up to a maximum number.
+ *
+ * This function calls the multi-consumers or the single-consumer
+ * version, depending on the default behaviour that was specified at
+ * ring creation time (see flags).
+ *
+ * @param r
+ *   A pointer to the ring structure.
+ * @param obj_table
+ *   A pointer to a table of void * pointers (objects) that will be filled.
+ * @param n
+ *   The number of objects to dequeue from the ring to the obj_table.
+ * @return
+ *   - Number of objects dequeued
+ */
+static inline unsigned __attribute__((always_inline))
+rte_ring_dequeue_burst(struct rte_ring *r, void **obj_table, unsigned n)
+{
+	if (r->cons.sc_dequeue)
+		return rte_ring_sc_dequeue_burst(r, obj_table, n);
+	else
+		return rte_ring_mc_dequeue_burst(r, obj_table, n);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_RING_H_ */
diff --git a/lib/librte_ring/rte_ring_version.map b/lib/librte_ring/rte_ring_version.map
new file mode 100644
index 00000000..5474b985
--- /dev/null
+++ b/lib/librte_ring/rte_ring_version.map
@@ -0,0 +1,20 @@
+DPDK_2.0 {
+	global:
+
+	rte_ring_create;
+	rte_ring_dump;
+	rte_ring_get_memsize;
+	rte_ring_init;
+	rte_ring_list_dump;
+	rte_ring_lookup;
+	rte_ring_set_water_mark;
+
+	local: *;
+};
+
+DPDK_2.2 {
+	global:
+
+	rte_ring_free;
+
+} DPDK_2.0;
diff --git a/lib/librte_sched/Makefile b/lib/librte_sched/Makefile
new file mode 100644
index 00000000..a782cd1a
--- /dev/null
+++ b/lib/librte_sched/Makefile
@@ -0,0 +1,65 @@
+#   BSD LICENSE
+#
+#   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+#   All rights reserved.
+#
+#   Redistribution and use in source and binary forms, with or without
+#   modification, are permitted provided that the following conditions
+#   are met:
+#
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in
+#       the documentation and/or other materials provided with the
+#       distribution.
+#     * Neither the name of Intel Corporation nor the names of its
+#       contributors may be used to endorse or promote products derived
+#       from this software without specific prior written permission.
+#
+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+#
+# library name
+#
+LIB = librte_sched.a
+
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS)
+
+CFLAGS_rte_red.o := -D_GNU_SOURCE
+
+LDLIBS += -lm
+LDLIBS += -lrt
+
+EXPORT_MAP := rte_sched_version.map
+
+LIBABIVER := 1
+
+#
+# all source are stored in SRCS-y
+#
+SRCS-$(CONFIG_RTE_LIBRTE_SCHED) += rte_sched.c rte_red.c rte_approx.c
+SRCS-$(CONFIG_RTE_LIBRTE_SCHED) += rte_reciprocal.c
+
+# install includes
+SYMLINK-$(CONFIG_RTE_LIBRTE_SCHED)-include := rte_sched.h rte_bitmap.h rte_sched_common.h rte_red.h rte_approx.h
+SYMLINK-$(CONFIG_RTE_LIBRTE_SCHED)-include += rte_reciprocal.h
+
+# this lib depends upon:
+DEPDIRS-$(CONFIG_RTE_LIBRTE_SCHED) += lib/librte_mempool lib/librte_mbuf
+DEPDIRS-$(CONFIG_RTE_LIBRTE_SCHED) += lib/librte_net lib/librte_timer
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_sched/rte_approx.c b/lib/librte_sched/rte_approx.c
new file mode 100644
index 00000000..739f37d7
--- /dev/null
+++ b/lib/librte_sched/rte_approx.c
@@ -0,0 +1,196 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdlib.h>
+
+#include "rte_approx.h"
+
+/*
+ * Based on paper "Approximating Rational Numbers by Fractions" by Michal
+ * Forisek forisek@dcs.fmph.uniba.sk
+ *
+ * Given a rational number alpha with 0 < alpha < 1 and a precision d, the goal
+ * is to find positive integers p, q such that alpha - d < p/q < alpha + d, and
+ * q is minimal.
+ *
+ * http://people.ksp.sk/~misof/publications/2007approx.pdf
+ */
+
+/* fraction comparison: compare (a/b) and (c/d) */
+static inline uint32_t
+less(uint32_t a, uint32_t b, uint32_t c, uint32_t d)
+{
+	return a*d < b*c;
+}
+
+static inline uint32_t
+less_or_equal(uint32_t a, uint32_t b, uint32_t c, uint32_t d)
+{
+	return a*d <= b*c;
+}
+
+/* check whether a/b is a valid approximation */
+static inline uint32_t
+matches(uint32_t a, uint32_t b,
+	uint32_t alpha_num, uint32_t d_num, uint32_t denum)
+{
+	if (less_or_equal(a, b, alpha_num - d_num, denum))
+		return 0;
+
+	if (less(a ,b, alpha_num + d_num, denum))
+		return 1;
+
+	return 0;
+}
+
+static inline void
+find_exact_solution_left(uint32_t p_a, uint32_t q_a, uint32_t p_b, uint32_t q_b,
+	uint32_t alpha_num, uint32_t d_num, uint32_t denum, uint32_t *p, uint32_t *q)
+{
+	uint32_t k_num = denum * p_b - (alpha_num + d_num) * q_b;
+	uint32_t k_denum = (alpha_num + d_num) * q_a - denum * p_a;
+	uint32_t k = (k_num / k_denum) + 1;
+
+	*p = p_b + k * p_a;
+	*q = q_b + k * q_a;
+}
+
+static inline void
+find_exact_solution_right(uint32_t p_a, uint32_t q_a, uint32_t p_b, uint32_t q_b,
+	uint32_t alpha_num, uint32_t d_num, uint32_t denum, uint32_t *p, uint32_t *q)
+{
+	uint32_t k_num = - denum * p_b + (alpha_num - d_num) * q_b;
+	uint32_t k_denum = - (alpha_num - d_num) * q_a + denum * p_a;
+	uint32_t k = (k_num / k_denum) + 1;
+
+	*p = p_b + k * p_a;
+	*q = q_b + k * q_a;
+}
+
+static int
+find_best_rational_approximation(uint32_t alpha_num, uint32_t d_num, uint32_t denum, uint32_t *p, uint32_t *q)
+{
+	uint32_t p_a, q_a, p_b, q_b;
+
+	/* check assumptions on the inputs */
+	if (!((0 < d_num) && (d_num < alpha_num) && (alpha_num < denum) && (d_num + alpha_num < denum))) {
+		return -1;
+	}
+
+	/* set initial bounds for the search */
+	p_a = 0;
+	q_a = 1;
+	p_b = 1;
+	q_b = 1;
+
+	while (1) {
+		uint32_t new_p_a, new_q_a, new_p_b, new_q_b;
+		uint32_t x_num, x_denum, x;
+		int aa, bb;
+
+		/* compute the number of steps to the left */
+		x_num = denum * p_b - alpha_num * q_b;
+		x_denum = - denum * p_a + alpha_num * q_a;
+		x = (x_num + x_denum - 1) / x_denum; /* x = ceil(x_num / x_denum) */
+
+		/* check whether we have a valid approximation */
+		aa = matches(p_b + x * p_a, q_b + x * q_a, alpha_num, d_num, denum);
+		bb = matches(p_b + (x-1) * p_a, q_b + (x - 1) * q_a, alpha_num, d_num, denum);
+		if (aa || bb) {
+			find_exact_solution_left(p_a, q_a, p_b, q_b, alpha_num, d_num, denum, p, q);
+			return 0;
+		}
+
+		/* update the interval */
+		new_p_a = p_b + (x - 1) * p_a ;
+		new_q_a = q_b + (x - 1) * q_a;
+		new_p_b = p_b + x * p_a ;
+		new_q_b = q_b + x * q_a;
+
+		p_a = new_p_a ;
+		q_a = new_q_a;
+		p_b = new_p_b ;
+		q_b = new_q_b;
+
+		/* compute the number of steps to the right */
+		x_num = alpha_num * q_b - denum * p_b;
+		x_denum = - alpha_num * q_a + denum * p_a;
+		x = (x_num + x_denum - 1) / x_denum; /* x = ceil(x_num / x_denum) */
+
+		/* check whether we have a valid approximation */
+		aa = matches(p_b + x * p_a, q_b + x * q_a, alpha_num, d_num, denum);
+		bb = matches(p_b + (x - 1) * p_a, q_b + (x - 1) * q_a, alpha_num, d_num, denum);
+		if (aa || bb) {
+			find_exact_solution_right(p_a, q_a, p_b, q_b, alpha_num, d_num, denum, p, q);
+			return 0;
+		 }
+
+		/* update the interval */
+		new_p_a = p_b + (x - 1) * p_a;
+		new_q_a = q_b + (x - 1) * q_a;
+		new_p_b = p_b + x * p_a;
+		new_q_b = q_b + x * q_a;
+
+		p_a = new_p_a;
+		q_a = new_q_a;
+		p_b = new_p_b;
+		q_b = new_q_b;
+	}
+}
+
+int rte_approx(double alpha, double d, uint32_t *p, uint32_t *q)
+{
+	uint32_t alpha_num, d_num, denum;
+
+	/* Check input arguments */
+	if (!((0.0 < d) && (d < alpha) && (alpha < 1.0))) {
+		return -1;
+	}
+
+	if ((p == NULL) || (q == NULL)) {
+		return -2;
+	}
+
+	/* Compute alpha_num, d_num and denum */
+	denum = 1;
+	while (d < 1) {
+		alpha *= 10;
+		d *= 10;
+		denum *= 10;
+	}
+	alpha_num = (uint32_t) alpha;
+	d_num = (uint32_t) d;
+
+	/* Perform approximation */
+	return find_best_rational_approximation(alpha_num, d_num, denum, p, q);
+}
diff --git a/lib/librte_sched/rte_approx.h b/lib/librte_sched/rte_approx.h
new file mode 100644
index 00000000..09f30a87
--- /dev/null
+++ b/lib/librte_sched/rte_approx.h
@@ -0,0 +1,75 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __INCLUDE_RTE_APPROX_H__
+#define __INCLUDE_RTE_APPROX_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @file
+ * RTE Rational Approximation
+ *
+ * Given a rational number alpha with 0 < alpha < 1 and a precision d, the goal
+ * is to find positive integers p, q such that alpha - d < p/q < alpha + d, and
+ * q is minimal.
+ *
+ ***/
+
+#include <stdint.h>
+
+/**
+ * Find best rational approximation
+ *
+ * @param alpha
+ *   Rational number to approximate
+ * @param d
+ *   Precision for the rational approximation
+ * @param p
+ *   Pointer to pre-allocated space where the numerator of the rational
+ *   approximation will be stored when operation is successful
+ * @param q
+ *   Pointer to pre-allocated space where the denominator of the rational
+ *   approximation will be stored when operation is successful
+ * @return
+ *   0 upon success, error code otherwise
+ */
+int rte_approx(double alpha, double d, uint32_t *p, uint32_t *q);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __INCLUDE_RTE_APPROX_H__ */
diff --git a/lib/librte_sched/rte_bitmap.h b/lib/librte_sched/rte_bitmap.h
new file mode 100644
index 00000000..ff675c58
--- /dev/null
+++ b/lib/librte_sched/rte_bitmap.h
@@ -0,0 +1,560 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __INCLUDE_RTE_BITMAP_H__
+#define __INCLUDE_RTE_BITMAP_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @file
+ * RTE Bitmap
+ *
+ * The bitmap component provides a mechanism to manage large arrays of bits
+ * through bit get/set/clear and bit array scan operations.
+ *
+ * The bitmap scan operation is optimized for 64-bit CPUs using 64/128 byte cache
+ * lines. The bitmap is hierarchically organized using two arrays (array1 and
+ * array2), with each bit in array1 being associated with a full cache line
+ * (512/1024 bits) of bitmap bits, which are stored in array2: the bit in array1
+ * is set only when there is at least one bit set within its associated array2
+ * bits, otherwise the bit in array1 is cleared. The read and write operations
+ * for array1 and array2 are always done in slabs of 64 bits.
+ *
+ * This bitmap is not thread safe. For lock free operation on a specific bitmap
+ * instance, a single writer thread performing bit set/clear operations is
+ * allowed, only the writer thread can do bitmap scan operations, while there
+ * can be several reader threads performing bit get operations in parallel with
+ * the writer thread. When the use of locking primitives is acceptable, the
+ * serialization of the bit set/clear and bitmap scan operations needs to be
+ * enforced by the caller, while the bit get operation does not require locking
+ * the bitmap.
+ *
+ ***/
+
+#include <rte_common.h>
+#include <rte_debug.h>
+#include <rte_memory.h>
+#include <rte_branch_prediction.h>
+#include <rte_prefetch.h>
+
+#ifndef RTE_BITMAP_OPTIMIZATIONS
+#define RTE_BITMAP_OPTIMIZATIONS		         1
+#endif
+
+/* Slab */
+#define RTE_BITMAP_SLAB_BIT_SIZE                 64
+#define RTE_BITMAP_SLAB_BIT_SIZE_LOG2            6
+#define RTE_BITMAP_SLAB_BIT_MASK                 (RTE_BITMAP_SLAB_BIT_SIZE - 1)
+
+/* Cache line (CL) */
+#define RTE_BITMAP_CL_BIT_SIZE                   (RTE_CACHE_LINE_SIZE * 8)
+#define RTE_BITMAP_CL_BIT_SIZE_LOG2              (RTE_CACHE_LINE_SIZE_LOG2 + 3)
+#define RTE_BITMAP_CL_BIT_MASK                   (RTE_BITMAP_CL_BIT_SIZE - 1)
+
+#define RTE_BITMAP_CL_SLAB_SIZE                  (RTE_BITMAP_CL_BIT_SIZE / RTE_BITMAP_SLAB_BIT_SIZE)
+#define RTE_BITMAP_CL_SLAB_SIZE_LOG2             (RTE_BITMAP_CL_BIT_SIZE_LOG2 - RTE_BITMAP_SLAB_BIT_SIZE_LOG2)
+#define RTE_BITMAP_CL_SLAB_MASK                  (RTE_BITMAP_CL_SLAB_SIZE - 1)
+
+/** Bitmap data structure */
+struct rte_bitmap {
+	/* Context for array1 and array2 */
+	uint64_t *array1;                        /**< Bitmap array1 */
+	uint64_t *array2;                        /**< Bitmap array2 */
+	uint32_t array1_size;                    /**< Number of 64-bit slabs in array1 that are actually used */
+	uint32_t array2_size;                    /**< Number of 64-bit slabs in array2 */
+
+	/* Context for the "scan next" operation */
+	uint32_t index1;  /**< Bitmap scan: Index of current array1 slab */
+	uint32_t offset1; /**< Bitmap scan: Offset of current bit within current array1 slab */
+	uint32_t index2;  /**< Bitmap scan: Index of current array2 slab */
+	uint32_t go2;     /**< Bitmap scan: Go/stop condition for current array2 cache line */
+
+	/* Storage space for array1 and array2 */
+	uint8_t memory[0];
+};
+
+static inline void
+__rte_bitmap_index1_inc(struct rte_bitmap *bmp)
+{
+	bmp->index1 = (bmp->index1 + 1) & (bmp->array1_size - 1);
+}
+
+static inline uint64_t
+__rte_bitmap_mask1_get(struct rte_bitmap *bmp)
+{
+	return (~1lu) << bmp->offset1;
+}
+
+static inline void
+__rte_bitmap_index2_set(struct rte_bitmap *bmp)
+{
+	bmp->index2 = (((bmp->index1 << RTE_BITMAP_SLAB_BIT_SIZE_LOG2) + bmp->offset1) << RTE_BITMAP_CL_SLAB_SIZE_LOG2);
+}
+
+#if RTE_BITMAP_OPTIMIZATIONS
+
+static inline int
+rte_bsf64(uint64_t slab, uint32_t *pos)
+{
+	if (likely(slab == 0)) {
+		return 0;
+	}
+
+	*pos = __builtin_ctzll(slab);
+	return 1;
+}
+
+#else
+
+static inline int
+rte_bsf64(uint64_t slab, uint32_t *pos)
+{
+	uint64_t mask;
+	uint32_t i;
+
+	if (likely(slab == 0)) {
+		return 0;
+	}
+
+	for (i = 0, mask = 1; i < RTE_BITMAP_SLAB_BIT_SIZE; i ++, mask <<= 1) {
+		if (unlikely(slab & mask)) {
+			*pos = i;
+			return 1;
+		}
+	}
+
+	return 0;
+}
+
+#endif
+
+static inline uint32_t
+__rte_bitmap_get_memory_footprint(uint32_t n_bits,
+	uint32_t *array1_byte_offset, uint32_t *array1_slabs,
+	uint32_t *array2_byte_offset, uint32_t *array2_slabs)
+{
+	uint32_t n_slabs_context, n_slabs_array1, n_cache_lines_context_and_array1;
+	uint32_t n_cache_lines_array2;
+	uint32_t n_bytes_total;
+
+	n_cache_lines_array2 = (n_bits + RTE_BITMAP_CL_BIT_SIZE - 1) / RTE_BITMAP_CL_BIT_SIZE;
+	n_slabs_array1 = (n_cache_lines_array2 + RTE_BITMAP_SLAB_BIT_SIZE - 1) / RTE_BITMAP_SLAB_BIT_SIZE;
+	n_slabs_array1 = rte_align32pow2(n_slabs_array1);
+	n_slabs_context = (sizeof(struct rte_bitmap) + (RTE_BITMAP_SLAB_BIT_SIZE / 8) - 1) / (RTE_BITMAP_SLAB_BIT_SIZE / 8);
+	n_cache_lines_context_and_array1 = (n_slabs_context + n_slabs_array1 + RTE_BITMAP_CL_SLAB_SIZE - 1) / RTE_BITMAP_CL_SLAB_SIZE;
+	n_bytes_total = (n_cache_lines_context_and_array1 + n_cache_lines_array2) * RTE_CACHE_LINE_SIZE;
+
+	if (array1_byte_offset) {
+		*array1_byte_offset = n_slabs_context * (RTE_BITMAP_SLAB_BIT_SIZE / 8);
+	}
+	if (array1_slabs) {
+		*array1_slabs = n_slabs_array1;
+	}
+	if (array2_byte_offset) {
+		*array2_byte_offset = n_cache_lines_context_and_array1 * RTE_CACHE_LINE_SIZE;
+	}
+	if (array2_slabs) {
+		*array2_slabs = n_cache_lines_array2 * RTE_BITMAP_CL_SLAB_SIZE;
+	}
+
+	return n_bytes_total;
+}
+
+static inline void
+__rte_bitmap_scan_init(struct rte_bitmap *bmp)
+{
+	bmp->index1 = bmp->array1_size - 1;
+	bmp->offset1 = RTE_BITMAP_SLAB_BIT_SIZE - 1;
+	__rte_bitmap_index2_set(bmp);
+	bmp->index2 += RTE_BITMAP_CL_SLAB_SIZE;
+
+	bmp->go2 = 0;
+}
+
+/**
+ * Bitmap memory footprint calculation
+ *
+ * @param n_bits
+ *   Number of bits in the bitmap
+ * @return
+ *   Bitmap memory footprint measured in bytes on success, 0 on error
+ */
+static inline uint32_t
+rte_bitmap_get_memory_footprint(uint32_t n_bits) {
+	/* Check input arguments */
+	if (n_bits == 0) {
+		return 0;
+	}
+
+	return __rte_bitmap_get_memory_footprint(n_bits, NULL, NULL, NULL, NULL);
+}
+
+/**
+ * Bitmap initialization
+ *
+ * @param mem_size
+ *   Minimum expected size of bitmap.
+ * @param mem
+ *   Base address of array1 and array2.
+ * @param n_bits
+ *   Number of pre-allocated bits in array2. Must be non-zero and multiple of 512.
+ * @return
+ *   Handle to bitmap instance.
+ */
+static inline struct rte_bitmap *
+rte_bitmap_init(uint32_t n_bits, uint8_t *mem, uint32_t mem_size)
+{
+	struct rte_bitmap *bmp;
+	uint32_t array1_byte_offset, array1_slabs, array2_byte_offset, array2_slabs;
+	uint32_t size;
+
+	/* Check input arguments */
+	if (n_bits == 0) {
+		return NULL;
+	}
+
+	if ((mem == NULL) || (((uintptr_t) mem) & RTE_CACHE_LINE_MASK)) {
+		return NULL;
+	}
+
+	size = __rte_bitmap_get_memory_footprint(n_bits,
+		&array1_byte_offset, &array1_slabs,
+		&array2_byte_offset, &array2_slabs);
+	if (size < mem_size) {
+		return NULL;
+	}
+
+	/* Setup bitmap */
+	memset(mem, 0, size);
+	bmp = (struct rte_bitmap *) mem;
+
+	bmp->array1 = (uint64_t *) &mem[array1_byte_offset];
+	bmp->array1_size = array1_slabs;
+	bmp->array2 = (uint64_t *) &mem[array2_byte_offset];
+	bmp->array2_size = array2_slabs;
+
+	__rte_bitmap_scan_init(bmp);
+
+	return bmp;
+}
+
+/**
+ * Bitmap free
+ *
+ * @param bmp
+ *   Handle to bitmap instance
+ * @return
+ *   0 upon success, error code otherwise
+ */
+static inline int
+rte_bitmap_free(struct rte_bitmap *bmp)
+{
+	/* Check input arguments */
+	if (bmp == NULL) {
+		return -1;
+	}
+
+	return 0;
+}
+
+/**
+ * Bitmap reset
+ *
+ * @param bmp
+ *   Handle to bitmap instance
+ */
+static inline void
+rte_bitmap_reset(struct rte_bitmap *bmp)
+{
+	memset(bmp->array1, 0, bmp->array1_size * sizeof(uint64_t));
+	memset(bmp->array2, 0, bmp->array2_size * sizeof(uint64_t));
+	__rte_bitmap_scan_init(bmp);
+}
+
+/**
+ * Bitmap location prefetch into CPU L1 cache
+ *
+ * @param bmp
+ *   Handle to bitmap instance
+ * @param pos
+ *   Bit position
+ * @return
+ *   0 upon success, error code otherwise
+ */
+static inline void
+rte_bitmap_prefetch0(struct rte_bitmap *bmp, uint32_t pos)
+{
+	uint64_t *slab2;
+	uint32_t index2;
+
+	index2 = pos >> RTE_BITMAP_SLAB_BIT_SIZE_LOG2;
+	slab2 = bmp->array2 + index2;
+	rte_prefetch0((void *) slab2);
+}
+
+/**
+ * Bitmap bit get
+ *
+ * @param bmp
+ *   Handle to bitmap instance
+ * @param pos
+ *   Bit position
+ * @return
+ *   0 when bit is cleared, non-zero when bit is set
+ */
+static inline uint64_t
+rte_bitmap_get(struct rte_bitmap *bmp, uint32_t pos)
+{
+	uint64_t *slab2;
+	uint32_t index2, offset2;
+
+	index2 = pos >> RTE_BITMAP_SLAB_BIT_SIZE_LOG2;
+	offset2 = pos & RTE_BITMAP_SLAB_BIT_MASK;
+	slab2 = bmp->array2 + index2;
+	return (*slab2) & (1lu << offset2);
+}
+
+/**
+ * Bitmap bit set
+ *
+ * @param bmp
+ *   Handle to bitmap instance
+ * @param pos
+ *   Bit position
+ */
+static inline void
+rte_bitmap_set(struct rte_bitmap *bmp, uint32_t pos)
+{
+	uint64_t *slab1, *slab2;
+	uint32_t index1, index2, offset1, offset2;
+
+	/* Set bit in array2 slab and set bit in array1 slab */
+	index2 = pos >> RTE_BITMAP_SLAB_BIT_SIZE_LOG2;
+	offset2 = pos & RTE_BITMAP_SLAB_BIT_MASK;
+	index1 = pos >> (RTE_BITMAP_SLAB_BIT_SIZE_LOG2 + RTE_BITMAP_CL_BIT_SIZE_LOG2);
+	offset1 = (pos >> RTE_BITMAP_CL_BIT_SIZE_LOG2) & RTE_BITMAP_SLAB_BIT_MASK;
+	slab2 = bmp->array2 + index2;
+	slab1 = bmp->array1 + index1;
+
+	*slab2 |= 1lu << offset2;
+	*slab1 |= 1lu << offset1;
+}
+
+/**
+ * Bitmap slab set
+ *
+ * @param bmp
+ *   Handle to bitmap instance
+ * @param pos
+ *   Bit position identifying the array2 slab
+ * @param slab
+ *   Value to be assigned to the 64-bit slab in array2
+ */
+static inline void
+rte_bitmap_set_slab(struct rte_bitmap *bmp, uint32_t pos, uint64_t slab)
+{
+	uint64_t *slab1, *slab2;
+	uint32_t index1, index2, offset1;
+
+	/* Set bits in array2 slab and set bit in array1 slab */
+	index2 = pos >> RTE_BITMAP_SLAB_BIT_SIZE_LOG2;
+	index1 = pos >> (RTE_BITMAP_SLAB_BIT_SIZE_LOG2 + RTE_BITMAP_CL_BIT_SIZE_LOG2);
+	offset1 = (pos >> RTE_BITMAP_CL_BIT_SIZE_LOG2) & RTE_BITMAP_SLAB_BIT_MASK;
+	slab2 = bmp->array2 + index2;
+	slab1 = bmp->array1 + index1;
+
+	*slab2 |= slab;
+	*slab1 |= 1lu << offset1;
+}
+
+static inline uint64_t
+__rte_bitmap_line_not_empty(uint64_t *slab2)
+{
+	uint64_t v1, v2, v3, v4;
+
+	v1 = slab2[0] | slab2[1];
+	v2 = slab2[2] | slab2[3];
+	v3 = slab2[4] | slab2[5];
+	v4 = slab2[6] | slab2[7];
+	v1 |= v2;
+	v3 |= v4;
+
+	return v1 | v3;
+}
+
+/**
+ * Bitmap bit clear
+ *
+ * @param bmp
+ *   Handle to bitmap instance
+ * @param pos
+ *   Bit position
+ */
+static inline void
+rte_bitmap_clear(struct rte_bitmap *bmp, uint32_t pos)
+{
+	uint64_t *slab1, *slab2;
+	uint32_t index1, index2, offset1, offset2;
+
+	/* Clear bit in array2 slab */
+	index2 = pos >> RTE_BITMAP_SLAB_BIT_SIZE_LOG2;
+	offset2 = pos & RTE_BITMAP_SLAB_BIT_MASK;
+	slab2 = bmp->array2 + index2;
+
+	/* Return if array2 slab is not all-zeros */
+	*slab2 &= ~(1lu << offset2);
+	if (*slab2){
+		return;
+	}
+
+	/* Check the entire cache line of array2 for all-zeros */
+	index2 &= ~ RTE_BITMAP_CL_SLAB_MASK;
+	slab2 = bmp->array2 + index2;
+	if (__rte_bitmap_line_not_empty(slab2)) {
+		return;
+	}
+
+	/* The array2 cache line is all-zeros, so clear bit in array1 slab */
+	index1 = pos >> (RTE_BITMAP_SLAB_BIT_SIZE_LOG2 + RTE_BITMAP_CL_BIT_SIZE_LOG2);
+	offset1 = (pos >> RTE_BITMAP_CL_BIT_SIZE_LOG2) & RTE_BITMAP_SLAB_BIT_MASK;
+	slab1 = bmp->array1 + index1;
+	*slab1 &= ~(1lu << offset1);
+
+	return;
+}
+
+static inline int
+__rte_bitmap_scan_search(struct rte_bitmap *bmp)
+{
+	uint64_t value1;
+	uint32_t i;
+
+	/* Check current array1 slab */
+	value1 = bmp->array1[bmp->index1];
+	value1 &= __rte_bitmap_mask1_get(bmp);
+
+	if (rte_bsf64(value1, &bmp->offset1)) {
+		return 1;
+	}
+
+	__rte_bitmap_index1_inc(bmp);
+	bmp->offset1 = 0;
+
+	/* Look for another array1 slab */
+	for (i = 0; i < bmp->array1_size; i ++, __rte_bitmap_index1_inc(bmp)) {
+		value1 = bmp->array1[bmp->index1];
+
+		if (rte_bsf64(value1, &bmp->offset1)) {
+			return 1;
+		}
+	}
+
+	return 0;
+}
+
+static inline void
+__rte_bitmap_scan_read_init(struct rte_bitmap *bmp)
+{
+	__rte_bitmap_index2_set(bmp);
+	bmp->go2 = 1;
+	rte_prefetch1((void *)(bmp->array2 + bmp->index2 + 8));
+}
+
+static inline int
+__rte_bitmap_scan_read(struct rte_bitmap *bmp, uint32_t *pos, uint64_t *slab)
+{
+	uint64_t *slab2;
+
+	slab2 = bmp->array2 + bmp->index2;
+	for ( ; bmp->go2 ; bmp->index2 ++, slab2 ++, bmp->go2 = bmp->index2 & RTE_BITMAP_CL_SLAB_MASK) {
+		if (*slab2) {
+			*pos = bmp->index2 << RTE_BITMAP_SLAB_BIT_SIZE_LOG2;
+			*slab = *slab2;
+
+			bmp->index2 ++;
+			slab2 ++;
+			bmp->go2 = bmp->index2 & RTE_BITMAP_CL_SLAB_MASK;
+			return 1;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * Bitmap scan (with automatic wrap-around)
+ *
+ * @param bmp
+ *   Handle to bitmap instance
+ * @param pos
+ *   When function call returns 1, pos contains the position of the next set
+ *   bit, otherwise not modified
+ * @param slab
+ *   When function call returns 1, slab contains the value of the entire 64-bit
+ *   slab where the bit indicated by pos is located. Slabs are always 64-bit
+ *   aligned, so the position of the first bit of the slab (this bit is not
+ *   necessarily set) is pos / 64. Once a slab has been returned by the bitmap
+ *   scan operation, the internal pointers of the bitmap are updated to point
+ *   after this slab, so the same slab will not be returned again if it
+ *   contains more than one bit which is set. When function call returns 0,
+ *   slab is not modified.
+ * @return
+ *   0 if there is no bit set in the bitmap, 1 otherwise
+ */
+static inline int
+rte_bitmap_scan(struct rte_bitmap *bmp, uint32_t *pos, uint64_t *slab)
+{
+	/* Return data from current array2 line if available */
+	if (__rte_bitmap_scan_read(bmp, pos, slab)) {
+		return 1;
+	}
+
+	/* Look for non-empty array2 line */
+	if (__rte_bitmap_scan_search(bmp)) {
+		__rte_bitmap_scan_read_init(bmp);
+		__rte_bitmap_scan_read(bmp, pos, slab);
+		return 1;
+	}
+
+	/* Empty bitmap */
+	return 0;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __INCLUDE_RTE_BITMAP_H__ */
diff --git a/lib/librte_sched/rte_reciprocal.c b/lib/librte_sched/rte_reciprocal.c
new file mode 100644
index 00000000..652f0237
--- /dev/null
+++ b/lib/librte_sched/rte_reciprocal.c
@@ -0,0 +1,72 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) Hannes Frederic Sowa
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <stdint.h>
+
+#include <rte_common.h>
+
+#include "rte_reciprocal.h"
+
+/* find largest set bit.
+ * portable and slow but does not matter for this usage.
+ */
+static inline int fls(uint32_t x)
+{
+	int b;
+
+	for (b = 31; b >= 0; --b) {
+		if (x & (1u << b))
+			return b + 1;
+	}
+
+	return 0;
+}
+
+struct rte_reciprocal rte_reciprocal_value(uint32_t d)
+{
+	struct rte_reciprocal R;
+	uint64_t m;
+	int l;
+
+	l = fls(d - 1);
+	m = ((1ULL << 32) * ((1ULL << l) - d));
+	m /= d;
+
+	++m;
+	R.m = m;
+	R.sh1 = RTE_MIN(l, 1);
+	R.sh2 = RTE_MAX(l - 1, 0);
+
+	return R;
+}
diff --git a/lib/librte_sched/rte_reciprocal.h b/lib/librte_sched/rte_reciprocal.h
new file mode 100644
index 00000000..abd15251
--- /dev/null
+++ b/lib/librte_sched/rte_reciprocal.h
@@ -0,0 +1,39 @@
+/*
+ * Reciprocal divide
+ *
+ * Used with permission from original authors
+ *  Hannes Frederic Sowa and Daniel Borkmann
+ *
+ * This algorithm is based on the paper "Division by Invariant
+ * Integers Using Multiplication" by Torbjörn Granlund and Peter
+ * L. Montgomery.
+ *
+ * The assembler implementation from Agner Fog, which this code is
+ * based on, can be found here:
+ * http://www.agner.org/optimize/asmlib.zip
+ *
+ * This optimization for A/B is helpful if the divisor B is mostly
+ * runtime invariant. The reciprocal of B is calculated in the
+ * slow-path with reciprocal_value(). The fast-path can then just use
+ * a much faster multiplication operation with a variable dividend A
+ * to calculate the division A/B.
+ */
+
+#ifndef _RTE_RECIPROCAL_H_
+#define _RTE_RECIPROCAL_H_
+
+struct rte_reciprocal {
+	uint32_t m;
+	uint8_t sh1, sh2;
+};
+
+static inline uint32_t rte_reciprocal_divide(uint32_t a, struct rte_reciprocal R)
+{
+	uint32_t t = (uint32_t)(((uint64_t)a * R.m) >> 32);
+
+	return (t + ((a - t) >> R.sh1)) >> R.sh2;
+}
+
+struct rte_reciprocal rte_reciprocal_value(uint32_t d);
+
+#endif /* _RTE_RECIPROCAL_H_ */
diff --git a/lib/librte_sched/rte_red.c b/lib/librte_sched/rte_red.c
new file mode 100644
index 00000000..fdf40576
--- /dev/null
+++ b/lib/librte_sched/rte_red.c
@@ -0,0 +1,158 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <math.h>
+#include "rte_red.h"
+#include <rte_random.h>
+#include <rte_common.h>
+
+#ifdef __INTEL_COMPILER
+#pragma warning(disable:2259) /* conversion may lose significant bits */
+#endif
+
+static int rte_red_init_done = 0;     /**< Flag to indicate that global initialisation is done */
+uint32_t rte_red_rand_val = 0;        /**< Random value cache */
+uint32_t rte_red_rand_seed = 0;       /**< Seed for random number generation */
+
+/**
+ * table[i] = log2(1-Wq) * Scale * -1
+ *       Wq = 1/(2^i)
+ */
+uint16_t rte_red_log2_1_minus_Wq[RTE_RED_WQ_LOG2_NUM];
+
+/**
+ * table[i] = 2^(i/16) * Scale
+ */
+uint16_t rte_red_pow2_frac_inv[16];
+
+/**
+ * @brief Initialize tables used to compute average
+ *        queue size when queue is empty.
+ */
+static void
+__rte_red_init_tables(void)
+{
+	uint32_t i = 0;
+	double scale = 0.0;
+	double table_size = 0.0;
+
+	scale = (double)(1 << RTE_RED_SCALING);
+	table_size = (double)(RTE_DIM(rte_red_pow2_frac_inv));
+
+	for (i = 0; i < RTE_DIM(rte_red_pow2_frac_inv); i++) {
+		double m = (double)i;
+
+		rte_red_pow2_frac_inv[i] = (uint16_t) round(scale / pow(2, m / table_size));
+	}
+
+	scale = 1024.0;
+
+	RTE_RED_ASSERT(RTE_RED_WQ_LOG2_NUM == RTE_DIM(rte_red_log2_1_minus_Wq));
+
+	for (i = RTE_RED_WQ_LOG2_MIN; i <= RTE_RED_WQ_LOG2_MAX; i++) {
+		double n = (double)i;
+		double Wq = pow(2, -n);
+		uint32_t index = i - RTE_RED_WQ_LOG2_MIN;
+
+		rte_red_log2_1_minus_Wq[index] = (uint16_t) round(-1.0 * scale * log2(1.0 - Wq));
+		/**
+		* Table entry of zero, corresponds to a Wq of zero
+		* which is not valid (avg would remain constant no
+		* matter how long the queue is empty). So we have
+		* to check for zero and round up to one.
+		*/
+		if (rte_red_log2_1_minus_Wq[index] == 0) {
+			rte_red_log2_1_minus_Wq[index] = 1;
+		}
+	}
+}
+
+int
+rte_red_rt_data_init(struct rte_red *red)
+{
+	if (red == NULL)
+		return -1;
+
+	red->avg = 0;
+	red->count = 0;
+	red->q_time = 0;
+	return 0;
+}
+
+int
+rte_red_config_init(struct rte_red_config *red_cfg,
+	const uint16_t wq_log2,
+	const uint16_t min_th,
+	const uint16_t max_th,
+	const uint16_t maxp_inv)
+{
+	if (red_cfg == NULL) {
+		return -1;
+	}
+	if (max_th > RTE_RED_MAX_TH_MAX) {
+		return -2;
+	}
+	if (min_th >= max_th) {
+		return -3;
+	}
+	if (wq_log2 > RTE_RED_WQ_LOG2_MAX) {
+		return -4;
+	}
+	if (wq_log2 < RTE_RED_WQ_LOG2_MIN) {
+		return -5;
+	}
+	if (maxp_inv < RTE_RED_MAXP_INV_MIN) {
+		return -6;
+	}
+	if (maxp_inv > RTE_RED_MAXP_INV_MAX) {
+		return -7;
+	}
+
+	/**
+	 *  Initialize the RED module if not already done
+	 */
+	if (!rte_red_init_done) {
+		rte_red_rand_seed = rte_rand();
+		rte_red_rand_val = rte_fast_rand();
+		__rte_red_init_tables();
+		rte_red_init_done = 1;
+	}
+
+	red_cfg->min_th = ((uint32_t) min_th) << (wq_log2 + RTE_RED_SCALING);
+	red_cfg->max_th = ((uint32_t) max_th) << (wq_log2 + RTE_RED_SCALING);
+	red_cfg->pa_const = (2 * (max_th - min_th) * maxp_inv) << RTE_RED_SCALING;
+	red_cfg->maxp_inv = maxp_inv;
+	red_cfg->wq_log2 = wq_log2;
+
+	return 0;
+}
diff --git a/lib/librte_sched/rte_red.h b/lib/librte_sched/rte_red.h
new file mode 100644
index 00000000..7f9ac901
--- /dev/null
+++ b/lib/librte_sched/rte_red.h
@@ -0,0 +1,453 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __RTE_RED_H_INCLUDED__
+#define __RTE_RED_H_INCLUDED__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @file
+ * RTE Random Early Detection (RED)
+ *
+ *
+ ***/
+
+#include <stdint.h>
+#include <limits.h>
+#include <rte_common.h>
+#include <rte_debug.h>
+#include <rte_cycles.h>
+#include <rte_branch_prediction.h>
+
+#define RTE_RED_SCALING                     10         /**< Fraction size for fixed-point */
+#define RTE_RED_S                           (1 << 22)  /**< Packet size multiplied by number of leaf queues */
+#define RTE_RED_MAX_TH_MAX                  1023       /**< Max threshold limit in fixed point format */
+#define RTE_RED_WQ_LOG2_MIN                 1          /**< Min inverse filter weight value */
+#define RTE_RED_WQ_LOG2_MAX                 12         /**< Max inverse filter weight value */
+#define RTE_RED_MAXP_INV_MIN                1          /**< Min inverse mark probability value */
+#define RTE_RED_MAXP_INV_MAX                255        /**< Max inverse mark probability value */
+#define RTE_RED_2POW16                      (1<<16)    /**< 2 power 16 */
+#define RTE_RED_INT16_NBITS                 (sizeof(uint16_t) * CHAR_BIT)
+#define RTE_RED_WQ_LOG2_NUM                 (RTE_RED_WQ_LOG2_MAX - RTE_RED_WQ_LOG2_MIN + 1)
+
+#ifdef RTE_RED_DEBUG
+
+#define RTE_RED_ASSERT(exp)                                      \
+if (!(exp)) {                                                    \
+	rte_panic("line%d\tassert \"" #exp "\" failed\n", __LINE__); \
+}
+
+#else
+
+#define RTE_RED_ASSERT(exp)                 do { } while(0)
+
+#endif /* RTE_RED_DEBUG */
+
+/**
+ * Externs
+ *
+ */
+extern uint32_t rte_red_rand_val;
+extern uint32_t rte_red_rand_seed;
+extern uint16_t rte_red_log2_1_minus_Wq[RTE_RED_WQ_LOG2_NUM];
+extern uint16_t rte_red_pow2_frac_inv[16];
+
+/**
+ * RED configuration parameters passed by user
+ *
+ */
+struct rte_red_params {
+	uint16_t min_th;   /**< Minimum threshold for queue (max_th) */
+	uint16_t max_th;   /**< Maximum threshold for queue (max_th) */
+	uint16_t maxp_inv; /**< Inverse of packet marking probability maximum value (maxp = 1 / maxp_inv) */
+	uint16_t wq_log2;  /**< Negated log2 of queue weight (wq = 1 / (2 ^ wq_log2)) */
+};
+
+/**
+ * RED configuration parameters
+ */
+struct rte_red_config {
+	uint32_t min_th;   /**< min_th scaled in fixed-point format */
+	uint32_t max_th;   /**< max_th scaled in fixed-point format */
+	uint32_t pa_const; /**< Precomputed constant value used for pa calculation (scaled in fixed-point format) */
+	uint8_t maxp_inv;  /**< maxp_inv */
+	uint8_t wq_log2;   /**< wq_log2 */
+};
+
+/**
+ * RED run-time data
+ */
+struct rte_red {
+	uint32_t avg;      /**< Average queue size (avg), scaled in fixed-point format */
+	uint32_t count;    /**< Number of packets since last marked packet (count) */
+	uint64_t q_time;   /**< Start of the queue idle time (q_time) */
+};
+
+/**
+ * @brief Initialises run-time data
+ *
+ * @param red [in,out] data pointer to RED runtime data
+ *
+ * @return Operation status
+ * @retval 0 success
+ * @retval !0 error
+ */
+int
+rte_red_rt_data_init(struct rte_red *red);
+
+/**
+ * @brief Configures a single RED configuration parameter structure.
+ *
+ * @param red_cfg [in,out] config pointer to a RED configuration parameter structure
+ * @param wq_log2 [in]  log2 of the filter weight, valid range is:
+ *             RTE_RED_WQ_LOG2_MIN <= wq_log2 <= RTE_RED_WQ_LOG2_MAX
+ * @param min_th [in] queue minimum threshold in number of packets
+ * @param max_th [in] queue maximum threshold in number of packets
+ * @param maxp_inv [in] inverse maximum mark probability
+ *
+ * @return Operation status
+ * @retval 0 success
+ * @retval !0 error
+ */
+int
+rte_red_config_init(struct rte_red_config *red_cfg,
+	const uint16_t wq_log2,
+	const uint16_t min_th,
+	const uint16_t max_th,
+	const uint16_t maxp_inv);
+
+/**
+ * @brief Generate random number for RED
+ *
+ * Implemenetation based on:
+ * http://software.intel.com/en-us/articles/fast-random-number-generator-on-the-intel-pentiumr-4-processor/
+ *
+ * 10 bit shift has been found through empirical tests (was 16).
+ *
+ * @return Random number between 0 and (2^22 - 1)
+ */
+static inline uint32_t
+rte_fast_rand(void)
+{
+	rte_red_rand_seed = (214013 * rte_red_rand_seed) + 2531011;
+	return rte_red_rand_seed >> 10;
+}
+
+/**
+ * @brief calculate factor to scale average queue size when queue
+ *        becomes empty
+ *
+ * @param wq_log2 [in] where EWMA filter weight wq = 1/(2 ^ wq_log2)
+ * @param m [in] exponent in the computed value (1 - wq) ^ m
+ *
+ * @return computed value
+ * @retval ((1 - wq) ^ m) scaled in fixed-point format
+ */
+static inline uint16_t
+__rte_red_calc_qempty_factor(uint8_t wq_log2, uint16_t m)
+{
+	uint32_t n = 0;
+	uint32_t f = 0;
+
+	/**
+	 * Basic math tells us that:
+	 *   a^b = 2^(b * log2(a) )
+	 *
+	 * in our case:
+	 *   a = (1-Wq)
+	 *   b = m
+	 *  Wq = 1/ (2^log2n)
+	 *
+	 * So we are computing this equation:
+	 *   factor = 2 ^ ( m * log2(1-Wq))
+	 *
+	 * First we are computing:
+	 *    n = m * log2(1-Wq)
+	 *
+	 * To avoid dealing with signed numbers log2 values are positive
+	 * but they should be negative because (1-Wq) is always < 1.
+	 * Contents of log2 table values are also scaled for precision.
+	 */
+
+	n = m * rte_red_log2_1_minus_Wq[wq_log2 - RTE_RED_WQ_LOG2_MIN];
+
+	/**
+	 * The tricky part is computing 2^n, for this I split n into
+	 * integer part and fraction part.
+	 *   f - is fraction part of n
+	 *   n - is integer part of original n
+	 *
+	 * Now using basic math we compute 2^n:
+	 *   2^(f+n) = 2^f * 2^n
+	 *   2^f - we use lookup table
+	 *   2^n - can be replaced with bit shift right oeprations
+	 */
+
+	f = (n >> 6) & 0xf;
+	n >>= 10;
+
+	if (n < RTE_RED_SCALING)
+		return (uint16_t) ((rte_red_pow2_frac_inv[f] + (1 << (n - 1))) >> n);
+
+	return 0;
+}
+
+/**
+ * @brief Updates queue average in condition when queue is empty
+ *
+ * Note: packet is never dropped in this particular case.
+ *
+ * @param red_cfg [in] config pointer to a RED configuration parameter structure
+ * @param red [in,out] data pointer to RED runtime data
+ * @param time [in] current time stamp
+ *
+ * @return Operation status
+ * @retval 0 enqueue the packet
+ * @retval 1 drop the packet based on max threshold criterion
+ * @retval 2 drop the packet based on mark probability criterion
+ */
+static inline int
+rte_red_enqueue_empty(const struct rte_red_config *red_cfg,
+	struct rte_red *red,
+	const uint64_t time)
+{
+	uint64_t time_diff = 0, m = 0;
+
+	RTE_RED_ASSERT(red_cfg != NULL);
+	RTE_RED_ASSERT(red != NULL);
+
+	red->count ++;
+
+	/**
+	 * We compute avg but we don't compare avg against
+	 *  min_th or max_th, nor calculate drop probability
+	 */
+	time_diff = time - red->q_time;
+
+	/**
+	 * m is the number of packets that might have arrived while the queue was empty.
+	 * In this case we have time stamps provided by scheduler in byte units (bytes
+	 * transmitted on network port). Such time stamp translates into time units as
+	 * port speed is fixed but such approach simplifies the code.
+	 */
+	m = time_diff / RTE_RED_S;
+
+	/**
+	 * Check that m will fit into 16-bit unsigned integer
+	 */
+	if (m >= RTE_RED_2POW16) {
+		red->avg = 0;
+	} else {
+		red->avg = (red->avg >> RTE_RED_SCALING) * __rte_red_calc_qempty_factor(red_cfg->wq_log2, (uint16_t) m);
+	}
+
+	return 0;
+}
+
+/**
+ *  Drop probability (Sally Floyd and Van Jacobson):
+ *
+ *     pb = (1 / maxp_inv) * (avg - min_th) / (max_th - min_th)
+ *     pa = pb / (2 - count * pb)
+ *
+ *
+ *                 (1 / maxp_inv) * (avg - min_th)
+ *                ---------------------------------
+ *                         max_th - min_th
+ *     pa = -----------------------------------------------
+ *                count * (1 / maxp_inv) * (avg - min_th)
+ *           2 - -----------------------------------------
+ *                          max_th - min_th
+ *
+ *
+ *                                  avg - min_th
+ *     pa = -----------------------------------------------------------
+ *           2 * (max_th - min_th) * maxp_inv - count * (avg - min_th)
+ *
+ *
+ *  We define pa_const as: pa_const =  2 * (max_th - min_th) * maxp_inv. Then:
+ *
+ *
+ *                     avg - min_th
+ *     pa = -----------------------------------
+ *           pa_const - count * (avg - min_th)
+ */
+
+/**
+ * @brief make a decision to drop or enqueue a packet based on mark probability
+ *        criteria
+ *
+ * @param red_cfg [in] config pointer to structure defining RED parameters
+ * @param red [in,out] data pointer to RED runtime data
+ *
+ * @return operation status
+ * @retval 0 enqueue the packet
+ * @retval 1 drop the packet
+ */
+static inline int
+__rte_red_drop(const struct rte_red_config *red_cfg, struct rte_red *red)
+{
+	uint32_t pa_num = 0;    /* numerator of drop-probability */
+	uint32_t pa_den = 0;    /* denominator of drop-probability */
+	uint32_t pa_num_count = 0;
+
+	pa_num = (red->avg - red_cfg->min_th) >> (red_cfg->wq_log2);
+
+	pa_num_count = red->count * pa_num;
+
+	if (red_cfg->pa_const <= pa_num_count)
+		return 1;
+
+	pa_den = red_cfg->pa_const - pa_num_count;
+
+	/* If drop, generate and save random number to be used next time */
+	if (unlikely((rte_red_rand_val % pa_den) < pa_num)) {
+		rte_red_rand_val = rte_fast_rand();
+
+		return 1;
+	}
+
+	/* No drop */
+	return 0;
+}
+
+/**
+ * @brief Decides if new packet should be enqeued or dropped in queue non-empty case
+ *
+ * @param red_cfg [in] config pointer to a RED configuration parameter structure
+ * @param red [in,out] data pointer to RED runtime data
+ * @param q [in] current queue size (measured in packets)
+ *
+ * @return Operation status
+ * @retval 0 enqueue the packet
+ * @retval 1 drop the packet based on max threshold criterion
+ * @retval 2 drop the packet based on mark probability criterion
+ */
+static inline int
+rte_red_enqueue_nonempty(const struct rte_red_config *red_cfg,
+	struct rte_red *red,
+	const unsigned q)
+{
+	RTE_RED_ASSERT(red_cfg != NULL);
+	RTE_RED_ASSERT(red != NULL);
+
+	/**
+	* EWMA filter (Sally Floyd and Van Jacobson):
+	*    avg = (1 - wq) * avg + wq * q
+	*    avg = avg + q * wq - avg * wq
+	*
+	* We select: wq = 2^(-n). Let scaled version of avg be: avg_s = avg * 2^(N+n). We get:
+	*    avg_s = avg_s + q * 2^N - avg_s * 2^(-n)
+	*
+	* By using shift left/right operations, we get:
+	*    avg_s = avg_s + (q << N) - (avg_s >> n)
+	*    avg_s += (q << N) - (avg_s >> n)
+	*/
+
+	/* avg update */
+	red->avg += (q << RTE_RED_SCALING) - (red->avg >> red_cfg->wq_log2);
+
+	/* avg < min_th: do not mark the packet  */
+	if (red->avg < red_cfg->min_th) {
+		red->count ++;
+		return 0;
+	}
+
+	/* min_th <= avg < max_th: mark the packet with pa probability */
+	if (red->avg < red_cfg->max_th) {
+		if (!__rte_red_drop(red_cfg, red)) {
+			red->count ++;
+			return 0;
+		}
+
+		red->count = 0;
+		return 2;
+	}
+
+	/* max_th <= avg: always mark the packet */
+	red->count = 0;
+	return 1;
+}
+
+/**
+ * @brief Decides if new packet should be enqeued or dropped
+ * Updates run time data based on new queue size value.
+ * Based on new queue average and RED configuration parameters
+ * gives verdict whether to enqueue or drop the packet.
+ *
+ * @param red_cfg [in] config pointer to a RED configuration parameter structure
+ * @param red [in,out] data pointer to RED runtime data
+ * @param q [in] updated queue size in packets
+ * @param time [in] current time stamp
+ *
+ * @return Operation status
+ * @retval 0 enqueue the packet
+ * @retval 1 drop the packet based on max threshold criteria
+ * @retval 2 drop the packet based on mark probability criteria
+ */
+static inline int
+rte_red_enqueue(const struct rte_red_config *red_cfg,
+	struct rte_red *red,
+	const unsigned q,
+	const uint64_t time)
+{
+	RTE_RED_ASSERT(red_cfg != NULL);
+	RTE_RED_ASSERT(red != NULL);
+
+	if (q != 0) {
+		return rte_red_enqueue_nonempty(red_cfg, red, q);
+	} else {
+		return rte_red_enqueue_empty(red_cfg, red, time);
+	}
+}
+
+/**
+ * @brief Callback to records time that queue became empty
+ *
+ * @param red [in,out] data pointer to RED runtime data
+ * @param time [in] current time stamp
+ */
+static inline void
+rte_red_mark_queue_empty(struct rte_red *red, const uint64_t time)
+{
+	red->q_time = time;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __RTE_RED_H_INCLUDED__ */
diff --git a/lib/librte_sched/rte_sched.c b/lib/librte_sched/rte_sched.c
new file mode 100644
index 00000000..1609ea87
--- /dev/null
+++ b/lib/librte_sched/rte_sched.c
@@ -0,0 +1,2156 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <string.h>
+
+#include <rte_common.h>
+#include <rte_log.h>
+#include <rte_memory.h>
+#include <rte_malloc.h>
+#include <rte_cycles.h>
+#include <rte_prefetch.h>
+#include <rte_branch_prediction.h>
+#include <rte_mbuf.h>
+
+#include "rte_sched.h"
+#include "rte_bitmap.h"
+#include "rte_sched_common.h"
+#include "rte_approx.h"
+#include "rte_reciprocal.h"
+
+#ifdef __INTEL_COMPILER
+#pragma warning(disable:2259) /* conversion may lose significant bits */
+#endif
+
+#ifdef RTE_SCHED_VECTOR
+#include <rte_vect.h>
+
+#if defined(__SSE4__)
+#define SCHED_VECTOR_SSE4
+#endif
+
+#endif
+
+#define RTE_SCHED_TB_RATE_CONFIG_ERR          (1e-7)
+#define RTE_SCHED_WRR_SHIFT                   3
+#define RTE_SCHED_GRINDER_PCACHE_SIZE         (64 / RTE_SCHED_QUEUES_PER_PIPE)
+#define RTE_SCHED_PIPE_INVALID                UINT32_MAX
+#define RTE_SCHED_BMP_POS_INVALID             UINT32_MAX
+
+/* Scaling for cycles_per_byte calculation
+ * Chosen so that minimum rate is 480 bit/sec
+ */
+#define RTE_SCHED_TIME_SHIFT		      8
+
+struct rte_sched_subport {
+	/* Token bucket (TB) */
+	uint64_t tb_time; /* time of last update */
+	uint32_t tb_period;
+	uint32_t tb_credits_per_period;
+	uint32_t tb_size;
+	uint32_t tb_credits;
+
+	/* Traffic classes (TCs) */
+	uint64_t tc_time; /* time of next update */
+	uint32_t tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+	uint32_t tc_credits[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+	uint32_t tc_period;
+
+	/* TC oversubscription */
+	uint32_t tc_ov_wm;
+	uint32_t tc_ov_wm_min;
+	uint32_t tc_ov_wm_max;
+	uint8_t tc_ov_period_id;
+	uint8_t tc_ov;
+	uint32_t tc_ov_n;
+	double tc_ov_rate;
+
+	/* Statistics */
+	struct rte_sched_subport_stats stats;
+};
+
+struct rte_sched_pipe_profile {
+	/* Token bucket (TB) */
+	uint32_t tb_period;
+	uint32_t tb_credits_per_period;
+	uint32_t tb_size;
+
+	/* Pipe traffic classes */
+	uint32_t tc_period;
+	uint32_t tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+	uint8_t tc_ov_weight;
+
+	/* Pipe queues */
+	uint8_t  wrr_cost[RTE_SCHED_QUEUES_PER_PIPE];
+};
+
+struct rte_sched_pipe {
+	/* Token bucket (TB) */
+	uint64_t tb_time; /* time of last update */
+	uint32_t tb_credits;
+
+	/* Pipe profile and flags */
+	uint32_t profile;
+
+	/* Traffic classes (TCs) */
+	uint64_t tc_time; /* time of next update */
+	uint32_t tc_credits[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+
+	/* Weighted Round Robin (WRR) */
+	uint8_t wrr_tokens[RTE_SCHED_QUEUES_PER_PIPE];
+
+	/* TC oversubscription */
+	uint32_t tc_ov_credits;
+	uint8_t tc_ov_period_id;
+	uint8_t reserved[3];
+} __rte_cache_aligned;
+
+struct rte_sched_queue {
+	uint16_t qw;
+	uint16_t qr;
+};
+
+struct rte_sched_queue_extra {
+	struct rte_sched_queue_stats stats;
+#ifdef RTE_SCHED_RED
+	struct rte_red red;
+#endif
+};
+
+enum grinder_state {
+	e_GRINDER_PREFETCH_PIPE = 0,
+	e_GRINDER_PREFETCH_TC_QUEUE_ARRAYS,
+	e_GRINDER_PREFETCH_MBUF,
+	e_GRINDER_READ_MBUF
+};
+
+/*
+ * Path through the scheduler hierarchy used by the scheduler enqueue
+ * operation to identify the destination queue for the current
+ * packet. Stored in the field pkt.hash.sched of struct rte_mbuf of
+ * each packet, typically written by the classification stage and read
+ * by scheduler enqueue.
+ */
+struct rte_sched_port_hierarchy {
+	uint16_t queue:2;                /**< Queue ID (0 .. 3) */
+	uint16_t traffic_class:2;        /**< Traffic class ID (0 .. 3)*/
+	uint32_t color:2;                /**< Color */
+	uint16_t unused:10;
+	uint16_t subport;                /**< Subport ID */
+	uint32_t pipe;		         /**< Pipe ID */
+};
+
+struct rte_sched_grinder {
+	/* Pipe cache */
+	uint16_t pcache_qmask[RTE_SCHED_GRINDER_PCACHE_SIZE];
+	uint32_t pcache_qindex[RTE_SCHED_GRINDER_PCACHE_SIZE];
+	uint32_t pcache_w;
+	uint32_t pcache_r;
+
+	/* Current pipe */
+	enum grinder_state state;
+	uint32_t productive;
+	uint32_t pindex;
+	struct rte_sched_subport *subport;
+	struct rte_sched_pipe *pipe;
+	struct rte_sched_pipe_profile *pipe_params;
+
+	/* TC cache */
+	uint8_t tccache_qmask[4];
+	uint32_t tccache_qindex[4];
+	uint32_t tccache_w;
+	uint32_t tccache_r;
+
+	/* Current TC */
+	uint32_t tc_index;
+	struct rte_sched_queue *queue[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+	struct rte_mbuf **qbase[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+	uint32_t qindex[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+	uint16_t qsize;
+	uint32_t qmask;
+	uint32_t qpos;
+	struct rte_mbuf *pkt;
+
+	/* WRR */
+	uint16_t wrr_tokens[RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS];
+	uint16_t wrr_mask[RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS];
+	uint8_t wrr_cost[RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS];
+};
+
+struct rte_sched_port {
+	/* User parameters */
+	uint32_t n_subports_per_port;
+	uint32_t n_pipes_per_subport;
+	uint32_t rate;
+	uint32_t mtu;
+	uint32_t frame_overhead;
+	uint16_t qsize[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+	uint32_t n_pipe_profiles;
+	uint32_t pipe_tc3_rate_max;
+#ifdef RTE_SCHED_RED
+	struct rte_red_config red_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][e_RTE_METER_COLORS];
+#endif
+
+	/* Timing */
+	uint64_t time_cpu_cycles;     /* Current CPU time measured in CPU cyles */
+	uint64_t time_cpu_bytes;      /* Current CPU time measured in bytes */
+	uint64_t time;                /* Current NIC TX time measured in bytes */
+	struct rte_reciprocal inv_cycles_per_byte; /* CPU cycles per byte */
+
+	/* Scheduling loop detection */
+	uint32_t pipe_loop;
+	uint32_t pipe_exhaustion;
+
+	/* Bitmap */
+	struct rte_bitmap *bmp;
+	uint32_t grinder_base_bmp_pos[RTE_SCHED_PORT_N_GRINDERS] __rte_aligned_16;
+
+	/* Grinders */
+	struct rte_sched_grinder grinder[RTE_SCHED_PORT_N_GRINDERS];
+	uint32_t busy_grinders;
+	struct rte_mbuf **pkts_out;
+	uint32_t n_pkts_out;
+
+	/* Queue base calculation */
+	uint32_t qsize_add[RTE_SCHED_QUEUES_PER_PIPE];
+	uint32_t qsize_sum;
+
+	/* Large data structures */
+	struct rte_sched_subport *subport;
+	struct rte_sched_pipe *pipe;
+	struct rte_sched_queue *queue;
+	struct rte_sched_queue_extra *queue_extra;
+	struct rte_sched_pipe_profile *pipe_profiles;
+	uint8_t *bmp_array;
+	struct rte_mbuf **queue_array;
+	uint8_t memory[0] __rte_cache_aligned;
+} __rte_cache_aligned;
+
+enum rte_sched_port_array {
+	e_RTE_SCHED_PORT_ARRAY_SUBPORT = 0,
+	e_RTE_SCHED_PORT_ARRAY_PIPE,
+	e_RTE_SCHED_PORT_ARRAY_QUEUE,
+	e_RTE_SCHED_PORT_ARRAY_QUEUE_EXTRA,
+	e_RTE_SCHED_PORT_ARRAY_PIPE_PROFILES,
+	e_RTE_SCHED_PORT_ARRAY_BMP_ARRAY,
+	e_RTE_SCHED_PORT_ARRAY_QUEUE_ARRAY,
+	e_RTE_SCHED_PORT_ARRAY_TOTAL,
+};
+
+#ifdef RTE_SCHED_COLLECT_STATS
+
+static inline uint32_t
+rte_sched_port_queues_per_subport(struct rte_sched_port *port)
+{
+	return RTE_SCHED_QUEUES_PER_PIPE * port->n_pipes_per_subport;
+}
+
+#endif
+
+static inline uint32_t
+rte_sched_port_queues_per_port(struct rte_sched_port *port)
+{
+	return RTE_SCHED_QUEUES_PER_PIPE * port->n_pipes_per_subport * port->n_subports_per_port;
+}
+
+static inline struct rte_mbuf **
+rte_sched_port_qbase(struct rte_sched_port *port, uint32_t qindex)
+{
+	uint32_t pindex = qindex >> 4;
+	uint32_t qpos = qindex & 0xF;
+
+	return (port->queue_array + pindex *
+		port->qsize_sum + port->qsize_add[qpos]);
+}
+
+static inline uint16_t
+rte_sched_port_qsize(struct rte_sched_port *port, uint32_t qindex)
+{
+	uint32_t tc = (qindex >> 2) & 0x3;
+
+	return port->qsize[tc];
+}
+
+static int
+rte_sched_port_check_params(struct rte_sched_port_params *params)
+{
+	uint32_t i, j;
+
+	if (params == NULL)
+		return -1;
+
+	/* socket */
+	if ((params->socket < 0) || (params->socket >= RTE_MAX_NUMA_NODES))
+		return -3;
+
+	/* rate */
+	if (params->rate == 0)
+		return -4;
+
+	/* mtu */
+	if (params->mtu == 0)
+		return -5;
+
+	/* n_subports_per_port: non-zero, limited to 16 bits, power of 2 */
+	if (params->n_subports_per_port == 0 ||
+	    params->n_subports_per_port > 1u << 16 ||
+	    !rte_is_power_of_2(params->n_subports_per_port))
+		return -6;
+
+	/* n_pipes_per_subport: non-zero, power of 2 */
+	if (params->n_pipes_per_subport == 0 ||
+	    !rte_is_power_of_2(params->n_pipes_per_subport))
+		return -7;
+
+	/* qsize: non-zero, power of 2,
+	 * no bigger than 32K (due to 16-bit read/write pointers)
+	 */
+	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
+		uint16_t qsize = params->qsize[i];
+
+		if (qsize == 0 || !rte_is_power_of_2(qsize))
+			return -8;
+	}
+
+	/* pipe_profiles and n_pipe_profiles */
+	if (params->pipe_profiles == NULL ||
+	    params->n_pipe_profiles == 0 ||
+	    params->n_pipe_profiles > RTE_SCHED_PIPE_PROFILES_PER_PORT)
+		return -9;
+
+	for (i = 0; i < params->n_pipe_profiles; i++) {
+		struct rte_sched_pipe_params *p = params->pipe_profiles + i;
+
+		/* TB rate: non-zero, not greater than port rate */
+		if (p->tb_rate == 0 || p->tb_rate > params->rate)
+			return -10;
+
+		/* TB size: non-zero */
+		if (p->tb_size == 0)
+			return -11;
+
+		/* TC rate: non-zero, less than pipe rate */
+		for (j = 0; j < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; j++) {
+			if (p->tc_rate[j] == 0 || p->tc_rate[j] > p->tb_rate)
+				return -12;
+		}
+
+		/* TC period: non-zero */
+		if (p->tc_period == 0)
+			return -13;
+
+#ifdef RTE_SCHED_SUBPORT_TC_OV
+		/* TC3 oversubscription weight: non-zero */
+		if (p->tc_ov_weight == 0)
+			return -14;
+#endif
+
+		/* Queue WRR weights: non-zero */
+		for (j = 0; j < RTE_SCHED_QUEUES_PER_PIPE; j++) {
+			if (p->wrr_weights[j] == 0)
+				return -15;
+		}
+	}
+
+	return 0;
+}
+
+static uint32_t
+rte_sched_port_get_array_base(struct rte_sched_port_params *params, enum rte_sched_port_array array)
+{
+	uint32_t n_subports_per_port = params->n_subports_per_port;
+	uint32_t n_pipes_per_subport = params->n_pipes_per_subport;
+	uint32_t n_pipes_per_port = n_pipes_per_subport * n_subports_per_port;
+	uint32_t n_queues_per_port = RTE_SCHED_QUEUES_PER_PIPE * n_pipes_per_subport * n_subports_per_port;
+
+	uint32_t size_subport = n_subports_per_port * sizeof(struct rte_sched_subport);
+	uint32_t size_pipe = n_pipes_per_port * sizeof(struct rte_sched_pipe);
+	uint32_t size_queue = n_queues_per_port * sizeof(struct rte_sched_queue);
+	uint32_t size_queue_extra
+		= n_queues_per_port * sizeof(struct rte_sched_queue_extra);
+	uint32_t size_pipe_profiles
+		= RTE_SCHED_PIPE_PROFILES_PER_PORT * sizeof(struct rte_sched_pipe_profile);
+	uint32_t size_bmp_array = rte_bitmap_get_memory_footprint(n_queues_per_port);
+	uint32_t size_per_pipe_queue_array, size_queue_array;
+
+	uint32_t base, i;
+
+	size_per_pipe_queue_array = 0;
+	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
+		size_per_pipe_queue_array += RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS
+			* params->qsize[i] * sizeof(struct rte_mbuf *);
+	}
+	size_queue_array = n_pipes_per_port * size_per_pipe_queue_array;
+
+	base = 0;
+
+	if (array == e_RTE_SCHED_PORT_ARRAY_SUBPORT)
+		return base;
+	base += RTE_CACHE_LINE_ROUNDUP(size_subport);
+
+	if (array == e_RTE_SCHED_PORT_ARRAY_PIPE)
+		return base;
+	base += RTE_CACHE_LINE_ROUNDUP(size_pipe);
+
+	if (array == e_RTE_SCHED_PORT_ARRAY_QUEUE)
+		return base;
+	base += RTE_CACHE_LINE_ROUNDUP(size_queue);
+
+	if (array == e_RTE_SCHED_PORT_ARRAY_QUEUE_EXTRA)
+		return base;
+	base += RTE_CACHE_LINE_ROUNDUP(size_queue_extra);
+
+	if (array == e_RTE_SCHED_PORT_ARRAY_PIPE_PROFILES)
+		return base;
+	base += RTE_CACHE_LINE_ROUNDUP(size_pipe_profiles);
+
+	if (array == e_RTE_SCHED_PORT_ARRAY_BMP_ARRAY)
+		return base;
+	base += RTE_CACHE_LINE_ROUNDUP(size_bmp_array);
+
+	if (array == e_RTE_SCHED_PORT_ARRAY_QUEUE_ARRAY)
+		return base;
+	base += RTE_CACHE_LINE_ROUNDUP(size_queue_array);
+
+	return base;
+}
+
+uint32_t
+rte_sched_port_get_memory_footprint(struct rte_sched_port_params *params)
+{
+	uint32_t size0, size1;
+	int status;
+
+	status = rte_sched_port_check_params(params);
+	if (status != 0) {
+		RTE_LOG(NOTICE, SCHED,
+			"Port scheduler params check failed (%d)\n", status);
+
+		return 0;
+	}
+
+	size0 = sizeof(struct rte_sched_port);
+	size1 = rte_sched_port_get_array_base(params, e_RTE_SCHED_PORT_ARRAY_TOTAL);
+
+	return size0 + size1;
+}
+
+static void
+rte_sched_port_config_qsize(struct rte_sched_port *port)
+{
+	/* TC 0 */
+	port->qsize_add[0] = 0;
+	port->qsize_add[1] = port->qsize_add[0] + port->qsize[0];
+	port->qsize_add[2] = port->qsize_add[1] + port->qsize[0];
+	port->qsize_add[3] = port->qsize_add[2] + port->qsize[0];
+
+	/* TC 1 */
+	port->qsize_add[4] = port->qsize_add[3] + port->qsize[0];
+	port->qsize_add[5] = port->qsize_add[4] + port->qsize[1];
+	port->qsize_add[6] = port->qsize_add[5] + port->qsize[1];
+	port->qsize_add[7] = port->qsize_add[6] + port->qsize[1];
+
+	/* TC 2 */
+	port->qsize_add[8] = port->qsize_add[7] + port->qsize[1];
+	port->qsize_add[9] = port->qsize_add[8] + port->qsize[2];
+	port->qsize_add[10] = port->qsize_add[9] + port->qsize[2];
+	port->qsize_add[11] = port->qsize_add[10] + port->qsize[2];
+
+	/* TC 3 */
+	port->qsize_add[12] = port->qsize_add[11] + port->qsize[2];
+	port->qsize_add[13] = port->qsize_add[12] + port->qsize[3];
+	port->qsize_add[14] = port->qsize_add[13] + port->qsize[3];
+	port->qsize_add[15] = port->qsize_add[14] + port->qsize[3];
+
+	port->qsize_sum = port->qsize_add[15] + port->qsize[3];
+}
+
+static void
+rte_sched_port_log_pipe_profile(struct rte_sched_port *port, uint32_t i)
+{
+	struct rte_sched_pipe_profile *p = port->pipe_profiles + i;
+
+	RTE_LOG(DEBUG, SCHED, "Low level config for pipe profile %u:\n"
+		"    Token bucket: period = %u, credits per period = %u, size = %u\n"
+		"    Traffic classes: period = %u, credits per period = [%u, %u, %u, %u]\n"
+		"    Traffic class 3 oversubscription: weight = %hhu\n"
+		"    WRR cost: [%hhu, %hhu, %hhu, %hhu], [%hhu, %hhu, %hhu, %hhu], [%hhu, %hhu, %hhu, %hhu], [%hhu, %hhu, %hhu, %hhu]\n",
+		i,
+
+		/* Token bucket */
+		p->tb_period,
+		p->tb_credits_per_period,
+		p->tb_size,
+
+		/* Traffic classes */
+		p->tc_period,
+		p->tc_credits_per_period[0],
+		p->tc_credits_per_period[1],
+		p->tc_credits_per_period[2],
+		p->tc_credits_per_period[3],
+
+		/* Traffic class 3 oversubscription */
+		p->tc_ov_weight,
+
+		/* WRR */
+		p->wrr_cost[ 0], p->wrr_cost[ 1], p->wrr_cost[ 2], p->wrr_cost[ 3],
+		p->wrr_cost[ 4], p->wrr_cost[ 5], p->wrr_cost[ 6], p->wrr_cost[ 7],
+		p->wrr_cost[ 8], p->wrr_cost[ 9], p->wrr_cost[10], p->wrr_cost[11],
+		p->wrr_cost[12], p->wrr_cost[13], p->wrr_cost[14], p->wrr_cost[15]);
+}
+
+static inline uint64_t
+rte_sched_time_ms_to_bytes(uint32_t time_ms, uint32_t rate)
+{
+	uint64_t time = time_ms;
+
+	time = (time * rate) / 1000;
+
+	return time;
+}
+
+static void
+rte_sched_port_config_pipe_profile_table(struct rte_sched_port *port, struct rte_sched_port_params *params)
+{
+	uint32_t i, j;
+
+	for (i = 0; i < port->n_pipe_profiles; i++) {
+		struct rte_sched_pipe_params *src = params->pipe_profiles + i;
+		struct rte_sched_pipe_profile *dst = port->pipe_profiles + i;
+
+		/* Token Bucket */
+		if (src->tb_rate == params->rate) {
+			dst->tb_credits_per_period = 1;
+			dst->tb_period = 1;
+		} else {
+			double tb_rate = (double) src->tb_rate
+				/ (double) params->rate;
+			double d = RTE_SCHED_TB_RATE_CONFIG_ERR;
+
+			rte_approx(tb_rate, d,
+				   &dst->tb_credits_per_period, &dst->tb_period);
+		}
+		dst->tb_size = src->tb_size;
+
+		/* Traffic Classes */
+		dst->tc_period = rte_sched_time_ms_to_bytes(src->tc_period,
+							    params->rate);
+
+		for (j = 0; j < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; j++)
+			dst->tc_credits_per_period[j]
+				= rte_sched_time_ms_to_bytes(src->tc_period,
+							     src->tc_rate[j]);
+
+#ifdef RTE_SCHED_SUBPORT_TC_OV
+		dst->tc_ov_weight = src->tc_ov_weight;
+#endif
+
+		/* WRR */
+		for (j = 0; j < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; j++) {
+			uint32_t wrr_cost[RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS];
+			uint32_t lcd, lcd1, lcd2;
+			uint32_t qindex;
+
+			qindex = j * RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS;
+
+			wrr_cost[0] = src->wrr_weights[qindex];
+			wrr_cost[1] = src->wrr_weights[qindex + 1];
+			wrr_cost[2] = src->wrr_weights[qindex + 2];
+			wrr_cost[3] = src->wrr_weights[qindex + 3];
+
+			lcd1 = rte_get_lcd(wrr_cost[0], wrr_cost[1]);
+			lcd2 = rte_get_lcd(wrr_cost[2], wrr_cost[3]);
+			lcd = rte_get_lcd(lcd1, lcd2);
+
+			wrr_cost[0] = lcd / wrr_cost[0];
+			wrr_cost[1] = lcd / wrr_cost[1];
+			wrr_cost[2] = lcd / wrr_cost[2];
+			wrr_cost[3] = lcd / wrr_cost[3];
+
+			dst->wrr_cost[qindex] = (uint8_t) wrr_cost[0];
+			dst->wrr_cost[qindex + 1] = (uint8_t) wrr_cost[1];
+			dst->wrr_cost[qindex + 2] = (uint8_t) wrr_cost[2];
+			dst->wrr_cost[qindex + 3] = (uint8_t) wrr_cost[3];
+		}
+
+		rte_sched_port_log_pipe_profile(port, i);
+	}
+
+	port->pipe_tc3_rate_max = 0;
+	for (i = 0; i < port->n_pipe_profiles; i++) {
+		struct rte_sched_pipe_params *src = params->pipe_profiles + i;
+		uint32_t pipe_tc3_rate = src->tc_rate[3];
+
+		if (port->pipe_tc3_rate_max < pipe_tc3_rate)
+			port->pipe_tc3_rate_max = pipe_tc3_rate;
+	}
+}
+
+struct rte_sched_port *
+rte_sched_port_config(struct rte_sched_port_params *params)
+{
+	struct rte_sched_port *port = NULL;
+	uint32_t mem_size, bmp_mem_size, n_queues_per_port, i, cycles_per_byte;
+
+	/* Check user parameters. Determine the amount of memory to allocate */
+	mem_size = rte_sched_port_get_memory_footprint(params);
+	if (mem_size == 0)
+		return NULL;
+
+	/* Allocate memory to store the data structures */
+	port = rte_zmalloc("qos_params", mem_size, RTE_CACHE_LINE_SIZE);
+	if (port == NULL)
+		return NULL;
+
+	/* compile time checks */
+	RTE_BUILD_BUG_ON(RTE_SCHED_PORT_N_GRINDERS == 0);
+	RTE_BUILD_BUG_ON(RTE_SCHED_PORT_N_GRINDERS & (RTE_SCHED_PORT_N_GRINDERS - 1));
+
+	/* User parameters */
+	port->n_subports_per_port = params->n_subports_per_port;
+	port->n_pipes_per_subport = params->n_pipes_per_subport;
+	port->rate = params->rate;
+	port->mtu = params->mtu + params->frame_overhead;
+	port->frame_overhead = params->frame_overhead;
+	memcpy(port->qsize, params->qsize, sizeof(params->qsize));
+	port->n_pipe_profiles = params->n_pipe_profiles;
+
+#ifdef RTE_SCHED_RED
+	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
+		uint32_t j;
+
+		for (j = 0; j < e_RTE_METER_COLORS; j++) {
+			/* if min/max are both zero, then RED is disabled */
+			if ((params->red_params[i][j].min_th |
+			     params->red_params[i][j].max_th) == 0) {
+				continue;
+			}
+
+			if (rte_red_config_init(&port->red_config[i][j],
+				params->red_params[i][j].wq_log2,
+				params->red_params[i][j].min_th,
+				params->red_params[i][j].max_th,
+				params->red_params[i][j].maxp_inv) != 0) {
+				return NULL;
+			}
+		}
+	}
+#endif
+
+	/* Timing */
+	port->time_cpu_cycles = rte_get_tsc_cycles();
+	port->time_cpu_bytes = 0;
+	port->time = 0;
+
+	cycles_per_byte = (rte_get_tsc_hz() << RTE_SCHED_TIME_SHIFT)
+		/ params->rate;
+	port->inv_cycles_per_byte = rte_reciprocal_value(cycles_per_byte);
+
+	/* Scheduling loop detection */
+	port->pipe_loop = RTE_SCHED_PIPE_INVALID;
+	port->pipe_exhaustion = 0;
+
+	/* Grinders */
+	port->busy_grinders = 0;
+	port->pkts_out = NULL;
+	port->n_pkts_out = 0;
+
+	/* Queue base calculation */
+	rte_sched_port_config_qsize(port);
+
+	/* Large data structures */
+	port->subport = (struct rte_sched_subport *)
+		(port->memory + rte_sched_port_get_array_base(params,
+							      e_RTE_SCHED_PORT_ARRAY_SUBPORT));
+	port->pipe = (struct rte_sched_pipe *)
+		(port->memory + rte_sched_port_get_array_base(params,
+							      e_RTE_SCHED_PORT_ARRAY_PIPE));
+	port->queue = (struct rte_sched_queue *)
+		(port->memory + rte_sched_port_get_array_base(params,
+							      e_RTE_SCHED_PORT_ARRAY_QUEUE));
+	port->queue_extra = (struct rte_sched_queue_extra *)
+		(port->memory + rte_sched_port_get_array_base(params,
+							      e_RTE_SCHED_PORT_ARRAY_QUEUE_EXTRA));
+	port->pipe_profiles = (struct rte_sched_pipe_profile *)
+		(port->memory + rte_sched_port_get_array_base(params,
+							      e_RTE_SCHED_PORT_ARRAY_PIPE_PROFILES));
+	port->bmp_array =  port->memory
+		+ rte_sched_port_get_array_base(params, e_RTE_SCHED_PORT_ARRAY_BMP_ARRAY);
+	port->queue_array = (struct rte_mbuf **)
+		(port->memory + rte_sched_port_get_array_base(params,
+							      e_RTE_SCHED_PORT_ARRAY_QUEUE_ARRAY));
+
+	/* Pipe profile table */
+	rte_sched_port_config_pipe_profile_table(port, params);
+
+	/* Bitmap */
+	n_queues_per_port = rte_sched_port_queues_per_port(port);
+	bmp_mem_size = rte_bitmap_get_memory_footprint(n_queues_per_port);
+	port->bmp = rte_bitmap_init(n_queues_per_port, port->bmp_array,
+				    bmp_mem_size);
+	if (port->bmp == NULL) {
+		RTE_LOG(ERR, SCHED, "Bitmap init error\n");
+		return NULL;
+	}
+
+	for (i = 0; i < RTE_SCHED_PORT_N_GRINDERS; i++)
+		port->grinder_base_bmp_pos[i] = RTE_SCHED_PIPE_INVALID;
+
+
+	return port;
+}
+
+void
+rte_sched_port_free(struct rte_sched_port *port)
+{
+	unsigned int queue;
+
+	/* Check user parameters */
+	if (port == NULL)
+		return;
+
+	/* Free enqueued mbufs */
+	for (queue = 0; queue < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; queue++) {
+		struct rte_mbuf **mbufs = rte_sched_port_qbase(port, queue);
+		unsigned int i;
+
+		for (i = 0; i < rte_sched_port_qsize(port, queue); i++)
+			rte_pktmbuf_free(mbufs[i]);
+	}
+
+	rte_bitmap_free(port->bmp);
+	rte_free(port);
+}
+
+static void
+rte_sched_port_log_subport_config(struct rte_sched_port *port, uint32_t i)
+{
+	struct rte_sched_subport *s = port->subport + i;
+
+	RTE_LOG(DEBUG, SCHED, "Low level config for subport %u:\n"
+		"    Token bucket: period = %u, credits per period = %u, size = %u\n"
+		"    Traffic classes: period = %u, credits per period = [%u, %u, %u, %u]\n"
+		"    Traffic class 3 oversubscription: wm min = %u, wm max = %u\n",
+		i,
+
+		/* Token bucket */
+		s->tb_period,
+		s->tb_credits_per_period,
+		s->tb_size,
+
+		/* Traffic classes */
+		s->tc_period,
+		s->tc_credits_per_period[0],
+		s->tc_credits_per_period[1],
+		s->tc_credits_per_period[2],
+		s->tc_credits_per_period[3],
+
+		/* Traffic class 3 oversubscription */
+		s->tc_ov_wm_min,
+		s->tc_ov_wm_max);
+}
+
+int
+rte_sched_subport_config(struct rte_sched_port *port,
+	uint32_t subport_id,
+	struct rte_sched_subport_params *params)
+{
+	struct rte_sched_subport *s;
+	uint32_t i;
+
+	/* Check user parameters */
+	if (port == NULL ||
+	    subport_id >= port->n_subports_per_port ||
+	    params == NULL)
+		return -1;
+
+	if (params->tb_rate == 0 || params->tb_rate > port->rate)
+		return -2;
+
+	if (params->tb_size == 0)
+		return -3;
+
+	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
+		if (params->tc_rate[i] == 0 ||
+		    params->tc_rate[i] > params->tb_rate)
+			return -4;
+	}
+
+	if (params->tc_period == 0)
+		return -5;
+
+	s = port->subport + subport_id;
+
+	/* Token Bucket (TB) */
+	if (params->tb_rate == port->rate) {
+		s->tb_credits_per_period = 1;
+		s->tb_period = 1;
+	} else {
+		double tb_rate = ((double) params->tb_rate) / ((double) port->rate);
+		double d = RTE_SCHED_TB_RATE_CONFIG_ERR;
+
+		rte_approx(tb_rate, d, &s->tb_credits_per_period, &s->tb_period);
+	}
+
+	s->tb_size = params->tb_size;
+	s->tb_time = port->time;
+	s->tb_credits = s->tb_size / 2;
+
+	/* Traffic Classes (TCs) */
+	s->tc_period = rte_sched_time_ms_to_bytes(params->tc_period, port->rate);
+	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
+		s->tc_credits_per_period[i]
+			= rte_sched_time_ms_to_bytes(params->tc_period,
+						     params->tc_rate[i]);
+	}
+	s->tc_time = port->time + s->tc_period;
+	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
+		s->tc_credits[i] = s->tc_credits_per_period[i];
+
+#ifdef RTE_SCHED_SUBPORT_TC_OV
+	/* TC oversubscription */
+	s->tc_ov_wm_min = port->mtu;
+	s->tc_ov_wm_max = rte_sched_time_ms_to_bytes(params->tc_period,
+						     port->pipe_tc3_rate_max);
+	s->tc_ov_wm = s->tc_ov_wm_max;
+	s->tc_ov_period_id = 0;
+	s->tc_ov = 0;
+	s->tc_ov_n = 0;
+	s->tc_ov_rate = 0;
+#endif
+
+	rte_sched_port_log_subport_config(port, subport_id);
+
+	return 0;
+}
+
+int
+rte_sched_pipe_config(struct rte_sched_port *port,
+	uint32_t subport_id,
+	uint32_t pipe_id,
+	int32_t pipe_profile)
+{
+	struct rte_sched_subport *s;
+	struct rte_sched_pipe *p;
+	struct rte_sched_pipe_profile *params;
+	uint32_t deactivate, profile, i;
+
+	/* Check user parameters */
+	profile = (uint32_t) pipe_profile;
+	deactivate = (pipe_profile < 0);
+
+	if (port == NULL ||
+	    subport_id >= port->n_subports_per_port ||
+	    pipe_id >= port->n_pipes_per_subport ||
+	    (!deactivate && profile >= port->n_pipe_profiles))
+		return -1;
+
+
+	/* Check that subport configuration is valid */
+	s = port->subport + subport_id;
+	if (s->tb_period == 0)
+		return -2;
+
+	p = port->pipe + (subport_id * port->n_pipes_per_subport + pipe_id);
+
+	/* Handle the case when pipe already has a valid configuration */
+	if (p->tb_time) {
+		params = port->pipe_profiles + p->profile;
+
+#ifdef RTE_SCHED_SUBPORT_TC_OV
+		double subport_tc3_rate = (double) s->tc_credits_per_period[3]
+			/ (double) s->tc_period;
+		double pipe_tc3_rate = (double) params->tc_credits_per_period[3]
+			/ (double) params->tc_period;
+		uint32_t tc3_ov = s->tc_ov;
+
+		/* Unplug pipe from its subport */
+		s->tc_ov_n -= params->tc_ov_weight;
+		s->tc_ov_rate -= pipe_tc3_rate;
+		s->tc_ov = s->tc_ov_rate > subport_tc3_rate;
+
+		if (s->tc_ov != tc3_ov) {
+			RTE_LOG(DEBUG, SCHED,
+				"Subport %u TC3 oversubscription is OFF (%.4lf >= %.4lf)\n",
+				subport_id, subport_tc3_rate, s->tc_ov_rate);
+		}
+#endif
+
+		/* Reset the pipe */
+		memset(p, 0, sizeof(struct rte_sched_pipe));
+	}
+
+	if (deactivate)
+		return 0;
+
+	/* Apply the new pipe configuration */
+	p->profile = profile;
+	params = port->pipe_profiles + p->profile;
+
+	/* Token Bucket (TB) */
+	p->tb_time = port->time;
+	p->tb_credits = params->tb_size / 2;
+
+	/* Traffic Classes (TCs) */
+	p->tc_time = port->time + params->tc_period;
+	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
+		p->tc_credits[i] = params->tc_credits_per_period[i];
+
+#ifdef RTE_SCHED_SUBPORT_TC_OV
+	{
+		/* Subport TC3 oversubscription */
+		double subport_tc3_rate = (double) s->tc_credits_per_period[3]
+			/ (double) s->tc_period;
+		double pipe_tc3_rate = (double) params->tc_credits_per_period[3]
+			/ (double) params->tc_period;
+		uint32_t tc3_ov = s->tc_ov;
+
+		s->tc_ov_n += params->tc_ov_weight;
+		s->tc_ov_rate += pipe_tc3_rate;
+		s->tc_ov = s->tc_ov_rate > subport_tc3_rate;
+
+		if (s->tc_ov != tc3_ov) {
+			RTE_LOG(DEBUG, SCHED,
+				"Subport %u TC3 oversubscription is ON (%.4lf < %.4lf)\n",
+				subport_id, subport_tc3_rate, s->tc_ov_rate);
+		}
+		p->tc_ov_period_id = s->tc_ov_period_id;
+		p->tc_ov_credits = s->tc_ov_wm;
+	}
+#endif
+
+	return 0;
+}
+
+void
+rte_sched_port_pkt_write(struct rte_mbuf *pkt,
+			 uint32_t subport, uint32_t pipe, uint32_t traffic_class,
+			 uint32_t queue, enum rte_meter_color color)
+{
+	struct rte_sched_port_hierarchy *sched
+		= (struct rte_sched_port_hierarchy *) &pkt->hash.sched;
+
+	RTE_BUILD_BUG_ON(sizeof(*sched) > sizeof(pkt->hash.sched));
+
+	sched->color = (uint32_t) color;
+	sched->subport = subport;
+	sched->pipe = pipe;
+	sched->traffic_class = traffic_class;
+	sched->queue = queue;
+}
+
+void
+rte_sched_port_pkt_read_tree_path(const struct rte_mbuf *pkt,
+				  uint32_t *subport, uint32_t *pipe,
+				  uint32_t *traffic_class, uint32_t *queue)
+{
+	const struct rte_sched_port_hierarchy *sched
+		= (const struct rte_sched_port_hierarchy *) &pkt->hash.sched;
+
+	*subport = sched->subport;
+	*pipe = sched->pipe;
+	*traffic_class = sched->traffic_class;
+	*queue = sched->queue;
+}
+
+enum rte_meter_color
+rte_sched_port_pkt_read_color(const struct rte_mbuf *pkt)
+{
+	const struct rte_sched_port_hierarchy *sched
+		= (const struct rte_sched_port_hierarchy *) &pkt->hash.sched;
+
+	return (enum rte_meter_color) sched->color;
+}
+
+int
+rte_sched_subport_read_stats(struct rte_sched_port *port,
+			     uint32_t subport_id,
+			     struct rte_sched_subport_stats *stats,
+			     uint32_t *tc_ov)
+{
+	struct rte_sched_subport *s;
+
+	/* Check user parameters */
+	if (port == NULL || subport_id >= port->n_subports_per_port ||
+	    stats == NULL || tc_ov == NULL)
+		return -1;
+
+	s = port->subport + subport_id;
+
+	/* Copy subport stats and clear */
+	memcpy(stats, &s->stats, sizeof(struct rte_sched_subport_stats));
+	memset(&s->stats, 0, sizeof(struct rte_sched_subport_stats));
+
+	/* Subport TC ovesubscription status */
+	*tc_ov = s->tc_ov;
+
+	return 0;
+}
+
+int
+rte_sched_queue_read_stats(struct rte_sched_port *port,
+	uint32_t queue_id,
+	struct rte_sched_queue_stats *stats,
+	uint16_t *qlen)
+{
+	struct rte_sched_queue *q;
+	struct rte_sched_queue_extra *qe;
+
+	/* Check user parameters */
+	if ((port == NULL) ||
+	    (queue_id >= rte_sched_port_queues_per_port(port)) ||
+		(stats == NULL) ||
+		(qlen == NULL)) {
+		return -1;
+	}
+	q = port->queue + queue_id;
+	qe = port->queue_extra + queue_id;
+
+	/* Copy queue stats and clear */
+	memcpy(stats, &qe->stats, sizeof(struct rte_sched_queue_stats));
+	memset(&qe->stats, 0, sizeof(struct rte_sched_queue_stats));
+
+	/* Queue length */
+	*qlen = q->qw - q->qr;
+
+	return 0;
+}
+
+static inline uint32_t
+rte_sched_port_qindex(struct rte_sched_port *port, uint32_t subport, uint32_t pipe, uint32_t traffic_class, uint32_t queue)
+{
+	uint32_t result;
+
+	result = subport * port->n_pipes_per_subport + pipe;
+	result = result * RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE + traffic_class;
+	result = result * RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS + queue;
+
+	return result;
+}
+
+#ifdef RTE_SCHED_DEBUG
+
+static inline int
+rte_sched_port_queue_is_empty(struct rte_sched_port *port, uint32_t qindex)
+{
+	struct rte_sched_queue *queue = port->queue + qindex;
+
+	return queue->qr == queue->qw;
+}
+
+#endif /* RTE_SCHED_DEBUG */
+
+#ifdef RTE_SCHED_COLLECT_STATS
+
+static inline void
+rte_sched_port_update_subport_stats(struct rte_sched_port *port, uint32_t qindex, struct rte_mbuf *pkt)
+{
+	struct rte_sched_subport *s = port->subport + (qindex / rte_sched_port_queues_per_subport(port));
+	uint32_t tc_index = (qindex >> 2) & 0x3;
+	uint32_t pkt_len = pkt->pkt_len;
+
+	s->stats.n_pkts_tc[tc_index] += 1;
+	s->stats.n_bytes_tc[tc_index] += pkt_len;
+}
+
+static inline void
+rte_sched_port_update_subport_stats_on_drop(struct rte_sched_port *port,
+					    uint32_t qindex,
+					    struct rte_mbuf *pkt, uint32_t red)
+{
+	struct rte_sched_subport *s = port->subport + (qindex / rte_sched_port_queues_per_subport(port));
+	uint32_t tc_index = (qindex >> 2) & 0x3;
+	uint32_t pkt_len = pkt->pkt_len;
+
+	s->stats.n_pkts_tc_dropped[tc_index] += 1;
+	s->stats.n_bytes_tc_dropped[tc_index] += pkt_len;
+#ifdef RTE_SCHED_RED
+	s->stats.n_pkts_red_dropped[tc_index] += red;
+#endif
+}
+
+static inline void
+rte_sched_port_update_queue_stats(struct rte_sched_port *port, uint32_t qindex, struct rte_mbuf *pkt)
+{
+	struct rte_sched_queue_extra *qe = port->queue_extra + qindex;
+	uint32_t pkt_len = pkt->pkt_len;
+
+	qe->stats.n_pkts += 1;
+	qe->stats.n_bytes += pkt_len;
+}
+
+static inline void
+rte_sched_port_update_queue_stats_on_drop(struct rte_sched_port *port,
+					  uint32_t qindex,
+					  struct rte_mbuf *pkt, uint32_t red)
+{
+	struct rte_sched_queue_extra *qe = port->queue_extra + qindex;
+	uint32_t pkt_len = pkt->pkt_len;
+
+	qe->stats.n_pkts_dropped += 1;
+	qe->stats.n_bytes_dropped += pkt_len;
+#ifdef RTE_SCHED_RED
+	qe->stats.n_pkts_red_dropped += red;
+#endif
+}
+
+#endif /* RTE_SCHED_COLLECT_STATS */
+
+#ifdef RTE_SCHED_RED
+
+static inline int
+rte_sched_port_red_drop(struct rte_sched_port *port, struct rte_mbuf *pkt, uint32_t qindex, uint16_t qlen)
+{
+	struct rte_sched_queue_extra *qe;
+	struct rte_red_config *red_cfg;
+	struct rte_red *red;
+	uint32_t tc_index;
+	enum rte_meter_color color;
+
+	tc_index = (qindex >> 2) & 0x3;
+	color = rte_sched_port_pkt_read_color(pkt);
+	red_cfg = &port->red_config[tc_index][color];
+
+	if ((red_cfg->min_th | red_cfg->max_th) == 0)
+		return 0;
+
+	qe = port->queue_extra + qindex;
+	red = &qe->red;
+
+	return rte_red_enqueue(red_cfg, red, qlen, port->time);
+}
+
+static inline void
+rte_sched_port_set_queue_empty_timestamp(struct rte_sched_port *port, uint32_t qindex)
+{
+	struct rte_sched_queue_extra *qe = port->queue_extra + qindex;
+	struct rte_red *red = &qe->red;
+
+	rte_red_mark_queue_empty(red, port->time);
+}
+
+#else
+
+#define rte_sched_port_red_drop(port, pkt, qindex, qlen)             0
+
+#define rte_sched_port_set_queue_empty_timestamp(port, qindex)
+
+#endif /* RTE_SCHED_RED */
+
+#ifdef RTE_SCHED_DEBUG
+
+static inline void
+debug_check_queue_slab(struct rte_sched_port *port, uint32_t bmp_pos,
+		       uint64_t bmp_slab)
+{
+	uint64_t mask;
+	uint32_t i, panic;
+
+	if (bmp_slab == 0)
+		rte_panic("Empty slab at position %u\n", bmp_pos);
+
+	panic = 0;
+	for (i = 0, mask = 1; i < 64; i++, mask <<= 1) {
+		if (mask & bmp_slab) {
+			if (rte_sched_port_queue_is_empty(port, bmp_pos + i)) {
+				printf("Queue %u (slab offset %u) is empty\n", bmp_pos + i, i);
+				panic = 1;
+			}
+		}
+	}
+
+	if (panic)
+		rte_panic("Empty queues in slab 0x%" PRIx64 "starting at position %u\n",
+			bmp_slab, bmp_pos);
+}
+
+#endif /* RTE_SCHED_DEBUG */
+
+static inline uint32_t
+rte_sched_port_enqueue_qptrs_prefetch0(struct rte_sched_port *port,
+				       struct rte_mbuf *pkt)
+{
+	struct rte_sched_queue *q;
+#ifdef RTE_SCHED_COLLECT_STATS
+	struct rte_sched_queue_extra *qe;
+#endif
+	uint32_t subport, pipe, traffic_class, queue, qindex;
+
+	rte_sched_port_pkt_read_tree_path(pkt, &subport, &pipe, &traffic_class, &queue);
+
+	qindex = rte_sched_port_qindex(port, subport, pipe, traffic_class, queue);
+	q = port->queue + qindex;
+	rte_prefetch0(q);
+#ifdef RTE_SCHED_COLLECT_STATS
+	qe = port->queue_extra + qindex;
+	rte_prefetch0(qe);
+#endif
+
+	return qindex;
+}
+
+static inline void
+rte_sched_port_enqueue_qwa_prefetch0(struct rte_sched_port *port,
+				     uint32_t qindex, struct rte_mbuf **qbase)
+{
+	struct rte_sched_queue *q;
+	struct rte_mbuf **q_qw;
+	uint16_t qsize;
+
+	q = port->queue + qindex;
+	qsize = rte_sched_port_qsize(port, qindex);
+	q_qw = qbase + (q->qw & (qsize - 1));
+
+	rte_prefetch0(q_qw);
+	rte_bitmap_prefetch0(port->bmp, qindex);
+}
+
+static inline int
+rte_sched_port_enqueue_qwa(struct rte_sched_port *port, uint32_t qindex,
+			   struct rte_mbuf **qbase, struct rte_mbuf *pkt)
+{
+	struct rte_sched_queue *q;
+	uint16_t qsize;
+	uint16_t qlen;
+
+	q = port->queue + qindex;
+	qsize = rte_sched_port_qsize(port, qindex);
+	qlen = q->qw - q->qr;
+
+	/* Drop the packet (and update drop stats) when queue is full */
+	if (unlikely(rte_sched_port_red_drop(port, pkt, qindex, qlen) ||
+		     (qlen >= qsize))) {
+		rte_pktmbuf_free(pkt);
+#ifdef RTE_SCHED_COLLECT_STATS
+		rte_sched_port_update_subport_stats_on_drop(port, qindex, pkt,
+							    qlen < qsize);
+		rte_sched_port_update_queue_stats_on_drop(port, qindex, pkt,
+							  qlen < qsize);
+#endif
+		return 0;
+	}
+
+	/* Enqueue packet */
+	qbase[q->qw & (qsize - 1)] = pkt;
+	q->qw++;
+
+	/* Activate queue in the port bitmap */
+	rte_bitmap_set(port->bmp, qindex);
+
+	/* Statistics */
+#ifdef RTE_SCHED_COLLECT_STATS
+	rte_sched_port_update_subport_stats(port, qindex, pkt);
+	rte_sched_port_update_queue_stats(port, qindex, pkt);
+#endif
+
+	return 1;
+}
+
+
+/*
+ * The enqueue function implements a 4-level pipeline with each stage
+ * processing two different packets. The purpose of using a pipeline
+ * is to hide the latency of prefetching the data structures. The
+ * naming convention is presented in the diagram below:
+ *
+ *   p00  _______   p10  _______   p20  _______   p30  _______
+ * ----->|       |----->|       |----->|       |----->|       |----->
+ *       |   0   |      |   1   |      |   2   |      |   3   |
+ * ----->|_______|----->|_______|----->|_______|----->|_______|----->
+ *   p01            p11            p21            p31
+ *
+ */
+int
+rte_sched_port_enqueue(struct rte_sched_port *port, struct rte_mbuf **pkts,
+		       uint32_t n_pkts)
+{
+	struct rte_mbuf *pkt00, *pkt01, *pkt10, *pkt11, *pkt20, *pkt21,
+		*pkt30, *pkt31, *pkt_last;
+	struct rte_mbuf **q00_base, **q01_base, **q10_base, **q11_base,
+		**q20_base, **q21_base, **q30_base, **q31_base, **q_last_base;
+	uint32_t q00, q01, q10, q11, q20, q21, q30, q31, q_last;
+	uint32_t r00, r01, r10, r11, r20, r21, r30, r31, r_last;
+	uint32_t result, i;
+
+	result = 0;
+
+	/*
+	 * Less then 6 input packets available, which is not enough to
+	 * feed the pipeline
+	 */
+	if (unlikely(n_pkts < 6)) {
+		struct rte_mbuf **q_base[5];
+		uint32_t q[5];
+
+		/* Prefetch the mbuf structure of each packet */
+		for (i = 0; i < n_pkts; i++)
+			rte_prefetch0(pkts[i]);
+
+		/* Prefetch the queue structure for each queue */
+		for (i = 0; i < n_pkts; i++)
+			q[i] = rte_sched_port_enqueue_qptrs_prefetch0(port,
+								      pkts[i]);
+
+		/* Prefetch the write pointer location of each queue */
+		for (i = 0; i < n_pkts; i++) {
+			q_base[i] = rte_sched_port_qbase(port, q[i]);
+			rte_sched_port_enqueue_qwa_prefetch0(port, q[i],
+							     q_base[i]);
+		}
+
+		/* Write each packet to its queue */
+		for (i = 0; i < n_pkts; i++)
+			result += rte_sched_port_enqueue_qwa(port, q[i],
+							     q_base[i], pkts[i]);
+
+		return result;
+	}
+
+	/* Feed the first 3 stages of the pipeline (6 packets needed) */
+	pkt20 = pkts[0];
+	pkt21 = pkts[1];
+	rte_prefetch0(pkt20);
+	rte_prefetch0(pkt21);
+
+	pkt10 = pkts[2];
+	pkt11 = pkts[3];
+	rte_prefetch0(pkt10);
+	rte_prefetch0(pkt11);
+
+	q20 = rte_sched_port_enqueue_qptrs_prefetch0(port, pkt20);
+	q21 = rte_sched_port_enqueue_qptrs_prefetch0(port, pkt21);
+
+	pkt00 = pkts[4];
+	pkt01 = pkts[5];
+	rte_prefetch0(pkt00);
+	rte_prefetch0(pkt01);
+
+	q10 = rte_sched_port_enqueue_qptrs_prefetch0(port, pkt10);
+	q11 = rte_sched_port_enqueue_qptrs_prefetch0(port, pkt11);
+
+	q20_base = rte_sched_port_qbase(port, q20);
+	q21_base = rte_sched_port_qbase(port, q21);
+	rte_sched_port_enqueue_qwa_prefetch0(port, q20, q20_base);
+	rte_sched_port_enqueue_qwa_prefetch0(port, q21, q21_base);
+
+	/* Run the pipeline */
+	for (i = 6; i < (n_pkts & (~1)); i += 2) {
+		/* Propagate stage inputs */
+		pkt30 = pkt20;
+		pkt31 = pkt21;
+		pkt20 = pkt10;
+		pkt21 = pkt11;
+		pkt10 = pkt00;
+		pkt11 = pkt01;
+		q30 = q20;
+		q31 = q21;
+		q20 = q10;
+		q21 = q11;
+		q30_base = q20_base;
+		q31_base = q21_base;
+
+		/* Stage 0: Get packets in */
+		pkt00 = pkts[i];
+		pkt01 = pkts[i + 1];
+		rte_prefetch0(pkt00);
+		rte_prefetch0(pkt01);
+
+		/* Stage 1: Prefetch queue structure storing queue pointers */
+		q10 = rte_sched_port_enqueue_qptrs_prefetch0(port, pkt10);
+		q11 = rte_sched_port_enqueue_qptrs_prefetch0(port, pkt11);
+
+		/* Stage 2: Prefetch queue write location */
+		q20_base = rte_sched_port_qbase(port, q20);
+		q21_base = rte_sched_port_qbase(port, q21);
+		rte_sched_port_enqueue_qwa_prefetch0(port, q20, q20_base);
+		rte_sched_port_enqueue_qwa_prefetch0(port, q21, q21_base);
+
+		/* Stage 3: Write packet to queue and activate queue */
+		r30 = rte_sched_port_enqueue_qwa(port, q30, q30_base, pkt30);
+		r31 = rte_sched_port_enqueue_qwa(port, q31, q31_base, pkt31);
+		result += r30 + r31;
+	}
+
+	/*
+	 * Drain the pipeline (exactly 6 packets).
+	 * Handle the last packet in the case
+	 * of an odd number of input packets.
+	 */
+	pkt_last = pkts[n_pkts - 1];
+	rte_prefetch0(pkt_last);
+
+	q00 = rte_sched_port_enqueue_qptrs_prefetch0(port, pkt00);
+	q01 = rte_sched_port_enqueue_qptrs_prefetch0(port, pkt01);
+
+	q10_base = rte_sched_port_qbase(port, q10);
+	q11_base = rte_sched_port_qbase(port, q11);
+	rte_sched_port_enqueue_qwa_prefetch0(port, q10, q10_base);
+	rte_sched_port_enqueue_qwa_prefetch0(port, q11, q11_base);
+
+	r20 = rte_sched_port_enqueue_qwa(port, q20, q20_base, pkt20);
+	r21 = rte_sched_port_enqueue_qwa(port, q21, q21_base, pkt21);
+	result += r20 + r21;
+
+	q_last = rte_sched_port_enqueue_qptrs_prefetch0(port, pkt_last);
+
+	q00_base = rte_sched_port_qbase(port, q00);
+	q01_base = rte_sched_port_qbase(port, q01);
+	rte_sched_port_enqueue_qwa_prefetch0(port, q00, q00_base);
+	rte_sched_port_enqueue_qwa_prefetch0(port, q01, q01_base);
+
+	r10 = rte_sched_port_enqueue_qwa(port, q10, q10_base, pkt10);
+	r11 = rte_sched_port_enqueue_qwa(port, q11, q11_base, pkt11);
+	result += r10 + r11;
+
+	q_last_base = rte_sched_port_qbase(port, q_last);
+	rte_sched_port_enqueue_qwa_prefetch0(port, q_last, q_last_base);
+
+	r00 = rte_sched_port_enqueue_qwa(port, q00, q00_base, pkt00);
+	r01 = rte_sched_port_enqueue_qwa(port, q01, q01_base, pkt01);
+	result += r00 + r01;
+
+	if (n_pkts & 1) {
+		r_last = rte_sched_port_enqueue_qwa(port, q_last, q_last_base, pkt_last);
+		result += r_last;
+	}
+
+	return result;
+}
+
+#ifndef RTE_SCHED_SUBPORT_TC_OV
+
+static inline void
+grinder_credits_update(struct rte_sched_port *port, uint32_t pos)
+{
+	struct rte_sched_grinder *grinder = port->grinder + pos;
+	struct rte_sched_subport *subport = grinder->subport;
+	struct rte_sched_pipe *pipe = grinder->pipe;
+	struct rte_sched_pipe_profile *params = grinder->pipe_params;
+	uint64_t n_periods;
+
+	/* Subport TB */
+	n_periods = (port->time - subport->tb_time) / subport->tb_period;
+	subport->tb_credits += n_periods * subport->tb_credits_per_period;
+	subport->tb_credits = rte_sched_min_val_2_u32(subport->tb_credits, subport->tb_size);
+	subport->tb_time += n_periods * subport->tb_period;
+
+	/* Pipe TB */
+	n_periods = (port->time - pipe->tb_time) / params->tb_period;
+	pipe->tb_credits += n_periods * params->tb_credits_per_period;
+	pipe->tb_credits = rte_sched_min_val_2_u32(pipe->tb_credits, params->tb_size);
+	pipe->tb_time += n_periods * params->tb_period;
+
+	/* Subport TCs */
+	if (unlikely(port->time >= subport->tc_time)) {
+		subport->tc_credits[0] = subport->tc_credits_per_period[0];
+		subport->tc_credits[1] = subport->tc_credits_per_period[1];
+		subport->tc_credits[2] = subport->tc_credits_per_period[2];
+		subport->tc_credits[3] = subport->tc_credits_per_period[3];
+		subport->tc_time = port->time + subport->tc_period;
+	}
+
+	/* Pipe TCs */
+	if (unlikely(port->time >= pipe->tc_time)) {
+		pipe->tc_credits[0] = params->tc_credits_per_period[0];
+		pipe->tc_credits[1] = params->tc_credits_per_period[1];
+		pipe->tc_credits[2] = params->tc_credits_per_period[2];
+		pipe->tc_credits[3] = params->tc_credits_per_period[3];
+		pipe->tc_time = port->time + params->tc_period;
+	}
+}
+
+#else
+
+static inline uint32_t
+grinder_tc_ov_credits_update(struct rte_sched_port *port, uint32_t pos)
+{
+	struct rte_sched_grinder *grinder = port->grinder + pos;
+	struct rte_sched_subport *subport = grinder->subport;
+	uint32_t tc_ov_consumption[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+	uint32_t tc_ov_consumption_max;
+	uint32_t tc_ov_wm = subport->tc_ov_wm;
+
+	if (subport->tc_ov == 0)
+		return subport->tc_ov_wm_max;
+
+	tc_ov_consumption[0] = subport->tc_credits_per_period[0] - subport->tc_credits[0];
+	tc_ov_consumption[1] = subport->tc_credits_per_period[1] - subport->tc_credits[1];
+	tc_ov_consumption[2] = subport->tc_credits_per_period[2] - subport->tc_credits[2];
+	tc_ov_consumption[3] = subport->tc_credits_per_period[3] - subport->tc_credits[3];
+
+	tc_ov_consumption_max = subport->tc_credits_per_period[3] -
+		(tc_ov_consumption[0] + tc_ov_consumption[1] + tc_ov_consumption[2]);
+
+	if (tc_ov_consumption[3] > (tc_ov_consumption_max - port->mtu)) {
+		tc_ov_wm  -= tc_ov_wm >> 7;
+		if (tc_ov_wm < subport->tc_ov_wm_min)
+			tc_ov_wm = subport->tc_ov_wm_min;
+
+		return tc_ov_wm;
+	}
+
+	tc_ov_wm += (tc_ov_wm >> 7) + 1;
+	if (tc_ov_wm > subport->tc_ov_wm_max)
+		tc_ov_wm = subport->tc_ov_wm_max;
+
+	return tc_ov_wm;
+}
+
+static inline void
+grinder_credits_update(struct rte_sched_port *port, uint32_t pos)
+{
+	struct rte_sched_grinder *grinder = port->grinder + pos;
+	struct rte_sched_subport *subport = grinder->subport;
+	struct rte_sched_pipe *pipe = grinder->pipe;
+	struct rte_sched_pipe_profile *params = grinder->pipe_params;
+	uint64_t n_periods;
+
+	/* Subport TB */
+	n_periods = (port->time - subport->tb_time) / subport->tb_period;
+	subport->tb_credits += n_periods * subport->tb_credits_per_period;
+	subport->tb_credits = rte_sched_min_val_2_u32(subport->tb_credits, subport->tb_size);
+	subport->tb_time += n_periods * subport->tb_period;
+
+	/* Pipe TB */
+	n_periods = (port->time - pipe->tb_time) / params->tb_period;
+	pipe->tb_credits += n_periods * params->tb_credits_per_period;
+	pipe->tb_credits = rte_sched_min_val_2_u32(pipe->tb_credits, params->tb_size);
+	pipe->tb_time += n_periods * params->tb_period;
+
+	/* Subport TCs */
+	if (unlikely(port->time >= subport->tc_time)) {
+		subport->tc_ov_wm = grinder_tc_ov_credits_update(port, pos);
+
+		subport->tc_credits[0] = subport->tc_credits_per_period[0];
+		subport->tc_credits[1] = subport->tc_credits_per_period[1];
+		subport->tc_credits[2] = subport->tc_credits_per_period[2];
+		subport->tc_credits[3] = subport->tc_credits_per_period[3];
+
+		subport->tc_time = port->time + subport->tc_period;
+		subport->tc_ov_period_id++;
+	}
+
+	/* Pipe TCs */
+	if (unlikely(port->time >= pipe->tc_time)) {
+		pipe->tc_credits[0] = params->tc_credits_per_period[0];
+		pipe->tc_credits[1] = params->tc_credits_per_period[1];
+		pipe->tc_credits[2] = params->tc_credits_per_period[2];
+		pipe->tc_credits[3] = params->tc_credits_per_period[3];
+		pipe->tc_time = port->time + params->tc_period;
+	}
+
+	/* Pipe TCs - Oversubscription */
+	if (unlikely(pipe->tc_ov_period_id != subport->tc_ov_period_id)) {
+		pipe->tc_ov_credits = subport->tc_ov_wm * params->tc_ov_weight;
+
+		pipe->tc_ov_period_id = subport->tc_ov_period_id;
+	}
+}
+
+#endif /* RTE_SCHED_TS_CREDITS_UPDATE, RTE_SCHED_SUBPORT_TC_OV */
+
+
+#ifndef RTE_SCHED_SUBPORT_TC_OV
+
+static inline int
+grinder_credits_check(struct rte_sched_port *port, uint32_t pos)
+{
+	struct rte_sched_grinder *grinder = port->grinder + pos;
+	struct rte_sched_subport *subport = grinder->subport;
+	struct rte_sched_pipe *pipe = grinder->pipe;
+	struct rte_mbuf *pkt = grinder->pkt;
+	uint32_t tc_index = grinder->tc_index;
+	uint32_t pkt_len = pkt->pkt_len + port->frame_overhead;
+	uint32_t subport_tb_credits = subport->tb_credits;
+	uint32_t subport_tc_credits = subport->tc_credits[tc_index];
+	uint32_t pipe_tb_credits = pipe->tb_credits;
+	uint32_t pipe_tc_credits = pipe->tc_credits[tc_index];
+	int enough_credits;
+
+	/* Check queue credits */
+	enough_credits = (pkt_len <= subport_tb_credits) &&
+		(pkt_len <= subport_tc_credits) &&
+		(pkt_len <= pipe_tb_credits) &&
+		(pkt_len <= pipe_tc_credits);
+
+	if (!enough_credits)
+		return 0;
+
+	/* Update port credits */
+	subport->tb_credits -= pkt_len;
+	subport->tc_credits[tc_index] -= pkt_len;
+	pipe->tb_credits -= pkt_len;
+	pipe->tc_credits[tc_index] -= pkt_len;
+
+	return 1;
+}
+
+#else
+
+static inline int
+grinder_credits_check(struct rte_sched_port *port, uint32_t pos)
+{
+	struct rte_sched_grinder *grinder = port->grinder + pos;
+	struct rte_sched_subport *subport = grinder->subport;
+	struct rte_sched_pipe *pipe = grinder->pipe;
+	struct rte_mbuf *pkt = grinder->pkt;
+	uint32_t tc_index = grinder->tc_index;
+	uint32_t pkt_len = pkt->pkt_len + port->frame_overhead;
+	uint32_t subport_tb_credits = subport->tb_credits;
+	uint32_t subport_tc_credits = subport->tc_credits[tc_index];
+	uint32_t pipe_tb_credits = pipe->tb_credits;
+	uint32_t pipe_tc_credits = pipe->tc_credits[tc_index];
+	uint32_t pipe_tc_ov_mask1[] = {UINT32_MAX, UINT32_MAX, UINT32_MAX, pipe->tc_ov_credits};
+	uint32_t pipe_tc_ov_mask2[] = {0, 0, 0, UINT32_MAX};
+	uint32_t pipe_tc_ov_credits = pipe_tc_ov_mask1[tc_index];
+	int enough_credits;
+
+	/* Check pipe and subport credits */
+	enough_credits = (pkt_len <= subport_tb_credits) &&
+		(pkt_len <= subport_tc_credits) &&
+		(pkt_len <= pipe_tb_credits) &&
+		(pkt_len <= pipe_tc_credits) &&
+		(pkt_len <= pipe_tc_ov_credits);
+
+	if (!enough_credits)
+		return 0;
+
+	/* Update pipe and subport credits */
+	subport->tb_credits -= pkt_len;
+	subport->tc_credits[tc_index] -= pkt_len;
+	pipe->tb_credits -= pkt_len;
+	pipe->tc_credits[tc_index] -= pkt_len;
+	pipe->tc_ov_credits -= pipe_tc_ov_mask2[tc_index] & pkt_len;
+
+	return 1;
+}
+
+#endif /* RTE_SCHED_SUBPORT_TC_OV */
+
+
+static inline int
+grinder_schedule(struct rte_sched_port *port, uint32_t pos)
+{
+	struct rte_sched_grinder *grinder = port->grinder + pos;
+	struct rte_sched_queue *queue = grinder->queue[grinder->qpos];
+	struct rte_mbuf *pkt = grinder->pkt;
+	uint32_t pkt_len = pkt->pkt_len + port->frame_overhead;
+
+	if (!grinder_credits_check(port, pos))
+		return 0;
+
+	/* Advance port time */
+	port->time += pkt_len;
+
+	/* Send packet */
+	port->pkts_out[port->n_pkts_out++] = pkt;
+	queue->qr++;
+	grinder->wrr_tokens[grinder->qpos] += pkt_len * grinder->wrr_cost[grinder->qpos];
+	if (queue->qr == queue->qw) {
+		uint32_t qindex = grinder->qindex[grinder->qpos];
+
+		rte_bitmap_clear(port->bmp, qindex);
+		grinder->qmask &= ~(1 << grinder->qpos);
+		grinder->wrr_mask[grinder->qpos] = 0;
+		rte_sched_port_set_queue_empty_timestamp(port, qindex);
+	}
+
+	/* Reset pipe loop detection */
+	port->pipe_loop = RTE_SCHED_PIPE_INVALID;
+	grinder->productive = 1;
+
+	return 1;
+}
+
+#ifdef SCHED_VECTOR_SSE4
+
+static inline int
+grinder_pipe_exists(struct rte_sched_port *port, uint32_t base_pipe)
+{
+	__m128i index = _mm_set1_epi32(base_pipe);
+	__m128i pipes = _mm_load_si128((__m128i *)port->grinder_base_bmp_pos);
+	__m128i res = _mm_cmpeq_epi32(pipes, index);
+
+	pipes = _mm_load_si128((__m128i *)(port->grinder_base_bmp_pos + 4));
+	pipes = _mm_cmpeq_epi32(pipes, index);
+	res = _mm_or_si128(res, pipes);
+
+	if (_mm_testz_si128(res, res))
+		return 0;
+
+	return 1;
+}
+
+#else
+
+static inline int
+grinder_pipe_exists(struct rte_sched_port *port, uint32_t base_pipe)
+{
+	uint32_t i;
+
+	for (i = 0; i < RTE_SCHED_PORT_N_GRINDERS; i++) {
+		if (port->grinder_base_bmp_pos[i] == base_pipe)
+			return 1;
+	}
+
+	return 0;
+}
+
+#endif /* RTE_SCHED_OPTIMIZATIONS */
+
+static inline void
+grinder_pcache_populate(struct rte_sched_port *port, uint32_t pos, uint32_t bmp_pos, uint64_t bmp_slab)
+{
+	struct rte_sched_grinder *grinder = port->grinder + pos;
+	uint16_t w[4];
+
+	grinder->pcache_w = 0;
+	grinder->pcache_r = 0;
+
+	w[0] = (uint16_t) bmp_slab;
+	w[1] = (uint16_t) (bmp_slab >> 16);
+	w[2] = (uint16_t) (bmp_slab >> 32);
+	w[3] = (uint16_t) (bmp_slab >> 48);
+
+	grinder->pcache_qmask[grinder->pcache_w] = w[0];
+	grinder->pcache_qindex[grinder->pcache_w] = bmp_pos;
+	grinder->pcache_w += (w[0] != 0);
+
+	grinder->pcache_qmask[grinder->pcache_w] = w[1];
+	grinder->pcache_qindex[grinder->pcache_w] = bmp_pos + 16;
+	grinder->pcache_w += (w[1] != 0);
+
+	grinder->pcache_qmask[grinder->pcache_w] = w[2];
+	grinder->pcache_qindex[grinder->pcache_w] = bmp_pos + 32;
+	grinder->pcache_w += (w[2] != 0);
+
+	grinder->pcache_qmask[grinder->pcache_w] = w[3];
+	grinder->pcache_qindex[grinder->pcache_w] = bmp_pos + 48;
+	grinder->pcache_w += (w[3] != 0);
+}
+
+static inline void
+grinder_tccache_populate(struct rte_sched_port *port, uint32_t pos, uint32_t qindex, uint16_t qmask)
+{
+	struct rte_sched_grinder *grinder = port->grinder + pos;
+	uint8_t b[4];
+
+	grinder->tccache_w = 0;
+	grinder->tccache_r = 0;
+
+	b[0] = (uint8_t) (qmask & 0xF);
+	b[1] = (uint8_t) ((qmask >> 4) & 0xF);
+	b[2] = (uint8_t) ((qmask >> 8) & 0xF);
+	b[3] = (uint8_t) ((qmask >> 12) & 0xF);
+
+	grinder->tccache_qmask[grinder->tccache_w] = b[0];
+	grinder->tccache_qindex[grinder->tccache_w] = qindex;
+	grinder->tccache_w += (b[0] != 0);
+
+	grinder->tccache_qmask[grinder->tccache_w] = b[1];
+	grinder->tccache_qindex[grinder->tccache_w] = qindex + 4;
+	grinder->tccache_w += (b[1] != 0);
+
+	grinder->tccache_qmask[grinder->tccache_w] = b[2];
+	grinder->tccache_qindex[grinder->tccache_w] = qindex + 8;
+	grinder->tccache_w += (b[2] != 0);
+
+	grinder->tccache_qmask[grinder->tccache_w] = b[3];
+	grinder->tccache_qindex[grinder->tccache_w] = qindex + 12;
+	grinder->tccache_w += (b[3] != 0);
+}
+
+static inline int
+grinder_next_tc(struct rte_sched_port *port, uint32_t pos)
+{
+	struct rte_sched_grinder *grinder = port->grinder + pos;
+	struct rte_mbuf **qbase;
+	uint32_t qindex;
+	uint16_t qsize;
+
+	if (grinder->tccache_r == grinder->tccache_w)
+		return 0;
+
+	qindex = grinder->tccache_qindex[grinder->tccache_r];
+	qbase = rte_sched_port_qbase(port, qindex);
+	qsize = rte_sched_port_qsize(port, qindex);
+
+	grinder->tc_index = (qindex >> 2) & 0x3;
+	grinder->qmask = grinder->tccache_qmask[grinder->tccache_r];
+	grinder->qsize = qsize;
+
+	grinder->qindex[0] = qindex;
+	grinder->qindex[1] = qindex + 1;
+	grinder->qindex[2] = qindex + 2;
+	grinder->qindex[3] = qindex + 3;
+
+	grinder->queue[0] = port->queue + qindex;
+	grinder->queue[1] = port->queue + qindex + 1;
+	grinder->queue[2] = port->queue + qindex + 2;
+	grinder->queue[3] = port->queue + qindex + 3;
+
+	grinder->qbase[0] = qbase;
+	grinder->qbase[1] = qbase + qsize;
+	grinder->qbase[2] = qbase + 2 * qsize;
+	grinder->qbase[3] = qbase + 3 * qsize;
+
+	grinder->tccache_r++;
+	return 1;
+}
+
+static inline int
+grinder_next_pipe(struct rte_sched_port *port, uint32_t pos)
+{
+	struct rte_sched_grinder *grinder = port->grinder + pos;
+	uint32_t pipe_qindex;
+	uint16_t pipe_qmask;
+
+	if (grinder->pcache_r < grinder->pcache_w) {
+		pipe_qmask = grinder->pcache_qmask[grinder->pcache_r];
+		pipe_qindex = grinder->pcache_qindex[grinder->pcache_r];
+		grinder->pcache_r++;
+	} else {
+		uint64_t bmp_slab = 0;
+		uint32_t bmp_pos = 0;
+
+		/* Get another non-empty pipe group */
+		if (unlikely(rte_bitmap_scan(port->bmp, &bmp_pos, &bmp_slab) <= 0))
+			return 0;
+
+#ifdef RTE_SCHED_DEBUG
+		debug_check_queue_slab(port, bmp_pos, bmp_slab);
+#endif
+
+		/* Return if pipe group already in one of the other grinders */
+		port->grinder_base_bmp_pos[pos] = RTE_SCHED_BMP_POS_INVALID;
+		if (unlikely(grinder_pipe_exists(port, bmp_pos)))
+			return 0;
+
+		port->grinder_base_bmp_pos[pos] = bmp_pos;
+
+		/* Install new pipe group into grinder's pipe cache */
+		grinder_pcache_populate(port, pos, bmp_pos, bmp_slab);
+
+		pipe_qmask = grinder->pcache_qmask[0];
+		pipe_qindex = grinder->pcache_qindex[0];
+		grinder->pcache_r = 1;
+	}
+
+	/* Install new pipe in the grinder */
+	grinder->pindex = pipe_qindex >> 4;
+	grinder->subport = port->subport + (grinder->pindex / port->n_pipes_per_subport);
+	grinder->pipe = port->pipe + grinder->pindex;
+	grinder->pipe_params = NULL; /* to be set after the pipe structure is prefetched */
+	grinder->productive = 0;
+
+	grinder_tccache_populate(port, pos, pipe_qindex, pipe_qmask);
+	grinder_next_tc(port, pos);
+
+	/* Check for pipe exhaustion */
+	if (grinder->pindex == port->pipe_loop) {
+		port->pipe_exhaustion = 1;
+		port->pipe_loop = RTE_SCHED_PIPE_INVALID;
+	}
+
+	return 1;
+}
+
+
+static inline void
+grinder_wrr_load(struct rte_sched_port *port, uint32_t pos)
+{
+	struct rte_sched_grinder *grinder = port->grinder + pos;
+	struct rte_sched_pipe *pipe = grinder->pipe;
+	struct rte_sched_pipe_profile *pipe_params = grinder->pipe_params;
+	uint32_t tc_index = grinder->tc_index;
+	uint32_t qmask = grinder->qmask;
+	uint32_t qindex;
+
+	qindex = tc_index * 4;
+
+	grinder->wrr_tokens[0] = ((uint16_t) pipe->wrr_tokens[qindex]) << RTE_SCHED_WRR_SHIFT;
+	grinder->wrr_tokens[1] = ((uint16_t) pipe->wrr_tokens[qindex + 1]) << RTE_SCHED_WRR_SHIFT;
+	grinder->wrr_tokens[2] = ((uint16_t) pipe->wrr_tokens[qindex + 2]) << RTE_SCHED_WRR_SHIFT;
+	grinder->wrr_tokens[3] = ((uint16_t) pipe->wrr_tokens[qindex + 3]) << RTE_SCHED_WRR_SHIFT;
+
+	grinder->wrr_mask[0] = (qmask & 0x1) * 0xFFFF;
+	grinder->wrr_mask[1] = ((qmask >> 1) & 0x1) * 0xFFFF;
+	grinder->wrr_mask[2] = ((qmask >> 2) & 0x1) * 0xFFFF;
+	grinder->wrr_mask[3] = ((qmask >> 3) & 0x1) * 0xFFFF;
+
+	grinder->wrr_cost[0] = pipe_params->wrr_cost[qindex];
+	grinder->wrr_cost[1] = pipe_params->wrr_cost[qindex + 1];
+	grinder->wrr_cost[2] = pipe_params->wrr_cost[qindex + 2];
+	grinder->wrr_cost[3] = pipe_params->wrr_cost[qindex + 3];
+}
+
+static inline void
+grinder_wrr_store(struct rte_sched_port *port, uint32_t pos)
+{
+	struct rte_sched_grinder *grinder = port->grinder + pos;
+	struct rte_sched_pipe *pipe = grinder->pipe;
+	uint32_t tc_index = grinder->tc_index;
+	uint32_t qindex;
+
+	qindex = tc_index * 4;
+
+	pipe->wrr_tokens[qindex] = (grinder->wrr_tokens[0] & grinder->wrr_mask[0])
+		>> RTE_SCHED_WRR_SHIFT;
+	pipe->wrr_tokens[qindex + 1] = (grinder->wrr_tokens[1] & grinder->wrr_mask[1])
+		>> RTE_SCHED_WRR_SHIFT;
+	pipe->wrr_tokens[qindex + 2] = (grinder->wrr_tokens[2] & grinder->wrr_mask[2])
+		>> RTE_SCHED_WRR_SHIFT;
+	pipe->wrr_tokens[qindex + 3] = (grinder->wrr_tokens[3] & grinder->wrr_mask[3])
+		>> RTE_SCHED_WRR_SHIFT;
+}
+
+static inline void
+grinder_wrr(struct rte_sched_port *port, uint32_t pos)
+{
+	struct rte_sched_grinder *grinder = port->grinder + pos;
+	uint16_t wrr_tokens_min;
+
+	grinder->wrr_tokens[0] |= ~grinder->wrr_mask[0];
+	grinder->wrr_tokens[1] |= ~grinder->wrr_mask[1];
+	grinder->wrr_tokens[2] |= ~grinder->wrr_mask[2];
+	grinder->wrr_tokens[3] |= ~grinder->wrr_mask[3];
+
+	grinder->qpos = rte_min_pos_4_u16(grinder->wrr_tokens);
+	wrr_tokens_min = grinder->wrr_tokens[grinder->qpos];
+
+	grinder->wrr_tokens[0] -= wrr_tokens_min;
+	grinder->wrr_tokens[1] -= wrr_tokens_min;
+	grinder->wrr_tokens[2] -= wrr_tokens_min;
+	grinder->wrr_tokens[3] -= wrr_tokens_min;
+}
+
+
+#define grinder_evict(port, pos)
+
+static inline void
+grinder_prefetch_pipe(struct rte_sched_port *port, uint32_t pos)
+{
+	struct rte_sched_grinder *grinder = port->grinder + pos;
+
+	rte_prefetch0(grinder->pipe);
+	rte_prefetch0(grinder->queue[0]);
+}
+
+static inline void
+grinder_prefetch_tc_queue_arrays(struct rte_sched_port *port, uint32_t pos)
+{
+	struct rte_sched_grinder *grinder = port->grinder + pos;
+	uint16_t qsize, qr[4];
+
+	qsize = grinder->qsize;
+	qr[0] = grinder->queue[0]->qr & (qsize - 1);
+	qr[1] = grinder->queue[1]->qr & (qsize - 1);
+	qr[2] = grinder->queue[2]->qr & (qsize - 1);
+	qr[3] = grinder->queue[3]->qr & (qsize - 1);
+
+	rte_prefetch0(grinder->qbase[0] + qr[0]);
+	rte_prefetch0(grinder->qbase[1] + qr[1]);
+
+	grinder_wrr_load(port, pos);
+	grinder_wrr(port, pos);
+
+	rte_prefetch0(grinder->qbase[2] + qr[2]);
+	rte_prefetch0(grinder->qbase[3] + qr[3]);
+}
+
+static inline void
+grinder_prefetch_mbuf(struct rte_sched_port *port, uint32_t pos)
+{
+	struct rte_sched_grinder *grinder = port->grinder + pos;
+	uint32_t qpos = grinder->qpos;
+	struct rte_mbuf **qbase = grinder->qbase[qpos];
+	uint16_t qsize = grinder->qsize;
+	uint16_t qr = grinder->queue[qpos]->qr & (qsize - 1);
+
+	grinder->pkt = qbase[qr];
+	rte_prefetch0(grinder->pkt);
+
+	if (unlikely((qr & 0x7) == 7)) {
+		uint16_t qr_next = (grinder->queue[qpos]->qr + 1) & (qsize - 1);
+
+		rte_prefetch0(qbase + qr_next);
+	}
+}
+
+static inline uint32_t
+grinder_handle(struct rte_sched_port *port, uint32_t pos)
+{
+	struct rte_sched_grinder *grinder = port->grinder + pos;
+
+	switch (grinder->state) {
+	case e_GRINDER_PREFETCH_PIPE:
+	{
+		if (grinder_next_pipe(port, pos)) {
+			grinder_prefetch_pipe(port, pos);
+			port->busy_grinders++;
+
+			grinder->state = e_GRINDER_PREFETCH_TC_QUEUE_ARRAYS;
+			return 0;
+		}
+
+		return 0;
+	}
+
+	case e_GRINDER_PREFETCH_TC_QUEUE_ARRAYS:
+	{
+		struct rte_sched_pipe *pipe = grinder->pipe;
+
+		grinder->pipe_params = port->pipe_profiles + pipe->profile;
+		grinder_prefetch_tc_queue_arrays(port, pos);
+		grinder_credits_update(port, pos);
+
+		grinder->state = e_GRINDER_PREFETCH_MBUF;
+		return 0;
+	}
+
+	case e_GRINDER_PREFETCH_MBUF:
+	{
+		grinder_prefetch_mbuf(port, pos);
+
+		grinder->state = e_GRINDER_READ_MBUF;
+		return 0;
+	}
+
+	case e_GRINDER_READ_MBUF:
+	{
+		uint32_t result = 0;
+
+		result = grinder_schedule(port, pos);
+
+		/* Look for next packet within the same TC */
+		if (result && grinder->qmask) {
+			grinder_wrr(port, pos);
+			grinder_prefetch_mbuf(port, pos);
+
+			return 1;
+		}
+		grinder_wrr_store(port, pos);
+
+		/* Look for another active TC within same pipe */
+		if (grinder_next_tc(port, pos)) {
+			grinder_prefetch_tc_queue_arrays(port, pos);
+
+			grinder->state = e_GRINDER_PREFETCH_MBUF;
+			return result;
+		}
+
+		if (grinder->productive == 0 &&
+		    port->pipe_loop == RTE_SCHED_PIPE_INVALID)
+			port->pipe_loop = grinder->pindex;
+
+		grinder_evict(port, pos);
+
+		/* Look for another active pipe */
+		if (grinder_next_pipe(port, pos)) {
+			grinder_prefetch_pipe(port, pos);
+
+			grinder->state = e_GRINDER_PREFETCH_TC_QUEUE_ARRAYS;
+			return result;
+		}
+
+		/* No active pipe found */
+		port->busy_grinders--;
+
+		grinder->state = e_GRINDER_PREFETCH_PIPE;
+		return result;
+	}
+
+	default:
+		rte_panic("Algorithmic error (invalid state)\n");
+		return 0;
+	}
+}
+
+static inline void
+rte_sched_port_time_resync(struct rte_sched_port *port)
+{
+	uint64_t cycles = rte_get_tsc_cycles();
+	uint64_t cycles_diff = cycles - port->time_cpu_cycles;
+	uint64_t bytes_diff;
+
+	/* Compute elapsed time in bytes */
+	bytes_diff = rte_reciprocal_divide(cycles_diff << RTE_SCHED_TIME_SHIFT,
+					   port->inv_cycles_per_byte);
+
+	/* Advance port time */
+	port->time_cpu_cycles = cycles;
+	port->time_cpu_bytes += bytes_diff;
+	if (port->time < port->time_cpu_bytes)
+		port->time = port->time_cpu_bytes;
+
+	/* Reset pipe loop detection */
+	port->pipe_loop = RTE_SCHED_PIPE_INVALID;
+}
+
+static inline int
+rte_sched_port_exceptions(struct rte_sched_port *port, int second_pass)
+{
+	int exceptions;
+
+	/* Check if any exception flag is set */
+	exceptions = (second_pass && port->busy_grinders == 0) ||
+		(port->pipe_exhaustion == 1);
+
+	/* Clear exception flags */
+	port->pipe_exhaustion = 0;
+
+	return exceptions;
+}
+
+int
+rte_sched_port_dequeue(struct rte_sched_port *port, struct rte_mbuf **pkts, uint32_t n_pkts)
+{
+	uint32_t i, count;
+
+	port->pkts_out = pkts;
+	port->n_pkts_out = 0;
+
+	rte_sched_port_time_resync(port);
+
+	/* Take each queue in the grinder one step further */
+	for (i = 0, count = 0; ; i++)  {
+		count += grinder_handle(port, i & (RTE_SCHED_PORT_N_GRINDERS - 1));
+		if ((count == n_pkts) ||
+		    rte_sched_port_exceptions(port, i >= RTE_SCHED_PORT_N_GRINDERS)) {
+			break;
+		}
+	}
+
+	return count;
+}
diff --git a/lib/librte_sched/rte_sched.h b/lib/librte_sched/rte_sched.h
new file mode 100644
index 00000000..e9c28172
--- /dev/null
+++ b/lib/librte_sched/rte_sched.h
@@ -0,0 +1,455 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __INCLUDE_RTE_SCHED_H__
+#define __INCLUDE_RTE_SCHED_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @file
+ * RTE Hierarchical Scheduler
+ *
+ * The hierarchical scheduler prioritizes the transmission of packets
+ * from different users and traffic classes according to the Service
+ * Level Agreements (SLAs) defined for the current network node.
+ *
+ * The scheduler supports thousands of packet queues grouped under a
+ * 5-level hierarchy:
+ *     1. Port:
+ *           - Typical usage: output Ethernet port;
+ *           - Multiple ports are scheduled in round robin order with
+ *	    equal priority;
+ *     2. Subport:
+ *           - Typical usage: group of users;
+ *           - Traffic shaping using the token bucket algorithm
+ *	    (one bucket per subport);
+ *           - Upper limit enforced per traffic class at subport level;
+ *           - Lower priority traffic classes able to reuse subport
+ *	    bandwidth currently unused by higher priority traffic
+ *	    classes of the same subport;
+ *           - When any subport traffic class is oversubscribed
+ *	    (configuration time event), the usage of subport member
+ *	    pipes with high demand for thattraffic class pipes is
+ *	    truncated to a dynamically adjusted value with no
+ *             impact to low demand pipes;
+ *     3. Pipe:
+ *           - Typical usage: individual user/subscriber;
+ *           - Traffic shaping using the token bucket algorithm
+ *	    (one bucket per pipe);
+ *     4. Traffic class:
+ *           - Traffic classes of the same pipe handled in strict
+ *	    priority order;
+ *           - Upper limit enforced per traffic class at the pipe level;
+ *           - Lower priority traffic classes able to reuse pipe
+ *	    bandwidth currently unused by higher priority traffic
+ *	    classes of the same pipe;
+ *     5. Queue:
+ *           - Typical usage: queue hosting packets from one or
+ *	    multiple connections of same traffic class belonging to
+ *	    the same user;
+ *           - Weighted Round Robin (WRR) is used to service the
+ *	    queues within same pipe traffic class.
+ *
+ */
+
+#include <sys/types.h>
+#include <rte_mbuf.h>
+#include <rte_meter.h>
+
+/** Random Early Detection (RED) */
+#ifdef RTE_SCHED_RED
+#include "rte_red.h"
+#endif
+
+/** Number of traffic classes per pipe (as well as subport).
+ * Cannot be changed.
+ */
+#define RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE    4
+
+/** Number of queues per pipe traffic class. Cannot be changed. */
+#define RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS    4
+
+/** Number of queues per pipe. */
+#define RTE_SCHED_QUEUES_PER_PIPE             \
+	(RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE *     \
+	RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS)
+
+/** Maximum number of pipe profiles that can be defined per port.
+ * Compile-time configurable.
+ */
+#ifndef RTE_SCHED_PIPE_PROFILES_PER_PORT
+#define RTE_SCHED_PIPE_PROFILES_PER_PORT      256
+#endif
+
+/*
+ * Ethernet framing overhead. Overhead fields per Ethernet frame:
+ * 1. Preamble:                             7 bytes;
+ * 2. Start of Frame Delimiter (SFD):       1 byte;
+ * 3. Frame Check Sequence (FCS):           4 bytes;
+ * 4. Inter Frame Gap (IFG):               12 bytes.
+ *
+ * The FCS is considered overhead only if not included in the packet
+ * length (field pkt_len of struct rte_mbuf).
+ */
+#ifndef RTE_SCHED_FRAME_OVERHEAD_DEFAULT
+#define RTE_SCHED_FRAME_OVERHEAD_DEFAULT      24
+#endif
+
+/*
+ * Subport configuration parameters. The period and credits_per_period
+ * parameters are measured in bytes, with one byte meaning the time
+ * duration associated with the transmission of one byte on the
+ * physical medium of the output port, with pipe or pipe traffic class
+ * rate (measured as percentage of output port rate) determined as
+ * credits_per_period divided by period. One credit represents one
+ * byte.
+ */
+struct rte_sched_subport_params {
+	/* Subport token bucket */
+	uint32_t tb_rate;                /**< Rate (measured in bytes per second) */
+	uint32_t tb_size;                /**< Size (measured in credits) */
+
+	/* Subport traffic classes */
+	uint32_t tc_rate[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+	/**< Traffic class rates (measured in bytes per second) */
+	uint32_t tc_period;
+	/**< Enforcement period for rates (measured in milliseconds) */
+};
+
+/** Subport statistics */
+struct rte_sched_subport_stats {
+	/* Packets */
+	uint32_t n_pkts_tc[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+	/**< Number of packets successfully written */
+	uint32_t n_pkts_tc_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+	/**< Number of packets dropped */
+
+	/* Bytes */
+	uint32_t n_bytes_tc[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+	/**< Number of bytes successfully written for each traffic class */
+	uint32_t n_bytes_tc_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+	/**< Number of bytes dropped for each traffic class */
+
+#ifdef RTE_SCHED_RED
+	uint32_t n_pkts_red_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+	/**< Number of packets dropped by red */
+#endif
+};
+
+/*
+ * Pipe configuration parameters. The period and credits_per_period
+ * parameters are measured in bytes, with one byte meaning the time
+ * duration associated with the transmission of one byte on the
+ * physical medium of the output port, with pipe or pipe traffic class
+ * rate (measured as percentage of output port rate) determined as
+ * credits_per_period divided by period. One credit represents one
+ * byte.
+ */
+struct rte_sched_pipe_params {
+	/* Pipe token bucket */
+	uint32_t tb_rate;                /**< Rate (measured in bytes per second) */
+	uint32_t tb_size;                /**< Size (measured in credits) */
+
+	/* Pipe traffic classes */
+	uint32_t tc_rate[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+	/**< Traffic class rates (measured in bytes per second) */
+	uint32_t tc_period;
+	/**< Enforcement period (measured in milliseconds) */
+#ifdef RTE_SCHED_SUBPORT_TC_OV
+	uint8_t tc_ov_weight;		 /**< Weight Traffic class 3 oversubscription */
+#endif
+
+	/* Pipe queues */
+	uint8_t  wrr_weights[RTE_SCHED_QUEUES_PER_PIPE]; /**< WRR weights */
+};
+
+/** Queue statistics */
+struct rte_sched_queue_stats {
+	/* Packets */
+	uint32_t n_pkts;                 /**< Packets successfully written */
+	uint32_t n_pkts_dropped;         /**< Packets dropped */
+#ifdef RTE_SCHED_RED
+	uint32_t n_pkts_red_dropped;	 /**< Packets dropped by RED */
+#endif
+
+	/* Bytes */
+	uint32_t n_bytes;                /**< Bytes successfully written */
+	uint32_t n_bytes_dropped;        /**< Bytes dropped */
+};
+
+/** Port configuration parameters. */
+struct rte_sched_port_params {
+	const char *name;                /**< String to be associated */
+	int socket;                      /**< CPU socket ID */
+	uint32_t rate;                   /**< Output port rate
+					  * (measured in bytes per second) */
+	uint32_t mtu;                    /**< Maximum Ethernet frame size
+					  * (measured in bytes).
+					  * Should not include the framing overhead. */
+	uint32_t frame_overhead;         /**< Framing overhead per packet
+					  * (measured in bytes) */
+	uint32_t n_subports_per_port;    /**< Number of subports */
+	uint32_t n_pipes_per_subport;    /**< Number of pipes per subport */
+	uint16_t qsize[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+	/**< Packet queue size for each traffic class.
+	 * All queues within the same pipe traffic class have the same
+	 * size. Queues from different pipes serving the same traffic
+	 * class have the same size. */
+	struct rte_sched_pipe_params *pipe_profiles;
+	/**< Pipe profile table.
+	 * Every pipe is configured using one of the profiles from this table. */
+	uint32_t n_pipe_profiles;        /**< Profiles in the pipe profile table */
+#ifdef RTE_SCHED_RED
+	struct rte_red_params red_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][e_RTE_METER_COLORS]; /**< RED parameters */
+#endif
+};
+
+/*
+ * Configuration
+ *
+ ***/
+
+/**
+ * Hierarchical scheduler port configuration
+ *
+ * @param params
+ *   Port scheduler configuration parameter structure
+ * @return
+ *   Handle to port scheduler instance upon success or NULL otherwise.
+ */
+struct rte_sched_port *
+rte_sched_port_config(struct rte_sched_port_params *params);
+
+/**
+ * Hierarchical scheduler port free
+ *
+ * @param port
+ *   Handle to port scheduler instance
+ */
+void
+rte_sched_port_free(struct rte_sched_port *port);
+
+/**
+ * Hierarchical scheduler subport configuration
+ *
+ * @param port
+ *   Handle to port scheduler instance
+ * @param subport_id
+ *   Subport ID
+ * @param params
+ *   Subport configuration parameters
+ * @return
+ *   0 upon success, error code otherwise
+ */
+int
+rte_sched_subport_config(struct rte_sched_port *port,
+	uint32_t subport_id,
+	struct rte_sched_subport_params *params);
+
+/**
+ * Hierarchical scheduler pipe configuration
+ *
+ * @param port
+ *   Handle to port scheduler instance
+ * @param subport_id
+ *   Subport ID
+ * @param pipe_id
+ *   Pipe ID within subport
+ * @param pipe_profile
+ *   ID of port-level pre-configured pipe profile
+ * @return
+ *   0 upon success, error code otherwise
+ */
+int
+rte_sched_pipe_config(struct rte_sched_port *port,
+	uint32_t subport_id,
+	uint32_t pipe_id,
+	int32_t pipe_profile);
+
+/**
+ * Hierarchical scheduler memory footprint size per port
+ *
+ * @param params
+ *   Port scheduler configuration parameter structure
+ * @return
+ *   Memory footprint size in bytes upon success, 0 otherwise
+ */
+uint32_t
+rte_sched_port_get_memory_footprint(struct rte_sched_port_params *params);
+
+/*
+ * Statistics
+ *
+ ***/
+
+/**
+ * Hierarchical scheduler subport statistics read
+ *
+ * @param port
+ *   Handle to port scheduler instance
+ * @param subport_id
+ *   Subport ID
+ * @param stats
+ *   Pointer to pre-allocated subport statistics structure where the statistics
+ *   counters should be stored
+ * @param tc_ov
+ *   Pointer to pre-allocated 4-entry array where the oversubscription status for
+ *   each of the 4 subport traffic classes should be stored.
+ * @return
+ *   0 upon success, error code otherwise
+ */
+int
+rte_sched_subport_read_stats(struct rte_sched_port *port,
+	uint32_t subport_id,
+	struct rte_sched_subport_stats *stats,
+	uint32_t *tc_ov);
+
+/**
+ * Hierarchical scheduler queue statistics read
+ *
+ * @param port
+ *   Handle to port scheduler instance
+ * @param queue_id
+ *   Queue ID within port scheduler
+ * @param stats
+ *   Pointer to pre-allocated subport statistics structure where the statistics
+ *   counters should be stored
+ * @param qlen
+ *   Pointer to pre-allocated variable where the current queue length
+ *   should be stored.
+ * @return
+ *   0 upon success, error code otherwise
+ */
+int
+rte_sched_queue_read_stats(struct rte_sched_port *port,
+	uint32_t queue_id,
+	struct rte_sched_queue_stats *stats,
+	uint16_t *qlen);
+
+/**
+ * Scheduler hierarchy path write to packet descriptor. Typically
+ * called by the packet classification stage.
+ *
+ * @param pkt
+ *   Packet descriptor handle
+ * @param subport
+ *   Subport ID
+ * @param pipe
+ *   Pipe ID within subport
+ * @param traffic_class
+ *   Traffic class ID within pipe (0 .. 3)
+ * @param queue
+ *   Queue ID within pipe traffic class (0 .. 3)
+ * @param color
+ *   Packet color set
+ */
+void
+rte_sched_port_pkt_write(struct rte_mbuf *pkt,
+			 uint32_t subport, uint32_t pipe, uint32_t traffic_class,
+			 uint32_t queue, enum rte_meter_color color);
+
+/**
+ * Scheduler hierarchy path read from packet descriptor (struct
+ * rte_mbuf). Typically called as part of the hierarchical scheduler
+ * enqueue operation. The subport, pipe, traffic class and queue
+ * parameters need to be pre-allocated by the caller.
+ *
+ * @param pkt
+ *   Packet descriptor handle
+ * @param subport
+ *   Subport ID
+ * @param pipe
+ *   Pipe ID within subport
+ * @param traffic_class
+ *   Traffic class ID within pipe (0 .. 3)
+ * @param queue
+ *   Queue ID within pipe traffic class (0 .. 3)
+ *
+ */
+void
+rte_sched_port_pkt_read_tree_path(const struct rte_mbuf *pkt,
+				  uint32_t *subport, uint32_t *pipe,
+				  uint32_t *traffic_class, uint32_t *queue);
+
+enum rte_meter_color
+rte_sched_port_pkt_read_color(const struct rte_mbuf *pkt);
+
+/**
+ * Hierarchical scheduler port enqueue. Writes up to n_pkts to port
+ * scheduler and returns the number of packets actually written. For
+ * each packet, the port scheduler queue to write the packet to is
+ * identified by reading the hierarchy path from the packet
+ * descriptor; if the queue is full or congested and the packet is not
+ * written to the queue, then the packet is automatically dropped
+ * without any action required from the caller.
+ *
+ * @param port
+ *   Handle to port scheduler instance
+ * @param pkts
+ *   Array storing the packet descriptor handles
+ * @param n_pkts
+ *   Number of packets to enqueue from the pkts array into the port scheduler
+ * @return
+ *   Number of packets successfully enqueued
+ */
+int
+rte_sched_port_enqueue(struct rte_sched_port *port, struct rte_mbuf **pkts, uint32_t n_pkts);
+
+/**
+ * Hierarchical scheduler port dequeue. Reads up to n_pkts from the
+ * port scheduler and stores them in the pkts array and returns the
+ * number of packets actually read.  The pkts array needs to be
+ * pre-allocated by the caller with at least n_pkts entries.
+ *
+ * @param port
+ *   Handle to port scheduler instance
+ * @param pkts
+ *   Pre-allocated packet descriptor array where the packets dequeued
+ *   from the port
+ *   scheduler should be stored
+ * @param n_pkts
+ *   Number of packets to dequeue from the port scheduler
+ * @return
+ *   Number of packets successfully dequeued and placed in the pkts array
+ */
+int
+rte_sched_port_dequeue(struct rte_sched_port *port, struct rte_mbuf **pkts, uint32_t n_pkts);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __INCLUDE_RTE_SCHED_H__ */
diff --git a/lib/librte_sched/rte_sched_common.h b/lib/librte_sched/rte_sched_common.h
new file mode 100644
index 00000000..8920adec
--- /dev/null
+++ b/lib/librte_sched/rte_sched_common.h
@@ -0,0 +1,129 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __INCLUDE_RTE_SCHED_COMMON_H__
+#define __INCLUDE_RTE_SCHED_COMMON_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <sys/types.h>
+
+#define __rte_aligned_16 __attribute__((__aligned__(16)))
+
+static inline uint32_t
+rte_sched_min_val_2_u32(uint32_t x, uint32_t y)
+{
+	return (x < y)? x : y;
+}
+
+#if 0
+static inline uint32_t
+rte_min_pos_4_u16(uint16_t *x)
+{
+	uint32_t pos0, pos1;
+
+	pos0 = (x[0] <= x[1])? 0 : 1;
+	pos1 = (x[2] <= x[3])? 2 : 3;
+
+	return (x[pos0] <= x[pos1])? pos0 : pos1;
+}
+
+#else
+
+/* simplified version to remove branches with CMOV instruction */
+static inline uint32_t
+rte_min_pos_4_u16(uint16_t *x)
+{
+	uint32_t pos0 = 0;
+	uint32_t pos1 = 2;
+
+	if (x[1] <= x[0]) pos0 = 1;
+	if (x[3] <= x[2]) pos1 = 3;
+	if (x[pos1] <= x[pos0]) pos0 = pos1;
+
+	return pos0;
+}
+
+#endif
+
+/*
+ * Compute the Greatest Common Divisor (GCD) of two numbers.
+ * This implementation uses Euclid's algorithm:
+ *    gcd(a, 0) = a
+ *    gcd(a, b) = gcd(b, a mod b)
+ *
+ */
+static inline uint32_t
+rte_get_gcd(uint32_t a, uint32_t b)
+{
+	uint32_t c;
+
+	if (a == 0)
+		return b;
+	if (b == 0)
+		return a;
+
+	if (a < b) {
+		c = a;
+		a = b;
+		b = c;
+	}
+
+	while (b != 0) {
+		c = a % b;
+		a = b;
+		b = c;
+	}
+
+	return a;
+}
+
+/*
+ * Compute the Lowest Common Denominator (LCD) of two numbers.
+ * This implementation computes GCD first:
+ *    LCD(a, b) = (a * b) / GCD(a, b)
+ *
+ */
+static inline uint32_t
+rte_get_lcd(uint32_t a, uint32_t b)
+{
+	return (a * b) / rte_get_gcd(a, b);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __INCLUDE_RTE_SCHED_COMMON_H__ */
diff --git a/lib/librte_sched/rte_sched_version.map b/lib/librte_sched/rte_sched_version.map
new file mode 100644
index 00000000..3aa159ab
--- /dev/null
+++ b/lib/librte_sched/rte_sched_version.map
@@ -0,0 +1,31 @@
+DPDK_2.0 {
+	global:
+
+	rte_approx;
+	rte_red_config_init;
+	rte_red_log2_1_minus_Wq;
+	rte_red_pow2_frac_inv;
+	rte_red_rand_seed;
+	rte_red_rand_val;
+	rte_red_rt_data_init;
+	rte_sched_pipe_config;
+	rte_sched_port_config;
+	rte_sched_port_dequeue;
+	rte_sched_port_enqueue;
+	rte_sched_port_free;
+	rte_sched_port_get_memory_footprint;
+	rte_sched_queue_read_stats;
+	rte_sched_subport_config;
+	rte_sched_subport_read_stats;
+
+	local: *;
+};
+
+DPDK_2.1 {
+	global:
+
+	rte_sched_port_pkt_write;
+	rte_sched_port_pkt_read_tree_path;
+	rte_sched_port_pkt_read_color;
+
+} DPDK_2.0;
diff --git a/lib/librte_table/Makefile b/lib/librte_table/Makefile
new file mode 100644
index 00000000..7f02af3c
--- /dev/null
+++ b/lib/librte_table/Makefile
@@ -0,0 +1,85 @@
+#   BSD LICENSE
+#
+#   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+#   All rights reserved.
+#
+#   Redistribution and use in source and binary forms, with or without
+#   modification, are permitted provided that the following conditions
+#   are met:
+#
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in
+#       the documentation and/or other materials provided with the
+#       distribution.
+#     * Neither the name of Intel Corporation nor the names of its
+#       contributors may be used to endorse or promote products derived
+#       from this software without specific prior written permission.
+#
+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+#
+# library name
+#
+LIB = librte_table.a
+
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS)
+
+EXPORT_MAP := rte_table_version.map
+
+LIBABIVER := 2
+
+#
+# all source are stored in SRCS-y
+#
+SRCS-$(CONFIG_RTE_LIBRTE_TABLE) += rte_table_lpm.c
+SRCS-$(CONFIG_RTE_LIBRTE_TABLE) += rte_table_lpm_ipv6.c
+ifeq ($(CONFIG_RTE_LIBRTE_ACL),y)
+SRCS-$(CONFIG_RTE_LIBRTE_TABLE) += rte_table_acl.c
+endif
+SRCS-$(CONFIG_RTE_LIBRTE_TABLE) += rte_table_hash_key8.c
+SRCS-$(CONFIG_RTE_LIBRTE_TABLE) += rte_table_hash_key16.c
+SRCS-$(CONFIG_RTE_LIBRTE_TABLE) += rte_table_hash_key32.c
+SRCS-$(CONFIG_RTE_LIBRTE_TABLE) += rte_table_hash_ext.c
+SRCS-$(CONFIG_RTE_LIBRTE_TABLE) += rte_table_hash_lru.c
+SRCS-$(CONFIG_RTE_LIBRTE_TABLE) += rte_table_array.c
+SRCS-$(CONFIG_RTE_LIBRTE_TABLE) += rte_table_stub.c
+
+# install includes
+SYMLINK-$(CONFIG_RTE_LIBRTE_TABLE)-include += rte_table.h
+SYMLINK-$(CONFIG_RTE_LIBRTE_TABLE)-include += rte_table_lpm.h
+SYMLINK-$(CONFIG_RTE_LIBRTE_TABLE)-include += rte_table_lpm_ipv6.h
+ifeq ($(CONFIG_RTE_LIBRTE_ACL),y)
+SYMLINK-$(CONFIG_RTE_LIBRTE_TABLE)-include += rte_table_acl.h
+endif
+SYMLINK-$(CONFIG_RTE_LIBRTE_TABLE)-include += rte_table_hash.h
+SYMLINK-$(CONFIG_RTE_LIBRTE_TABLE)-include += rte_lru.h
+SYMLINK-$(CONFIG_RTE_LIBRTE_TABLE)-include += rte_table_array.h
+SYMLINK-$(CONFIG_RTE_LIBRTE_TABLE)-include += rte_table_stub.h
+
+# this lib depends upon:
+DEPDIRS-$(CONFIG_RTE_LIBRTE_TABLE) := lib/librte_eal
+DEPDIRS-$(CONFIG_RTE_LIBRTE_TABLE) += lib/librte_mbuf
+DEPDIRS-$(CONFIG_RTE_LIBRTE_TABLE) += lib/librte_mempool
+DEPDIRS-$(CONFIG_RTE_LIBRTE_TABLE) += lib/librte_port
+DEPDIRS-$(CONFIG_RTE_LIBRTE_TABLE) += lib/librte_lpm
+ifeq ($(CONFIG_RTE_LIBRTE_ACL),y)
+DEPDIRS-$(CONFIG_RTE_LIBRTE_TABLE) += lib/librte_acl
+endif
+DEPDIRS-$(CONFIG_RTE_LIBRTE_TABLE) += lib/librte_hash
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_table/rte_lru.h b/lib/librte_table/rte_lru.h
new file mode 100644
index 00000000..e87e062d
--- /dev/null
+++ b/lib/librte_table/rte_lru.h
@@ -0,0 +1,213 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __INCLUDE_RTE_LRU_H__
+#define __INCLUDE_RTE_LRU_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+
+#ifdef __INTEL_COMPILER
+#define GCC_VERSION (0)
+#else
+#define GCC_VERSION (__GNUC__ * 10000+__GNUC_MINOR__*100 + __GNUC_PATCHLEVEL__)
+#endif
+
+#ifndef RTE_TABLE_HASH_LRU_STRATEGY
+#ifdef __SSE4_2__
+#define RTE_TABLE_HASH_LRU_STRATEGY                        2
+#else /* if no SSE, use simple scalar version */
+#define RTE_TABLE_HASH_LRU_STRATEGY                        1
+#endif
+#endif
+
+#ifndef RTE_ARCH_X86_64
+#undef RTE_TABLE_HASH_LRU_STRATEGY
+#define RTE_TABLE_HASH_LRU_STRATEGY                        1
+#endif
+
+#if (RTE_TABLE_HASH_LRU_STRATEGY < 0) || (RTE_TABLE_HASH_LRU_STRATEGY > 3)
+#error Invalid value for RTE_TABLE_HASH_LRU_STRATEGY
+#endif
+
+#if RTE_TABLE_HASH_LRU_STRATEGY == 0
+
+#define lru_init(bucket)						\
+do									\
+	bucket = bucket;						\
+while (0)
+
+#define lru_pos(bucket) (bucket->lru_list & 0xFFFFLLU)
+
+#define lru_update(bucket, mru_val)					\
+do {									\
+	bucket = bucket;						\
+	mru_val = mru_val;						\
+} while (0)
+
+#elif RTE_TABLE_HASH_LRU_STRATEGY == 1
+
+#define lru_init(bucket)						\
+do									\
+	bucket->lru_list = 0x0000000100020003LLU;			\
+while (0)
+
+#define lru_pos(bucket) (bucket->lru_list & 0xFFFFLLU)
+
+#define lru_update(bucket, mru_val)					\
+do {									\
+	uint64_t x, pos, x0, x1, x2, mask;				\
+									\
+	x = bucket->lru_list;						\
+									\
+	pos = 4;							\
+	if ((x >> 48) == ((uint64_t) mru_val))				\
+		pos = 3;						\
+									\
+	if (((x >> 32) & 0xFFFFLLU) == ((uint64_t) mru_val))		\
+		pos = 2;						\
+									\
+	if (((x >> 16) & 0xFFFFLLU) == ((uint64_t) mru_val))		\
+		pos = 1;						\
+									\
+	if ((x & 0xFFFFLLU) == ((uint64_t) mru_val))			\
+		pos = 0;						\
+									\
+									\
+	pos <<= 4;							\
+	mask = (~0LLU) << pos;						\
+	x0 = x & (~mask);						\
+	x1 = (x >> 16) & mask;						\
+	x2 = (x << (48 - pos)) & (0xFFFFLLU << 48);			\
+	x = x0 | x1 | x2;						\
+									\
+	if (pos != 64)							\
+		bucket->lru_list = x;					\
+} while (0)
+
+#elif RTE_TABLE_HASH_LRU_STRATEGY == 2
+
+#if GCC_VERSION > 40306
+#include <x86intrin.h>
+#else
+#include <emmintrin.h>
+#include <smmintrin.h>
+#include <xmmintrin.h>
+#endif
+
+#define lru_init(bucket)						\
+do									\
+	bucket->lru_list = 0x0000000100020003LLU;			\
+while (0)
+
+#define lru_pos(bucket) (bucket->lru_list & 0xFFFFLLU)
+
+#define lru_update(bucket, mru_val)					\
+do {									\
+	/* set up the masks for all possible shuffles, depends on pos */\
+	static uint64_t masks[10] = {					\
+		/* Shuffle order; Make Zero (see _mm_shuffle_epi8 manual) */\
+		0x0100070605040302, 0x8080808080808080,			\
+		0x0302070605040100, 0x8080808080808080,			\
+		0x0504070603020100, 0x8080808080808080,			\
+		0x0706050403020100, 0x8080808080808080,			\
+		0x0706050403020100, 0x8080808080808080};		\
+	/* load up one register with repeats of mru-val  */		\
+	uint64_t mru2 = mru_val;					\
+	uint64_t mru3 = mru2 | (mru2 << 16);				\
+	uint64_t lru = bucket->lru_list;				\
+	/* XOR to cause the word we're looking for to go to zero */	\
+	uint64_t mru = lru ^ ((mru3 << 32) | mru3);			\
+	__m128i c = _mm_cvtsi64_si128(mru);				\
+	__m128i b = _mm_cvtsi64_si128(lru);				\
+	/* Find the minimum value (first zero word, if it's in there) */\
+	__m128i d = _mm_minpos_epu16(c);				\
+	/* Second word is the index to found word (first word is the value) */\
+	unsigned pos = _mm_extract_epi16(d, 1);				\
+	/* move the recently used location to top of list */		\
+	__m128i k = _mm_shuffle_epi8(b, *((__m128i *) &masks[2 * pos]));\
+	/* Finally, update the original list with the reordered data */	\
+	bucket->lru_list = _mm_extract_epi64(k, 0);			\
+	/* Phwew! */							\
+} while (0)
+
+#elif RTE_TABLE_HASH_LRU_STRATEGY == 3
+
+#if GCC_VERSION > 40306
+#include <x86intrin.h>
+#else
+#include <emmintrin.h>
+#include <smmintrin.h>
+#include <xmmintrin.h>
+#endif
+
+#define lru_init(bucket)						\
+do									\
+	bucket->lru_list = ~0LLU;					\
+while (0)
+
+
+static inline int
+f_lru_pos(uint64_t lru_list)
+{
+	__m128i lst = _mm_set_epi64x((uint64_t)-1, lru_list);
+	__m128i min = _mm_minpos_epu16(lst);
+	return _mm_extract_epi16(min, 1);
+}
+#define lru_pos(bucket) f_lru_pos(bucket->lru_list)
+
+#define lru_update(bucket, mru_val)					\
+do {									\
+	const uint64_t orvals[] = {0xFFFFLLU, 0xFFFFLLU << 16,		\
+		0xFFFFLLU << 32, 0xFFFFLLU << 48, 0LLU};		\
+	const uint64_t decs[] = {0x1000100010001LLU, 0};		\
+	__m128i lru = _mm_cvtsi64_si128(bucket->lru_list);		\
+	__m128i vdec = _mm_cvtsi64_si128(decs[mru_val>>2]);		\
+	lru = _mm_subs_epu16(lru, vdec);				\
+	bucket->lru_list = _mm_extract_epi64(lru, 0) | orvals[mru_val];	\
+} while (0)
+
+#else
+
+#error "Incorrect value for RTE_TABLE_HASH_LRU_STRATEGY"
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/lib/librte_table/rte_table.h b/lib/librte_table/rte_table.h
new file mode 100644
index 00000000..d3446a56
--- /dev/null
+++ b/lib/librte_table/rte_table.h
@@ -0,0 +1,301 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __INCLUDE_RTE_TABLE_H__
+#define __INCLUDE_RTE_TABLE_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @file
+ * RTE Table
+ *
+ * This tool is part of the DPDK Packet Framework tool suite and provides
+ * a standard interface to implement different types of lookup tables for data
+ * plane processing.
+ *
+ * Virtually any search algorithm that can uniquely associate data to a lookup
+ * key can be fitted under this lookup table abstraction. For the flow table
+ * use-case, the lookup key is an n-tuple of packet fields that uniquely
+ * identifies a traffic flow, while data represents actions and action
+ * meta-data associated with the same traffic flow.
+ *
+ ***/
+
+#include <stdint.h>
+#include <rte_port.h>
+
+struct rte_mbuf;
+
+/** Lookup table statistics */
+struct rte_table_stats {
+	uint64_t n_pkts_in;
+	uint64_t n_pkts_lookup_miss;
+};
+
+/**
+ * Lookup table create
+ *
+ * @param params
+ *   Parameters for lookup table creation. The underlying data structure is
+ *   different for each lookup table type.
+ * @param socket_id
+ *   CPU socket ID (e.g. for memory allocation purpose)
+ * @param entry_size
+ *   Data size of each lookup table entry (measured in bytes)
+ * @return
+ *   Handle to lookup table instance
+ */
+typedef void* (*rte_table_op_create)(void *params, int socket_id,
+	uint32_t entry_size);
+
+/**
+ * Lookup table free
+ *
+ * @param table
+ *   Handle to lookup table instance
+ * @return
+ *   0 on success, error code otherwise
+ */
+typedef int (*rte_table_op_free)(void *table);
+
+/**
+ * Lookup table entry add
+ *
+ * @param table
+ *   Handle to lookup table instance
+ * @param key
+ *   Lookup key
+ * @param entry
+ *   Data to be associated with the current key. This parameter has to point to
+ *   a valid memory buffer where the first entry_size bytes (table create
+ *   parameter) are populated with the data.
+ * @param key_found
+ *   After successful invocation, *key_found is set to a value different than 0
+ *   if the current key is already present in the table and to 0 if not. This
+ *   pointer has to be set to a valid memory location before the table entry add
+ *   function is called.
+ * @param entry_ptr
+ *   After successful invocation, *entry_ptr stores the handle to the table
+ *   entry containing the data associated with the current key. This handle can
+ *   be used to perform further read-write accesses to this entry. This handle
+ *   is valid until the key is deleted from the table or the same key is
+ *   re-added to the table, typically to associate it with different data. This
+ *   pointer has to be set to a valid memory location before the function is
+ *   called.
+ * @return
+ *   0 on success, error code otherwise
+ */
+typedef int (*rte_table_op_entry_add)(
+	void *table,
+	void *key,
+	void *entry,
+	int *key_found,
+	void **entry_ptr);
+
+/**
+ * Lookup table entry delete
+ *
+ * @param table
+ *   Handle to lookup table instance
+ * @param key
+ *   Lookup key
+ * @param key_found
+ *   After successful invocation, *key_found is set to a value different than 0
+ *   if the current key was present in the table before the delete operation
+ *   was performed and to 0 if not. This pointer has to be set to a valid
+ *   memory location before the table entry delete function is called.
+ * @param entry
+ *   After successful invocation, if the key is found in the table (*key found
+ *   is different than 0 after function call is completed) and entry points to
+ *   a valid buffer (entry is set to a value different than NULL before the
+ *   function is called), then the first entry_size bytes (table create
+ *   parameter) in *entry store a copy of table entry that contained the data
+ *   associated with the current key before the key was deleted.
+ * @return
+ *   0 on success, error code otherwise
+ */
+typedef int (*rte_table_op_entry_delete)(
+	void *table,
+	void *key,
+	int *key_found,
+	void *entry);
+
+/**
+ * Lookup table entry add bulk
+ *
+ * @param table
+ *   Handle to lookup table instance
+ * @param key
+ *   Array containing lookup keys
+ * @param entries
+ *   Array containing data to be associated with each key. Every item in the
+ *   array has to point to a valid memory buffer where the first entry_size
+ *   bytes (table create parameter) are populated with the data.
+ * @param n_keys
+ *   Number of keys to add
+ * @param key_found
+ *   After successful invocation, key_found for every item in the array is set
+ *   to a value different than 0 if the current key is already present in the
+ *   table and to 0 if not. This pointer has to be set to a valid memory
+ *   location before the table entry add function is called.
+ * @param entries_ptr
+ *   After successful invocation, array *entries_ptr stores the handle to the
+ *   table entry containing the data associated with every key. This handle can
+ *   be used to perform further read-write accesses to this entry. This handle
+ *   is valid until the key is deleted from the table or the same key is
+ *   re-added to the table, typically to associate it with different data. This
+ *   pointer has to be set to a valid memory location before the function is
+ *   called.
+ * @return
+ *   0 on success, error code otherwise
+ */
+typedef int (*rte_table_op_entry_add_bulk)(
+	void *table,
+	void **keys,
+	void **entries,
+	uint32_t n_keys,
+	int *key_found,
+	void **entries_ptr);
+
+/**
+ * Lookup table entry delete bulk
+ *
+ * @param table
+ *   Handle to lookup table instance
+ * @param key
+ *   Array containing lookup keys
+ * @param n_keys
+ *   Number of keys to delete
+ * @param key_found
+ *   After successful invocation, key_found for every item in the array is set
+ *   to a value different than 0if the current key was present in the table
+ *   before the delete operation was performed and to 0 if not. This pointer
+ *   has to be set to a valid memory location before the table entry delete
+ *   function is called.
+ * @param entries
+ *   If entries pointer is NULL, this pointer is ignored for every entry found.
+ *   Else, after successful invocation, if specific key is found in the table
+ *   (key_found is different than 0 for this item after function call is
+ *   completed) and item of entry array points to a valid buffer (entry is set
+ *   to a value different than NULL before the function is called), then the
+ *   first entry_size bytes (table create parameter) in *entry store a copy of
+ *   table entry that contained the data associated with the current key before
+ *   the key was deleted.
+ * @return
+ *   0 on success, error code otherwise
+ */
+typedef int (*rte_table_op_entry_delete_bulk)(
+	void *table,
+	void **keys,
+	uint32_t n_keys,
+	int *key_found,
+	void **entries);
+
+/**
+ * Lookup table lookup
+ *
+ * @param table
+ *   Handle to lookup table instance
+ * @param pkts
+ *   Burst of input packets specified as array of up to 64 pointers to struct
+ *   rte_mbuf
+ * @param pkts_mask
+ *   64-bit bitmask specifying which packets in the input burst are valid. When
+ *   pkts_mask bit n is set, then element n of pkts array is pointing to a
+ *   valid packet. Otherwise, element n of pkts array does not point to a valid
+ *   packet, therefore it will not be accessed.
+ * @param lookup_hit_mask
+ *   Once the table lookup operation is completed, this 64-bit bitmask
+ *   specifies which of the valid packets in the input burst resulted in lookup
+ *   hit. For each valid input packet (pkts_mask bit n is set), the following
+ *   are true on lookup hit: lookup_hit_mask bit n is set, element n of entries
+ *   array is valid and it points to the lookup table entry that was hit. For
+ *   each valid input packet (pkts_mask bit n is set), the following are true
+ *   on lookup miss: lookup_hit_mask bit n is not set and element n of entries
+ *   array is not valid.
+ * @param entries
+ *   Once the table lookup operation is completed, this array provides the
+ *   lookup table entries that were hit, as described above. It is required
+ *   that this array is always pre-allocated by the caller of this function
+ *   with exactly 64 elements. The implementation is allowed to speculatively
+ *   modify the elements of this array, so elements marked as invalid in
+ *   lookup_hit_mask once the table lookup operation is completed might have
+ *   been modified by this function.
+ * @return
+ *   0 on success, error code otherwise
+ */
+typedef int (*rte_table_op_lookup)(
+	void *table,
+	struct rte_mbuf **pkts,
+	uint64_t pkts_mask,
+	uint64_t *lookup_hit_mask,
+	void **entries);
+
+/**
+ * Lookup table stats read
+ *
+ * @param table
+ *   Handle to lookup table instance
+ * @param stats
+ *   Handle to table stats struct to copy data
+ * @param clear
+ *   Flag indicating that stats should be cleared after read
+ *
+ * @return
+ *   Error code or 0 on success.
+ */
+typedef int (*rte_table_op_stats_read)(
+	void *table,
+	struct rte_table_stats *stats,
+	int clear);
+
+/** Lookup table interface defining the lookup table operation */
+struct rte_table_ops {
+	rte_table_op_create f_create;                 /**< Create */
+	rte_table_op_free f_free;                     /**< Free */
+	rte_table_op_entry_add f_add;                 /**< Entry add */
+	rte_table_op_entry_delete f_delete;           /**< Entry delete */
+	rte_table_op_entry_add_bulk f_add_bulk;       /**< Add entry bulk */
+	rte_table_op_entry_delete_bulk f_delete_bulk; /**< Delete entry bulk */
+	rte_table_op_lookup f_lookup;                 /**< Lookup */
+	rte_table_op_stats_read f_stats;              /**< Stats */
+};
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/lib/librte_table/rte_table_acl.c b/lib/librte_table/rte_table_acl.c
new file mode 100644
index 00000000..c1eb8488
--- /dev/null
+++ b/lib/librte_table/rte_table_acl.c
@@ -0,0 +1,835 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <string.h>
+#include <stdio.h>
+
+#include <rte_common.h>
+#include <rte_mbuf.h>
+#include <rte_memory.h>
+#include <rte_malloc.h>
+#include <rte_log.h>
+
+#include "rte_table_acl.h"
+#include <rte_ether.h>
+
+#ifdef RTE_TABLE_STATS_COLLECT
+
+#define RTE_TABLE_ACL_STATS_PKTS_IN_ADD(table, val) \
+	table->stats.n_pkts_in += val
+#define RTE_TABLE_ACL_STATS_PKTS_LOOKUP_MISS(table, val) \
+	table->stats.n_pkts_lookup_miss += val
+
+#else
+
+#define RTE_TABLE_ACL_STATS_PKTS_IN_ADD(table, val)
+#define RTE_TABLE_ACL_STATS_PKTS_LOOKUP_MISS(table, val)
+
+#endif
+
+struct rte_table_acl {
+	struct rte_table_stats stats;
+
+	/* Low-level ACL table */
+	char name[2][RTE_ACL_NAMESIZE];
+	struct rte_acl_param acl_params; /* for creating low level acl table */
+	struct rte_acl_config cfg; /* Holds the field definitions (metadata) */
+	struct rte_acl_ctx *ctx;
+	uint32_t name_id;
+
+	/* Input parameters */
+	uint32_t n_rules;
+	uint32_t entry_size;
+
+	/* Internal tables */
+	uint8_t *action_table;
+	struct rte_acl_rule **acl_rule_list; /* Array of pointers to rules */
+	uint8_t *acl_rule_memory; /* Memory to store the rules */
+
+	/* Memory to store the action table and stack of free entries */
+	uint8_t memory[0] __rte_cache_aligned;
+};
+
+
+static void *
+rte_table_acl_create(
+	void *params,
+	int socket_id,
+	uint32_t entry_size)
+{
+	struct rte_table_acl_params *p = (struct rte_table_acl_params *) params;
+	struct rte_table_acl *acl;
+	uint32_t action_table_size, acl_rule_list_size, acl_rule_memory_size;
+	uint32_t total_size;
+
+	RTE_BUILD_BUG_ON(((sizeof(struct rte_table_acl) % RTE_CACHE_LINE_SIZE)
+		!= 0));
+
+	/* Check input parameters */
+	if (p == NULL) {
+		RTE_LOG(ERR, TABLE, "%s: Invalid value for params\n", __func__);
+		return NULL;
+	}
+	if (p->name == NULL) {
+		RTE_LOG(ERR, TABLE, "%s: Invalid value for name\n", __func__);
+		return NULL;
+	}
+	if (p->n_rules == 0) {
+		RTE_LOG(ERR, TABLE, "%s: Invalid value for n_rules\n",
+			__func__);
+		return NULL;
+	}
+	if ((p->n_rule_fields == 0) ||
+	    (p->n_rule_fields > RTE_ACL_MAX_FIELDS)) {
+		RTE_LOG(ERR, TABLE, "%s: Invalid value for n_rule_fields\n",
+			__func__);
+		return NULL;
+	}
+
+	entry_size = RTE_ALIGN(entry_size, sizeof(uint64_t));
+
+	/* Memory allocation */
+	action_table_size = RTE_CACHE_LINE_ROUNDUP(p->n_rules * entry_size);
+	acl_rule_list_size =
+		RTE_CACHE_LINE_ROUNDUP(p->n_rules * sizeof(struct rte_acl_rule *));
+	acl_rule_memory_size = RTE_CACHE_LINE_ROUNDUP(p->n_rules *
+		RTE_ACL_RULE_SZ(p->n_rule_fields));
+	total_size = sizeof(struct rte_table_acl) + action_table_size +
+		acl_rule_list_size + acl_rule_memory_size;
+
+	acl = rte_zmalloc_socket("TABLE", total_size, RTE_CACHE_LINE_SIZE,
+		socket_id);
+	if (acl == NULL) {
+		RTE_LOG(ERR, TABLE,
+			"%s: Cannot allocate %u bytes for ACL table\n",
+			__func__, total_size);
+		return NULL;
+	}
+
+	acl->action_table = &acl->memory[0];
+	acl->acl_rule_list =
+		(struct rte_acl_rule **) &acl->memory[action_table_size];
+	acl->acl_rule_memory = (uint8_t *)
+		&acl->memory[action_table_size + acl_rule_list_size];
+
+	/* Initialization of internal fields */
+	snprintf(acl->name[0], RTE_ACL_NAMESIZE, "%s_a", p->name);
+	snprintf(acl->name[1], RTE_ACL_NAMESIZE, "%s_b", p->name);
+	acl->name_id = 1;
+
+	acl->acl_params.name = acl->name[acl->name_id];
+	acl->acl_params.socket_id = socket_id;
+	acl->acl_params.rule_size = RTE_ACL_RULE_SZ(p->n_rule_fields);
+	acl->acl_params.max_rule_num = p->n_rules;
+
+	acl->cfg.num_categories = 1;
+	acl->cfg.num_fields = p->n_rule_fields;
+	memcpy(&acl->cfg.defs[0], &p->field_format[0],
+		p->n_rule_fields * sizeof(struct rte_acl_field_def));
+
+	acl->ctx = NULL;
+
+	acl->n_rules = p->n_rules;
+	acl->entry_size = entry_size;
+
+	return acl;
+}
+
+static int
+rte_table_acl_free(void *table)
+{
+	struct rte_table_acl *acl = (struct rte_table_acl *) table;
+
+	/* Check input parameters */
+	if (table == NULL) {
+		RTE_LOG(ERR, TABLE, "%s: table parameter is NULL\n", __func__);
+		return -EINVAL;
+	}
+
+	/* Free previously allocated resources */
+	if (acl->ctx != NULL)
+		rte_acl_free(acl->ctx);
+
+	rte_free(acl);
+
+	return 0;
+}
+
+RTE_ACL_RULE_DEF(rte_pipeline_acl_rule, RTE_ACL_MAX_FIELDS);
+
+static int
+rte_table_acl_build(struct rte_table_acl *acl, struct rte_acl_ctx **acl_ctx)
+{
+	struct rte_acl_ctx *ctx = NULL;
+	uint32_t n_rules, i;
+	int status;
+
+	/* Create low level ACL table */
+	ctx = rte_acl_create(&acl->acl_params);
+	if (ctx == NULL) {
+		RTE_LOG(ERR, TABLE, "%s: Cannot create low level ACL table\n",
+			__func__);
+		return -1;
+	}
+
+	/* Add rules to low level ACL table */
+	n_rules = 0;
+	for (i = 1; i < acl->n_rules; i++) {
+		if (acl->acl_rule_list[i] != NULL) {
+			status = rte_acl_add_rules(ctx, acl->acl_rule_list[i],
+				1);
+			if (status != 0) {
+				RTE_LOG(ERR, TABLE,
+				"%s: Cannot add rule to low level ACL table\n",
+					__func__);
+				rte_acl_free(ctx);
+				return -1;
+			}
+
+			n_rules++;
+		}
+	}
+
+	if (n_rules == 0) {
+		rte_acl_free(ctx);
+		*acl_ctx = NULL;
+		return 0;
+	}
+
+	/* Build low level ACl table */
+	status = rte_acl_build(ctx, &acl->cfg);
+	if (status != 0) {
+		RTE_LOG(ERR, TABLE,
+			"%s: Cannot build the low level ACL table\n",
+			__func__);
+		rte_acl_free(ctx);
+		return -1;
+	}
+
+	rte_acl_dump(ctx);
+
+	*acl_ctx = ctx;
+	return 0;
+}
+
+static int
+rte_table_acl_entry_add(
+	void *table,
+	void *key,
+	void *entry,
+	int *key_found,
+	void **entry_ptr)
+{
+	struct rte_table_acl *acl = (struct rte_table_acl *) table;
+	struct rte_table_acl_rule_add_params *rule =
+		(struct rte_table_acl_rule_add_params *) key;
+	struct rte_pipeline_acl_rule acl_rule;
+	struct rte_acl_rule *rule_location;
+	struct rte_acl_ctx *ctx;
+	uint32_t free_pos, free_pos_valid, i;
+	int status;
+
+	/* Check input parameters */
+	if (table == NULL) {
+		RTE_LOG(ERR, TABLE, "%s: table parameter is NULL\n", __func__);
+		return -EINVAL;
+	}
+	if (key == NULL) {
+		RTE_LOG(ERR, TABLE, "%s: key parameter is NULL\n", __func__);
+		return -EINVAL;
+	}
+	if (entry == NULL) {
+		RTE_LOG(ERR, TABLE, "%s: entry parameter is NULL\n", __func__);
+		return -EINVAL;
+	}
+	if (key_found == NULL) {
+		RTE_LOG(ERR, TABLE, "%s: key_found parameter is NULL\n",
+			__func__);
+		return -EINVAL;
+	}
+	if (entry_ptr == NULL) {
+		RTE_LOG(ERR, TABLE, "%s: entry_ptr parameter is NULL\n",
+			__func__);
+		return -EINVAL;
+	}
+	if (rule->priority > RTE_ACL_MAX_PRIORITY) {
+		RTE_LOG(ERR, TABLE, "%s: Priority is too high\n", __func__);
+		return -EINVAL;
+	}
+
+	/* Setup rule data structure */
+	memset(&acl_rule, 0, sizeof(acl_rule));
+	acl_rule.data.category_mask = 1;
+	acl_rule.data.priority = RTE_ACL_MAX_PRIORITY - rule->priority;
+	acl_rule.data.userdata = 0; /* To be set up later */
+	memcpy(&acl_rule.field[0],
+		&rule->field_value[0],
+		acl->cfg.num_fields * sizeof(struct rte_acl_field));
+
+	/* Look to see if the rule exists already in the table */
+	free_pos = 0;
+	free_pos_valid = 0;
+	for (i = 1; i < acl->n_rules; i++) {
+		if (acl->acl_rule_list[i] == NULL) {
+			if (free_pos_valid == 0) {
+				free_pos = i;
+				free_pos_valid = 1;
+			}
+
+			continue;
+		}
+
+		/* Compare the key fields */
+		status = memcmp(&acl->acl_rule_list[i]->field[0],
+			&rule->field_value[0],
+			acl->cfg.num_fields * sizeof(struct rte_acl_field));
+
+		/* Rule found: update data associated with the rule */
+		if (status == 0) {
+			*key_found = 1;
+			*entry_ptr = &acl->memory[i * acl->entry_size];
+			memcpy(*entry_ptr, entry, acl->entry_size);
+
+			return 0;
+		}
+	}
+
+	/* Return if max rules */
+	if (free_pos_valid == 0) {
+		RTE_LOG(ERR, TABLE, "%s: Max number of rules reached\n",
+			__func__);
+		return -ENOSPC;
+	}
+
+	/* Add the new rule to the rule set */
+	acl_rule.data.userdata = free_pos;
+	rule_location = (struct rte_acl_rule *)
+		&acl->acl_rule_memory[free_pos * acl->acl_params.rule_size];
+	memcpy(rule_location, &acl_rule, acl->acl_params.rule_size);
+	acl->acl_rule_list[free_pos] = rule_location;
+
+	/* Build low level ACL table */
+	acl->name_id ^= 1;
+	acl->acl_params.name = acl->name[acl->name_id];
+	status = rte_table_acl_build(acl, &ctx);
+	if (status != 0) {
+		/* Roll back changes */
+		acl->acl_rule_list[free_pos] = NULL;
+		acl->name_id ^= 1;
+
+		return -EINVAL;
+	}
+
+	/* Commit changes */
+	if (acl->ctx != NULL)
+		rte_acl_free(acl->ctx);
+	acl->ctx = ctx;
+	*key_found = 0;
+	*entry_ptr = &acl->memory[free_pos * acl->entry_size];
+	memcpy(*entry_ptr, entry, acl->entry_size);
+
+	return 0;
+}
+
+static int
+rte_table_acl_entry_delete(
+	void *table,
+	void *key,
+	int *key_found,
+	void *entry)
+{
+	struct rte_table_acl *acl = (struct rte_table_acl *) table;
+	struct rte_table_acl_rule_delete_params *rule =
+		(struct rte_table_acl_rule_delete_params *) key;
+	struct rte_acl_rule *deleted_rule = NULL;
+	struct rte_acl_ctx *ctx;
+	uint32_t pos, pos_valid, i;
+	int status;
+
+	/* Check input parameters */
+	if (table == NULL) {
+		RTE_LOG(ERR, TABLE, "%s: table parameter is NULL\n", __func__);
+		return -EINVAL;
+	}
+	if (key == NULL) {
+		RTE_LOG(ERR, TABLE, "%s: key parameter is NULL\n", __func__);
+		return -EINVAL;
+	}
+	if (key_found == NULL) {
+		RTE_LOG(ERR, TABLE, "%s: key_found parameter is NULL\n",
+			__func__);
+		return -EINVAL;
+	}
+
+	/* Look for the rule in the table */
+	pos = 0;
+	pos_valid = 0;
+	for (i = 1; i < acl->n_rules; i++) {
+		if (acl->acl_rule_list[i] != NULL) {
+			/* Compare the key fields */
+			status = memcmp(&acl->acl_rule_list[i]->field[0],
+				&rule->field_value[0], acl->cfg.num_fields *
+				sizeof(struct rte_acl_field));
+
+			/* Rule found: remove from table */
+			if (status == 0) {
+				pos = i;
+				pos_valid = 1;
+
+				deleted_rule = acl->acl_rule_list[i];
+				acl->acl_rule_list[i] = NULL;
+			}
+		}
+	}
+
+	/* Return if rule not found */
+	if (pos_valid == 0) {
+		*key_found = 0;
+		return 0;
+	}
+
+	/* Build low level ACL table */
+	acl->name_id ^= 1;
+	acl->acl_params.name = acl->name[acl->name_id];
+	status = rte_table_acl_build(acl, &ctx);
+	if (status != 0) {
+		/* Roll back changes */
+		acl->acl_rule_list[pos] = deleted_rule;
+		acl->name_id ^= 1;
+
+		return -EINVAL;
+	}
+
+	/* Commit changes */
+	if (acl->ctx != NULL)
+		rte_acl_free(acl->ctx);
+
+	acl->ctx = ctx;
+	*key_found = 1;
+	if (entry != NULL)
+		memcpy(entry, &acl->memory[pos * acl->entry_size],
+			acl->entry_size);
+
+	return 0;
+}
+
+static int
+rte_table_acl_entry_add_bulk(
+	void *table,
+	void **keys,
+	void **entries,
+	uint32_t n_keys,
+	int *key_found,
+	void **entries_ptr)
+{
+	struct rte_table_acl *acl = (struct rte_table_acl *) table;
+	struct rte_acl_ctx *ctx;
+	uint32_t rule_pos[n_keys];
+	uint32_t i;
+	int err = 0, build = 0;
+	int status;
+
+	/* Check input parameters */
+	if (table == NULL) {
+		RTE_LOG(ERR, TABLE, "%s: table parameter is NULL\n", __func__);
+		return -EINVAL;
+	}
+	if (keys == NULL) {
+		RTE_LOG(ERR, TABLE, "%s: keys parameter is NULL\n", __func__);
+		return -EINVAL;
+	}
+	if (entries == NULL) {
+		RTE_LOG(ERR, TABLE, "%s: entries parameter is NULL\n", __func__);
+		return -EINVAL;
+	}
+	if (n_keys == 0) {
+		RTE_LOG(ERR, TABLE, "%s: 0 rules to add\n", __func__);
+		return -EINVAL;
+	}
+	if (key_found == NULL) {
+		RTE_LOG(ERR, TABLE, "%s: key_found parameter is NULL\n",
+			__func__);
+		return -EINVAL;
+	}
+	if (entries_ptr == NULL) {
+		RTE_LOG(ERR, TABLE, "%s: entries_ptr parameter is NULL\n",
+			__func__);
+		return -EINVAL;
+	}
+
+	/* Check input parameters in arrays */
+	for (i = 0; i < n_keys; i++) {
+		struct rte_table_acl_rule_add_params *rule;
+
+		if (keys[i] == NULL) {
+			RTE_LOG(ERR, TABLE, "%s: keys[%" PRIu32 "] parameter is NULL\n",
+					__func__, i);
+			return -EINVAL;
+		}
+
+		if (entries[i] == NULL) {
+			RTE_LOG(ERR, TABLE, "%s: entries[%" PRIu32 "] parameter is NULL\n",
+					__func__, i);
+			return -EINVAL;
+		}
+
+		if (entries_ptr[i] == NULL) {
+			RTE_LOG(ERR, TABLE, "%s: entries_ptr[%" PRIu32 "] parameter is NULL\n",
+					__func__, i);
+			return -EINVAL;
+		}
+
+		rule = (struct rte_table_acl_rule_add_params *) keys[i];
+		if (rule->priority > RTE_ACL_MAX_PRIORITY) {
+			RTE_LOG(ERR, TABLE, "%s: Priority is too high\n", __func__);
+			return -EINVAL;
+		}
+	}
+
+	memset(rule_pos, 0, n_keys * sizeof(uint32_t));
+	memset(key_found, 0, n_keys * sizeof(int));
+	for (i = 0; i < n_keys; i++) {
+		struct rte_table_acl_rule_add_params *rule =
+				(struct rte_table_acl_rule_add_params *) keys[i];
+		struct rte_pipeline_acl_rule acl_rule;
+		struct rte_acl_rule *rule_location;
+		uint32_t free_pos, free_pos_valid, j;
+
+		/* Setup rule data structure */
+		memset(&acl_rule, 0, sizeof(acl_rule));
+		acl_rule.data.category_mask = 1;
+		acl_rule.data.priority = RTE_ACL_MAX_PRIORITY - rule->priority;
+		acl_rule.data.userdata = 0; /* To be set up later */
+		memcpy(&acl_rule.field[0],
+			&rule->field_value[0],
+			acl->cfg.num_fields * sizeof(struct rte_acl_field));
+
+		/* Look to see if the rule exists already in the table */
+		free_pos = 0;
+		free_pos_valid = 0;
+		for (j = 1; j < acl->n_rules; j++) {
+			if (acl->acl_rule_list[j] == NULL) {
+				if (free_pos_valid == 0) {
+					free_pos = j;
+					free_pos_valid = 1;
+				}
+
+				continue;
+			}
+
+			/* Compare the key fields */
+			status = memcmp(&acl->acl_rule_list[j]->field[0],
+				&rule->field_value[0],
+				acl->cfg.num_fields * sizeof(struct rte_acl_field));
+
+			/* Rule found: update data associated with the rule */
+			if (status == 0) {
+				key_found[i] = 1;
+				entries_ptr[i] = &acl->memory[j * acl->entry_size];
+				memcpy(entries_ptr[i], entries[i], acl->entry_size);
+
+				break;
+			}
+		}
+
+		/* Key already in the table */
+		if (key_found[i] != 0)
+			continue;
+
+		/* Maximum number of rules reached */
+		if (free_pos_valid == 0) {
+			err = 1;
+			break;
+		}
+
+		/* Add the new rule to the rule set */
+		acl_rule.data.userdata = free_pos;
+		rule_location = (struct rte_acl_rule *)
+			&acl->acl_rule_memory[free_pos * acl->acl_params.rule_size];
+		memcpy(rule_location, &acl_rule, acl->acl_params.rule_size);
+		acl->acl_rule_list[free_pos] = rule_location;
+		rule_pos[i] = free_pos;
+		build = 1;
+	}
+
+	if (err != 0) {
+		for (i = 0; i < n_keys; i++) {
+			if (rule_pos[i] == 0)
+				continue;
+
+			acl->acl_rule_list[rule_pos[i]] = NULL;
+		}
+
+		return -ENOSPC;
+	}
+
+	if (build == 0)
+		return 0;
+
+	/* Build low level ACL table */
+	acl->name_id ^= 1;
+	acl->acl_params.name = acl->name[acl->name_id];
+	status = rte_table_acl_build(acl, &ctx);
+	if (status != 0) {
+		/* Roll back changes */
+		for (i = 0; i < n_keys; i++) {
+			if (rule_pos[i] == 0)
+				continue;
+
+			acl->acl_rule_list[rule_pos[i]] = NULL;
+		}
+		acl->name_id ^= 1;
+
+		return -EINVAL;
+	}
+
+	/* Commit changes */
+	if (acl->ctx != NULL)
+		rte_acl_free(acl->ctx);
+	acl->ctx = ctx;
+
+	for (i = 0; i < n_keys; i++) {
+		if (rule_pos[i] == 0)
+			continue;
+
+		key_found[i] = 0;
+		entries_ptr[i] = &acl->memory[rule_pos[i] * acl->entry_size];
+		memcpy(entries_ptr[i], entries[i], acl->entry_size);
+	}
+
+	return 0;
+}
+
+static int
+rte_table_acl_entry_delete_bulk(
+	void *table,
+	void **keys,
+	uint32_t n_keys,
+	int *key_found,
+	void **entries)
+{
+	struct rte_table_acl *acl = (struct rte_table_acl *) table;
+	struct rte_acl_rule *deleted_rules[n_keys];
+	uint32_t rule_pos[n_keys];
+	struct rte_acl_ctx *ctx;
+	uint32_t i;
+	int status;
+	int build = 0;
+
+	/* Check input parameters */
+	if (table == NULL) {
+		RTE_LOG(ERR, TABLE, "%s: table parameter is NULL\n", __func__);
+		return -EINVAL;
+	}
+	if (keys == NULL) {
+		RTE_LOG(ERR, TABLE, "%s: key parameter is NULL\n", __func__);
+		return -EINVAL;
+	}
+	if (n_keys == 0) {
+		RTE_LOG(ERR, TABLE, "%s: 0 rules to delete\n", __func__);
+		return -EINVAL;
+	}
+	if (key_found == NULL) {
+		RTE_LOG(ERR, TABLE, "%s: key_found parameter is NULL\n",
+			__func__);
+		return -EINVAL;
+	}
+
+	for (i = 0; i < n_keys; i++) {
+		if (keys[i] == NULL) {
+			RTE_LOG(ERR, TABLE, "%s: keys[%" PRIu32 "] parameter is NULL\n",
+					__func__, i);
+			return -EINVAL;
+		}
+	}
+
+	memset(deleted_rules, 0, n_keys * sizeof(struct rte_acl_rule *));
+	memset(rule_pos, 0, n_keys * sizeof(uint32_t));
+	for (i = 0; i < n_keys; i++) {
+		struct rte_table_acl_rule_delete_params *rule =
+			(struct rte_table_acl_rule_delete_params *) keys[i];
+		uint32_t pos_valid, j;
+
+		/* Look for the rule in the table */
+		pos_valid = 0;
+		for (j = 1; j < acl->n_rules; j++) {
+			if (acl->acl_rule_list[j] == NULL)
+				continue;
+
+			/* Compare the key fields */
+			status = memcmp(&acl->acl_rule_list[j]->field[0],
+					&rule->field_value[0],
+					acl->cfg.num_fields * sizeof(struct rte_acl_field));
+
+			/* Rule found: remove from table */
+			if (status == 0) {
+				pos_valid = 1;
+
+				deleted_rules[i] = acl->acl_rule_list[j];
+				acl->acl_rule_list[j] = NULL;
+				rule_pos[i] = j;
+
+				build = 1;
+			}
+		}
+
+		if (pos_valid == 0) {
+			key_found[i] = 0;
+			continue;
+		}
+	}
+
+	/* Return if no changes to acl table */
+	if (build == 0) {
+		return 0;
+	}
+
+	/* Build low level ACL table */
+	acl->name_id ^= 1;
+	acl->acl_params.name = acl->name[acl->name_id];
+	status = rte_table_acl_build(acl, &ctx);
+	if (status != 0) {
+		/* Roll back changes */
+		for (i = 0; i < n_keys; i++) {
+			if (rule_pos[i] == 0)
+				continue;
+
+			acl->acl_rule_list[rule_pos[i]] = deleted_rules[i];
+		}
+
+		acl->name_id ^= 1;
+
+		return -EINVAL;
+	}
+
+	/* Commit changes */
+	if (acl->ctx != NULL)
+		rte_acl_free(acl->ctx);
+
+	acl->ctx = ctx;
+	for (i = 0; i < n_keys; i++) {
+		if (rule_pos[i] == 0)
+			continue;
+
+		key_found[i] = 1;
+		if (entries != NULL && entries[i] != NULL)
+			memcpy(entries[i], &acl->memory[rule_pos[i] * acl->entry_size],
+					acl->entry_size);
+	}
+
+	return 0;
+}
+
+static int
+rte_table_acl_lookup(
+	void *table,
+	struct rte_mbuf **pkts,
+	uint64_t pkts_mask,
+	uint64_t *lookup_hit_mask,
+	void **entries)
+{
+	struct rte_table_acl *acl = (struct rte_table_acl *) table;
+	const uint8_t *pkts_data[RTE_PORT_IN_BURST_SIZE_MAX];
+	uint32_t results[RTE_PORT_IN_BURST_SIZE_MAX];
+	uint64_t pkts_out_mask;
+	uint32_t n_pkts, i, j;
+
+	__rte_unused uint32_t n_pkts_in = __builtin_popcountll(pkts_mask);
+	RTE_TABLE_ACL_STATS_PKTS_IN_ADD(acl, n_pkts_in);
+
+	/* Input conversion */
+	for (i = 0, j = 0; i < (uint32_t)(RTE_PORT_IN_BURST_SIZE_MAX -
+		__builtin_clzll(pkts_mask)); i++) {
+		uint64_t pkt_mask = 1LLU << i;
+
+		if (pkt_mask & pkts_mask) {
+			pkts_data[j] = rte_pktmbuf_mtod(pkts[i], uint8_t *);
+			j++;
+		}
+	}
+	n_pkts = j;
+
+	/* Low-level ACL table lookup */
+	if (acl->ctx != NULL)
+		rte_acl_classify(acl->ctx, pkts_data, results, n_pkts, 1);
+	else
+		n_pkts = 0;
+
+	/* Output conversion */
+	pkts_out_mask = 0;
+	for (i = 0; i < n_pkts; i++) {
+		uint32_t action_table_pos = results[i];
+		uint32_t pkt_pos = __builtin_ctzll(pkts_mask);
+		uint64_t pkt_mask = 1LLU << pkt_pos;
+
+		pkts_mask &= ~pkt_mask;
+
+		if (action_table_pos != RTE_ACL_INVALID_USERDATA) {
+			pkts_out_mask |= pkt_mask;
+			entries[pkt_pos] = (void *)
+				&acl->memory[action_table_pos *
+				acl->entry_size];
+			rte_prefetch0(entries[pkt_pos]);
+		}
+	}
+
+	*lookup_hit_mask = pkts_out_mask;
+	RTE_TABLE_ACL_STATS_PKTS_LOOKUP_MISS(acl, n_pkts_in - __builtin_popcountll(pkts_out_mask));
+
+	return 0;
+}
+
+static int
+rte_table_acl_stats_read(void *table, struct rte_table_stats *stats, int clear)
+{
+	struct rte_table_acl *acl = (struct rte_table_acl *) table;
+
+	if (stats != NULL)
+		memcpy(stats, &acl->stats, sizeof(acl->stats));
+
+	if (clear)
+		memset(&acl->stats, 0, sizeof(acl->stats));
+
+	return 0;
+}
+
+struct rte_table_ops rte_table_acl_ops = {
+	.f_create = rte_table_acl_create,
+	.f_free = rte_table_acl_free,
+	.f_add = rte_table_acl_entry_add,
+	.f_delete = rte_table_acl_entry_delete,
+	.f_add_bulk = rte_table_acl_entry_add_bulk,
+	.f_delete_bulk = rte_table_acl_entry_delete_bulk,
+	.f_lookup = rte_table_acl_lookup,
+	.f_stats = rte_table_acl_stats_read,
+};
diff --git a/lib/librte_table/rte_table_acl.h b/lib/librte_table/rte_table_acl.h
new file mode 100644
index 00000000..a9cc0328
--- /dev/null
+++ b/lib/librte_table/rte_table_acl.h
@@ -0,0 +1,95 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __INCLUDE_RTE_TABLE_ACL_H__
+#define __INCLUDE_RTE_TABLE_ACL_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @file
+ * RTE Table ACL
+ *
+ * This table uses the Access Control List (ACL) algorithm to uniquely
+ * associate data to lookup keys.
+ *
+ * Use-cases: Firewall rule database, etc.
+ *
+ ***/
+
+#include <stdint.h>
+
+#include "rte_acl.h"
+
+#include "rte_table.h"
+
+/** ACL table parameters */
+struct rte_table_acl_params {
+	/** Name */
+	const char *name;
+
+	/** Maximum number of ACL rules in the table */
+	uint32_t n_rules;
+
+	/** Number of fields in the ACL rule specification */
+	uint32_t n_rule_fields;
+
+	/** Format specification of the fields of the ACL rule */
+	struct rte_acl_field_def field_format[RTE_ACL_MAX_FIELDS];
+};
+
+/** ACL rule specification for entry add operation */
+struct rte_table_acl_rule_add_params {
+	/** ACL rule priority, with 0 as the highest priority */
+	int32_t  priority;
+
+	/** Values for the fields of the ACL rule to be added to the table */
+	struct rte_acl_field field_value[RTE_ACL_MAX_FIELDS];
+};
+
+/** ACL rule specification for entry delete operation */
+struct rte_table_acl_rule_delete_params {
+	/** Values for the fields of the ACL rule to be deleted from table */
+	struct rte_acl_field field_value[RTE_ACL_MAX_FIELDS];
+};
+
+/** ACL table operations */
+extern struct rte_table_ops rte_table_acl_ops;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/lib/librte_table/rte_table_array.c b/lib/librte_table/rte_table_array.c
new file mode 100644
index 00000000..3bb68d11
--- /dev/null
+++ b/lib/librte_table/rte_table_array.c
@@ -0,0 +1,237 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <string.h>
+#include <stdio.h>
+
+#include <rte_common.h>
+#include <rte_mbuf.h>
+#include <rte_memory.h>
+#include <rte_malloc.h>
+#include <rte_log.h>
+
+#include "rte_table_array.h"
+
+#ifdef RTE_TABLE_STATS_COLLECT
+
+#define RTE_TABLE_ARRAY_STATS_PKTS_IN_ADD(table, val) \
+	table->stats.n_pkts_in += val
+#define RTE_TABLE_ARRAY_STATS_PKTS_LOOKUP_MISS(table, val) \
+	table->stats.n_pkts_lookup_miss += val
+
+#else
+
+#define RTE_TABLE_ARRAY_STATS_PKTS_IN_ADD(table, val)
+#define RTE_TABLE_ARRAY_STATS_PKTS_LOOKUP_MISS(table, val)
+
+#endif
+
+struct rte_table_array {
+	struct rte_table_stats stats;
+
+	/* Input parameters */
+	uint32_t entry_size;
+	uint32_t n_entries;
+	uint32_t offset;
+
+	/* Internal fields */
+	uint32_t entry_pos_mask;
+
+	/* Internal table */
+	uint8_t array[0] __rte_cache_aligned;
+} __rte_cache_aligned;
+
+static void *
+rte_table_array_create(void *params, int socket_id, uint32_t entry_size)
+{
+	struct rte_table_array_params *p =
+		(struct rte_table_array_params *) params;
+	struct rte_table_array *t;
+	uint32_t total_cl_size, total_size;
+
+	/* Check input parameters */
+	if ((p == NULL) ||
+	    (p->n_entries == 0) ||
+		(!rte_is_power_of_2(p->n_entries)))
+		return NULL;
+
+	/* Memory allocation */
+	total_cl_size = (sizeof(struct rte_table_array) +
+			RTE_CACHE_LINE_SIZE) / RTE_CACHE_LINE_SIZE;
+	total_cl_size += (p->n_entries * entry_size +
+			RTE_CACHE_LINE_SIZE) / RTE_CACHE_LINE_SIZE;
+	total_size = total_cl_size * RTE_CACHE_LINE_SIZE;
+	t = rte_zmalloc_socket("TABLE", total_size, RTE_CACHE_LINE_SIZE, socket_id);
+	if (t == NULL) {
+		RTE_LOG(ERR, TABLE,
+			"%s: Cannot allocate %u bytes for array table\n",
+			__func__, total_size);
+		return NULL;
+	}
+
+	/* Memory initialization */
+	t->entry_size = entry_size;
+	t->n_entries = p->n_entries;
+	t->offset = p->offset;
+	t->entry_pos_mask = t->n_entries - 1;
+
+	return t;
+}
+
+static int
+rte_table_array_free(void *table)
+{
+	struct rte_table_array *t = (struct rte_table_array *) table;
+
+	/* Check input parameters */
+	if (t == NULL) {
+		RTE_LOG(ERR, TABLE, "%s: table parameter is NULL\n", __func__);
+		return -EINVAL;
+	}
+
+	/* Free previously allocated resources */
+	rte_free(t);
+
+	return 0;
+}
+
+static int
+rte_table_array_entry_add(
+	void *table,
+	void *key,
+	void *entry,
+	int *key_found,
+	void **entry_ptr)
+{
+	struct rte_table_array *t = (struct rte_table_array *) table;
+	struct rte_table_array_key *k = (struct rte_table_array_key *) key;
+	uint8_t *table_entry;
+
+	/* Check input parameters */
+	if (table == NULL) {
+		RTE_LOG(ERR, TABLE, "%s: table parameter is NULL\n", __func__);
+		return -EINVAL;
+	}
+	if (key == NULL) {
+		RTE_LOG(ERR, TABLE, "%s: key parameter is NULL\n", __func__);
+		return -EINVAL;
+	}
+	if (entry == NULL) {
+		RTE_LOG(ERR, TABLE, "%s: entry parameter is NULL\n", __func__);
+		return -EINVAL;
+	}
+	if (key_found == NULL) {
+		RTE_LOG(ERR, TABLE, "%s: key_found parameter is NULL\n",
+			__func__);
+		return -EINVAL;
+	}
+	if (entry_ptr == NULL) {
+		RTE_LOG(ERR, TABLE, "%s: entry_ptr parameter is NULL\n",
+			__func__);
+		return -EINVAL;
+	}
+
+	table_entry = &t->array[k->pos * t->entry_size];
+	memcpy(table_entry, entry, t->entry_size);
+	*key_found = 1;
+	*entry_ptr = (void *) table_entry;
+
+	return 0;
+}
+
+static int
+rte_table_array_lookup(
+	void *table,
+	struct rte_mbuf **pkts,
+	uint64_t pkts_mask,
+	uint64_t *lookup_hit_mask,
+	void **entries)
+{
+	struct rte_table_array *t = (struct rte_table_array *) table;
+	__rte_unused uint32_t n_pkts_in = __builtin_popcountll(pkts_mask);
+	RTE_TABLE_ARRAY_STATS_PKTS_IN_ADD(t, n_pkts_in);
+	*lookup_hit_mask = pkts_mask;
+
+	if ((pkts_mask & (pkts_mask + 1)) == 0) {
+		uint64_t n_pkts = __builtin_popcountll(pkts_mask);
+		uint32_t i;
+
+		for (i = 0; i < n_pkts; i++) {
+			struct rte_mbuf *pkt = pkts[i];
+			uint32_t entry_pos = RTE_MBUF_METADATA_UINT32(pkt,
+				t->offset) & t->entry_pos_mask;
+
+			entries[i] = (void *) &t->array[entry_pos *
+				t->entry_size];
+		}
+	} else {
+		for ( ; pkts_mask; ) {
+			uint32_t pkt_index = __builtin_ctzll(pkts_mask);
+			uint64_t pkt_mask = 1LLU << pkt_index;
+			struct rte_mbuf *pkt = pkts[pkt_index];
+			uint32_t entry_pos = RTE_MBUF_METADATA_UINT32(pkt,
+				t->offset) & t->entry_pos_mask;
+
+			entries[pkt_index] = (void *) &t->array[entry_pos *
+				t->entry_size];
+			pkts_mask &= ~pkt_mask;
+		}
+	}
+
+	return 0;
+}
+
+static int
+rte_table_array_stats_read(void *table, struct rte_table_stats *stats, int clear)
+{
+	struct rte_table_array *array = (struct rte_table_array *) table;
+
+	if (stats != NULL)
+		memcpy(stats, &array->stats, sizeof(array->stats));
+
+	if (clear)
+		memset(&array->stats, 0, sizeof(array->stats));
+
+	return 0;
+}
+
+struct rte_table_ops rte_table_array_ops = {
+	.f_create = rte_table_array_create,
+	.f_free = rte_table_array_free,
+	.f_add = rte_table_array_entry_add,
+	.f_delete = NULL,
+	.f_add_bulk = NULL,
+	.f_delete_bulk = NULL,
+	.f_lookup = rte_table_array_lookup,
+	.f_stats = rte_table_array_stats_read,
+};
diff --git a/lib/librte_table/rte_table_array.h b/lib/librte_table/rte_table_array.h
new file mode 100644
index 00000000..9521119e
--- /dev/null
+++ b/lib/librte_table/rte_table_array.h
@@ -0,0 +1,76 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __INCLUDE_RTE_TABLE_ARRAY_H__
+#define __INCLUDE_RTE_TABLE_ARRAY_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @file
+ * RTE Table Array
+ *
+ * Simple array indexing. Lookup key is the array entry index.
+ *
+ ***/
+
+#include <stdint.h>
+
+#include "rte_table.h"
+
+/** Array table parameters */
+struct rte_table_array_params {
+	/** Number of array entries. Has to be a power of two. */
+	uint32_t n_entries;
+
+	/** Byte offset within input packet meta-data where lookup key (i.e. the
+	    array entry index) is located. */
+	uint32_t offset;
+};
+
+/** Array table key format */
+struct rte_table_array_key {
+	/** Array entry index */
+	uint32_t pos;
+};
+
+/** Array table operations */
+extern struct rte_table_ops rte_table_array_ops;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/lib/librte_table/rte_table_hash.h b/lib/librte_table/rte_table_hash.h
new file mode 100644
index 00000000..9d17516a
--- /dev/null
+++ b/lib/librte_table/rte_table_hash.h
@@ -0,0 +1,370 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __INCLUDE_RTE_TABLE_HASH_H__
+#define __INCLUDE_RTE_TABLE_HASH_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @file
+ * RTE Table Hash
+ *
+ * These tables use the exact match criterion to uniquely associate data to
+ * lookup keys.
+ *
+ * Use-cases: Flow classification table, Address Resolution Protocol (ARP) table
+ *
+ * Hash table types:
+ * 1. Entry add strategy on bucket full:
+ *     a. Least Recently Used (LRU): One of the existing keys in the bucket is
+ *        deleted and the new key is added in its place. The number of keys in
+ *        each bucket never grows bigger than 4. The logic to pick the key to
+ *        be dropped from the bucket is LRU. The hash table lookup operation
+ *        maintains the order in which the keys in the same bucket are hit, so
+ *        every time a key is hit, it becomes the new Most Recently Used (MRU)
+ *        key, i.e. the most unlikely candidate for drop. When a key is added
+ *        to the bucket, it also becomes the new MRU key. When a key needs to
+ *        be picked and dropped, the most likely candidate for drop, i.e. the
+ *        current LRU key, is always picked. The LRU logic requires maintaining
+ *        specific data structures per each bucket.
+ *     b. Extendible bucket (ext): The bucket is extended with space for 4 more
+ *        keys. This is done by allocating additional memory at table init time,
+ *        which is used to create a pool of free keys (the size of this pool is
+ *        configurable and always a multiple of 4). On key add operation, the
+ *        allocation of a group of 4 keys only happens successfully within the
+ *        limit of free keys, otherwise the key add operation fails. On key
+ *        delete operation, a group of 4 keys is freed back to the pool of free
+ *        keys when the key to be deleted is the only key that was used within
+ *        its group of 4 keys at that time. On key lookup operation, if the
+ *        current bucket is in extended state and a match is not found in the
+ *        first group of 4 keys, the search continues beyond the first group of
+ *        4 keys, potentially until all keys in this bucket are examined. The
+ *        extendible bucket logic requires maintaining specific data structures
+ *        per table and per each bucket.
+ * 2. Key signature computation:
+ *     a. Pre-computed key signature: The key lookup operation is split between
+ *        two CPU cores. The first CPU core (typically the CPU core performing
+ *        packet RX) extracts the key from the input packet, computes the key
+ *        signature and saves both the key and the key signature in the packet
+ *        buffer as packet meta-data. The second CPU core reads both the key and
+ *        the key signature from the packet meta-data and performs the bucket
+ *        search step of the key lookup operation.
+ *     b. Key signature computed on lookup (do-sig): The same CPU core reads
+ *        the key from the packet meta-data, uses it to compute the key
+ *        signature and also performs the bucket search step of the key lookup
+ *        operation.
+ * 3. Key size:
+ *     a. Configurable key size
+ *     b. Single key size (8-byte, 16-byte or 32-byte key size)
+ *
+ ***/
+#include <stdint.h>
+
+#include "rte_table.h"
+
+/** Hash function */
+typedef uint64_t (*rte_table_hash_op_hash)(
+	void *key,
+	uint32_t key_size,
+	uint64_t seed);
+
+/**
+ * Hash tables with configurable key size
+ *
+ */
+/** Extendible bucket hash table parameters */
+struct rte_table_hash_ext_params {
+	/** Key size (number of bytes) */
+	uint32_t key_size;
+
+	/** Maximum number of keys */
+	uint32_t n_keys;
+
+	/** Number of hash table buckets. Each bucket stores up to 4 keys. */
+	uint32_t n_buckets;
+
+	/** Number of hash table bucket extensions. Each bucket extension has
+	space for 4 keys and each bucket can have 0, 1 or more extensions. */
+	uint32_t n_buckets_ext;
+
+	/** Hash function */
+	rte_table_hash_op_hash f_hash;
+
+	/** Seed value for the hash function */
+	uint64_t seed;
+
+	/** Byte offset within packet meta-data where the 4-byte key signature
+	is located. Valid for pre-computed key signature tables, ignored for
+	do-sig tables. */
+	uint32_t signature_offset;
+
+	/** Byte offset within packet meta-data where the key is located */
+	uint32_t key_offset;
+};
+
+/** Extendible bucket hash table operations for pre-computed key signature */
+extern struct rte_table_ops rte_table_hash_ext_ops;
+
+/** Extendible bucket hash table operations for key signature computed on
+	lookup ("do-sig") */
+extern struct rte_table_ops rte_table_hash_ext_dosig_ops;
+
+/** LRU hash table parameters */
+struct rte_table_hash_lru_params {
+	/** Key size (number of bytes) */
+	uint32_t key_size;
+
+	/** Maximum number of keys */
+	uint32_t n_keys;
+
+	/** Number of hash table buckets. Each bucket stores up to 4 keys. */
+	uint32_t n_buckets;
+
+	/** Hash function */
+	rte_table_hash_op_hash f_hash;
+
+	/** Seed value for the hash function */
+	uint64_t seed;
+
+	/** Byte offset within packet meta-data where the 4-byte key signature
+	is located. Valid for pre-computed key signature tables, ignored for
+	do-sig tables. */
+	uint32_t signature_offset;
+
+	/** Byte offset within packet meta-data where the key is located */
+	uint32_t key_offset;
+};
+
+/** LRU hash table operations for pre-computed key signature */
+extern struct rte_table_ops rte_table_hash_lru_ops;
+
+/** LRU hash table operations for key signature computed on lookup ("do-sig") */
+extern struct rte_table_ops rte_table_hash_lru_dosig_ops;
+
+/**
+ * 8-byte key hash tables
+ *
+ */
+/** LRU hash table parameters */
+struct rte_table_hash_key8_lru_params {
+	/** Maximum number of entries (and keys) in the table */
+	uint32_t n_entries;
+
+	/** Hash function */
+	rte_table_hash_op_hash f_hash;
+
+	/** Seed for the hash function */
+	uint64_t seed;
+
+	/** Byte offset within packet meta-data where the 4-byte key signature
+	is located. Valid for pre-computed key signature tables, ignored for
+	do-sig tables. */
+	uint32_t signature_offset;
+
+	/** Byte offset within packet meta-data where the key is located */
+	uint32_t key_offset;
+
+	/** Bit-mask to be AND-ed to the key on lookup */
+	uint8_t *key_mask;
+};
+
+/** LRU hash table operations for pre-computed key signature */
+extern struct rte_table_ops rte_table_hash_key8_lru_ops;
+
+/** LRU hash table operations for key signature computed on lookup ("do-sig") */
+extern struct rte_table_ops rte_table_hash_key8_lru_dosig_ops;
+
+/** Extendible bucket hash table parameters */
+struct rte_table_hash_key8_ext_params {
+	/** Maximum number of entries (and keys) in the table */
+	uint32_t n_entries;
+
+	/** Number of entries (and keys) for hash table bucket extensions. Each
+		bucket is extended in increments of 4 keys. */
+	uint32_t n_entries_ext;
+
+	/** Hash function */
+	rte_table_hash_op_hash f_hash;
+
+	/** Seed for the hash function */
+	uint64_t seed;
+
+	/** Byte offset within packet meta-data where the 4-byte key signature
+	is located. Valid for pre-computed key signature tables, ignored for
+	do-sig tables. */
+	uint32_t signature_offset;
+
+	/** Byte offset within packet meta-data where the key is located */
+	uint32_t key_offset;
+
+	/** Bit-mask to be AND-ed to the key on lookup */
+	uint8_t *key_mask;
+};
+
+/** Extendible bucket hash table operations for pre-computed key signature */
+extern struct rte_table_ops rte_table_hash_key8_ext_ops;
+
+/** Extendible bucket hash table operations for key signature computed on
+    lookup ("do-sig") */
+extern struct rte_table_ops rte_table_hash_key8_ext_dosig_ops;
+
+/**
+ * 16-byte key hash tables
+ *
+ */
+/** LRU hash table parameters */
+struct rte_table_hash_key16_lru_params {
+	/** Maximum number of entries (and keys) in the table */
+	uint32_t n_entries;
+
+	/** Hash function */
+	rte_table_hash_op_hash f_hash;
+
+	/** Seed for the hash function */
+	uint64_t seed;
+
+	/** Byte offset within packet meta-data where the 4-byte key signature
+	is located. Valid for pre-computed key signature tables, ignored for
+	do-sig tables. */
+	uint32_t signature_offset;
+
+	/** Byte offset within packet meta-data where the key is located */
+	uint32_t key_offset;
+
+	/** Bit-mask to be AND-ed to the key on lookup */
+	uint8_t *key_mask;
+};
+
+/** LRU hash table operations for pre-computed key signature */
+extern struct rte_table_ops rte_table_hash_key16_lru_ops;
+
+/** LRU hash table operations for key signature computed on lookup
+    ("do-sig") */
+extern struct rte_table_ops rte_table_hash_key16_lru_dosig_ops;
+
+/** Extendible bucket hash table parameters */
+struct rte_table_hash_key16_ext_params {
+	/** Maximum number of entries (and keys) in the table */
+	uint32_t n_entries;
+
+	/** Number of entries (and keys) for hash table bucket extensions. Each
+	bucket is extended in increments of 4 keys. */
+	uint32_t n_entries_ext;
+
+	/** Hash function */
+	rte_table_hash_op_hash f_hash;
+
+	/** Seed for the hash function */
+	uint64_t seed;
+
+	/** Byte offset within packet meta-data where the 4-byte key signature
+	is located. Valid for pre-computed key signature tables, ignored for
+	do-sig tables. */
+	uint32_t signature_offset;
+
+	/** Byte offset within packet meta-data where the key is located */
+	uint32_t key_offset;
+
+	/** Bit-mask to be AND-ed to the key on lookup */
+	uint8_t *key_mask;
+};
+
+/** Extendible bucket operations for pre-computed key signature */
+extern struct rte_table_ops rte_table_hash_key16_ext_ops;
+
+/** Extendible bucket hash table operations for key signature computed on
+    lookup ("do-sig") */
+extern struct rte_table_ops rte_table_hash_key16_ext_dosig_ops;
+
+/**
+ * 32-byte key hash tables
+ *
+ */
+/** LRU hash table parameters */
+struct rte_table_hash_key32_lru_params {
+	/** Maximum number of entries (and keys) in the table */
+	uint32_t n_entries;
+
+	/** Hash function */
+	rte_table_hash_op_hash f_hash;
+
+	/** Seed for the hash function */
+	uint64_t seed;
+
+	/** Byte offset within packet meta-data where the 4-byte key signature
+	is located. Valid for pre-computed key signature tables, ignored for
+	do-sig tables. */
+	uint32_t signature_offset;
+
+	/** Byte offset within packet meta-data where the key is located */
+	uint32_t key_offset;
+};
+
+/** LRU hash table operations for pre-computed key signature */
+extern struct rte_table_ops rte_table_hash_key32_lru_ops;
+
+/** Extendible bucket hash table parameters */
+struct rte_table_hash_key32_ext_params {
+	/** Maximum number of entries (and keys) in the table */
+	uint32_t n_entries;
+
+	/** Number of entries (and keys) for hash table bucket extensions. Each
+		bucket is extended in increments of 4 keys. */
+	uint32_t n_entries_ext;
+
+	/** Hash function */
+	rte_table_hash_op_hash f_hash;
+
+	/** Seed for the hash function */
+	uint64_t seed;
+
+	/** Byte offset within packet meta-data where the 4-byte key signature
+	is located. Valid for pre-computed key signature tables, ignored for
+	do-sig tables. */
+	uint32_t signature_offset;
+
+	/** Byte offset within packet meta-data where the key is located */
+	uint32_t key_offset;
+};
+
+/** Extendible bucket hash table operations */
+extern struct rte_table_ops rte_table_hash_key32_ext_ops;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/lib/librte_table/rte_table_hash_ext.c b/lib/librte_table/rte_table_hash_ext.c
new file mode 100644
index 00000000..e283a3d1
--- /dev/null
+++ b/lib/librte_table/rte_table_hash_ext.c
@@ -0,0 +1,1159 @@
+/*-
+ *	 BSD LICENSE
+ *
+ *	 Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *	 All rights reserved.
+ *
+ *	 Redistribution and use in source and binary forms, with or without
+ *	 modification, are permitted provided that the following conditions
+ *	 are met:
+ *
+ *	* Redistributions of source code must retain the above copyright
+ *		 notice, this list of conditions and the following disclaimer.
+ *	* Redistributions in binary form must reproduce the above copyright
+ *		 notice, this list of conditions and the following disclaimer in
+ *		 the documentation and/or other materials provided with the
+ *		 distribution.
+ *	* Neither the name of Intel Corporation nor the names of its
+ *		 contributors may be used to endorse or promote products derived
+ *		 from this software without specific prior written permission.
+ *
+ *	 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *	 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *	 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *	 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *	 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *	 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *	 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *	 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *	 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *	 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *	 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <string.h>
+#include <stdio.h>
+
+#include <rte_common.h>
+#include <rte_mbuf.h>
+#include <rte_memory.h>
+#include <rte_malloc.h>
+#include <rte_log.h>
+
+#include "rte_table_hash.h"
+
+#define KEYS_PER_BUCKET	4
+
+struct bucket {
+	union {
+		uintptr_t next;
+		uint64_t lru_list;
+	};
+	uint16_t sig[KEYS_PER_BUCKET];
+	uint32_t key_pos[KEYS_PER_BUCKET];
+};
+
+#define BUCKET_NEXT(bucket)						\
+	((void *) ((bucket)->next & (~1LU)))
+
+#define BUCKET_NEXT_VALID(bucket)					\
+	((bucket)->next & 1LU)
+
+#define BUCKET_NEXT_SET(bucket, bucket_next)				\
+do									\
+	(bucket)->next = (((uintptr_t) ((void *) (bucket_next))) | 1LU);\
+while (0)
+
+#define BUCKET_NEXT_SET_NULL(bucket)					\
+do									\
+	(bucket)->next = 0;						\
+while (0)
+
+#define BUCKET_NEXT_COPY(bucket, bucket2)				\
+do									\
+	(bucket)->next = (bucket2)->next;				\
+while (0)
+
+#ifdef RTE_TABLE_STATS_COLLECT
+
+#define RTE_TABLE_HASH_EXT_STATS_PKTS_IN_ADD(table, val) \
+	table->stats.n_pkts_in += val
+#define RTE_TABLE_HASH_EXT_STATS_PKTS_LOOKUP_MISS(table, val) \
+	table->stats.n_pkts_lookup_miss += val
+
+#else
+
+#define RTE_TABLE_HASH_EXT_STATS_PKTS_IN_ADD(table, val)
+#define RTE_TABLE_HASH_EXT_STATS_PKTS_LOOKUP_MISS(table, val)
+
+#endif
+
+struct grinder {
+	struct bucket *bkt;
+	uint64_t sig;
+	uint64_t match;
+	uint32_t key_index;
+};
+
+struct rte_table_hash {
+	struct rte_table_stats stats;
+
+	/* Input parameters */
+	uint32_t key_size;
+	uint32_t entry_size;
+	uint32_t n_keys;
+	uint32_t n_buckets;
+	uint32_t n_buckets_ext;
+	rte_table_hash_op_hash f_hash;
+	uint64_t seed;
+	uint32_t signature_offset;
+	uint32_t key_offset;
+
+	/* Internal */
+	uint64_t bucket_mask;
+	uint32_t key_size_shl;
+	uint32_t data_size_shl;
+	uint32_t key_stack_tos;
+	uint32_t bkt_ext_stack_tos;
+
+	/* Grinder */
+	struct grinder grinders[RTE_PORT_IN_BURST_SIZE_MAX];
+
+	/* Tables */
+	struct bucket *buckets;
+	struct bucket *buckets_ext;
+	uint8_t *key_mem;
+	uint8_t *data_mem;
+	uint32_t *key_stack;
+	uint32_t *bkt_ext_stack;
+
+	/* Table memory */
+	uint8_t memory[0] __rte_cache_aligned;
+};
+
+static int
+check_params_create(struct rte_table_hash_ext_params *params)
+{
+	uint32_t n_buckets_min;
+
+	/* key_size */
+	if ((params->key_size == 0) ||
+		(!rte_is_power_of_2(params->key_size))) {
+		RTE_LOG(ERR, TABLE, "%s: key_size invalid value\n", __func__);
+		return -EINVAL;
+	}
+
+	/* n_keys */
+	if ((params->n_keys == 0) ||
+		(!rte_is_power_of_2(params->n_keys))) {
+		RTE_LOG(ERR, TABLE, "%s: n_keys invalid value\n", __func__);
+		return -EINVAL;
+	}
+
+	/* n_buckets */
+	n_buckets_min = (params->n_keys + KEYS_PER_BUCKET - 1) / params->n_keys;
+	if ((params->n_buckets == 0) ||
+		(!rte_is_power_of_2(params->n_keys)) ||
+		(params->n_buckets < n_buckets_min)) {
+		RTE_LOG(ERR, TABLE, "%s: n_buckets invalid value\n", __func__);
+		return -EINVAL;
+	}
+
+	/* f_hash */
+	if (params->f_hash == NULL) {
+		RTE_LOG(ERR, TABLE, "%s: f_hash invalid value\n", __func__);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static void *
+rte_table_hash_ext_create(void *params, int socket_id, uint32_t entry_size)
+{
+	struct rte_table_hash_ext_params *p =
+		(struct rte_table_hash_ext_params *) params;
+	struct rte_table_hash *t;
+	uint32_t total_size, table_meta_sz;
+	uint32_t bucket_sz, bucket_ext_sz, key_sz;
+	uint32_t key_stack_sz, bkt_ext_stack_sz, data_sz;
+	uint32_t bucket_offset, bucket_ext_offset, key_offset;
+	uint32_t key_stack_offset, bkt_ext_stack_offset, data_offset;
+	uint32_t i;
+
+	/* Check input parameters */
+	if ((check_params_create(p) != 0) ||
+		(!rte_is_power_of_2(entry_size)) ||
+		((sizeof(struct rte_table_hash) % RTE_CACHE_LINE_SIZE) != 0) ||
+		(sizeof(struct bucket) != (RTE_CACHE_LINE_SIZE / 2)))
+		return NULL;
+
+	/* Memory allocation */
+	table_meta_sz = RTE_CACHE_LINE_ROUNDUP(sizeof(struct rte_table_hash));
+	bucket_sz = RTE_CACHE_LINE_ROUNDUP(p->n_buckets * sizeof(struct bucket));
+	bucket_ext_sz =
+		RTE_CACHE_LINE_ROUNDUP(p->n_buckets_ext * sizeof(struct bucket));
+	key_sz = RTE_CACHE_LINE_ROUNDUP(p->n_keys * p->key_size);
+	key_stack_sz = RTE_CACHE_LINE_ROUNDUP(p->n_keys * sizeof(uint32_t));
+	bkt_ext_stack_sz =
+		RTE_CACHE_LINE_ROUNDUP(p->n_buckets_ext * sizeof(uint32_t));
+	data_sz = RTE_CACHE_LINE_ROUNDUP(p->n_keys * entry_size);
+	total_size = table_meta_sz + bucket_sz + bucket_ext_sz + key_sz +
+		key_stack_sz + bkt_ext_stack_sz + data_sz;
+
+	t = rte_zmalloc_socket("TABLE", total_size, RTE_CACHE_LINE_SIZE, socket_id);
+	if (t == NULL) {
+		RTE_LOG(ERR, TABLE,
+			"%s: Cannot allocate %u bytes for hash table\n",
+			__func__, total_size);
+		return NULL;
+	}
+	RTE_LOG(INFO, TABLE, "%s (%u-byte key): Hash table memory footprint is "
+		"%u bytes\n", __func__, p->key_size, total_size);
+
+	/* Memory initialization */
+	t->key_size = p->key_size;
+	t->entry_size = entry_size;
+	t->n_keys = p->n_keys;
+	t->n_buckets = p->n_buckets;
+	t->n_buckets_ext = p->n_buckets_ext;
+	t->f_hash = p->f_hash;
+	t->seed = p->seed;
+	t->signature_offset = p->signature_offset;
+	t->key_offset = p->key_offset;
+
+	/* Internal */
+	t->bucket_mask = t->n_buckets - 1;
+	t->key_size_shl = __builtin_ctzl(p->key_size);
+	t->data_size_shl = __builtin_ctzl(entry_size);
+
+	/* Tables */
+	bucket_offset = 0;
+	bucket_ext_offset = bucket_offset + bucket_sz;
+	key_offset = bucket_ext_offset + bucket_ext_sz;
+	key_stack_offset = key_offset + key_sz;
+	bkt_ext_stack_offset = key_stack_offset + key_stack_sz;
+	data_offset = bkt_ext_stack_offset + bkt_ext_stack_sz;
+
+	t->buckets = (struct bucket *) &t->memory[bucket_offset];
+	t->buckets_ext = (struct bucket *) &t->memory[bucket_ext_offset];
+	t->key_mem = &t->memory[key_offset];
+	t->key_stack = (uint32_t *) &t->memory[key_stack_offset];
+	t->bkt_ext_stack = (uint32_t *) &t->memory[bkt_ext_stack_offset];
+	t->data_mem = &t->memory[data_offset];
+
+	/* Key stack */
+	for (i = 0; i < t->n_keys; i++)
+		t->key_stack[i] = t->n_keys - 1 - i;
+	t->key_stack_tos = t->n_keys;
+
+	/* Bucket ext stack */
+	for (i = 0; i < t->n_buckets_ext; i++)
+		t->bkt_ext_stack[i] = t->n_buckets_ext - 1 - i;
+	t->bkt_ext_stack_tos = t->n_buckets_ext;
+
+	return t;
+}
+
+static int
+rte_table_hash_ext_free(void *table)
+{
+	struct rte_table_hash *t = (struct rte_table_hash *) table;
+
+	/* Check input parameters */
+	if (t == NULL)
+		return -EINVAL;
+
+	rte_free(t);
+	return 0;
+}
+
+static int
+rte_table_hash_ext_entry_add(void *table, void *key, void *entry,
+	int *key_found, void **entry_ptr)
+{
+	struct rte_table_hash *t = (struct rte_table_hash *) table;
+	struct bucket *bkt0, *bkt, *bkt_prev;
+	uint64_t sig;
+	uint32_t bkt_index, i;
+
+	sig = t->f_hash(key, t->key_size, t->seed);
+	bkt_index = sig & t->bucket_mask;
+	bkt0 = &t->buckets[bkt_index];
+	sig = (sig >> 16) | 1LLU;
+
+	/* Key is present in the bucket */
+	for (bkt = bkt0; bkt != NULL; bkt = BUCKET_NEXT(bkt))
+		for (i = 0; i < KEYS_PER_BUCKET; i++) {
+			uint64_t bkt_sig = (uint64_t) bkt->sig[i];
+			uint32_t bkt_key_index = bkt->key_pos[i];
+			uint8_t *bkt_key =
+				&t->key_mem[bkt_key_index << t->key_size_shl];
+
+			if ((sig == bkt_sig) && (memcmp(key, bkt_key,
+				t->key_size) == 0)) {
+				uint8_t *data = &t->data_mem[bkt_key_index <<
+					t->data_size_shl];
+
+				memcpy(data, entry, t->entry_size);
+				*key_found = 1;
+				*entry_ptr = (void *) data;
+				return 0;
+			}
+		}
+
+	/* Key is not present in the bucket */
+	for (bkt_prev = NULL, bkt = bkt0; bkt != NULL; bkt_prev = bkt,
+		bkt = BUCKET_NEXT(bkt))
+		for (i = 0; i < KEYS_PER_BUCKET; i++) {
+			uint64_t bkt_sig = (uint64_t) bkt->sig[i];
+
+			if (bkt_sig == 0) {
+				uint32_t bkt_key_index;
+				uint8_t *bkt_key, *data;
+
+				/* Allocate new key */
+				if (t->key_stack_tos == 0) /* No free keys */
+					return -ENOSPC;
+
+				bkt_key_index = t->key_stack[
+					--t->key_stack_tos];
+
+				/* Install new key */
+				bkt_key = &t->key_mem[bkt_key_index <<
+					t->key_size_shl];
+				data = &t->data_mem[bkt_key_index <<
+					t->data_size_shl];
+
+				bkt->sig[i] = (uint16_t) sig;
+				bkt->key_pos[i] = bkt_key_index;
+				memcpy(bkt_key, key, t->key_size);
+				memcpy(data, entry, t->entry_size);
+
+				*key_found = 0;
+				*entry_ptr = (void *) data;
+				return 0;
+			}
+		}
+
+	/* Bucket full: extend bucket */
+	if ((t->bkt_ext_stack_tos > 0) && (t->key_stack_tos > 0)) {
+		uint32_t bkt_key_index;
+		uint8_t *bkt_key, *data;
+
+		/* Allocate new bucket ext */
+		bkt_index = t->bkt_ext_stack[--t->bkt_ext_stack_tos];
+		bkt = &t->buckets_ext[bkt_index];
+
+		/* Chain the new bucket ext */
+		BUCKET_NEXT_SET(bkt_prev, bkt);
+		BUCKET_NEXT_SET_NULL(bkt);
+
+		/* Allocate new key */
+		bkt_key_index = t->key_stack[--t->key_stack_tos];
+		bkt_key = &t->key_mem[bkt_key_index << t->key_size_shl];
+
+		data = &t->data_mem[bkt_key_index << t->data_size_shl];
+
+		/* Install new key into bucket */
+		bkt->sig[0] = (uint16_t) sig;
+		bkt->key_pos[0] = bkt_key_index;
+		memcpy(bkt_key, key, t->key_size);
+		memcpy(data, entry, t->entry_size);
+
+		*key_found = 0;
+		*entry_ptr = (void *) data;
+		return 0;
+	}
+
+	return -ENOSPC;
+}
+
+static int
+rte_table_hash_ext_entry_delete(void *table, void *key, int *key_found,
+void *entry)
+{
+	struct rte_table_hash *t = (struct rte_table_hash *) table;
+	struct bucket *bkt0, *bkt, *bkt_prev;
+	uint64_t sig;
+	uint32_t bkt_index, i;
+
+	sig = t->f_hash(key, t->key_size, t->seed);
+	bkt_index = sig & t->bucket_mask;
+	bkt0 = &t->buckets[bkt_index];
+	sig = (sig >> 16) | 1LLU;
+
+	/* Key is present in the bucket */
+	for (bkt_prev = NULL, bkt = bkt0; bkt != NULL; bkt_prev = bkt,
+		bkt = BUCKET_NEXT(bkt))
+		for (i = 0; i < KEYS_PER_BUCKET; i++) {
+			uint64_t bkt_sig = (uint64_t) bkt->sig[i];
+			uint32_t bkt_key_index = bkt->key_pos[i];
+			uint8_t *bkt_key = &t->key_mem[bkt_key_index <<
+				t->key_size_shl];
+
+			if ((sig == bkt_sig) && (memcmp(key, bkt_key,
+				t->key_size) == 0)) {
+				uint8_t *data = &t->data_mem[bkt_key_index <<
+					t->data_size_shl];
+
+				/* Uninstall key from bucket */
+				bkt->sig[i] = 0;
+				*key_found = 1;
+				if (entry)
+					memcpy(entry, data, t->entry_size);
+
+				/* Free key */
+				t->key_stack[t->key_stack_tos++] =
+					bkt_key_index;
+
+				/*Check if bucket is unused */
+				if ((bkt_prev != NULL) &&
+				    (bkt->sig[0] == 0) && (bkt->sig[1] == 0) &&
+				    (bkt->sig[2] == 0) && (bkt->sig[3] == 0)) {
+					/* Unchain bucket */
+					BUCKET_NEXT_COPY(bkt_prev, bkt);
+
+					/* Clear bucket */
+					memset(bkt, 0, sizeof(struct bucket));
+
+					/* Free bucket back to buckets ext */
+					bkt_index = bkt - t->buckets_ext;
+					t->bkt_ext_stack[t->bkt_ext_stack_tos++]
+						= bkt_index;
+				}
+
+				return 0;
+			}
+		}
+
+	/* Key is not present in the bucket */
+	*key_found = 0;
+	return 0;
+}
+
+static int rte_table_hash_ext_lookup_unoptimized(
+	void *table,
+	struct rte_mbuf **pkts,
+	uint64_t pkts_mask,
+	uint64_t *lookup_hit_mask,
+	void **entries,
+	int dosig)
+{
+	struct rte_table_hash *t = (struct rte_table_hash *) table;
+	uint64_t pkts_mask_out = 0;
+
+	__rte_unused uint32_t n_pkts_in = __builtin_popcountll(pkts_mask);
+	RTE_TABLE_HASH_EXT_STATS_PKTS_IN_ADD(t, n_pkts_in);
+
+	for ( ; pkts_mask; ) {
+		struct bucket *bkt0, *bkt;
+		struct rte_mbuf *pkt;
+		uint8_t *key;
+		uint64_t pkt_mask, sig;
+		uint32_t pkt_index, bkt_index, i;
+
+		pkt_index = __builtin_ctzll(pkts_mask);
+		pkt_mask = 1LLU << pkt_index;
+		pkts_mask &= ~pkt_mask;
+
+		pkt = pkts[pkt_index];
+		key = RTE_MBUF_METADATA_UINT8_PTR(pkt, t->key_offset);
+		if (dosig)
+			sig = (uint64_t) t->f_hash(key, t->key_size, t->seed);
+		else
+			sig = RTE_MBUF_METADATA_UINT32(pkt,
+				t->signature_offset);
+
+		bkt_index = sig & t->bucket_mask;
+		bkt0 = &t->buckets[bkt_index];
+		sig = (sig >> 16) | 1LLU;
+
+		/* Key is present in the bucket */
+		for (bkt = bkt0; bkt != NULL; bkt = BUCKET_NEXT(bkt))
+			for (i = 0; i < KEYS_PER_BUCKET; i++) {
+				uint64_t bkt_sig = (uint64_t) bkt->sig[i];
+				uint32_t bkt_key_index = bkt->key_pos[i];
+				uint8_t *bkt_key = &t->key_mem[bkt_key_index <<
+					t->key_size_shl];
+
+				if ((sig == bkt_sig) && (memcmp(key, bkt_key,
+					t->key_size) == 0)) {
+					uint8_t *data = &t->data_mem[
+					bkt_key_index << t->data_size_shl];
+
+					pkts_mask_out |= pkt_mask;
+					entries[pkt_index] = (void *) data;
+					break;
+				}
+			}
+	}
+
+	*lookup_hit_mask = pkts_mask_out;
+	RTE_TABLE_HASH_EXT_STATS_PKTS_LOOKUP_MISS(t, n_pkts_in - __builtin_popcountll(pkts_mask_out));
+	return 0;
+}
+
+/***
+ *
+ * mask = match bitmask
+ * match = at least one match
+ * match_many = more than one match
+ * match_pos = position of first match
+ *
+ *----------------------------------------
+ * mask		 match	 match_many	  match_pos
+ *----------------------------------------
+ * 0000		 0		 0			  00
+ * 0001		 1		 0			  00
+ * 0010		 1		 0			  01
+ * 0011		 1		 1			  00
+ *----------------------------------------
+ * 0100		 1		 0			  10
+ * 0101		 1		 1			  00
+ * 0110		 1		 1			  01
+ * 0111		 1		 1			  00
+ *----------------------------------------
+ * 1000		 1		 0			  11
+ * 1001		 1		 1			  00
+ * 1010		 1		 1			  01
+ * 1011		 1		 1			  00
+ *----------------------------------------
+ * 1100		 1		 1			  10
+ * 1101		 1		 1			  00
+ * 1110		 1		 1			  01
+ * 1111		 1		 1			  00
+ *----------------------------------------
+ *
+ * match = 1111_1111_1111_1110
+ * match_many = 1111_1110_1110_1000
+ * match_pos = 0001_0010_0001_0011__0001_0010_0001_0000
+ *
+ * match = 0xFFFELLU
+ * match_many = 0xFEE8LLU
+ * match_pos = 0x12131210LLU
+ *
+ ***/
+
+#define LUT_MATCH						0xFFFELLU
+#define LUT_MATCH_MANY						0xFEE8LLU
+#define LUT_MATCH_POS						0x12131210LLU
+
+#define lookup_cmp_sig(mbuf_sig, bucket, match, match_many, match_pos)	\
+{									\
+	uint64_t bucket_sig[4], mask[4], mask_all;			\
+									\
+	bucket_sig[0] = bucket->sig[0];					\
+	bucket_sig[1] = bucket->sig[1];					\
+	bucket_sig[2] = bucket->sig[2];					\
+	bucket_sig[3] = bucket->sig[3];					\
+									\
+	bucket_sig[0] ^= mbuf_sig;					\
+	bucket_sig[1] ^= mbuf_sig;					\
+	bucket_sig[2] ^= mbuf_sig;					\
+	bucket_sig[3] ^= mbuf_sig;					\
+									\
+	mask[0] = 0;							\
+	mask[1] = 0;							\
+	mask[2] = 0;							\
+	mask[3] = 0;							\
+									\
+	if (bucket_sig[0] == 0)						\
+		mask[0] = 1;						\
+	if (bucket_sig[1] == 0)						\
+		mask[1] = 2;						\
+	if (bucket_sig[2] == 0)						\
+		mask[2] = 4;						\
+	if (bucket_sig[3] == 0)						\
+		mask[3] = 8;						\
+									\
+	mask_all = (mask[0] | mask[1]) | (mask[2] | mask[3]);		\
+									\
+	match = (LUT_MATCH >> mask_all) & 1;				\
+	match_many = (LUT_MATCH_MANY >> mask_all) & 1;			\
+	match_pos = (LUT_MATCH_POS >> (mask_all << 1)) & 3;		\
+}
+
+#define lookup_cmp_key(mbuf, key, match_key, f)				\
+{									\
+	uint64_t *pkt_key = RTE_MBUF_METADATA_UINT64_PTR(mbuf, f->key_offset);\
+	uint64_t *bkt_key = (uint64_t *) key;				\
+									\
+	switch (f->key_size) {						\
+	case 8:								\
+	{								\
+		uint64_t xor = pkt_key[0] ^ bkt_key[0];			\
+		match_key = 0;						\
+		if (xor == 0)						\
+			match_key = 1;					\
+	}								\
+	break;								\
+									\
+	case 16:							\
+	{								\
+		uint64_t xor[2], or;					\
+									\
+		xor[0] = pkt_key[0] ^ bkt_key[0];			\
+		xor[1] = pkt_key[1] ^ bkt_key[1];			\
+		or = xor[0] | xor[1];					\
+		match_key = 0;						\
+		if (or == 0)						\
+			match_key = 1;					\
+	}								\
+	break;								\
+									\
+	case 32:							\
+	{								\
+		uint64_t xor[4], or;					\
+									\
+		xor[0] = pkt_key[0] ^ bkt_key[0];			\
+		xor[1] = pkt_key[1] ^ bkt_key[1];			\
+		xor[2] = pkt_key[2] ^ bkt_key[2];			\
+		xor[3] = pkt_key[3] ^ bkt_key[3];			\
+		or = xor[0] | xor[1] | xor[2] | xor[3];			\
+		match_key = 0;						\
+		if (or == 0)						\
+			match_key = 1;					\
+	}								\
+	break;								\
+									\
+	case 64:							\
+	{								\
+		uint64_t xor[8], or;					\
+									\
+		xor[0] = pkt_key[0] ^ bkt_key[0];			\
+		xor[1] = pkt_key[1] ^ bkt_key[1];			\
+		xor[2] = pkt_key[2] ^ bkt_key[2];			\
+		xor[3] = pkt_key[3] ^ bkt_key[3];			\
+		xor[4] = pkt_key[4] ^ bkt_key[4];			\
+		xor[5] = pkt_key[5] ^ bkt_key[5];			\
+		xor[6] = pkt_key[6] ^ bkt_key[6];			\
+		xor[7] = pkt_key[7] ^ bkt_key[7];			\
+		or = xor[0] | xor[1] | xor[2] | xor[3] |		\
+			xor[4] | xor[5] | xor[6] | xor[7];		\
+		match_key = 0;						\
+		if (or == 0)						\
+			match_key = 1;					\
+	}								\
+	break;								\
+									\
+	default:							\
+		match_key = 0;						\
+		if (memcmp(pkt_key, bkt_key, f->key_size) == 0)		\
+			match_key = 1;					\
+	}								\
+}
+
+#define lookup2_stage0(t, g, pkts, pkts_mask, pkt00_index, pkt01_index)	\
+{									\
+	uint64_t pkt00_mask, pkt01_mask;				\
+	struct rte_mbuf *mbuf00, *mbuf01;				\
+	uint32_t key_offset = t->key_offset;			\
+									\
+	pkt00_index = __builtin_ctzll(pkts_mask);			\
+	pkt00_mask = 1LLU << pkt00_index;				\
+	pkts_mask &= ~pkt00_mask;					\
+	mbuf00 = pkts[pkt00_index];					\
+									\
+	pkt01_index = __builtin_ctzll(pkts_mask);			\
+	pkt01_mask = 1LLU << pkt01_index;				\
+	pkts_mask &= ~pkt01_mask;					\
+	mbuf01 = pkts[pkt01_index];					\
+									\
+	rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf00, key_offset));\
+	rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf01, key_offset));\
+}
+
+#define lookup2_stage0_with_odd_support(t, g, pkts, pkts_mask, pkt00_index, \
+	pkt01_index)							\
+{									\
+	uint64_t pkt00_mask, pkt01_mask;				\
+	struct rte_mbuf *mbuf00, *mbuf01;				\
+	uint32_t key_offset = t->key_offset;			\
+									\
+	pkt00_index = __builtin_ctzll(pkts_mask);			\
+	pkt00_mask = 1LLU << pkt00_index;				\
+	pkts_mask &= ~pkt00_mask;					\
+	mbuf00 = pkts[pkt00_index];					\
+									\
+	pkt01_index = __builtin_ctzll(pkts_mask);			\
+	if (pkts_mask == 0)						\
+		pkt01_index = pkt00_index;				\
+	pkt01_mask = 1LLU << pkt01_index;				\
+	pkts_mask &= ~pkt01_mask;					\
+	mbuf01 = pkts[pkt01_index];					\
+									\
+	rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf00, key_offset));\
+	rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf01, key_offset));\
+}
+
+#define lookup2_stage1(t, g, pkts, pkt10_index, pkt11_index)		\
+{									\
+	struct grinder *g10, *g11;					\
+	uint64_t sig10, sig11, bkt10_index, bkt11_index;		\
+	struct rte_mbuf *mbuf10, *mbuf11;				\
+	struct bucket *bkt10, *bkt11, *buckets = t->buckets;		\
+	uint64_t bucket_mask = t->bucket_mask;				\
+	uint32_t signature_offset = t->signature_offset;		\
+									\
+	mbuf10 = pkts[pkt10_index];					\
+	sig10 = (uint64_t) RTE_MBUF_METADATA_UINT32(mbuf10, signature_offset);\
+	bkt10_index = sig10 & bucket_mask;				\
+	bkt10 = &buckets[bkt10_index];					\
+									\
+	mbuf11 = pkts[pkt11_index];					\
+	sig11 = (uint64_t) RTE_MBUF_METADATA_UINT32(mbuf11, signature_offset);\
+	bkt11_index = sig11 & bucket_mask;				\
+	bkt11 = &buckets[bkt11_index];					\
+									\
+	rte_prefetch0(bkt10);						\
+	rte_prefetch0(bkt11);						\
+									\
+	g10 = &g[pkt10_index];						\
+	g10->sig = sig10;						\
+	g10->bkt = bkt10;						\
+									\
+	g11 = &g[pkt11_index];						\
+	g11->sig = sig11;						\
+	g11->bkt = bkt11;						\
+}
+
+#define lookup2_stage1_dosig(t, g, pkts, pkt10_index, pkt11_index)	\
+{									\
+	struct grinder *g10, *g11;					\
+	uint64_t sig10, sig11, bkt10_index, bkt11_index;		\
+	struct rte_mbuf *mbuf10, *mbuf11;				\
+	struct bucket *bkt10, *bkt11, *buckets = t->buckets;		\
+	uint8_t *key10, *key11;						\
+	uint64_t bucket_mask = t->bucket_mask;				\
+	rte_table_hash_op_hash f_hash = t->f_hash;			\
+	uint64_t seed = t->seed;					\
+	uint32_t key_size = t->key_size;				\
+	uint32_t key_offset = t->key_offset;				\
+									\
+	mbuf10 = pkts[pkt10_index];					\
+	key10 = RTE_MBUF_METADATA_UINT8_PTR(mbuf10, key_offset);	\
+	sig10 = (uint64_t) f_hash(key10, key_size, seed);		\
+	bkt10_index = sig10 & bucket_mask;				\
+	bkt10 = &buckets[bkt10_index];					\
+									\
+	mbuf11 = pkts[pkt11_index];					\
+	key11 = RTE_MBUF_METADATA_UINT8_PTR(mbuf11, key_offset);	\
+	sig11 = (uint64_t) f_hash(key11, key_size, seed);		\
+	bkt11_index = sig11 & bucket_mask;				\
+	bkt11 = &buckets[bkt11_index];					\
+									\
+	rte_prefetch0(bkt10);						\
+	rte_prefetch0(bkt11);						\
+									\
+	g10 = &g[pkt10_index];						\
+	g10->sig = sig10;						\
+	g10->bkt = bkt10;						\
+									\
+	g11 = &g[pkt11_index];						\
+	g11->sig = sig11;						\
+	g11->bkt = bkt11;						\
+}
+
+#define lookup2_stage2(t, g, pkt20_index, pkt21_index, pkts_mask_match_many)\
+{									\
+	struct grinder *g20, *g21;					\
+	uint64_t sig20, sig21;						\
+	struct bucket *bkt20, *bkt21;					\
+	uint8_t *key20, *key21, *key_mem = t->key_mem;			\
+	uint64_t match20, match21, match_many20, match_many21;		\
+	uint64_t match_pos20, match_pos21;				\
+	uint32_t key20_index, key21_index, key_size_shl = t->key_size_shl;\
+									\
+	g20 = &g[pkt20_index];						\
+	sig20 = g20->sig;						\
+	bkt20 = g20->bkt;						\
+	sig20 = (sig20 >> 16) | 1LLU;					\
+	lookup_cmp_sig(sig20, bkt20, match20, match_many20, match_pos20);\
+	match20 <<= pkt20_index;					\
+	match_many20 |= BUCKET_NEXT_VALID(bkt20);			\
+	match_many20 <<= pkt20_index;					\
+	key20_index = bkt20->key_pos[match_pos20];			\
+	key20 = &key_mem[key20_index << key_size_shl];			\
+									\
+	g21 = &g[pkt21_index];						\
+	sig21 = g21->sig;						\
+	bkt21 = g21->bkt;						\
+	sig21 = (sig21 >> 16) | 1LLU;					\
+	lookup_cmp_sig(sig21, bkt21, match21, match_many21, match_pos21);\
+	match21 <<= pkt21_index;					\
+	match_many21 |= BUCKET_NEXT_VALID(bkt21);			\
+	match_many21 <<= pkt21_index;					\
+	key21_index = bkt21->key_pos[match_pos21];			\
+	key21 = &key_mem[key21_index << key_size_shl];			\
+									\
+	rte_prefetch0(key20);						\
+	rte_prefetch0(key21);						\
+									\
+	pkts_mask_match_many |= match_many20 | match_many21;		\
+									\
+	g20->match = match20;						\
+	g20->key_index = key20_index;					\
+									\
+	g21->match = match21;						\
+	g21->key_index = key21_index;					\
+}
+
+#define lookup2_stage3(t, g, pkts, pkt30_index, pkt31_index, pkts_mask_out, \
+	entries)							\
+{									\
+	struct grinder *g30, *g31;					\
+	struct rte_mbuf *mbuf30, *mbuf31;				\
+	uint8_t *key30, *key31, *key_mem = t->key_mem;			\
+	uint8_t *data30, *data31, *data_mem = t->data_mem;		\
+	uint64_t match30, match31, match_key30, match_key31, match_keys;\
+	uint32_t key30_index, key31_index;				\
+	uint32_t key_size_shl = t->key_size_shl;			\
+	uint32_t data_size_shl = t->data_size_shl;			\
+									\
+	mbuf30 = pkts[pkt30_index];					\
+	g30 = &g[pkt30_index];						\
+	match30 = g30->match;						\
+	key30_index = g30->key_index;					\
+	key30 = &key_mem[key30_index << key_size_shl];			\
+	lookup_cmp_key(mbuf30, key30, match_key30, t);			\
+	match_key30 <<= pkt30_index;					\
+	match_key30 &= match30;						\
+	data30 = &data_mem[key30_index << data_size_shl];		\
+	entries[pkt30_index] = data30;					\
+									\
+	mbuf31 = pkts[pkt31_index];					\
+	g31 = &g[pkt31_index];						\
+	match31 = g31->match;						\
+	key31_index = g31->key_index;					\
+	key31 = &key_mem[key31_index << key_size_shl];			\
+	lookup_cmp_key(mbuf31, key31, match_key31, t);			\
+	match_key31 <<= pkt31_index;					\
+	match_key31 &= match31;						\
+	data31 = &data_mem[key31_index << data_size_shl];		\
+	entries[pkt31_index] = data31;					\
+									\
+	rte_prefetch0(data30);						\
+	rte_prefetch0(data31);						\
+									\
+	match_keys = match_key30 | match_key31;				\
+	pkts_mask_out |= match_keys;					\
+}
+
+/***
+* The lookup function implements a 4-stage pipeline, with each stage processing
+* two different packets. The purpose of pipelined implementation is to hide the
+* latency of prefetching the data structures and loosen the data dependency
+* between instructions.
+*
+*  p00  _______   p10  _______   p20  _______   p30  _______
+*----->|       |----->|       |----->|       |----->|       |----->
+*      |   0   |      |   1   |      |   2   |      |   3   |
+*----->|_______|----->|_______|----->|_______|----->|_______|----->
+*  p01            p11            p21            p31
+*
+* The naming convention is:
+*    pXY = packet Y of stage X, X = 0 .. 3, Y = 0 .. 1
+*
+***/
+static int rte_table_hash_ext_lookup(
+	void *table,
+	struct rte_mbuf **pkts,
+	uint64_t pkts_mask,
+	uint64_t *lookup_hit_mask,
+	void **entries)
+{
+	struct rte_table_hash *t = (struct rte_table_hash *) table;
+	struct grinder *g = t->grinders;
+	uint64_t pkt00_index, pkt01_index, pkt10_index, pkt11_index;
+	uint64_t pkt20_index, pkt21_index, pkt30_index, pkt31_index;
+	uint64_t pkts_mask_out = 0, pkts_mask_match_many = 0;
+	int status = 0;
+
+	__rte_unused uint32_t n_pkts_in = __builtin_popcountll(pkts_mask);
+	RTE_TABLE_HASH_EXT_STATS_PKTS_IN_ADD(t, n_pkts_in);
+
+	/* Cannot run the pipeline with less than 7 packets */
+	if (__builtin_popcountll(pkts_mask) < 7)
+		return rte_table_hash_ext_lookup_unoptimized(table, pkts,
+			pkts_mask, lookup_hit_mask, entries, 0);
+
+	/* Pipeline stage 0 */
+	lookup2_stage0(t, g, pkts, pkts_mask, pkt00_index, pkt01_index);
+
+	/* Pipeline feed */
+	pkt10_index = pkt00_index;
+	pkt11_index = pkt01_index;
+
+	/* Pipeline stage 0 */
+	lookup2_stage0(t, g, pkts, pkts_mask, pkt00_index, pkt01_index);
+
+	/* Pipeline stage 1 */
+	lookup2_stage1(t, g, pkts, pkt10_index, pkt11_index);
+
+	/* Pipeline feed */
+	pkt20_index = pkt10_index;
+	pkt21_index = pkt11_index;
+	pkt10_index = pkt00_index;
+	pkt11_index = pkt01_index;
+
+	/* Pipeline stage 0 */
+	lookup2_stage0(t, g, pkts, pkts_mask, pkt00_index, pkt01_index);
+
+	/* Pipeline stage 1 */
+	lookup2_stage1(t, g, pkts, pkt10_index, pkt11_index);
+
+	/* Pipeline stage 2 */
+	lookup2_stage2(t, g, pkt20_index, pkt21_index, pkts_mask_match_many);
+
+	/*
+	* Pipeline run
+	*
+	*/
+	for ( ; pkts_mask; ) {
+		/* Pipeline feed */
+		pkt30_index = pkt20_index;
+		pkt31_index = pkt21_index;
+		pkt20_index = pkt10_index;
+		pkt21_index = pkt11_index;
+		pkt10_index = pkt00_index;
+		pkt11_index = pkt01_index;
+
+		/* Pipeline stage 0 */
+		lookup2_stage0_with_odd_support(t, g, pkts, pkts_mask,
+			pkt00_index, pkt01_index);
+
+		/* Pipeline stage 1 */
+		lookup2_stage1(t, g, pkts, pkt10_index, pkt11_index);
+
+		/* Pipeline stage 2 */
+		lookup2_stage2(t, g, pkt20_index, pkt21_index,
+			pkts_mask_match_many);
+
+		/* Pipeline stage 3 */
+		lookup2_stage3(t, g, pkts, pkt30_index, pkt31_index,
+			pkts_mask_out, entries);
+	}
+
+	/* Pipeline feed */
+	pkt30_index = pkt20_index;
+	pkt31_index = pkt21_index;
+	pkt20_index = pkt10_index;
+	pkt21_index = pkt11_index;
+	pkt10_index = pkt00_index;
+	pkt11_index = pkt01_index;
+
+	/* Pipeline stage 1 */
+	lookup2_stage1(t, g, pkts, pkt10_index, pkt11_index);
+
+	/* Pipeline stage 2 */
+	lookup2_stage2(t, g, pkt20_index, pkt21_index, pkts_mask_match_many);
+
+	/* Pipeline stage 3 */
+	lookup2_stage3(t, g, pkts, pkt30_index, pkt31_index, pkts_mask_out,
+		entries);
+
+	/* Pipeline feed */
+	pkt30_index = pkt20_index;
+	pkt31_index = pkt21_index;
+	pkt20_index = pkt10_index;
+	pkt21_index = pkt11_index;
+
+	/* Pipeline stage 2 */
+	lookup2_stage2(t, g, pkt20_index, pkt21_index, pkts_mask_match_many);
+
+	/* Pipeline stage 3 */
+	lookup2_stage3(t, g, pkts, pkt30_index, pkt31_index, pkts_mask_out,
+		entries);
+
+	/* Pipeline feed */
+	pkt30_index = pkt20_index;
+	pkt31_index = pkt21_index;
+
+	/* Pipeline stage 3 */
+	lookup2_stage3(t, g, pkts, pkt30_index, pkt31_index, pkts_mask_out,
+		entries);
+
+	/* Slow path */
+	pkts_mask_match_many &= ~pkts_mask_out;
+	if (pkts_mask_match_many) {
+		uint64_t pkts_mask_out_slow = 0;
+
+		status = rte_table_hash_ext_lookup_unoptimized(table, pkts,
+			pkts_mask_match_many, &pkts_mask_out_slow, entries, 0);
+		pkts_mask_out |= pkts_mask_out_slow;
+	}
+
+	*lookup_hit_mask = pkts_mask_out;
+	RTE_TABLE_HASH_EXT_STATS_PKTS_LOOKUP_MISS(t, n_pkts_in - __builtin_popcountll(pkts_mask_out));
+	return status;
+}
+
+static int rte_table_hash_ext_lookup_dosig(
+	void *table,
+	struct rte_mbuf **pkts,
+	uint64_t pkts_mask,
+	uint64_t *lookup_hit_mask,
+	void **entries)
+{
+	struct rte_table_hash *t = (struct rte_table_hash *) table;
+	struct grinder *g = t->grinders;
+	uint64_t pkt00_index, pkt01_index, pkt10_index, pkt11_index;
+	uint64_t pkt20_index, pkt21_index, pkt30_index, pkt31_index;
+	uint64_t pkts_mask_out = 0, pkts_mask_match_many = 0;
+	int status = 0;
+
+	__rte_unused uint32_t n_pkts_in = __builtin_popcountll(pkts_mask);
+	RTE_TABLE_HASH_EXT_STATS_PKTS_IN_ADD(t, n_pkts_in);
+
+	/* Cannot run the pipeline with less than 7 packets */
+	if (__builtin_popcountll(pkts_mask) < 7)
+		return rte_table_hash_ext_lookup_unoptimized(table, pkts,
+			pkts_mask, lookup_hit_mask, entries, 1);
+
+	/* Pipeline stage 0 */
+	lookup2_stage0(t, g, pkts, pkts_mask, pkt00_index, pkt01_index);
+
+	/* Pipeline feed */
+	pkt10_index = pkt00_index;
+	pkt11_index = pkt01_index;
+
+	/* Pipeline stage 0 */
+	lookup2_stage0(t, g, pkts, pkts_mask, pkt00_index, pkt01_index);
+
+	/* Pipeline stage 1 */
+	lookup2_stage1_dosig(t, g, pkts, pkt10_index, pkt11_index);
+
+	/* Pipeline feed */
+	pkt20_index = pkt10_index;
+	pkt21_index = pkt11_index;
+	pkt10_index = pkt00_index;
+	pkt11_index = pkt01_index;
+
+	/* Pipeline stage 0 */
+	lookup2_stage0(t, g, pkts, pkts_mask, pkt00_index, pkt01_index);
+
+	/* Pipeline stage 1 */
+	lookup2_stage1_dosig(t, g, pkts, pkt10_index, pkt11_index);
+
+	/* Pipeline stage 2 */
+	lookup2_stage2(t, g, pkt20_index, pkt21_index, pkts_mask_match_many);
+
+	/*
+	* Pipeline run
+	*
+	*/
+	for ( ; pkts_mask; ) {
+		/* Pipeline feed */
+		pkt30_index = pkt20_index;
+		pkt31_index = pkt21_index;
+		pkt20_index = pkt10_index;
+		pkt21_index = pkt11_index;
+		pkt10_index = pkt00_index;
+		pkt11_index = pkt01_index;
+
+		/* Pipeline stage 0 */
+		lookup2_stage0_with_odd_support(t, g, pkts, pkts_mask,
+			pkt00_index, pkt01_index);
+
+		/* Pipeline stage 1 */
+		lookup2_stage1_dosig(t, g, pkts, pkt10_index, pkt11_index);
+
+		/* Pipeline stage 2 */
+		lookup2_stage2(t, g, pkt20_index, pkt21_index,
+			pkts_mask_match_many);
+
+		/* Pipeline stage 3 */
+		lookup2_stage3(t, g, pkts, pkt30_index, pkt31_index,
+			pkts_mask_out, entries);
+	}
+
+	/* Pipeline feed */
+	pkt30_index = pkt20_index;
+	pkt31_index = pkt21_index;
+	pkt20_index = pkt10_index;
+	pkt21_index = pkt11_index;
+	pkt10_index = pkt00_index;
+	pkt11_index = pkt01_index;
+
+	/* Pipeline stage 1 */
+	lookup2_stage1_dosig(t, g, pkts, pkt10_index, pkt11_index);
+
+	/* Pipeline stage 2 */
+	lookup2_stage2(t, g, pkt20_index, pkt21_index, pkts_mask_match_many);
+
+	/* Pipeline stage 3 */
+	lookup2_stage3(t, g, pkts, pkt30_index, pkt31_index, pkts_mask_out,
+		entries);
+
+	/* Pipeline feed */
+	pkt30_index = pkt20_index;
+	pkt31_index = pkt21_index;
+	pkt20_index = pkt10_index;
+	pkt21_index = pkt11_index;
+
+	/* Pipeline stage 2 */
+	lookup2_stage2(t, g, pkt20_index, pkt21_index, pkts_mask_match_many);
+
+	/* Pipeline stage 3 */
+	lookup2_stage3(t, g, pkts, pkt30_index, pkt31_index, pkts_mask_out,
+		entries);
+
+	/* Pipeline feed */
+	pkt30_index = pkt20_index;
+	pkt31_index = pkt21_index;
+
+	/* Pipeline stage 3 */
+	lookup2_stage3(t, g, pkts, pkt30_index, pkt31_index, pkts_mask_out,
+		entries);
+
+	/* Slow path */
+	pkts_mask_match_many &= ~pkts_mask_out;
+	if (pkts_mask_match_many) {
+		uint64_t pkts_mask_out_slow = 0;
+
+		status = rte_table_hash_ext_lookup_unoptimized(table, pkts,
+			pkts_mask_match_many, &pkts_mask_out_slow, entries, 1);
+		pkts_mask_out |= pkts_mask_out_slow;
+	}
+
+	*lookup_hit_mask = pkts_mask_out;
+	RTE_TABLE_HASH_EXT_STATS_PKTS_LOOKUP_MISS(t, n_pkts_in - __builtin_popcountll(pkts_mask_out));
+	return status;
+}
+
+static int
+rte_table_hash_ext_stats_read(void *table, struct rte_table_stats *stats, int clear)
+{
+	struct rte_table_hash *t = (struct rte_table_hash *) table;
+
+	if (stats != NULL)
+		memcpy(stats, &t->stats, sizeof(t->stats));
+
+	if (clear)
+		memset(&t->stats, 0, sizeof(t->stats));
+
+	return 0;
+}
+
+struct rte_table_ops rte_table_hash_ext_ops	 = {
+	.f_create = rte_table_hash_ext_create,
+	.f_free = rte_table_hash_ext_free,
+	.f_add = rte_table_hash_ext_entry_add,
+	.f_delete = rte_table_hash_ext_entry_delete,
+	.f_add_bulk = NULL,
+	.f_delete_bulk = NULL,
+	.f_lookup = rte_table_hash_ext_lookup,
+	.f_stats = rte_table_hash_ext_stats_read,
+};
+
+struct rte_table_ops rte_table_hash_ext_dosig_ops  = {
+	.f_create = rte_table_hash_ext_create,
+	.f_free = rte_table_hash_ext_free,
+	.f_add = rte_table_hash_ext_entry_add,
+	.f_delete = rte_table_hash_ext_entry_delete,
+	.f_add_bulk = NULL,
+	.f_delete_bulk = NULL,
+	.f_lookup = rte_table_hash_ext_lookup_dosig,
+	.f_stats = rte_table_hash_ext_stats_read,
+};
diff --git a/lib/librte_table/rte_table_hash_key16.c b/lib/librte_table/rte_table_hash_key16.c
new file mode 100644
index 00000000..b7e000fd
--- /dev/null
+++ b/lib/librte_table/rte_table_hash_key16.c
@@ -0,0 +1,1515 @@
+/*-
+ *	 BSD LICENSE
+ *
+ *	 Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *	 All rights reserved.
+ *
+ *	 Redistribution and use in source and binary forms, with or without
+ *	 modification, are permitted provided that the following conditions
+ *	 are met:
+ *
+ *	* Redistributions of source code must retain the above copyright
+ *		 notice, this list of conditions and the following disclaimer.
+ *	* Redistributions in binary form must reproduce the above copyright
+ *		 notice, this list of conditions and the following disclaimer in
+ *		 the documentation and/or other materials provided with the
+ *		 distribution.
+ *	* Neither the name of Intel Corporation nor the names of its
+ *		 contributors may be used to endorse or promote products derived
+ *		 from this software without specific prior written permission.
+ *
+ *	 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *	 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *	 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *	 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *	 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *	 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *	 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *	 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *	 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *	 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *	 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <string.h>
+#include <stdio.h>
+
+#include <rte_common.h>
+#include <rte_mbuf.h>
+#include <rte_memory.h>
+#include <rte_malloc.h>
+#include <rte_log.h>
+
+#include "rte_table_hash.h"
+#include "rte_lru.h"
+
+#define RTE_TABLE_HASH_KEY_SIZE						16
+
+#define RTE_BUCKET_ENTRY_VALID						0x1LLU
+
+#ifdef RTE_TABLE_STATS_COLLECT
+
+#define RTE_TABLE_HASH_KEY16_STATS_PKTS_IN_ADD(table, val) \
+	table->stats.n_pkts_in += val
+#define RTE_TABLE_HASH_KEY16_STATS_PKTS_LOOKUP_MISS(table, val) \
+	table->stats.n_pkts_lookup_miss += val
+
+#else
+
+#define RTE_TABLE_HASH_KEY16_STATS_PKTS_IN_ADD(table, val)
+#define RTE_TABLE_HASH_KEY16_STATS_PKTS_LOOKUP_MISS(table, val)
+
+#endif
+
+struct rte_bucket_4_16 {
+	/* Cache line 0 */
+	uint64_t signature[4 + 1];
+	uint64_t lru_list;
+	struct rte_bucket_4_16 *next;
+	uint64_t next_valid;
+
+	/* Cache line 1 */
+	uint64_t key[4][2];
+
+	/* Cache line 2 */
+	uint8_t data[0];
+};
+
+struct rte_table_hash {
+	struct rte_table_stats stats;
+
+	/* Input parameters */
+	uint32_t n_buckets;
+	uint32_t n_entries_per_bucket;
+	uint32_t key_size;
+	uint32_t entry_size;
+	uint32_t bucket_size;
+	uint32_t signature_offset;
+	uint32_t key_offset;
+	uint64_t key_mask[2];
+	rte_table_hash_op_hash f_hash;
+	uint64_t seed;
+
+	/* Extendible buckets */
+	uint32_t n_buckets_ext;
+	uint32_t stack_pos;
+	uint32_t *stack;
+
+	/* Lookup table */
+	uint8_t memory[0] __rte_cache_aligned;
+};
+
+static int
+check_params_create_lru(struct rte_table_hash_key16_lru_params *params) {
+	/* n_entries */
+	if (params->n_entries == 0) {
+		RTE_LOG(ERR, TABLE, "%s: n_entries is zero\n", __func__);
+		return -EINVAL;
+	}
+
+	/* f_hash */
+	if (params->f_hash == NULL) {
+		RTE_LOG(ERR, TABLE,
+			"%s: f_hash function pointer is NULL\n", __func__);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static void *
+rte_table_hash_create_key16_lru(void *params,
+		int socket_id,
+		uint32_t entry_size)
+{
+	struct rte_table_hash_key16_lru_params *p =
+			(struct rte_table_hash_key16_lru_params *) params;
+	struct rte_table_hash *f;
+	uint32_t n_buckets, n_entries_per_bucket,
+			key_size, bucket_size_cl, total_size, i;
+
+	/* Check input parameters */
+	if ((check_params_create_lru(p) != 0) ||
+		((sizeof(struct rte_table_hash) % RTE_CACHE_LINE_SIZE) != 0) ||
+		((sizeof(struct rte_bucket_4_16) % RTE_CACHE_LINE_SIZE) != 0))
+		return NULL;
+	n_entries_per_bucket = 4;
+	key_size = 16;
+
+	/* Memory allocation */
+	n_buckets = rte_align32pow2((p->n_entries + n_entries_per_bucket - 1) /
+		n_entries_per_bucket);
+	bucket_size_cl = (sizeof(struct rte_bucket_4_16) + n_entries_per_bucket
+		* entry_size + RTE_CACHE_LINE_SIZE - 1) / RTE_CACHE_LINE_SIZE;
+	total_size = sizeof(struct rte_table_hash) + n_buckets *
+		bucket_size_cl * RTE_CACHE_LINE_SIZE;
+
+	f = rte_zmalloc_socket("TABLE", total_size, RTE_CACHE_LINE_SIZE, socket_id);
+	if (f == NULL) {
+		RTE_LOG(ERR, TABLE,
+		"%s: Cannot allocate %u bytes for hash table\n",
+		__func__, total_size);
+		return NULL;
+	}
+	RTE_LOG(INFO, TABLE,
+		"%s: Hash table memory footprint is %u bytes\n",
+		__func__, total_size);
+
+	/* Memory initialization */
+	f->n_buckets = n_buckets;
+	f->n_entries_per_bucket = n_entries_per_bucket;
+	f->key_size = key_size;
+	f->entry_size = entry_size;
+	f->bucket_size = bucket_size_cl * RTE_CACHE_LINE_SIZE;
+	f->signature_offset = p->signature_offset;
+	f->key_offset = p->key_offset;
+	f->f_hash = p->f_hash;
+	f->seed = p->seed;
+
+	if (p->key_mask != NULL) {
+		f->key_mask[0] = ((uint64_t *)p->key_mask)[0];
+		f->key_mask[1] = ((uint64_t *)p->key_mask)[1];
+	} else {
+		f->key_mask[0] = 0xFFFFFFFFFFFFFFFFLLU;
+		f->key_mask[1] = 0xFFFFFFFFFFFFFFFFLLU;
+	}
+
+	for (i = 0; i < n_buckets; i++) {
+		struct rte_bucket_4_16 *bucket;
+
+		bucket = (struct rte_bucket_4_16 *) &f->memory[i *
+			f->bucket_size];
+		lru_init(bucket);
+	}
+
+	return f;
+}
+
+static int
+rte_table_hash_free_key16_lru(void *table)
+{
+	struct rte_table_hash *f = (struct rte_table_hash *) table;
+
+	/* Check input parameters */
+	if (f == NULL) {
+		RTE_LOG(ERR, TABLE, "%s: table parameter is NULL\n", __func__);
+		return -EINVAL;
+	}
+
+	rte_free(f);
+	return 0;
+}
+
+static int
+rte_table_hash_entry_add_key16_lru(
+	void *table,
+	void *key,
+	void *entry,
+	int *key_found,
+	void **entry_ptr)
+{
+	struct rte_table_hash *f = (struct rte_table_hash *) table;
+	struct rte_bucket_4_16 *bucket;
+	uint64_t signature, pos;
+	uint32_t bucket_index, i;
+
+	signature = f->f_hash(key, f->key_size, f->seed);
+	bucket_index = signature & (f->n_buckets - 1);
+	bucket = (struct rte_bucket_4_16 *)
+			&f->memory[bucket_index * f->bucket_size];
+	signature |= RTE_BUCKET_ENTRY_VALID;
+
+	/* Key is present in the bucket */
+	for (i = 0; i < 4; i++) {
+		uint64_t bucket_signature = bucket->signature[i];
+		uint8_t *bucket_key = (uint8_t *) bucket->key[i];
+
+		if ((bucket_signature == signature) &&
+				(memcmp(key, bucket_key, f->key_size) == 0)) {
+			uint8_t *bucket_data = &bucket->data[i * f->entry_size];
+
+			memcpy(bucket_data, entry, f->entry_size);
+			lru_update(bucket, i);
+			*key_found = 1;
+			*entry_ptr = (void *) bucket_data;
+			return 0;
+		}
+	}
+
+	/* Key is not present in the bucket */
+	for (i = 0; i < 4; i++) {
+		uint64_t bucket_signature = bucket->signature[i];
+		uint8_t *bucket_key = (uint8_t *) bucket->key[i];
+
+		if (bucket_signature == 0) {
+			uint8_t *bucket_data = &bucket->data[i * f->entry_size];
+
+			bucket->signature[i] = signature;
+			memcpy(bucket_key, key, f->key_size);
+			memcpy(bucket_data, entry, f->entry_size);
+			lru_update(bucket, i);
+			*key_found = 0;
+			*entry_ptr = (void *) bucket_data;
+
+			return 0;
+		}
+	}
+
+	/* Bucket full: replace LRU entry */
+	pos = lru_pos(bucket);
+	bucket->signature[pos] = signature;
+	memcpy(bucket->key[pos], key, f->key_size);
+	memcpy(&bucket->data[pos * f->entry_size], entry, f->entry_size);
+	lru_update(bucket, pos);
+	*key_found = 0;
+	*entry_ptr = (void *) &bucket->data[pos * f->entry_size];
+
+	return 0;
+}
+
+static int
+rte_table_hash_entry_delete_key16_lru(
+	void *table,
+	void *key,
+	int *key_found,
+	void *entry)
+{
+	struct rte_table_hash *f = (struct rte_table_hash *) table;
+	struct rte_bucket_4_16 *bucket;
+	uint64_t signature;
+	uint32_t bucket_index, i;
+
+	signature = f->f_hash(key, f->key_size, f->seed);
+	bucket_index = signature & (f->n_buckets - 1);
+	bucket = (struct rte_bucket_4_16 *)
+			&f->memory[bucket_index * f->bucket_size];
+	signature |= RTE_BUCKET_ENTRY_VALID;
+
+	/* Key is present in the bucket */
+	for (i = 0; i < 4; i++) {
+		uint64_t bucket_signature = bucket->signature[i];
+		uint8_t *bucket_key = (uint8_t *) bucket->key[i];
+
+		if ((bucket_signature == signature) &&
+				(memcmp(key, bucket_key, f->key_size) == 0)) {
+			uint8_t *bucket_data = &bucket->data[i * f->entry_size];
+
+			bucket->signature[i] = 0;
+			*key_found = 1;
+			if (entry)
+				memcpy(entry, bucket_data, f->entry_size);
+			return 0;
+		}
+	}
+
+	/* Key is not present in the bucket */
+	*key_found = 0;
+	return 0;
+}
+
+static int
+check_params_create_ext(struct rte_table_hash_key16_ext_params *params) {
+	/* n_entries */
+	if (params->n_entries == 0) {
+		RTE_LOG(ERR, TABLE, "%s: n_entries is zero\n", __func__);
+		return -EINVAL;
+	}
+
+	/* n_entries_ext */
+	if (params->n_entries_ext == 0) {
+		RTE_LOG(ERR, TABLE, "%s: n_entries_ext is zero\n", __func__);
+		return -EINVAL;
+	}
+
+	/* f_hash */
+	if (params->f_hash == NULL) {
+		RTE_LOG(ERR, TABLE,
+			"%s: f_hash function pointer is NULL\n", __func__);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static void *
+rte_table_hash_create_key16_ext(void *params,
+		int socket_id,
+		uint32_t entry_size)
+{
+	struct rte_table_hash_key16_ext_params *p =
+			(struct rte_table_hash_key16_ext_params *) params;
+	struct rte_table_hash *f;
+	uint32_t n_buckets, n_buckets_ext, n_entries_per_bucket, key_size,
+			bucket_size_cl, stack_size_cl, total_size, i;
+
+	/* Check input parameters */
+	if ((check_params_create_ext(p) != 0) ||
+		((sizeof(struct rte_table_hash) % RTE_CACHE_LINE_SIZE) != 0) ||
+		((sizeof(struct rte_bucket_4_16) % RTE_CACHE_LINE_SIZE) != 0))
+		return NULL;
+
+	n_entries_per_bucket = 4;
+	key_size = 16;
+
+	/* Memory allocation */
+	n_buckets = rte_align32pow2((p->n_entries + n_entries_per_bucket - 1) /
+		n_entries_per_bucket);
+	n_buckets_ext = (p->n_entries_ext + n_entries_per_bucket - 1) /
+		n_entries_per_bucket;
+	bucket_size_cl = (sizeof(struct rte_bucket_4_16) + n_entries_per_bucket
+		* entry_size + RTE_CACHE_LINE_SIZE - 1) / RTE_CACHE_LINE_SIZE;
+	stack_size_cl = (n_buckets_ext * sizeof(uint32_t) + RTE_CACHE_LINE_SIZE - 1)
+		/ RTE_CACHE_LINE_SIZE;
+	total_size = sizeof(struct rte_table_hash) +
+		((n_buckets + n_buckets_ext) * bucket_size_cl + stack_size_cl) *
+		RTE_CACHE_LINE_SIZE;
+
+	f = rte_zmalloc_socket("TABLE", total_size, RTE_CACHE_LINE_SIZE, socket_id);
+	if (f == NULL) {
+		RTE_LOG(ERR, TABLE,
+			"%s: Cannot allocate %u bytes for hash table\n",
+			__func__, total_size);
+		return NULL;
+	}
+	RTE_LOG(INFO, TABLE,
+		"%s: Hash table memory footprint is %u bytes\n",
+		__func__, total_size);
+
+	/* Memory initialization */
+	f->n_buckets = n_buckets;
+	f->n_entries_per_bucket = n_entries_per_bucket;
+	f->key_size = key_size;
+	f->entry_size = entry_size;
+	f->bucket_size = bucket_size_cl * RTE_CACHE_LINE_SIZE;
+	f->signature_offset = p->signature_offset;
+	f->key_offset = p->key_offset;
+	f->f_hash = p->f_hash;
+	f->seed = p->seed;
+
+	f->n_buckets_ext = n_buckets_ext;
+	f->stack_pos = n_buckets_ext;
+	f->stack = (uint32_t *)
+		&f->memory[(n_buckets + n_buckets_ext) * f->bucket_size];
+
+	for (i = 0; i < n_buckets_ext; i++)
+		f->stack[i] = i;
+
+	if (p->key_mask != NULL) {
+		f->key_mask[0] = (((uint64_t *)p->key_mask)[0]);
+		f->key_mask[1] = (((uint64_t *)p->key_mask)[1]);
+	} else {
+		f->key_mask[0] = 0xFFFFFFFFFFFFFFFFLLU;
+		f->key_mask[1] = 0xFFFFFFFFFFFFFFFFLLU;
+	}
+
+	return f;
+}
+
+static int
+rte_table_hash_free_key16_ext(void *table)
+{
+	struct rte_table_hash *f = (struct rte_table_hash *) table;
+
+	/* Check input parameters */
+	if (f == NULL) {
+		RTE_LOG(ERR, TABLE, "%s: table parameter is NULL\n", __func__);
+		return -EINVAL;
+	}
+
+	rte_free(f);
+	return 0;
+}
+
+static int
+rte_table_hash_entry_add_key16_ext(
+	void *table,
+	void *key,
+	void *entry,
+	int *key_found,
+	void **entry_ptr)
+{
+	struct rte_table_hash *f = (struct rte_table_hash *) table;
+	struct rte_bucket_4_16 *bucket0, *bucket, *bucket_prev;
+	uint64_t signature;
+	uint32_t bucket_index, i;
+
+	signature = f->f_hash(key, f->key_size, f->seed);
+	bucket_index = signature & (f->n_buckets - 1);
+	bucket0 = (struct rte_bucket_4_16 *)
+			&f->memory[bucket_index * f->bucket_size];
+	signature |= RTE_BUCKET_ENTRY_VALID;
+
+	/* Key is present in the bucket */
+	for (bucket = bucket0; bucket != NULL; bucket = bucket->next)
+		for (i = 0; i < 4; i++) {
+			uint64_t bucket_signature = bucket->signature[i];
+			uint8_t *bucket_key = (uint8_t *) bucket->key[i];
+
+			if ((bucket_signature == signature) &&
+				(memcmp(key, bucket_key, f->key_size) == 0)) {
+				uint8_t *bucket_data = &bucket->data[i *
+					f->entry_size];
+
+				memcpy(bucket_data, entry, f->entry_size);
+				*key_found = 1;
+				*entry_ptr = (void *) bucket_data;
+				return 0;
+			}
+		}
+
+	/* Key is not present in the bucket */
+	for (bucket_prev = NULL, bucket = bucket0; bucket != NULL;
+			 bucket_prev = bucket, bucket = bucket->next)
+		for (i = 0; i < 4; i++) {
+			uint64_t bucket_signature = bucket->signature[i];
+			uint8_t *bucket_key = (uint8_t *) bucket->key[i];
+
+			if (bucket_signature == 0) {
+				uint8_t *bucket_data = &bucket->data[i *
+					f->entry_size];
+
+				bucket->signature[i] = signature;
+				memcpy(bucket_key, key, f->key_size);
+				memcpy(bucket_data, entry, f->entry_size);
+				*key_found = 0;
+				*entry_ptr = (void *) bucket_data;
+
+				return 0;
+			}
+		}
+
+	/* Bucket full: extend bucket */
+	if (f->stack_pos > 0) {
+		bucket_index = f->stack[--f->stack_pos];
+
+		bucket = (struct rte_bucket_4_16 *) &f->memory[(f->n_buckets +
+			bucket_index) * f->bucket_size];
+		bucket_prev->next = bucket;
+		bucket_prev->next_valid = 1;
+
+		bucket->signature[0] = signature;
+		memcpy(bucket->key[0], key, f->key_size);
+		memcpy(&bucket->data[0], entry, f->entry_size);
+		*key_found = 0;
+		*entry_ptr = (void *) &bucket->data[0];
+		return 0;
+	}
+
+	return -ENOSPC;
+}
+
+static int
+rte_table_hash_entry_delete_key16_ext(
+	void *table,
+	void *key,
+	int *key_found,
+	void *entry)
+{
+	struct rte_table_hash *f = (struct rte_table_hash *) table;
+	struct rte_bucket_4_16 *bucket0, *bucket, *bucket_prev;
+	uint64_t signature;
+	uint32_t bucket_index, i;
+
+	signature = f->f_hash(key, f->key_size, f->seed);
+	bucket_index = signature & (f->n_buckets - 1);
+	bucket0 = (struct rte_bucket_4_16 *)
+		&f->memory[bucket_index * f->bucket_size];
+	signature |= RTE_BUCKET_ENTRY_VALID;
+
+	/* Key is present in the bucket */
+	for (bucket_prev = NULL, bucket = bucket0; bucket != NULL;
+		bucket_prev = bucket, bucket = bucket->next)
+		for (i = 0; i < 4; i++) {
+			uint64_t bucket_signature = bucket->signature[i];
+			uint8_t *bucket_key = (uint8_t *) bucket->key[i];
+
+			if ((bucket_signature == signature) &&
+				(memcmp(key, bucket_key, f->key_size) == 0)) {
+				uint8_t *bucket_data = &bucket->data[i *
+					f->entry_size];
+
+				bucket->signature[i] = 0;
+				*key_found = 1;
+				if (entry)
+					memcpy(entry, bucket_data,
+					f->entry_size);
+
+				if ((bucket->signature[0] == 0) &&
+					(bucket->signature[1] == 0) &&
+					(bucket->signature[2] == 0) &&
+					(bucket->signature[3] == 0) &&
+					(bucket_prev != NULL)) {
+					bucket_prev->next = bucket->next;
+					bucket_prev->next_valid =
+						bucket->next_valid;
+
+					memset(bucket, 0,
+						sizeof(struct rte_bucket_4_16));
+					bucket_index = (((uint8_t *)bucket -
+						(uint8_t *)f->memory)/f->bucket_size) - f->n_buckets;
+					f->stack[f->stack_pos++] = bucket_index;
+				}
+
+				return 0;
+			}
+		}
+
+	/* Key is not present in the bucket */
+	*key_found = 0;
+	return 0;
+}
+
+#define lookup_key16_cmp(key_in, bucket, pos)			\
+{								\
+	uint64_t xor[4][2], or[4], signature[4];		\
+								\
+	signature[0] = (~bucket->signature[0]) & 1;		\
+	signature[1] = (~bucket->signature[1]) & 1;		\
+	signature[2] = (~bucket->signature[2]) & 1;		\
+	signature[3] = (~bucket->signature[3]) & 1;		\
+								\
+	xor[0][0] = key_in[0] ^	 bucket->key[0][0];		\
+	xor[0][1] = key_in[1] ^	 bucket->key[0][1];		\
+								\
+	xor[1][0] = key_in[0] ^	 bucket->key[1][0];		\
+	xor[1][1] = key_in[1] ^	 bucket->key[1][1];		\
+								\
+	xor[2][0] = key_in[0] ^	 bucket->key[2][0];		\
+	xor[2][1] = key_in[1] ^	 bucket->key[2][1];		\
+								\
+	xor[3][0] = key_in[0] ^	 bucket->key[3][0];		\
+	xor[3][1] = key_in[1] ^	 bucket->key[3][1];		\
+								\
+	or[0] = xor[0][0] | xor[0][1] | signature[0];		\
+	or[1] = xor[1][0] | xor[1][1] | signature[1];		\
+	or[2] = xor[2][0] | xor[2][1] | signature[2];		\
+	or[3] = xor[3][0] | xor[3][1] | signature[3];		\
+								\
+	pos = 4;						\
+	if (or[0] == 0)						\
+		pos = 0;					\
+	if (or[1] == 0)						\
+		pos = 1;					\
+	if (or[2] == 0)						\
+		pos = 2;					\
+	if (or[3] == 0)						\
+		pos = 3;					\
+}
+
+#define lookup1_stage0(pkt0_index, mbuf0, pkts, pkts_mask, f)	\
+{								\
+	uint64_t pkt_mask;					\
+	uint32_t key_offset = f->key_offset;\
+								\
+	pkt0_index = __builtin_ctzll(pkts_mask);		\
+	pkt_mask = 1LLU << pkt0_index;				\
+	pkts_mask &= ~pkt_mask;					\
+								\
+	mbuf0 = pkts[pkt0_index];				\
+	rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf0, key_offset));\
+}
+
+#define lookup1_stage1(mbuf1, bucket1, f)			\
+{								\
+	uint64_t signature;					\
+	uint32_t bucket_index;					\
+								\
+	signature = RTE_MBUF_METADATA_UINT32(mbuf1, f->signature_offset);\
+	bucket_index = signature & (f->n_buckets - 1);		\
+	bucket1 = (struct rte_bucket_4_16 *)			\
+		&f->memory[bucket_index * f->bucket_size];	\
+	rte_prefetch0(bucket1);					\
+	rte_prefetch0((void *)(((uintptr_t) bucket1) + RTE_CACHE_LINE_SIZE));\
+}
+
+#define lookup1_stage1_dosig(mbuf1, bucket1, f)			\
+{								\
+	uint64_t *key;						\
+	uint64_t signature = 0;				\
+	uint32_t bucket_index;				\
+	uint64_t hash_key_buffer[2];		\
+								\
+	key = RTE_MBUF_METADATA_UINT64_PTR(mbuf1, f->key_offset);\
+								\
+	hash_key_buffer[0] = key[0] & f->key_mask[0];	\
+	hash_key_buffer[1] = key[1] & f->key_mask[1];	\
+	signature = f->f_hash(hash_key_buffer,			\
+			RTE_TABLE_HASH_KEY_SIZE, f->seed);		\
+								\
+	bucket_index = signature & (f->n_buckets - 1);		\
+	bucket1 = (struct rte_bucket_4_16 *)			\
+		&f->memory[bucket_index * f->bucket_size];	\
+	rte_prefetch0(bucket1);					\
+	rte_prefetch0((void *)(((uintptr_t) bucket1) + RTE_CACHE_LINE_SIZE));\
+}
+
+#define lookup1_stage2_lru(pkt2_index, mbuf2, bucket2,		\
+		pkts_mask_out, entries, f)			\
+{								\
+	void *a;						\
+	uint64_t pkt_mask;					\
+	uint64_t *key;						\
+	uint64_t hash_key_buffer[2];		\
+	uint32_t pos;						\
+								\
+	key = RTE_MBUF_METADATA_UINT64_PTR(mbuf2, f->key_offset);\
+	hash_key_buffer[0] = key[0] & f->key_mask[0];	\
+	hash_key_buffer[1] = key[1] & f->key_mask[1];	\
+								\
+	lookup_key16_cmp(hash_key_buffer, bucket2, pos);	\
+								\
+	pkt_mask = (bucket2->signature[pos] & 1LLU) << pkt2_index;\
+	pkts_mask_out |= pkt_mask;				\
+								\
+	a = (void *) &bucket2->data[pos * f->entry_size];	\
+	rte_prefetch0(a);					\
+	entries[pkt2_index] = a;				\
+	lru_update(bucket2, pos);				\
+}
+
+#define lookup1_stage2_ext(pkt2_index, mbuf2, bucket2, pkts_mask_out, entries, \
+	buckets_mask, buckets, keys, f)				\
+{								\
+	struct rte_bucket_4_16 *bucket_next;			\
+	void *a;						\
+	uint64_t pkt_mask, bucket_mask;				\
+	uint64_t *key;						\
+	uint64_t hash_key_buffer[2];		\
+	uint32_t pos;						\
+								\
+	key = RTE_MBUF_METADATA_UINT64_PTR(mbuf2, f->key_offset);\
+	hash_key_buffer[0] = key[0] & f->key_mask[0];	\
+	hash_key_buffer[1] = key[1] & f->key_mask[1];	\
+								\
+	lookup_key16_cmp(hash_key_buffer, bucket2, pos);	\
+								\
+	pkt_mask = (bucket2->signature[pos] & 1LLU) << pkt2_index;\
+	pkts_mask_out |= pkt_mask;				\
+								\
+	a = (void *) &bucket2->data[pos * f->entry_size];	\
+	rte_prefetch0(a);					\
+	entries[pkt2_index] = a;				\
+								\
+	bucket_mask = (~pkt_mask) & (bucket2->next_valid << pkt2_index);\
+	buckets_mask |= bucket_mask;				\
+	bucket_next = bucket2->next;				\
+	buckets[pkt2_index] = bucket_next;			\
+	keys[pkt2_index] = key;					\
+}
+
+#define lookup_grinder(pkt_index, buckets, keys, pkts_mask_out, entries,\
+	buckets_mask, f)					\
+{								\
+	struct rte_bucket_4_16 *bucket, *bucket_next;		\
+	void *a;						\
+	uint64_t pkt_mask, bucket_mask;				\
+	uint64_t *key;						\
+	uint64_t hash_key_buffer[2];		\
+	uint32_t pos;						\
+								\
+	bucket = buckets[pkt_index];				\
+	key = keys[pkt_index];					\
+	hash_key_buffer[0] = key[0] & f->key_mask[0];	\
+	hash_key_buffer[1] = key[1] & f->key_mask[1];	\
+								\
+	lookup_key16_cmp(hash_key_buffer, bucket, pos);	\
+								\
+	pkt_mask = (bucket->signature[pos] & 1LLU) << pkt_index;\
+	pkts_mask_out |= pkt_mask;				\
+								\
+	a = (void *) &bucket->data[pos * f->entry_size];	\
+	rte_prefetch0(a);					\
+	entries[pkt_index] = a;					\
+								\
+	bucket_mask = (~pkt_mask) & (bucket->next_valid << pkt_index);\
+	buckets_mask |= bucket_mask;				\
+	bucket_next = bucket->next;				\
+	rte_prefetch0(bucket_next);				\
+	rte_prefetch0((void *)(((uintptr_t) bucket_next) + RTE_CACHE_LINE_SIZE));\
+	buckets[pkt_index] = bucket_next;			\
+	keys[pkt_index] = key;					\
+}
+
+#define lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01,\
+		pkts, pkts_mask, f)				\
+{								\
+	uint64_t pkt00_mask, pkt01_mask;			\
+	uint32_t key_offset = f->key_offset;		\
+								\
+	pkt00_index = __builtin_ctzll(pkts_mask);		\
+	pkt00_mask = 1LLU << pkt00_index;			\
+	pkts_mask &= ~pkt00_mask;				\
+								\
+	mbuf00 = pkts[pkt00_index];				\
+	rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf00, key_offset));\
+								\
+	pkt01_index = __builtin_ctzll(pkts_mask);		\
+	pkt01_mask = 1LLU << pkt01_index;			\
+	pkts_mask &= ~pkt01_mask;				\
+								\
+	mbuf01 = pkts[pkt01_index];				\
+	rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf01, key_offset));\
+}
+
+#define lookup2_stage0_with_odd_support(pkt00_index, pkt01_index,\
+		mbuf00, mbuf01, pkts, pkts_mask, f)		\
+{								\
+	uint64_t pkt00_mask, pkt01_mask;			\
+	uint32_t key_offset = f->key_offset;		\
+								\
+	pkt00_index = __builtin_ctzll(pkts_mask);		\
+	pkt00_mask = 1LLU << pkt00_index;			\
+	pkts_mask &= ~pkt00_mask;				\
+								\
+	mbuf00 = pkts[pkt00_index];				\
+	rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf00, key_offset));	\
+								\
+	pkt01_index = __builtin_ctzll(pkts_mask);		\
+	if (pkts_mask == 0)					\
+		pkt01_index = pkt00_index;			\
+	pkt01_mask = 1LLU << pkt01_index;			\
+	pkts_mask &= ~pkt01_mask;				\
+								\
+	mbuf01 = pkts[pkt01_index];				\
+	rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf01, key_offset));	\
+}
+
+#define lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f)	\
+{								\
+	uint64_t signature10, signature11;			\
+	uint32_t bucket10_index, bucket11_index;		\
+								\
+	signature10 = RTE_MBUF_METADATA_UINT32(mbuf10, f->signature_offset);\
+	bucket10_index = signature10 & (f->n_buckets - 1);	\
+	bucket10 = (struct rte_bucket_4_16 *)			\
+		&f->memory[bucket10_index * f->bucket_size];	\
+	rte_prefetch0(bucket10);				\
+	rte_prefetch0((void *)(((uintptr_t) bucket10) + RTE_CACHE_LINE_SIZE));\
+								\
+	signature11 = RTE_MBUF_METADATA_UINT32(mbuf11, f->signature_offset);\
+	bucket11_index = signature11 & (f->n_buckets - 1);	\
+	bucket11 = (struct rte_bucket_4_16 *)			\
+		&f->memory[bucket11_index * f->bucket_size];	\
+	rte_prefetch0(bucket11);				\
+	rte_prefetch0((void *)(((uintptr_t) bucket11) + RTE_CACHE_LINE_SIZE));\
+}
+
+#define lookup2_stage1_dosig(mbuf10, mbuf11, bucket10, bucket11, f)	\
+{								\
+	uint64_t *key10, *key11;					\
+	uint64_t hash_offset_buffer[2];				\
+	uint64_t signature10, signature11;			\
+	uint32_t bucket10_index, bucket11_index;	\
+								\
+	key10 = RTE_MBUF_METADATA_UINT64_PTR(mbuf10, f->key_offset);\
+	hash_offset_buffer[0] = key10[0] & f->key_mask[0];	\
+	hash_offset_buffer[1] = key10[1] & f->key_mask[1];	\
+	signature10 = f->f_hash(hash_offset_buffer,			\
+			RTE_TABLE_HASH_KEY_SIZE, f->seed);\
+	bucket10_index = signature10 & (f->n_buckets - 1);	\
+	bucket10 = (struct rte_bucket_4_16 *)				\
+		&f->memory[bucket10_index * f->bucket_size];	\
+	rte_prefetch0(bucket10);				\
+	rte_prefetch0((void *)(((uintptr_t) bucket10) + RTE_CACHE_LINE_SIZE));\
+								\
+	key11 = RTE_MBUF_METADATA_UINT64_PTR(mbuf11, f->key_offset);\
+	hash_offset_buffer[0] = key11[0] & f->key_mask[0];	\
+	hash_offset_buffer[1] = key11[1] & f->key_mask[1];	\
+	signature11 = f->f_hash(hash_offset_buffer,			\
+			RTE_TABLE_HASH_KEY_SIZE, f->seed);\
+	bucket11_index = signature11 & (f->n_buckets - 1);	\
+	bucket11 = (struct rte_bucket_4_16 *)			\
+		&f->memory[bucket11_index * f->bucket_size];	\
+	rte_prefetch0(bucket11);				\
+	rte_prefetch0((void *)(((uintptr_t) bucket11) + RTE_CACHE_LINE_SIZE));\
+}
+
+#define lookup2_stage2_lru(pkt20_index, pkt21_index, mbuf20, mbuf21,\
+		bucket20, bucket21, pkts_mask_out, entries, f)	\
+{								\
+	void *a20, *a21;					\
+	uint64_t pkt20_mask, pkt21_mask;			\
+	uint64_t *key20, *key21;				\
+	uint64_t hash_key_buffer20[2];			\
+	uint64_t hash_key_buffer21[2];			\
+	uint32_t pos20, pos21;					\
+								\
+	key20 = RTE_MBUF_METADATA_UINT64_PTR(mbuf20, f->key_offset);\
+	key21 = RTE_MBUF_METADATA_UINT64_PTR(mbuf21, f->key_offset);\
+	hash_key_buffer20[0] = key20[0] & f->key_mask[0];	\
+	hash_key_buffer20[1] = key20[1] & f->key_mask[1];	\
+	hash_key_buffer21[0] = key21[0] & f->key_mask[0];	\
+	hash_key_buffer21[1] = key21[1] & f->key_mask[1];	\
+								\
+	lookup_key16_cmp(hash_key_buffer20, bucket20, pos20);	\
+	lookup_key16_cmp(hash_key_buffer21, bucket21, pos21);	\
+								\
+	pkt20_mask = (bucket20->signature[pos20] & 1LLU) << pkt20_index;\
+	pkt21_mask = (bucket21->signature[pos21] & 1LLU) << pkt21_index;\
+	pkts_mask_out |= pkt20_mask | pkt21_mask;			\
+								\
+	a20 = (void *) &bucket20->data[pos20 * f->entry_size];	\
+	a21 = (void *) &bucket21->data[pos21 * f->entry_size];	\
+	rte_prefetch0(a20);					\
+	rte_prefetch0(a21);					\
+	entries[pkt20_index] = a20;				\
+	entries[pkt21_index] = a21;				\
+	lru_update(bucket20, pos20);				\
+	lru_update(bucket21, pos21);				\
+}
+
+#define lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21, bucket20, \
+	bucket21, pkts_mask_out, entries, buckets_mask, buckets, keys, f) \
+{								\
+	struct rte_bucket_4_16 *bucket20_next, *bucket21_next;	\
+	void *a20, *a21;					\
+	uint64_t pkt20_mask, pkt21_mask, bucket20_mask, bucket21_mask;\
+	uint64_t *key20, *key21;				\
+	uint64_t hash_key_buffer20[2];			\
+	uint64_t hash_key_buffer21[2];			\
+	uint32_t pos20, pos21;					\
+								\
+	key20 = RTE_MBUF_METADATA_UINT64_PTR(mbuf20, f->key_offset);\
+	key21 = RTE_MBUF_METADATA_UINT64_PTR(mbuf21, f->key_offset);\
+	hash_key_buffer20[0] = key20[0] & f->key_mask[0];	\
+	hash_key_buffer20[1] = key20[1] & f->key_mask[1];	\
+	hash_key_buffer21[0] = key21[0] & f->key_mask[0];	\
+	hash_key_buffer21[1] = key21[1] & f->key_mask[1];	\
+								\
+	lookup_key16_cmp(hash_key_buffer20, bucket20, pos20);	\
+	lookup_key16_cmp(hash_key_buffer21, bucket21, pos21);	\
+								\
+	pkt20_mask = (bucket20->signature[pos20] & 1LLU) << pkt20_index;\
+	pkt21_mask = (bucket21->signature[pos21] & 1LLU) << pkt21_index;\
+	pkts_mask_out |= pkt20_mask | pkt21_mask;		\
+								\
+	a20 = (void *) &bucket20->data[pos20 * f->entry_size];	\
+	a21 = (void *) &bucket21->data[pos21 * f->entry_size];	\
+	rte_prefetch0(a20);					\
+	rte_prefetch0(a21);					\
+	entries[pkt20_index] = a20;				\
+	entries[pkt21_index] = a21;				\
+								\
+	bucket20_mask = (~pkt20_mask) & (bucket20->next_valid << pkt20_index);\
+	bucket21_mask = (~pkt21_mask) & (bucket21->next_valid << pkt21_index);\
+	buckets_mask |= bucket20_mask | bucket21_mask;		\
+	bucket20_next = bucket20->next;				\
+	bucket21_next = bucket21->next;				\
+	buckets[pkt20_index] = bucket20_next;			\
+	buckets[pkt21_index] = bucket21_next;			\
+	keys[pkt20_index] = key20;				\
+	keys[pkt21_index] = key21;				\
+}
+
+static int
+rte_table_hash_lookup_key16_lru(
+	void *table,
+	struct rte_mbuf **pkts,
+	uint64_t pkts_mask,
+	uint64_t *lookup_hit_mask,
+	void **entries)
+{
+	struct rte_table_hash *f = (struct rte_table_hash *) table;
+	struct rte_bucket_4_16 *bucket10, *bucket11, *bucket20, *bucket21;
+	struct rte_mbuf *mbuf00, *mbuf01, *mbuf10, *mbuf11, *mbuf20, *mbuf21;
+	uint32_t pkt00_index, pkt01_index, pkt10_index;
+	uint32_t pkt11_index, pkt20_index, pkt21_index;
+	uint64_t pkts_mask_out = 0;
+
+	__rte_unused uint32_t n_pkts_in = __builtin_popcountll(pkts_mask);
+	RTE_TABLE_HASH_KEY16_STATS_PKTS_IN_ADD(f, n_pkts_in);
+
+	/* Cannot run the pipeline with less than 5 packets */
+	if (__builtin_popcountll(pkts_mask) < 5) {
+		for ( ; pkts_mask; ) {
+			struct rte_bucket_4_16 *bucket;
+			struct rte_mbuf *mbuf;
+			uint32_t pkt_index;
+
+			lookup1_stage0(pkt_index, mbuf, pkts, pkts_mask, f);
+			lookup1_stage1(mbuf, bucket, f);
+			lookup1_stage2_lru(pkt_index, mbuf, bucket,
+				pkts_mask_out, entries, f);
+		}
+
+		*lookup_hit_mask = pkts_mask_out;
+		RTE_TABLE_HASH_KEY16_STATS_PKTS_LOOKUP_MISS(f,
+			n_pkts_in - __builtin_popcountll(pkts_mask_out));
+		return 0;
+	}
+
+	/*
+	 * Pipeline fill
+	 *
+	 */
+	/* Pipeline stage 0 */
+	lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts,
+		pkts_mask, f);
+
+	/* Pipeline feed */
+	mbuf10 = mbuf00;
+	mbuf11 = mbuf01;
+	pkt10_index = pkt00_index;
+	pkt11_index = pkt01_index;
+
+	/* Pipeline stage 0 */
+	lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts,
+		pkts_mask, f);
+
+	/* Pipeline stage 1 */
+	lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
+
+	/*
+	 * Pipeline run
+	 *
+	 */
+	for ( ; pkts_mask; ) {
+		/* Pipeline feed */
+		bucket20 = bucket10;
+		bucket21 = bucket11;
+		mbuf20 = mbuf10;
+		mbuf21 = mbuf11;
+		mbuf10 = mbuf00;
+		mbuf11 = mbuf01;
+		pkt20_index = pkt10_index;
+		pkt21_index = pkt11_index;
+		pkt10_index = pkt00_index;
+		pkt11_index = pkt01_index;
+
+		/* Pipeline stage 0 */
+		lookup2_stage0_with_odd_support(pkt00_index, pkt01_index,
+			mbuf00, mbuf01, pkts, pkts_mask, f);
+
+		/* Pipeline stage 1 */
+		lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
+
+		/* Pipeline stage 2 */
+		lookup2_stage2_lru(pkt20_index, pkt21_index, mbuf20, mbuf21,
+			bucket20, bucket21, pkts_mask_out, entries, f);
+	}
+
+	/*
+	 * Pipeline flush
+	 *
+	 */
+	/* Pipeline feed */
+	bucket20 = bucket10;
+	bucket21 = bucket11;
+	mbuf20 = mbuf10;
+	mbuf21 = mbuf11;
+	mbuf10 = mbuf00;
+	mbuf11 = mbuf01;
+	pkt20_index = pkt10_index;
+	pkt21_index = pkt11_index;
+	pkt10_index = pkt00_index;
+	pkt11_index = pkt01_index;
+
+	/* Pipeline stage 1 */
+	lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
+
+	/* Pipeline stage 2 */
+	lookup2_stage2_lru(pkt20_index, pkt21_index, mbuf20, mbuf21,
+		bucket20, bucket21, pkts_mask_out, entries, f);
+
+	/* Pipeline feed */
+	bucket20 = bucket10;
+	bucket21 = bucket11;
+	mbuf20 = mbuf10;
+	mbuf21 = mbuf11;
+	pkt20_index = pkt10_index;
+	pkt21_index = pkt11_index;
+
+	/* Pipeline stage 2 */
+	lookup2_stage2_lru(pkt20_index, pkt21_index, mbuf20, mbuf21,
+		bucket20, bucket21, pkts_mask_out, entries, f);
+
+	*lookup_hit_mask = pkts_mask_out;
+	RTE_TABLE_HASH_KEY16_STATS_PKTS_LOOKUP_MISS(f, n_pkts_in -
+		__builtin_popcountll(pkts_mask_out));
+	return 0;
+} /* rte_table_hash_lookup_key16_lru() */
+
+static int
+rte_table_hash_lookup_key16_lru_dosig(
+	void *table,
+	struct rte_mbuf **pkts,
+	uint64_t pkts_mask,
+	uint64_t *lookup_hit_mask,
+	void **entries)
+{
+	struct rte_table_hash *f = (struct rte_table_hash *) table;
+	struct rte_bucket_4_16 *bucket10, *bucket11, *bucket20, *bucket21;
+	struct rte_mbuf *mbuf00, *mbuf01, *mbuf10, *mbuf11, *mbuf20, *mbuf21;
+	uint32_t pkt00_index, pkt01_index, pkt10_index;
+	uint32_t pkt11_index, pkt20_index, pkt21_index;
+	uint64_t pkts_mask_out = 0;
+
+	__rte_unused uint32_t n_pkts_in = __builtin_popcountll(pkts_mask);
+
+	RTE_TABLE_HASH_KEY16_STATS_PKTS_IN_ADD(f, n_pkts_in);
+
+	/* Cannot run the pipeline with less than 5 packets */
+	if (__builtin_popcountll(pkts_mask) < 5) {
+		for ( ; pkts_mask; ) {
+			struct rte_bucket_4_16 *bucket;
+			struct rte_mbuf *mbuf;
+			uint32_t pkt_index;
+
+			lookup1_stage0(pkt_index, mbuf, pkts, pkts_mask, f);
+			lookup1_stage1_dosig(mbuf, bucket, f);
+			lookup1_stage2_lru(pkt_index, mbuf, bucket,
+				pkts_mask_out, entries, f);
+		}
+
+		*lookup_hit_mask = pkts_mask_out;
+		RTE_TABLE_HASH_KEY16_STATS_PKTS_LOOKUP_MISS(f, n_pkts_in -
+			__builtin_popcountll(pkts_mask_out));
+		return 0;
+	}
+
+	/*
+	 * Pipeline fill
+	 *
+	 */
+	/* Pipeline stage 0 */
+	lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts,
+		pkts_mask, f);
+
+	/* Pipeline feed */
+	mbuf10 = mbuf00;
+	mbuf11 = mbuf01;
+	pkt10_index = pkt00_index;
+	pkt11_index = pkt01_index;
+
+	/* Pipeline stage 0 */
+	lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts,
+		pkts_mask, f);
+
+	/* Pipeline stage 1 */
+	lookup2_stage1_dosig(mbuf10, mbuf11, bucket10, bucket11, f);
+
+	/*
+	 * Pipeline run
+	 *
+	 */
+	for ( ; pkts_mask; ) {
+		/* Pipeline feed */
+		bucket20 = bucket10;
+		bucket21 = bucket11;
+		mbuf20 = mbuf10;
+		mbuf21 = mbuf11;
+		mbuf10 = mbuf00;
+		mbuf11 = mbuf01;
+		pkt20_index = pkt10_index;
+		pkt21_index = pkt11_index;
+		pkt10_index = pkt00_index;
+		pkt11_index = pkt01_index;
+
+		/* Pipeline stage 0 */
+		lookup2_stage0_with_odd_support(pkt00_index, pkt01_index,
+			mbuf00, mbuf01, pkts, pkts_mask, f);
+
+		/* Pipeline stage 1 */
+		lookup2_stage1_dosig(mbuf10, mbuf11, bucket10, bucket11, f);
+
+		/* Pipeline stage 2 */
+		lookup2_stage2_lru(pkt20_index, pkt21_index, mbuf20, mbuf21,
+			bucket20, bucket21, pkts_mask_out, entries, f);
+	}
+
+	/*
+	 * Pipeline flush
+	 *
+	 */
+	/* Pipeline feed */
+	bucket20 = bucket10;
+	bucket21 = bucket11;
+	mbuf20 = mbuf10;
+	mbuf21 = mbuf11;
+	mbuf10 = mbuf00;
+	mbuf11 = mbuf01;
+	pkt20_index = pkt10_index;
+	pkt21_index = pkt11_index;
+	pkt10_index = pkt00_index;
+	pkt11_index = pkt01_index;
+
+	/* Pipeline stage 1 */
+	lookup2_stage1_dosig(mbuf10, mbuf11, bucket10, bucket11, f);
+
+	/* Pipeline stage 2 */
+	lookup2_stage2_lru(pkt20_index, pkt21_index, mbuf20, mbuf21,
+		bucket20, bucket21, pkts_mask_out, entries, f);
+
+	/* Pipeline feed */
+	bucket20 = bucket10;
+	bucket21 = bucket11;
+	mbuf20 = mbuf10;
+	mbuf21 = mbuf11;
+	pkt20_index = pkt10_index;
+	pkt21_index = pkt11_index;
+
+	/* Pipeline stage 2 */
+	lookup2_stage2_lru(pkt20_index, pkt21_index, mbuf20, mbuf21,
+		bucket20, bucket21, pkts_mask_out, entries, f);
+
+	*lookup_hit_mask = pkts_mask_out;
+	RTE_TABLE_HASH_KEY16_STATS_PKTS_LOOKUP_MISS(f, n_pkts_in -
+		__builtin_popcountll(pkts_mask_out));
+	return 0;
+} /* rte_table_hash_lookup_key16_lru_dosig() */
+
+static int
+rte_table_hash_lookup_key16_ext(
+	void *table,
+	struct rte_mbuf **pkts,
+	uint64_t pkts_mask,
+	uint64_t *lookup_hit_mask,
+	void **entries)
+{
+	struct rte_table_hash *f = (struct rte_table_hash *) table;
+	struct rte_bucket_4_16 *bucket10, *bucket11, *bucket20, *bucket21;
+	struct rte_mbuf *mbuf00, *mbuf01, *mbuf10, *mbuf11, *mbuf20, *mbuf21;
+	uint32_t pkt00_index, pkt01_index, pkt10_index;
+	uint32_t pkt11_index, pkt20_index, pkt21_index;
+	uint64_t pkts_mask_out = 0, buckets_mask = 0;
+	struct rte_bucket_4_16 *buckets[RTE_PORT_IN_BURST_SIZE_MAX];
+	uint64_t *keys[RTE_PORT_IN_BURST_SIZE_MAX];
+
+	__rte_unused uint32_t n_pkts_in = __builtin_popcountll(pkts_mask);
+	RTE_TABLE_HASH_KEY16_STATS_PKTS_IN_ADD(f, n_pkts_in);
+
+	/* Cannot run the pipeline with less than 5 packets */
+	if (__builtin_popcountll(pkts_mask) < 5) {
+		for ( ; pkts_mask; ) {
+			struct rte_bucket_4_16 *bucket;
+			struct rte_mbuf *mbuf;
+			uint32_t pkt_index;
+
+			lookup1_stage0(pkt_index, mbuf, pkts, pkts_mask, f);
+			lookup1_stage1(mbuf, bucket, f);
+			lookup1_stage2_ext(pkt_index, mbuf, bucket,
+				pkts_mask_out, entries, buckets_mask,
+				buckets, keys, f);
+		}
+
+		goto grind_next_buckets;
+	}
+
+	/*
+	 * Pipeline fill
+	 *
+	 */
+	/* Pipeline stage 0 */
+	lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts,
+		pkts_mask, f);
+
+	/* Pipeline feed */
+	mbuf10 = mbuf00;
+	mbuf11 = mbuf01;
+	pkt10_index = pkt00_index;
+	pkt11_index = pkt01_index;
+
+	/* Pipeline stage 0 */
+	lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts,
+		pkts_mask, f);
+
+	/* Pipeline stage 1 */
+	lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
+
+	/*
+	 * Pipeline run
+	 *
+	 */
+	for ( ; pkts_mask; ) {
+		/* Pipeline feed */
+		bucket20 = bucket10;
+		bucket21 = bucket11;
+		mbuf20 = mbuf10;
+		mbuf21 = mbuf11;
+		mbuf10 = mbuf00;
+		mbuf11 = mbuf01;
+		pkt20_index = pkt10_index;
+		pkt21_index = pkt11_index;
+		pkt10_index = pkt00_index;
+		pkt11_index = pkt01_index;
+
+		/* Pipeline stage 0 */
+		lookup2_stage0_with_odd_support(pkt00_index, pkt01_index,
+			mbuf00, mbuf01, pkts, pkts_mask, f);
+
+		/* Pipeline stage 1 */
+		lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
+
+		/* Pipeline stage 2 */
+		lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21,
+			bucket20, bucket21, pkts_mask_out, entries,
+			buckets_mask, buckets, keys, f);
+	}
+
+	/*
+	 * Pipeline flush
+	 *
+	 */
+	/* Pipeline feed */
+	bucket20 = bucket10;
+	bucket21 = bucket11;
+	mbuf20 = mbuf10;
+	mbuf21 = mbuf11;
+	mbuf10 = mbuf00;
+	mbuf11 = mbuf01;
+	pkt20_index = pkt10_index;
+	pkt21_index = pkt11_index;
+	pkt10_index = pkt00_index;
+	pkt11_index = pkt01_index;
+
+	/* Pipeline stage 1 */
+	lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
+
+	/* Pipeline stage 2 */
+	lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21,
+		bucket20, bucket21, pkts_mask_out, entries,
+		buckets_mask, buckets, keys, f);
+
+	/* Pipeline feed */
+	bucket20 = bucket10;
+	bucket21 = bucket11;
+	mbuf20 = mbuf10;
+	mbuf21 = mbuf11;
+	pkt20_index = pkt10_index;
+	pkt21_index = pkt11_index;
+
+	/* Pipeline stage 2 */
+	lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21,
+		bucket20, bucket21, pkts_mask_out, entries,
+		buckets_mask, buckets, keys, f);
+
+grind_next_buckets:
+	/* Grind next buckets */
+	for ( ; buckets_mask; ) {
+		uint64_t buckets_mask_next = 0;
+
+		for ( ; buckets_mask; ) {
+			uint64_t pkt_mask;
+			uint32_t pkt_index;
+
+			pkt_index = __builtin_ctzll(buckets_mask);
+			pkt_mask = 1LLU << pkt_index;
+			buckets_mask &= ~pkt_mask;
+
+			lookup_grinder(pkt_index, buckets, keys, pkts_mask_out,
+				entries, buckets_mask_next, f);
+		}
+
+		buckets_mask = buckets_mask_next;
+	}
+
+	*lookup_hit_mask = pkts_mask_out;
+	RTE_TABLE_HASH_KEY16_STATS_PKTS_LOOKUP_MISS(f, n_pkts_in -
+		__builtin_popcountll(pkts_mask_out));
+	return 0;
+} /* rte_table_hash_lookup_key16_ext() */
+
+static int
+rte_table_hash_lookup_key16_ext_dosig(
+	void *table,
+	struct rte_mbuf **pkts,
+	uint64_t pkts_mask,
+	uint64_t *lookup_hit_mask,
+	void **entries)
+{
+	struct rte_table_hash *f = (struct rte_table_hash *) table;
+	struct rte_bucket_4_16 *bucket10, *bucket11, *bucket20, *bucket21;
+	struct rte_mbuf *mbuf00, *mbuf01, *mbuf10, *mbuf11, *mbuf20, *mbuf21;
+	uint32_t pkt00_index, pkt01_index, pkt10_index;
+	uint32_t pkt11_index, pkt20_index, pkt21_index;
+	uint64_t pkts_mask_out = 0, buckets_mask = 0;
+	struct rte_bucket_4_16 *buckets[RTE_PORT_IN_BURST_SIZE_MAX];
+	uint64_t *keys[RTE_PORT_IN_BURST_SIZE_MAX];
+
+	__rte_unused uint32_t n_pkts_in = __builtin_popcountll(pkts_mask);
+
+	RTE_TABLE_HASH_KEY16_STATS_PKTS_IN_ADD(f, n_pkts_in);
+
+	/* Cannot run the pipeline with less than 5 packets */
+	if (__builtin_popcountll(pkts_mask) < 5) {
+		for ( ; pkts_mask; ) {
+			struct rte_bucket_4_16 *bucket;
+			struct rte_mbuf *mbuf;
+			uint32_t pkt_index;
+
+			lookup1_stage0(pkt_index, mbuf, pkts, pkts_mask, f);
+			lookup1_stage1_dosig(mbuf, bucket, f);
+			lookup1_stage2_ext(pkt_index, mbuf, bucket,
+				pkts_mask_out, entries, buckets_mask,
+				buckets, keys, f);
+		}
+
+		goto grind_next_buckets;
+	}
+
+	/*
+	 * Pipeline fill
+	 *
+	 */
+	/* Pipeline stage 0 */
+	lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts,
+		pkts_mask, f);
+
+	/* Pipeline feed */
+	mbuf10 = mbuf00;
+	mbuf11 = mbuf01;
+	pkt10_index = pkt00_index;
+	pkt11_index = pkt01_index;
+
+	/* Pipeline stage 0 */
+	lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts,
+		pkts_mask, f);
+
+	/* Pipeline stage 1 */
+	lookup2_stage1_dosig(mbuf10, mbuf11, bucket10, bucket11, f);
+
+	/*
+	 * Pipeline run
+	 *
+	 */
+	for ( ; pkts_mask; ) {
+		/* Pipeline feed */
+		bucket20 = bucket10;
+		bucket21 = bucket11;
+		mbuf20 = mbuf10;
+		mbuf21 = mbuf11;
+		mbuf10 = mbuf00;
+		mbuf11 = mbuf01;
+		pkt20_index = pkt10_index;
+		pkt21_index = pkt11_index;
+		pkt10_index = pkt00_index;
+		pkt11_index = pkt01_index;
+
+		/* Pipeline stage 0 */
+		lookup2_stage0_with_odd_support(pkt00_index, pkt01_index,
+			mbuf00, mbuf01, pkts, pkts_mask, f);
+
+		/* Pipeline stage 1 */
+		lookup2_stage1_dosig(mbuf10, mbuf11, bucket10, bucket11, f);
+
+		/* Pipeline stage 2 */
+		lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21,
+			bucket20, bucket21, pkts_mask_out, entries,
+			buckets_mask, buckets, keys, f);
+	}
+
+	/*
+	 * Pipeline flush
+	 *
+	 */
+	/* Pipeline feed */
+	bucket20 = bucket10;
+	bucket21 = bucket11;
+	mbuf20 = mbuf10;
+	mbuf21 = mbuf11;
+	mbuf10 = mbuf00;
+	mbuf11 = mbuf01;
+	pkt20_index = pkt10_index;
+	pkt21_index = pkt11_index;
+	pkt10_index = pkt00_index;
+	pkt11_index = pkt01_index;
+
+	/* Pipeline stage 1 */
+	lookup2_stage1_dosig(mbuf10, mbuf11, bucket10, bucket11, f);
+
+	/* Pipeline stage 2 */
+	lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21,
+		bucket20, bucket21, pkts_mask_out, entries,
+		buckets_mask, buckets, keys, f);
+
+	/* Pipeline feed */
+	bucket20 = bucket10;
+	bucket21 = bucket11;
+	mbuf20 = mbuf10;
+	mbuf21 = mbuf11;
+	pkt20_index = pkt10_index;
+	pkt21_index = pkt11_index;
+
+	/* Pipeline stage 2 */
+	lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21,
+		bucket20, bucket21, pkts_mask_out, entries,
+		buckets_mask, buckets, keys, f);
+
+grind_next_buckets:
+	/* Grind next buckets */
+	for ( ; buckets_mask; ) {
+		uint64_t buckets_mask_next = 0;
+
+		for ( ; buckets_mask; ) {
+			uint64_t pkt_mask;
+			uint32_t pkt_index;
+
+			pkt_index = __builtin_ctzll(buckets_mask);
+			pkt_mask = 1LLU << pkt_index;
+			buckets_mask &= ~pkt_mask;
+
+			lookup_grinder(pkt_index, buckets, keys, pkts_mask_out,
+				entries, buckets_mask_next, f);
+		}
+
+		buckets_mask = buckets_mask_next;
+	}
+
+	*lookup_hit_mask = pkts_mask_out;
+	RTE_TABLE_HASH_KEY16_STATS_PKTS_LOOKUP_MISS(f, n_pkts_in -
+		__builtin_popcountll(pkts_mask_out));
+	return 0;
+} /* rte_table_hash_lookup_key16_ext_dosig() */
+
+static int
+rte_table_hash_key16_stats_read(void *table, struct rte_table_stats *stats, int clear)
+{
+	struct rte_table_hash *t = (struct rte_table_hash *) table;
+
+	if (stats != NULL)
+		memcpy(stats, &t->stats, sizeof(t->stats));
+
+	if (clear)
+		memset(&t->stats, 0, sizeof(t->stats));
+
+	return 0;
+}
+
+struct rte_table_ops rte_table_hash_key16_lru_ops = {
+	.f_create = rte_table_hash_create_key16_lru,
+	.f_free = rte_table_hash_free_key16_lru,
+	.f_add = rte_table_hash_entry_add_key16_lru,
+	.f_delete = rte_table_hash_entry_delete_key16_lru,
+	.f_add_bulk = NULL,
+	.f_delete_bulk = NULL,
+	.f_lookup = rte_table_hash_lookup_key16_lru,
+	.f_stats = rte_table_hash_key16_stats_read,
+};
+
+struct rte_table_ops rte_table_hash_key16_lru_dosig_ops = {
+	.f_create = rte_table_hash_create_key16_lru,
+	.f_free = rte_table_hash_free_key16_lru,
+	.f_add = rte_table_hash_entry_add_key16_lru,
+	.f_delete = rte_table_hash_entry_delete_key16_lru,
+	.f_lookup = rte_table_hash_lookup_key16_lru_dosig,
+	.f_stats = rte_table_hash_key16_stats_read,
+};
+
+struct rte_table_ops rte_table_hash_key16_ext_ops = {
+	.f_create = rte_table_hash_create_key16_ext,
+	.f_free = rte_table_hash_free_key16_ext,
+	.f_add = rte_table_hash_entry_add_key16_ext,
+	.f_delete = rte_table_hash_entry_delete_key16_ext,
+	.f_add_bulk = NULL,
+	.f_delete_bulk = NULL,
+	.f_lookup = rte_table_hash_lookup_key16_ext,
+	.f_stats = rte_table_hash_key16_stats_read,
+};
+
+struct rte_table_ops rte_table_hash_key16_ext_dosig_ops = {
+	.f_create = rte_table_hash_create_key16_ext,
+	.f_free = rte_table_hash_free_key16_ext,
+	.f_add = rte_table_hash_entry_add_key16_ext,
+	.f_delete = rte_table_hash_entry_delete_key16_ext,
+	.f_lookup = rte_table_hash_lookup_key16_ext_dosig,
+	.f_stats = rte_table_hash_key16_stats_read,
+};
diff --git a/lib/librte_table/rte_table_hash_key32.c b/lib/librte_table/rte_table_hash_key32.c
new file mode 100644
index 00000000..a7aba492
--- /dev/null
+++ b/lib/librte_table/rte_table_hash_key32.c
@@ -0,0 +1,1144 @@
+/*-
+ *	 BSD LICENSE
+ *
+ *	 Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *	 All rights reserved.
+ *
+ *	 Redistribution and use in source and binary forms, with or without
+ *	 modification, are permitted provided that the following conditions
+ *	 are met:
+ *
+ *	* Redistributions of source code must retain the above copyright
+ *		 notice, this list of conditions and the following disclaimer.
+ *	* Redistributions in binary form must reproduce the above copyright
+ *		 notice, this list of conditions and the following disclaimer in
+ *		 the documentation and/or other materials provided with the
+ *		 distribution.
+ *	* Neither the name of Intel Corporation nor the names of its
+ *		 contributors may be used to endorse or promote products derived
+ *		 from this software without specific prior written permission.
+ *
+ *	 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *	 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *	 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *	 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *	 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *	 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *	 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *	 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *	 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *	 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *	 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <string.h>
+#include <stdio.h>
+
+#include <rte_common.h>
+#include <rte_mbuf.h>
+#include <rte_memory.h>
+#include <rte_malloc.h>
+#include <rte_log.h>
+
+#include "rte_table_hash.h"
+#include "rte_lru.h"
+
+#define RTE_TABLE_HASH_KEY_SIZE						32
+
+#define RTE_BUCKET_ENTRY_VALID						0x1LLU
+
+#ifdef RTE_TABLE_STATS_COLLECT
+
+#define RTE_TABLE_HASH_KEY32_STATS_PKTS_IN_ADD(table, val) \
+	table->stats.n_pkts_in += val
+#define RTE_TABLE_HASH_KEY32_STATS_PKTS_LOOKUP_MISS(table, val) \
+	table->stats.n_pkts_lookup_miss += val
+
+#else
+
+#define RTE_TABLE_HASH_KEY32_STATS_PKTS_IN_ADD(table, val)
+#define RTE_TABLE_HASH_KEY32_STATS_PKTS_LOOKUP_MISS(table, val)
+
+#endif
+
+struct rte_bucket_4_32 {
+	/* Cache line 0 */
+	uint64_t signature[4 + 1];
+	uint64_t lru_list;
+	struct rte_bucket_4_32 *next;
+	uint64_t next_valid;
+
+	/* Cache lines 1 and 2 */
+	uint64_t key[4][4];
+
+	/* Cache line 3 */
+	uint8_t data[0];
+};
+
+struct rte_table_hash {
+	struct rte_table_stats stats;
+
+	/* Input parameters */
+	uint32_t n_buckets;
+	uint32_t n_entries_per_bucket;
+	uint32_t key_size;
+	uint32_t entry_size;
+	uint32_t bucket_size;
+	uint32_t signature_offset;
+	uint32_t key_offset;
+	rte_table_hash_op_hash f_hash;
+	uint64_t seed;
+
+	/* Extendible buckets */
+	uint32_t n_buckets_ext;
+	uint32_t stack_pos;
+	uint32_t *stack;
+
+	/* Lookup table */
+	uint8_t memory[0] __rte_cache_aligned;
+};
+
+static int
+check_params_create_lru(struct rte_table_hash_key32_lru_params *params) {
+	/* n_entries */
+	if (params->n_entries == 0) {
+		RTE_LOG(ERR, TABLE, "%s: n_entries is zero\n", __func__);
+		return -EINVAL;
+	}
+
+	/* f_hash */
+	if (params->f_hash == NULL) {
+		RTE_LOG(ERR, TABLE, "%s: f_hash function pointer is NULL\n",
+			__func__);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static void *
+rte_table_hash_create_key32_lru(void *params,
+		int socket_id,
+		uint32_t entry_size)
+{
+	struct rte_table_hash_key32_lru_params *p =
+		(struct rte_table_hash_key32_lru_params *) params;
+	struct rte_table_hash *f;
+	uint32_t n_buckets, n_entries_per_bucket, key_size, bucket_size_cl;
+	uint32_t total_size, i;
+
+	/* Check input parameters */
+	if ((check_params_create_lru(p) != 0) ||
+		((sizeof(struct rte_table_hash) % RTE_CACHE_LINE_SIZE) != 0) ||
+		((sizeof(struct rte_bucket_4_32) % RTE_CACHE_LINE_SIZE) != 0)) {
+		return NULL;
+	}
+	n_entries_per_bucket = 4;
+	key_size = 32;
+
+	/* Memory allocation */
+	n_buckets = rte_align32pow2((p->n_entries + n_entries_per_bucket - 1) /
+		n_entries_per_bucket);
+	bucket_size_cl = (sizeof(struct rte_bucket_4_32) + n_entries_per_bucket
+		* entry_size + RTE_CACHE_LINE_SIZE - 1) / RTE_CACHE_LINE_SIZE;
+	total_size = sizeof(struct rte_table_hash) + n_buckets *
+		bucket_size_cl * RTE_CACHE_LINE_SIZE;
+
+	f = rte_zmalloc_socket("TABLE", total_size, RTE_CACHE_LINE_SIZE, socket_id);
+	if (f == NULL) {
+		RTE_LOG(ERR, TABLE,
+			"%s: Cannot allocate %u bytes for hash table\n",
+			__func__, total_size);
+		return NULL;
+	}
+	RTE_LOG(INFO, TABLE,
+		"%s: Hash table memory footprint is %u bytes\n", __func__,
+		total_size);
+
+	/* Memory initialization */
+	f->n_buckets = n_buckets;
+	f->n_entries_per_bucket = n_entries_per_bucket;
+	f->key_size = key_size;
+	f->entry_size = entry_size;
+	f->bucket_size = bucket_size_cl * RTE_CACHE_LINE_SIZE;
+	f->signature_offset = p->signature_offset;
+	f->key_offset = p->key_offset;
+	f->f_hash = p->f_hash;
+	f->seed = p->seed;
+
+	for (i = 0; i < n_buckets; i++) {
+		struct rte_bucket_4_32 *bucket;
+
+		bucket = (struct rte_bucket_4_32 *) &f->memory[i *
+			f->bucket_size];
+		bucket->lru_list = 0x0000000100020003LLU;
+	}
+
+	return f;
+}
+
+static int
+rte_table_hash_free_key32_lru(void *table)
+{
+	struct rte_table_hash *f = (struct rte_table_hash *) table;
+
+	/* Check input parameters */
+	if (f == NULL) {
+		RTE_LOG(ERR, TABLE, "%s: table parameter is NULL\n", __func__);
+		return -EINVAL;
+	}
+
+	rte_free(f);
+	return 0;
+}
+
+static int
+rte_table_hash_entry_add_key32_lru(
+	void *table,
+	void *key,
+	void *entry,
+	int *key_found,
+	void **entry_ptr)
+{
+	struct rte_table_hash *f = (struct rte_table_hash *) table;
+	struct rte_bucket_4_32 *bucket;
+	uint64_t signature, pos;
+	uint32_t bucket_index, i;
+
+	signature = f->f_hash(key, f->key_size, f->seed);
+	bucket_index = signature & (f->n_buckets - 1);
+	bucket = (struct rte_bucket_4_32 *)
+		&f->memory[bucket_index * f->bucket_size];
+	signature |= RTE_BUCKET_ENTRY_VALID;
+
+	/* Key is present in the bucket */
+	for (i = 0; i < 4; i++) {
+		uint64_t bucket_signature = bucket->signature[i];
+		uint8_t *bucket_key = (uint8_t *) bucket->key[i];
+
+		if ((bucket_signature == signature) &&
+			(memcmp(key, bucket_key, f->key_size) == 0)) {
+			uint8_t *bucket_data = &bucket->data[i * f->entry_size];
+
+			memcpy(bucket_data, entry, f->entry_size);
+			lru_update(bucket, i);
+			*key_found = 1;
+			*entry_ptr = (void *) bucket_data;
+			return 0;
+		}
+	}
+
+	/* Key is not present in the bucket */
+	for (i = 0; i < 4; i++) {
+		uint64_t bucket_signature = bucket->signature[i];
+		uint8_t *bucket_key = (uint8_t *) bucket->key[i];
+
+		if (bucket_signature == 0) {
+			uint8_t *bucket_data = &bucket->data[i * f->entry_size];
+
+			bucket->signature[i] = signature;
+			memcpy(bucket_key, key, f->key_size);
+			memcpy(bucket_data, entry, f->entry_size);
+			lru_update(bucket, i);
+			*key_found = 0;
+			*entry_ptr = (void *) bucket_data;
+
+			return 0;
+		}
+	}
+
+	/* Bucket full: replace LRU entry */
+	pos = lru_pos(bucket);
+	bucket->signature[pos] = signature;
+	memcpy(bucket->key[pos], key, f->key_size);
+	memcpy(&bucket->data[pos * f->entry_size], entry, f->entry_size);
+	lru_update(bucket, pos);
+	*key_found	= 0;
+	*entry_ptr = (void *) &bucket->data[pos * f->entry_size];
+
+	return 0;
+}
+
+static int
+rte_table_hash_entry_delete_key32_lru(
+	void *table,
+	void *key,
+	int *key_found,
+	void *entry)
+{
+	struct rte_table_hash *f = (struct rte_table_hash *) table;
+	struct rte_bucket_4_32 *bucket;
+	uint64_t signature;
+	uint32_t bucket_index, i;
+
+	signature = f->f_hash(key, f->key_size, f->seed);
+	bucket_index = signature & (f->n_buckets - 1);
+	bucket = (struct rte_bucket_4_32 *)
+		&f->memory[bucket_index * f->bucket_size];
+	signature |= RTE_BUCKET_ENTRY_VALID;
+
+	/* Key is present in the bucket */
+	for (i = 0; i < 4; i++) {
+		uint64_t bucket_signature = bucket->signature[i];
+		uint8_t *bucket_key = (uint8_t *) bucket->key[i];
+
+		if ((bucket_signature == signature) &&
+			(memcmp(key, bucket_key, f->key_size) == 0)) {
+			uint8_t *bucket_data = &bucket->data[i * f->entry_size];
+
+			bucket->signature[i] = 0;
+			*key_found = 1;
+			if (entry)
+				memcpy(entry, bucket_data, f->entry_size);
+
+			return 0;
+		}
+	}
+
+	/* Key is not present in the bucket */
+	*key_found = 0;
+	return 0;
+}
+
+static int
+check_params_create_ext(struct rte_table_hash_key32_ext_params *params) {
+	/* n_entries */
+	if (params->n_entries == 0) {
+		RTE_LOG(ERR, TABLE, "%s: n_entries is zero\n", __func__);
+		return -EINVAL;
+	}
+
+	/* n_entries_ext */
+	if (params->n_entries_ext == 0) {
+		RTE_LOG(ERR, TABLE, "%s: n_entries_ext is zero\n", __func__);
+		return -EINVAL;
+	}
+
+	/* f_hash */
+	if (params->f_hash == NULL) {
+		RTE_LOG(ERR, TABLE, "%s: f_hash function pointer is NULL\n",
+			__func__);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static void *
+rte_table_hash_create_key32_ext(void *params,
+	int socket_id,
+	uint32_t entry_size)
+{
+	struct rte_table_hash_key32_ext_params *p =
+			(struct rte_table_hash_key32_ext_params *) params;
+	struct rte_table_hash *f;
+	uint32_t n_buckets, n_buckets_ext, n_entries_per_bucket;
+	uint32_t key_size, bucket_size_cl, stack_size_cl, total_size, i;
+
+	/* Check input parameters */
+	if ((check_params_create_ext(p) != 0) ||
+		((sizeof(struct rte_table_hash) % RTE_CACHE_LINE_SIZE) != 0) ||
+		((sizeof(struct rte_bucket_4_32) % RTE_CACHE_LINE_SIZE) != 0))
+		return NULL;
+
+	n_entries_per_bucket = 4;
+	key_size = 32;
+
+	/* Memory allocation */
+	n_buckets = rte_align32pow2((p->n_entries + n_entries_per_bucket - 1) /
+		n_entries_per_bucket);
+	n_buckets_ext = (p->n_entries_ext + n_entries_per_bucket - 1) /
+		n_entries_per_bucket;
+	bucket_size_cl = (sizeof(struct rte_bucket_4_32) + n_entries_per_bucket
+		* entry_size + RTE_CACHE_LINE_SIZE - 1) / RTE_CACHE_LINE_SIZE;
+	stack_size_cl = (n_buckets_ext * sizeof(uint32_t) + RTE_CACHE_LINE_SIZE - 1)
+		/ RTE_CACHE_LINE_SIZE;
+	total_size = sizeof(struct rte_table_hash) +
+		((n_buckets + n_buckets_ext) * bucket_size_cl + stack_size_cl) *
+		RTE_CACHE_LINE_SIZE;
+
+	f = rte_zmalloc_socket("TABLE", total_size, RTE_CACHE_LINE_SIZE, socket_id);
+	if (f == NULL) {
+		RTE_LOG(ERR, TABLE,
+			"%s: Cannot allocate %u bytes for hash table\n",
+			__func__, total_size);
+		return NULL;
+	}
+	RTE_LOG(INFO, TABLE,
+		"%s: Hash table memory footprint is %u bytes\n", __func__,
+		total_size);
+
+	/* Memory initialization */
+	f->n_buckets = n_buckets;
+	f->n_entries_per_bucket = n_entries_per_bucket;
+	f->key_size = key_size;
+	f->entry_size = entry_size;
+	f->bucket_size = bucket_size_cl * RTE_CACHE_LINE_SIZE;
+	f->signature_offset = p->signature_offset;
+	f->key_offset = p->key_offset;
+	f->f_hash = p->f_hash;
+	f->seed = p->seed;
+
+	f->n_buckets_ext = n_buckets_ext;
+	f->stack_pos = n_buckets_ext;
+	f->stack = (uint32_t *)
+		&f->memory[(n_buckets + n_buckets_ext) * f->bucket_size];
+
+	for (i = 0; i < n_buckets_ext; i++)
+		f->stack[i] = i;
+
+	return f;
+}
+
+static int
+rte_table_hash_free_key32_ext(void *table)
+{
+	struct rte_table_hash *f = (struct rte_table_hash *) table;
+
+	/* Check input parameters */
+	if (f == NULL) {
+		RTE_LOG(ERR, TABLE, "%s: table parameter is NULL\n", __func__);
+		return -EINVAL;
+	}
+
+	rte_free(f);
+	return 0;
+}
+
+static int
+rte_table_hash_entry_add_key32_ext(
+	void *table,
+	void *key,
+	void *entry,
+	int *key_found,
+	void **entry_ptr)
+{
+	struct rte_table_hash *f = (struct rte_table_hash *) table;
+	struct rte_bucket_4_32 *bucket0, *bucket, *bucket_prev;
+	uint64_t signature;
+	uint32_t bucket_index, i;
+
+	signature = f->f_hash(key, f->key_size, f->seed);
+	bucket_index = signature & (f->n_buckets - 1);
+	bucket0 = (struct rte_bucket_4_32 *)
+			&f->memory[bucket_index * f->bucket_size];
+	signature |= RTE_BUCKET_ENTRY_VALID;
+
+	/* Key is present in the bucket */
+	for (bucket = bucket0; bucket != NULL; bucket = bucket->next) {
+		for (i = 0; i < 4; i++) {
+			uint64_t bucket_signature = bucket->signature[i];
+			uint8_t *bucket_key = (uint8_t *) bucket->key[i];
+
+			if ((bucket_signature == signature) &&
+				(memcmp(key, bucket_key, f->key_size) == 0)) {
+				uint8_t *bucket_data = &bucket->data[i *
+					f->entry_size];
+
+				memcpy(bucket_data, entry, f->entry_size);
+				*key_found = 1;
+				*entry_ptr = (void *) bucket_data;
+
+				return 0;
+			}
+		}
+	}
+
+	/* Key is not present in the bucket */
+	for (bucket_prev = NULL, bucket = bucket0; bucket != NULL;
+		bucket_prev = bucket, bucket = bucket->next)
+		for (i = 0; i < 4; i++) {
+			uint64_t bucket_signature = bucket->signature[i];
+			uint8_t *bucket_key = (uint8_t *) bucket->key[i];
+
+			if (bucket_signature == 0) {
+				uint8_t *bucket_data = &bucket->data[i *
+					f->entry_size];
+
+				bucket->signature[i] = signature;
+				memcpy(bucket_key, key, f->key_size);
+				memcpy(bucket_data, entry, f->entry_size);
+				*key_found = 0;
+				*entry_ptr = (void *) bucket_data;
+
+				return 0;
+			}
+		}
+
+	/* Bucket full: extend bucket */
+	if (f->stack_pos > 0) {
+		bucket_index = f->stack[--f->stack_pos];
+
+		bucket = (struct rte_bucket_4_32 *)
+			&f->memory[(f->n_buckets + bucket_index) *
+			f->bucket_size];
+		bucket_prev->next = bucket;
+		bucket_prev->next_valid = 1;
+
+		bucket->signature[0] = signature;
+		memcpy(bucket->key[0], key, f->key_size);
+		memcpy(&bucket->data[0], entry, f->entry_size);
+		*key_found = 0;
+		*entry_ptr = (void *) &bucket->data[0];
+		return 0;
+	}
+
+	return -ENOSPC;
+}
+
+static int
+rte_table_hash_entry_delete_key32_ext(
+	void *table,
+	void *key,
+	int *key_found,
+	void *entry)
+{
+	struct rte_table_hash *f = (struct rte_table_hash *) table;
+	struct rte_bucket_4_32 *bucket0, *bucket, *bucket_prev;
+	uint64_t signature;
+	uint32_t bucket_index, i;
+
+	signature = f->f_hash(key, f->key_size, f->seed);
+	bucket_index = signature & (f->n_buckets - 1);
+	bucket0 = (struct rte_bucket_4_32 *)
+		&f->memory[bucket_index * f->bucket_size];
+	signature |= RTE_BUCKET_ENTRY_VALID;
+
+	/* Key is present in the bucket */
+	for (bucket_prev = NULL, bucket = bucket0; bucket != NULL;
+		bucket_prev = bucket, bucket = bucket->next)
+		for (i = 0; i < 4; i++) {
+			uint64_t bucket_signature = bucket->signature[i];
+			uint8_t *bucket_key = (uint8_t *) bucket->key[i];
+
+			if ((bucket_signature == signature) &&
+				(memcmp(key, bucket_key, f->key_size) == 0)) {
+				uint8_t *bucket_data = &bucket->data[i *
+					f->entry_size];
+
+				bucket->signature[i] = 0;
+				*key_found = 1;
+				if (entry)
+					memcpy(entry, bucket_data,
+						f->entry_size);
+
+				if ((bucket->signature[0] == 0) &&
+						(bucket->signature[1] == 0) &&
+						(bucket->signature[2] == 0) &&
+						(bucket->signature[3] == 0) &&
+						(bucket_prev != NULL)) {
+					bucket_prev->next = bucket->next;
+					bucket_prev->next_valid =
+						bucket->next_valid;
+
+					memset(bucket, 0,
+						sizeof(struct rte_bucket_4_32));
+					bucket_index = (((uint8_t *)bucket -
+						(uint8_t *)f->memory)/f->bucket_size) - f->n_buckets;
+					f->stack[f->stack_pos++] = bucket_index;
+				}
+
+				return 0;
+			}
+		}
+
+	/* Key is not present in the bucket */
+	*key_found = 0;
+	return 0;
+}
+
+#define lookup_key32_cmp(key_in, bucket, pos)			\
+{								\
+	uint64_t xor[4][4], or[4], signature[4];		\
+								\
+	signature[0] = ((~bucket->signature[0]) & 1);		\
+	signature[1] = ((~bucket->signature[1]) & 1);		\
+	signature[2] = ((~bucket->signature[2]) & 1);		\
+	signature[3] = ((~bucket->signature[3]) & 1);		\
+								\
+	xor[0][0] = key_in[0] ^	 bucket->key[0][0];		\
+	xor[0][1] = key_in[1] ^	 bucket->key[0][1];		\
+	xor[0][2] = key_in[2] ^	 bucket->key[0][2];		\
+	xor[0][3] = key_in[3] ^	 bucket->key[0][3];		\
+								\
+	xor[1][0] = key_in[0] ^	 bucket->key[1][0];		\
+	xor[1][1] = key_in[1] ^	 bucket->key[1][1];		\
+	xor[1][2] = key_in[2] ^	 bucket->key[1][2];		\
+	xor[1][3] = key_in[3] ^	 bucket->key[1][3];		\
+								\
+	xor[2][0] = key_in[0] ^	 bucket->key[2][0];		\
+	xor[2][1] = key_in[1] ^	 bucket->key[2][1];		\
+	xor[2][2] = key_in[2] ^	 bucket->key[2][2];		\
+	xor[2][3] = key_in[3] ^	 bucket->key[2][3];		\
+								\
+	xor[3][0] = key_in[0] ^	 bucket->key[3][0];		\
+	xor[3][1] = key_in[1] ^	 bucket->key[3][1];		\
+	xor[3][2] = key_in[2] ^	 bucket->key[3][2];		\
+	xor[3][3] = key_in[3] ^	 bucket->key[3][3];		\
+								\
+	or[0] = xor[0][0] | xor[0][1] | xor[0][2] | xor[0][3] | signature[0];\
+	or[1] = xor[1][0] | xor[1][1] | xor[1][2] | xor[1][3] | signature[1];\
+	or[2] = xor[2][0] | xor[2][1] | xor[2][2] | xor[2][3] | signature[2];\
+	or[3] = xor[3][0] | xor[3][1] | xor[3][2] | xor[3][3] | signature[3];\
+								\
+	pos = 4;						\
+	if (or[0] == 0)						\
+		pos = 0;					\
+	if (or[1] == 0)						\
+		pos = 1;					\
+	if (or[2] == 0)						\
+		pos = 2;					\
+	if (or[3] == 0)						\
+		pos = 3;					\
+}
+
+#define lookup1_stage0(pkt0_index, mbuf0, pkts, pkts_mask, f)	\
+{								\
+	uint64_t pkt_mask;					\
+	uint32_t key_offset = f->key_offset;	\
+								\
+	pkt0_index = __builtin_ctzll(pkts_mask);		\
+	pkt_mask = 1LLU << pkt0_index;				\
+	pkts_mask &= ~pkt_mask;					\
+								\
+	mbuf0 = pkts[pkt0_index];				\
+	rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf0, key_offset));\
+}
+
+#define lookup1_stage1(mbuf1, bucket1, f)			\
+{								\
+	uint64_t signature;					\
+	uint32_t bucket_index;					\
+								\
+	signature = RTE_MBUF_METADATA_UINT32(mbuf1, f->signature_offset);\
+	bucket_index = signature & (f->n_buckets - 1);		\
+	bucket1 = (struct rte_bucket_4_32 *)			\
+		&f->memory[bucket_index * f->bucket_size];	\
+	rte_prefetch0(bucket1);					\
+	rte_prefetch0((void *)(((uintptr_t) bucket1) + RTE_CACHE_LINE_SIZE));\
+	rte_prefetch0((void *)(((uintptr_t) bucket1) + 2 * RTE_CACHE_LINE_SIZE));\
+}
+
+#define lookup1_stage2_lru(pkt2_index, mbuf2, bucket2,		\
+	pkts_mask_out, entries, f)				\
+{								\
+	void *a;						\
+	uint64_t pkt_mask;					\
+	uint64_t *key;						\
+	uint32_t pos;						\
+								\
+	key = RTE_MBUF_METADATA_UINT64_PTR(mbuf2, f->key_offset);\
+								\
+	lookup_key32_cmp(key, bucket2, pos);			\
+								\
+	pkt_mask = (bucket2->signature[pos] & 1LLU) << pkt2_index;\
+	pkts_mask_out |= pkt_mask;				\
+								\
+	a = (void *) &bucket2->data[pos * f->entry_size];	\
+	rte_prefetch0(a);					\
+	entries[pkt2_index] = a;				\
+	lru_update(bucket2, pos);				\
+}
+
+#define lookup1_stage2_ext(pkt2_index, mbuf2, bucket2, pkts_mask_out,\
+	entries, buckets_mask, buckets, keys, f)		\
+{								\
+	struct rte_bucket_4_32 *bucket_next;			\
+	void *a;						\
+	uint64_t pkt_mask, bucket_mask;				\
+	uint64_t *key;						\
+	uint32_t pos;						\
+								\
+	key = RTE_MBUF_METADATA_UINT64_PTR(mbuf2, f->key_offset);\
+								\
+	lookup_key32_cmp(key, bucket2, pos);			\
+								\
+	pkt_mask = (bucket2->signature[pos] & 1LLU) << pkt2_index;\
+	pkts_mask_out |= pkt_mask;				\
+								\
+	a = (void *) &bucket2->data[pos * f->entry_size];	\
+	rte_prefetch0(a);					\
+	entries[pkt2_index] = a;				\
+								\
+	bucket_mask = (~pkt_mask) & (bucket2->next_valid << pkt2_index);\
+	buckets_mask |= bucket_mask;				\
+	bucket_next = bucket2->next;				\
+	buckets[pkt2_index] = bucket_next;			\
+	keys[pkt2_index] = key;					\
+}
+
+#define lookup_grinder(pkt_index, buckets, keys, pkts_mask_out,	\
+	entries, buckets_mask, f)				\
+{								\
+	struct rte_bucket_4_32 *bucket, *bucket_next;		\
+	void *a;						\
+	uint64_t pkt_mask, bucket_mask;				\
+	uint64_t *key;						\
+	uint32_t pos;						\
+								\
+	bucket = buckets[pkt_index];				\
+	key = keys[pkt_index];					\
+								\
+	lookup_key32_cmp(key, bucket, pos);			\
+								\
+	pkt_mask = (bucket->signature[pos] & 1LLU) << pkt_index;\
+	pkts_mask_out |= pkt_mask;				\
+								\
+	a = (void *) &bucket->data[pos * f->entry_size];	\
+	rte_prefetch0(a);					\
+	entries[pkt_index] = a;					\
+								\
+	bucket_mask = (~pkt_mask) & (bucket->next_valid << pkt_index);\
+	buckets_mask |= bucket_mask;				\
+	bucket_next = bucket->next;				\
+	rte_prefetch0(bucket_next);				\
+	rte_prefetch0((void *)(((uintptr_t) bucket_next) + RTE_CACHE_LINE_SIZE));\
+	rte_prefetch0((void *)(((uintptr_t) bucket_next) +	\
+		2 * RTE_CACHE_LINE_SIZE));				\
+	buckets[pkt_index] = bucket_next;			\
+	keys[pkt_index] = key;					\
+}
+
+#define lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01,\
+	pkts, pkts_mask, f)					\
+{								\
+	uint64_t pkt00_mask, pkt01_mask;			\
+	uint32_t key_offset = f->key_offset;		\
+								\
+	pkt00_index = __builtin_ctzll(pkts_mask);		\
+	pkt00_mask = 1LLU << pkt00_index;			\
+	pkts_mask &= ~pkt00_mask;				\
+								\
+	mbuf00 = pkts[pkt00_index];				\
+	rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf00, key_offset));\
+								\
+	pkt01_index = __builtin_ctzll(pkts_mask);		\
+	pkt01_mask = 1LLU << pkt01_index;			\
+	pkts_mask &= ~pkt01_mask;				\
+								\
+	mbuf01 = pkts[pkt01_index];				\
+	rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf01, key_offset));\
+}
+
+#define lookup2_stage0_with_odd_support(pkt00_index, pkt01_index,\
+	mbuf00, mbuf01, pkts, pkts_mask, f)			\
+{								\
+	uint64_t pkt00_mask, pkt01_mask;			\
+	uint32_t key_offset = f->key_offset;		\
+								\
+	pkt00_index = __builtin_ctzll(pkts_mask);		\
+	pkt00_mask = 1LLU << pkt00_index;			\
+	pkts_mask &= ~pkt00_mask;				\
+								\
+	mbuf00 = pkts[pkt00_index];				\
+	rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf00, key_offset));	\
+								\
+	pkt01_index = __builtin_ctzll(pkts_mask);		\
+	if (pkts_mask == 0)					\
+		pkt01_index = pkt00_index;			\
+								\
+	pkt01_mask = 1LLU << pkt01_index;			\
+	pkts_mask &= ~pkt01_mask;				\
+								\
+	mbuf01 = pkts[pkt01_index];				\
+	rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf01, key_offset));	\
+}
+
+#define lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f)	\
+{								\
+	uint64_t signature10, signature11;			\
+	uint32_t bucket10_index, bucket11_index;		\
+								\
+	signature10 = RTE_MBUF_METADATA_UINT32(mbuf10, f->signature_offset);\
+	bucket10_index = signature10 & (f->n_buckets - 1);	\
+	bucket10 = (struct rte_bucket_4_32 *)			\
+		&f->memory[bucket10_index * f->bucket_size];	\
+	rte_prefetch0(bucket10);				\
+	rte_prefetch0((void *)(((uintptr_t) bucket10) + RTE_CACHE_LINE_SIZE));\
+	rte_prefetch0((void *)(((uintptr_t) bucket10) + 2 * RTE_CACHE_LINE_SIZE));\
+								\
+	signature11 = RTE_MBUF_METADATA_UINT32(mbuf11, f->signature_offset);\
+	bucket11_index = signature11 & (f->n_buckets - 1);	\
+	bucket11 = (struct rte_bucket_4_32 *)			\
+		&f->memory[bucket11_index * f->bucket_size];	\
+	rte_prefetch0(bucket11);				\
+	rte_prefetch0((void *)(((uintptr_t) bucket11) + RTE_CACHE_LINE_SIZE));\
+	rte_prefetch0((void *)(((uintptr_t) bucket11) + 2 * RTE_CACHE_LINE_SIZE));\
+}
+
+#define lookup2_stage2_lru(pkt20_index, pkt21_index, mbuf20, mbuf21,\
+	bucket20, bucket21, pkts_mask_out, entries, f)		\
+{								\
+	void *a20, *a21;					\
+	uint64_t pkt20_mask, pkt21_mask;			\
+	uint64_t *key20, *key21;				\
+	uint32_t pos20, pos21;					\
+								\
+	key20 = RTE_MBUF_METADATA_UINT64_PTR(mbuf20, f->key_offset);\
+	key21 = RTE_MBUF_METADATA_UINT64_PTR(mbuf21, f->key_offset);\
+								\
+	lookup_key32_cmp(key20, bucket20, pos20);		\
+	lookup_key32_cmp(key21, bucket21, pos21);		\
+								\
+	pkt20_mask = (bucket20->signature[pos20] & 1LLU) << pkt20_index;\
+	pkt21_mask = (bucket21->signature[pos21] & 1LLU) << pkt21_index;\
+	pkts_mask_out |= pkt20_mask | pkt21_mask;		\
+								\
+	a20 = (void *) &bucket20->data[pos20 * f->entry_size];	\
+	a21 = (void *) &bucket21->data[pos21 * f->entry_size];	\
+	rte_prefetch0(a20);					\
+	rte_prefetch0(a21);					\
+	entries[pkt20_index] = a20;				\
+	entries[pkt21_index] = a21;				\
+	lru_update(bucket20, pos20);				\
+	lru_update(bucket21, pos21);				\
+}
+
+#define lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21, bucket20, \
+	bucket21, pkts_mask_out, entries, buckets_mask, buckets, keys, f)\
+{								\
+	struct rte_bucket_4_32 *bucket20_next, *bucket21_next;	\
+	void *a20, *a21;					\
+	uint64_t pkt20_mask, pkt21_mask, bucket20_mask, bucket21_mask;\
+	uint64_t *key20, *key21;				\
+	uint32_t pos20, pos21;					\
+								\
+	key20 = RTE_MBUF_METADATA_UINT64_PTR(mbuf20, f->key_offset);\
+	key21 = RTE_MBUF_METADATA_UINT64_PTR(mbuf21, f->key_offset);\
+								\
+	lookup_key32_cmp(key20, bucket20, pos20);		\
+	lookup_key32_cmp(key21, bucket21, pos21);		\
+								\
+	pkt20_mask = (bucket20->signature[pos20] & 1LLU) << pkt20_index;\
+	pkt21_mask = (bucket21->signature[pos21] & 1LLU) << pkt21_index;\
+	pkts_mask_out |= pkt20_mask | pkt21_mask;		\
+								\
+	a20 = (void *) &bucket20->data[pos20 * f->entry_size];	\
+	a21 = (void *) &bucket21->data[pos21 * f->entry_size];	\
+	rte_prefetch0(a20);					\
+	rte_prefetch0(a21);					\
+	entries[pkt20_index] = a20;				\
+	entries[pkt21_index] = a21;				\
+								\
+	bucket20_mask = (~pkt20_mask) & (bucket20->next_valid << pkt20_index);\
+	bucket21_mask = (~pkt21_mask) & (bucket21->next_valid << pkt21_index);\
+	buckets_mask |= bucket20_mask | bucket21_mask;		\
+	bucket20_next = bucket20->next;				\
+	bucket21_next = bucket21->next;				\
+	buckets[pkt20_index] = bucket20_next;			\
+	buckets[pkt21_index] = bucket21_next;			\
+	keys[pkt20_index] = key20;				\
+	keys[pkt21_index] = key21;				\
+}
+
+static int
+rte_table_hash_lookup_key32_lru(
+	void *table,
+	struct rte_mbuf **pkts,
+	uint64_t pkts_mask,
+	uint64_t *lookup_hit_mask,
+	void **entries)
+{
+	struct rte_table_hash *f = (struct rte_table_hash *) table;
+	struct rte_bucket_4_32 *bucket10, *bucket11, *bucket20, *bucket21;
+	struct rte_mbuf *mbuf00, *mbuf01, *mbuf10, *mbuf11, *mbuf20, *mbuf21;
+	uint32_t pkt00_index, pkt01_index, pkt10_index;
+	uint32_t pkt11_index, pkt20_index, pkt21_index;
+	uint64_t pkts_mask_out = 0;
+
+	__rte_unused uint32_t n_pkts_in = __builtin_popcountll(pkts_mask);
+	RTE_TABLE_HASH_KEY32_STATS_PKTS_IN_ADD(f, n_pkts_in);
+
+	/* Cannot run the pipeline with less than 5 packets */
+	if (__builtin_popcountll(pkts_mask) < 5) {
+		for ( ; pkts_mask; ) {
+			struct rte_bucket_4_32 *bucket;
+			struct rte_mbuf *mbuf;
+			uint32_t pkt_index;
+
+			lookup1_stage0(pkt_index, mbuf, pkts, pkts_mask, f);
+			lookup1_stage1(mbuf, bucket, f);
+			lookup1_stage2_lru(pkt_index, mbuf, bucket,
+					pkts_mask_out, entries, f);
+		}
+
+		*lookup_hit_mask = pkts_mask_out;
+		RTE_TABLE_HASH_KEY32_STATS_PKTS_LOOKUP_MISS(f, n_pkts_in - __builtin_popcountll(pkts_mask_out));
+		return 0;
+	}
+
+	/*
+	 * Pipeline fill
+	 *
+	 */
+	/* Pipeline stage 0 */
+	lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts,
+		pkts_mask, f);
+
+	/* Pipeline feed */
+	mbuf10 = mbuf00;
+	mbuf11 = mbuf01;
+	pkt10_index = pkt00_index;
+	pkt11_index = pkt01_index;
+
+	/* Pipeline stage 0 */
+	lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts,
+		pkts_mask, f);
+
+	/* Pipeline stage 1 */
+	lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
+
+	/*
+	 * Pipeline run
+	 *
+	 */
+	for ( ; pkts_mask; ) {
+		/* Pipeline feed */
+		bucket20 = bucket10;
+		bucket21 = bucket11;
+		mbuf20 = mbuf10;
+		mbuf21 = mbuf11;
+		mbuf10 = mbuf00;
+		mbuf11 = mbuf01;
+		pkt20_index = pkt10_index;
+		pkt21_index = pkt11_index;
+		pkt10_index = pkt00_index;
+		pkt11_index = pkt01_index;
+
+		/* Pipeline stage 0 */
+		lookup2_stage0_with_odd_support(pkt00_index, pkt01_index,
+			mbuf00, mbuf01, pkts, pkts_mask, f);
+
+		/* Pipeline stage 1 */
+		lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
+
+		/* Pipeline stage 2 */
+		lookup2_stage2_lru(pkt20_index, pkt21_index,
+			mbuf20, mbuf21, bucket20, bucket21, pkts_mask_out,
+			entries, f);
+	}
+
+	/*
+	 * Pipeline flush
+	 *
+	 */
+	/* Pipeline feed */
+	bucket20 = bucket10;
+	bucket21 = bucket11;
+	mbuf20 = mbuf10;
+	mbuf21 = mbuf11;
+	mbuf10 = mbuf00;
+	mbuf11 = mbuf01;
+	pkt20_index = pkt10_index;
+	pkt21_index = pkt11_index;
+	pkt10_index = pkt00_index;
+	pkt11_index = pkt01_index;
+
+	/* Pipeline stage 1 */
+	lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
+
+	/* Pipeline stage 2 */
+	lookup2_stage2_lru(pkt20_index, pkt21_index,
+		mbuf20, mbuf21, bucket20, bucket21, pkts_mask_out, entries, f);
+
+	/* Pipeline feed */
+	bucket20 = bucket10;
+	bucket21 = bucket11;
+	mbuf20 = mbuf10;
+	mbuf21 = mbuf11;
+	pkt20_index = pkt10_index;
+	pkt21_index = pkt11_index;
+
+	/* Pipeline stage 2 */
+	lookup2_stage2_lru(pkt20_index, pkt21_index,
+		mbuf20, mbuf21, bucket20, bucket21, pkts_mask_out, entries, f);
+
+	*lookup_hit_mask = pkts_mask_out;
+	RTE_TABLE_HASH_KEY32_STATS_PKTS_LOOKUP_MISS(f, n_pkts_in - __builtin_popcountll(pkts_mask_out));
+	return 0;
+} /* rte_table_hash_lookup_key32_lru() */
+
+static int
+rte_table_hash_lookup_key32_ext(
+	void *table,
+	struct rte_mbuf **pkts,
+	uint64_t pkts_mask,
+	uint64_t *lookup_hit_mask,
+	void **entries)
+{
+	struct rte_table_hash *f = (struct rte_table_hash *) table;
+	struct rte_bucket_4_32 *bucket10, *bucket11, *bucket20, *bucket21;
+	struct rte_mbuf *mbuf00, *mbuf01, *mbuf10, *mbuf11, *mbuf20, *mbuf21;
+	uint32_t pkt00_index, pkt01_index, pkt10_index;
+	uint32_t pkt11_index, pkt20_index, pkt21_index;
+	uint64_t pkts_mask_out = 0, buckets_mask = 0;
+	struct rte_bucket_4_32 *buckets[RTE_PORT_IN_BURST_SIZE_MAX];
+	uint64_t *keys[RTE_PORT_IN_BURST_SIZE_MAX];
+
+	__rte_unused uint32_t n_pkts_in = __builtin_popcountll(pkts_mask);
+	RTE_TABLE_HASH_KEY32_STATS_PKTS_IN_ADD(f, n_pkts_in);
+
+	/* Cannot run the pipeline with less than 5 packets */
+	if (__builtin_popcountll(pkts_mask) < 5) {
+		for ( ; pkts_mask; ) {
+			struct rte_bucket_4_32 *bucket;
+			struct rte_mbuf *mbuf;
+			uint32_t pkt_index;
+
+			lookup1_stage0(pkt_index, mbuf, pkts, pkts_mask, f);
+			lookup1_stage1(mbuf, bucket, f);
+			lookup1_stage2_ext(pkt_index, mbuf, bucket,
+				pkts_mask_out, entries, buckets_mask, buckets,
+				keys, f);
+		}
+
+		goto grind_next_buckets;
+	}
+
+	/*
+	 * Pipeline fill
+	 *
+	 */
+	/* Pipeline stage 0 */
+	lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts,
+		pkts_mask, f);
+
+	/* Pipeline feed */
+	mbuf10 = mbuf00;
+	mbuf11 = mbuf01;
+	pkt10_index = pkt00_index;
+	pkt11_index = pkt01_index;
+
+	/* Pipeline stage 0 */
+	lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts,
+		pkts_mask, f);
+
+	/* Pipeline stage 1 */
+	lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
+
+	/*
+	 * Pipeline run
+	 *
+	 */
+	for ( ; pkts_mask; ) {
+		/* Pipeline feed */
+		bucket20 = bucket10;
+		bucket21 = bucket11;
+		mbuf20 = mbuf10;
+		mbuf21 = mbuf11;
+		mbuf10 = mbuf00;
+		mbuf11 = mbuf01;
+		pkt20_index = pkt10_index;
+		pkt21_index = pkt11_index;
+		pkt10_index = pkt00_index;
+		pkt11_index = pkt01_index;
+
+		/* Pipeline stage 0 */
+		lookup2_stage0_with_odd_support(pkt00_index, pkt01_index,
+			mbuf00, mbuf01, pkts, pkts_mask, f);
+
+		/* Pipeline stage 1 */
+		lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
+
+		/* Pipeline stage 2 */
+		lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21,
+			bucket20, bucket21, pkts_mask_out, entries,
+			buckets_mask, buckets, keys, f);
+	}
+
+	/*
+	 * Pipeline flush
+	 *
+	 */
+	/* Pipeline feed */
+	bucket20 = bucket10;
+	bucket21 = bucket11;
+	mbuf20 = mbuf10;
+	mbuf21 = mbuf11;
+	mbuf10 = mbuf00;
+	mbuf11 = mbuf01;
+	pkt20_index = pkt10_index;
+	pkt21_index = pkt11_index;
+	pkt10_index = pkt00_index;
+	pkt11_index = pkt01_index;
+
+	/* Pipeline stage 1 */
+	lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
+
+	/* Pipeline stage 2 */
+	lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21,
+		bucket20, bucket21, pkts_mask_out, entries,
+		buckets_mask, buckets, keys, f);
+
+	/* Pipeline feed */
+	bucket20 = bucket10;
+	bucket21 = bucket11;
+	mbuf20 = mbuf10;
+	mbuf21 = mbuf11;
+	pkt20_index = pkt10_index;
+	pkt21_index = pkt11_index;
+
+	/* Pipeline stage 2 */
+	lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21,
+		bucket20, bucket21, pkts_mask_out, entries,
+		buckets_mask, buckets, keys, f);
+
+grind_next_buckets:
+	/* Grind next buckets */
+	for ( ; buckets_mask; ) {
+		uint64_t buckets_mask_next = 0;
+
+		for ( ; buckets_mask; ) {
+			uint64_t pkt_mask;
+			uint32_t pkt_index;
+
+			pkt_index = __builtin_ctzll(buckets_mask);
+			pkt_mask = 1LLU << pkt_index;
+			buckets_mask &= ~pkt_mask;
+
+			lookup_grinder(pkt_index, buckets, keys, pkts_mask_out,
+				entries, buckets_mask_next, f);
+		}
+
+		buckets_mask = buckets_mask_next;
+	}
+
+	*lookup_hit_mask = pkts_mask_out;
+	RTE_TABLE_HASH_KEY32_STATS_PKTS_LOOKUP_MISS(f, n_pkts_in - __builtin_popcountll(pkts_mask_out));
+	return 0;
+} /* rte_table_hash_lookup_key32_ext() */
+
+static int
+rte_table_hash_key32_stats_read(void *table, struct rte_table_stats *stats, int clear)
+{
+	struct rte_table_hash *t = (struct rte_table_hash *) table;
+
+	if (stats != NULL)
+		memcpy(stats, &t->stats, sizeof(t->stats));
+
+	if (clear)
+		memset(&t->stats, 0, sizeof(t->stats));
+
+	return 0;
+}
+
+struct rte_table_ops rte_table_hash_key32_lru_ops = {
+	.f_create = rte_table_hash_create_key32_lru,
+	.f_free = rte_table_hash_free_key32_lru,
+	.f_add = rte_table_hash_entry_add_key32_lru,
+	.f_delete = rte_table_hash_entry_delete_key32_lru,
+	.f_add_bulk = NULL,
+	.f_delete_bulk = NULL,
+	.f_lookup = rte_table_hash_lookup_key32_lru,
+	.f_stats = rte_table_hash_key32_stats_read,
+};
+
+struct rte_table_ops rte_table_hash_key32_ext_ops = {
+	.f_create = rte_table_hash_create_key32_ext,
+	.f_free = rte_table_hash_free_key32_ext,
+	.f_add = rte_table_hash_entry_add_key32_ext,
+	.f_delete = rte_table_hash_entry_delete_key32_ext,
+	.f_add_bulk = NULL,
+	.f_delete_bulk = NULL,
+	.f_lookup = rte_table_hash_lookup_key32_ext,
+	.f_stats = rte_table_hash_key32_stats_read,
+};
diff --git a/lib/librte_table/rte_table_hash_key8.c b/lib/librte_table/rte_table_hash_key8.c
new file mode 100644
index 00000000..e2e2bdc4
--- /dev/null
+++ b/lib/librte_table/rte_table_hash_key8.c
@@ -0,0 +1,1471 @@
+/*-
+ *	 BSD LICENSE
+ *
+ *	 Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *	 All rights reserved.
+ *
+ *	 Redistribution and use in source and binary forms, with or without
+ *	 modification, are permitted provided that the following conditions
+ *	 are met:
+ *
+ *	* Redistributions of source code must retain the above copyright
+ *		 notice, this list of conditions and the following disclaimer.
+ *	* Redistributions in binary form must reproduce the above copyright
+ *		 notice, this list of conditions and the following disclaimer in
+ *		 the documentation and/or other materials provided with the
+ *		 distribution.
+ *	* Neither the name of Intel Corporation nor the names of its
+ *		 contributors may be used to endorse or promote products derived
+ *		 from this software without specific prior written permission.
+ *
+ *	 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *	 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *	 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *	 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *	 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *	 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *	 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *	 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *	 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *	 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *	 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <string.h>
+#include <stdio.h>
+
+#include <rte_common.h>
+#include <rte_mbuf.h>
+#include <rte_memory.h>
+#include <rte_malloc.h>
+#include <rte_log.h>
+
+#include "rte_table_hash.h"
+#include "rte_lru.h"
+
+#define RTE_TABLE_HASH_KEY_SIZE						8
+
+#ifdef RTE_TABLE_STATS_COLLECT
+
+#define RTE_TABLE_HASH_KEY8_STATS_PKTS_IN_ADD(table, val) \
+	table->stats.n_pkts_in += val
+#define RTE_TABLE_HASH_KEY8_STATS_PKTS_LOOKUP_MISS(table, val) \
+	table->stats.n_pkts_lookup_miss += val
+
+#else
+
+#define RTE_TABLE_HASH_KEY8_STATS_PKTS_IN_ADD(table, val)
+#define RTE_TABLE_HASH_KEY8_STATS_PKTS_LOOKUP_MISS(table, val)
+
+#endif
+
+struct rte_bucket_4_8 {
+	/* Cache line 0 */
+	uint64_t signature;
+	uint64_t lru_list;
+	struct rte_bucket_4_8 *next;
+	uint64_t next_valid;
+
+	uint64_t key[4];
+
+	/* Cache line 1 */
+	uint8_t data[0];
+};
+
+struct rte_table_hash {
+	struct rte_table_stats stats;
+
+	/* Input parameters */
+	uint32_t n_buckets;
+	uint32_t n_entries_per_bucket;
+	uint32_t key_size;
+	uint32_t entry_size;
+	uint32_t bucket_size;
+	uint32_t signature_offset;
+	uint32_t key_offset;
+	uint64_t key_mask;
+	rte_table_hash_op_hash f_hash;
+	uint64_t seed;
+
+	/* Extendible buckets */
+	uint32_t n_buckets_ext;
+	uint32_t stack_pos;
+	uint32_t *stack;
+
+	/* Lookup table */
+	uint8_t memory[0] __rte_cache_aligned;
+};
+
+static int
+check_params_create_lru(struct rte_table_hash_key8_lru_params *params) {
+	/* n_entries */
+	if (params->n_entries == 0) {
+		RTE_LOG(ERR, TABLE, "%s: n_entries is zero\n", __func__);
+		return -EINVAL;
+	}
+
+	/* f_hash */
+	if (params->f_hash == NULL) {
+		RTE_LOG(ERR, TABLE, "%s: f_hash function pointer is NULL\n",
+			__func__);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static void *
+rte_table_hash_create_key8_lru(void *params, int socket_id, uint32_t entry_size)
+{
+	struct rte_table_hash_key8_lru_params *p =
+		(struct rte_table_hash_key8_lru_params *) params;
+	struct rte_table_hash *f;
+	uint32_t n_buckets, n_entries_per_bucket, key_size, bucket_size_cl;
+	uint32_t total_size, i;
+
+	/* Check input parameters */
+	if ((check_params_create_lru(p) != 0) ||
+		((sizeof(struct rte_table_hash) % RTE_CACHE_LINE_SIZE) != 0) ||
+		((sizeof(struct rte_bucket_4_8) % RTE_CACHE_LINE_SIZE) != 0)) {
+		return NULL;
+	}
+	n_entries_per_bucket = 4;
+	key_size = 8;
+
+	/* Memory allocation */
+	n_buckets = rte_align32pow2((p->n_entries + n_entries_per_bucket - 1) /
+		n_entries_per_bucket);
+	bucket_size_cl = (sizeof(struct rte_bucket_4_8) + n_entries_per_bucket *
+		entry_size + RTE_CACHE_LINE_SIZE - 1) / RTE_CACHE_LINE_SIZE;
+	total_size = sizeof(struct rte_table_hash) + n_buckets *
+		bucket_size_cl * RTE_CACHE_LINE_SIZE;
+
+	f = rte_zmalloc_socket("TABLE", total_size, RTE_CACHE_LINE_SIZE, socket_id);
+	if (f == NULL) {
+		RTE_LOG(ERR, TABLE,
+			"%s: Cannot allocate %u bytes for hash table\n",
+			__func__, total_size);
+		return NULL;
+	}
+	RTE_LOG(INFO, TABLE,
+		"%s: Hash table memory footprint is %u bytes\n",
+		__func__, total_size);
+
+	/* Memory initialization */
+	f->n_buckets = n_buckets;
+	f->n_entries_per_bucket = n_entries_per_bucket;
+	f->key_size = key_size;
+	f->entry_size = entry_size;
+	f->bucket_size = bucket_size_cl * RTE_CACHE_LINE_SIZE;
+	f->signature_offset = p->signature_offset;
+	f->key_offset = p->key_offset;
+	f->f_hash = p->f_hash;
+	f->seed = p->seed;
+
+	if (p->key_mask != NULL)
+		f->key_mask = ((uint64_t *)p->key_mask)[0];
+	else
+		f->key_mask = 0xFFFFFFFFFFFFFFFFLLU;
+
+	for (i = 0; i < n_buckets; i++) {
+		struct rte_bucket_4_8 *bucket;
+
+		bucket = (struct rte_bucket_4_8 *) &f->memory[i *
+			f->bucket_size];
+		bucket->lru_list = 0x0000000100020003LLU;
+	}
+
+	return f;
+}
+
+static int
+rte_table_hash_free_key8_lru(void *table)
+{
+	struct rte_table_hash *f = (struct rte_table_hash *) table;
+
+	/* Check input parameters */
+	if (f == NULL) {
+		RTE_LOG(ERR, TABLE, "%s: table parameter is NULL\n", __func__);
+		return -EINVAL;
+	}
+
+	rte_free(f);
+	return 0;
+}
+
+static int
+rte_table_hash_entry_add_key8_lru(
+	void *table,
+	void *key,
+	void *entry,
+	int *key_found,
+	void **entry_ptr)
+{
+	struct rte_table_hash *f = (struct rte_table_hash *) table;
+	struct rte_bucket_4_8 *bucket;
+	uint64_t signature, mask, pos;
+	uint32_t bucket_index, i;
+
+	signature = f->f_hash(key, f->key_size, f->seed);
+	bucket_index = signature & (f->n_buckets - 1);
+	bucket = (struct rte_bucket_4_8 *)
+		&f->memory[bucket_index * f->bucket_size];
+
+	/* Key is present in the bucket */
+	for (i = 0, mask = 1LLU; i < 4; i++, mask <<= 1) {
+		uint64_t bucket_signature = bucket->signature;
+		uint64_t bucket_key = bucket->key[i];
+
+		if ((bucket_signature & mask) &&
+		    (*((uint64_t *) key) == bucket_key)) {
+			uint8_t *bucket_data = &bucket->data[i * f->entry_size];
+
+			memcpy(bucket_data, entry, f->entry_size);
+			lru_update(bucket, i);
+			*key_found = 1;
+			*entry_ptr = (void *) bucket_data;
+			return 0;
+		}
+	}
+
+	/* Key is not present in the bucket */
+	for (i = 0, mask = 1LLU; i < 4; i++, mask <<= 1) {
+		uint64_t bucket_signature = bucket->signature;
+
+		if ((bucket_signature & mask) == 0) {
+			uint8_t *bucket_data = &bucket->data[i * f->entry_size];
+
+			bucket->signature |= mask;
+			bucket->key[i] = *((uint64_t *) key);
+			memcpy(bucket_data, entry, f->entry_size);
+			lru_update(bucket, i);
+			*key_found = 0;
+			*entry_ptr = (void *) bucket_data;
+
+			return 0;
+		}
+	}
+
+	/* Bucket full: replace LRU entry */
+	pos = lru_pos(bucket);
+	bucket->key[pos] = *((uint64_t *) key);
+	memcpy(&bucket->data[pos * f->entry_size], entry, f->entry_size);
+	lru_update(bucket, pos);
+	*key_found	= 0;
+	*entry_ptr = (void *) &bucket->data[pos * f->entry_size];
+
+	return 0;
+}
+
+static int
+rte_table_hash_entry_delete_key8_lru(
+	void *table,
+	void *key,
+	int *key_found,
+	void *entry)
+{
+	struct rte_table_hash *f = (struct rte_table_hash *) table;
+	struct rte_bucket_4_8 *bucket;
+	uint64_t signature, mask;
+	uint32_t bucket_index, i;
+
+	signature = f->f_hash(key, f->key_size, f->seed);
+	bucket_index = signature & (f->n_buckets - 1);
+	bucket = (struct rte_bucket_4_8 *)
+		&f->memory[bucket_index * f->bucket_size];
+
+	/* Key is present in the bucket */
+	for (i = 0, mask = 1LLU; i < 4; i++, mask <<= 1) {
+		uint64_t bucket_signature = bucket->signature;
+		uint64_t bucket_key = bucket->key[i];
+
+		if ((bucket_signature & mask) &&
+		    (*((uint64_t *) key) == bucket_key)) {
+			uint8_t *bucket_data = &bucket->data[i * f->entry_size];
+
+			bucket->signature &= ~mask;
+			*key_found = 1;
+			if (entry)
+				memcpy(entry, bucket_data, f->entry_size);
+
+			return 0;
+		}
+	}
+
+	/* Key is not present in the bucket */
+	*key_found = 0;
+	return 0;
+}
+
+static int
+check_params_create_ext(struct rte_table_hash_key8_ext_params *params) {
+	/* n_entries */
+	if (params->n_entries == 0) {
+		RTE_LOG(ERR, TABLE, "%s: n_entries is zero\n", __func__);
+		return -EINVAL;
+	}
+
+	/* n_entries_ext */
+	if (params->n_entries_ext == 0) {
+		RTE_LOG(ERR, TABLE, "%s: n_entries_ext is zero\n", __func__);
+		return -EINVAL;
+	}
+
+	/* f_hash */
+	if (params->f_hash == NULL) {
+		RTE_LOG(ERR, TABLE, "%s: f_hash function pointer is NULL\n",
+			__func__);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static void *
+rte_table_hash_create_key8_ext(void *params, int socket_id, uint32_t entry_size)
+{
+	struct rte_table_hash_key8_ext_params *p =
+		(struct rte_table_hash_key8_ext_params *) params;
+	struct rte_table_hash *f;
+	uint32_t n_buckets, n_buckets_ext, n_entries_per_bucket, key_size;
+	uint32_t bucket_size_cl, stack_size_cl, total_size, i;
+
+	/* Check input parameters */
+	if ((check_params_create_ext(p) != 0) ||
+		((sizeof(struct rte_table_hash) % RTE_CACHE_LINE_SIZE) != 0) ||
+		((sizeof(struct rte_bucket_4_8) % RTE_CACHE_LINE_SIZE) != 0))
+		return NULL;
+
+	n_entries_per_bucket = 4;
+	key_size = 8;
+
+	/* Memory allocation */
+	n_buckets = rte_align32pow2((p->n_entries + n_entries_per_bucket - 1) /
+		n_entries_per_bucket);
+	n_buckets_ext = (p->n_entries_ext + n_entries_per_bucket - 1) /
+		n_entries_per_bucket;
+	bucket_size_cl = (sizeof(struct rte_bucket_4_8) + n_entries_per_bucket *
+		entry_size + RTE_CACHE_LINE_SIZE - 1) / RTE_CACHE_LINE_SIZE;
+	stack_size_cl = (n_buckets_ext * sizeof(uint32_t) + RTE_CACHE_LINE_SIZE - 1)
+		/ RTE_CACHE_LINE_SIZE;
+	total_size = sizeof(struct rte_table_hash) + ((n_buckets +
+		n_buckets_ext) * bucket_size_cl + stack_size_cl) *
+		RTE_CACHE_LINE_SIZE;
+
+	f = rte_zmalloc_socket("TABLE", total_size, RTE_CACHE_LINE_SIZE, socket_id);
+	if (f == NULL) {
+		RTE_LOG(ERR, TABLE,
+			"%s: Cannot allocate %u bytes for hash table\n",
+			__func__, total_size);
+		return NULL;
+	}
+	RTE_LOG(INFO, TABLE,
+		"%s: Hash table memory footprint is %u bytes\n",
+		__func__, total_size);
+
+	/* Memory initialization */
+	f->n_buckets = n_buckets;
+	f->n_entries_per_bucket = n_entries_per_bucket;
+	f->key_size = key_size;
+	f->entry_size = entry_size;
+	f->bucket_size = bucket_size_cl * RTE_CACHE_LINE_SIZE;
+	f->signature_offset = p->signature_offset;
+	f->key_offset = p->key_offset;
+	f->f_hash = p->f_hash;
+	f->seed = p->seed;
+
+	f->n_buckets_ext = n_buckets_ext;
+	f->stack_pos = n_buckets_ext;
+	f->stack = (uint32_t *)
+		&f->memory[(n_buckets + n_buckets_ext) * f->bucket_size];
+
+	if (p->key_mask != NULL)
+		f->key_mask = ((uint64_t *)p->key_mask)[0];
+	else
+		f->key_mask = 0xFFFFFFFFFFFFFFFFLLU;
+
+	for (i = 0; i < n_buckets_ext; i++)
+		f->stack[i] = i;
+
+	return f;
+}
+
+static int
+rte_table_hash_free_key8_ext(void *table)
+{
+	struct rte_table_hash *f = (struct rte_table_hash *) table;
+
+	/* Check input parameters */
+	if (f == NULL) {
+		RTE_LOG(ERR, TABLE, "%s: table parameter is NULL\n", __func__);
+		return -EINVAL;
+	}
+
+	rte_free(f);
+	return 0;
+}
+
+static int
+rte_table_hash_entry_add_key8_ext(
+	void *table,
+	void *key,
+	void *entry,
+	int *key_found,
+	void **entry_ptr)
+{
+	struct rte_table_hash *f = (struct rte_table_hash *) table;
+	struct rte_bucket_4_8 *bucket0, *bucket, *bucket_prev;
+	uint64_t signature;
+	uint32_t bucket_index, i;
+
+	signature = f->f_hash(key, f->key_size, f->seed);
+	bucket_index = signature & (f->n_buckets - 1);
+	bucket0 = (struct rte_bucket_4_8 *)
+		&f->memory[bucket_index * f->bucket_size];
+
+	/* Key is present in the bucket */
+	for (bucket = bucket0; bucket != NULL; bucket = bucket->next) {
+		uint64_t mask;
+
+		for (i = 0, mask = 1LLU; i < 4; i++, mask <<= 1) {
+			uint64_t bucket_signature = bucket->signature;
+			uint64_t bucket_key = bucket->key[i];
+
+			if ((bucket_signature & mask) &&
+					(*((uint64_t *) key) == bucket_key)) {
+				uint8_t *bucket_data = &bucket->data[i *
+					f->entry_size];
+
+				memcpy(bucket_data, entry, f->entry_size);
+				*key_found = 1;
+				*entry_ptr = (void *) bucket_data;
+				return 0;
+			}
+		}
+	}
+
+	/* Key is not present in the bucket */
+	for (bucket_prev = NULL, bucket = bucket0;
+		bucket != NULL; bucket_prev = bucket, bucket = bucket->next) {
+		uint64_t mask;
+
+		for (i = 0, mask = 1LLU; i < 4; i++, mask <<= 1) {
+			uint64_t bucket_signature = bucket->signature;
+
+			if ((bucket_signature & mask) == 0) {
+				uint8_t *bucket_data = &bucket->data[i *
+					f->entry_size];
+
+				bucket->signature |= mask;
+				bucket->key[i] = *((uint64_t *) key);
+				memcpy(bucket_data, entry, f->entry_size);
+				*key_found = 0;
+				*entry_ptr = (void *) bucket_data;
+
+				return 0;
+			}
+		}
+	}
+
+	/* Bucket full: extend bucket */
+	if (f->stack_pos > 0) {
+		bucket_index = f->stack[--f->stack_pos];
+
+		bucket = (struct rte_bucket_4_8 *) &f->memory[(f->n_buckets +
+			bucket_index) * f->bucket_size];
+		bucket_prev->next = bucket;
+		bucket_prev->next_valid = 1;
+
+		bucket->signature = 1;
+		bucket->key[0] = *((uint64_t *) key);
+		memcpy(&bucket->data[0], entry, f->entry_size);
+		*key_found = 0;
+		*entry_ptr = (void *) &bucket->data[0];
+		return 0;
+	}
+
+	return -ENOSPC;
+}
+
+static int
+rte_table_hash_entry_delete_key8_ext(
+	void *table,
+	void *key,
+	int *key_found,
+	void *entry)
+{
+	struct rte_table_hash *f = (struct rte_table_hash *) table;
+	struct rte_bucket_4_8 *bucket0, *bucket, *bucket_prev;
+	uint64_t signature;
+	uint32_t bucket_index, i;
+
+	signature = f->f_hash(key, f->key_size, f->seed);
+	bucket_index = signature & (f->n_buckets - 1);
+	bucket0 = (struct rte_bucket_4_8 *)
+		&f->memory[bucket_index * f->bucket_size];
+
+	/* Key is present in the bucket */
+	for (bucket_prev = NULL, bucket = bucket0; bucket != NULL;
+		bucket_prev = bucket, bucket = bucket->next) {
+		uint64_t mask;
+
+		for (i = 0, mask = 1LLU; i < 4; i++, mask <<= 1) {
+			uint64_t bucket_signature = bucket->signature;
+			uint64_t bucket_key = bucket->key[i];
+
+			if ((bucket_signature & mask) &&
+				(*((uint64_t *) key) == bucket_key)) {
+				uint8_t *bucket_data = &bucket->data[i *
+					f->entry_size];
+
+				bucket->signature &= ~mask;
+				*key_found = 1;
+				if (entry)
+					memcpy(entry, bucket_data,
+						f->entry_size);
+
+				if ((bucket->signature == 0) &&
+				    (bucket_prev != NULL)) {
+					bucket_prev->next = bucket->next;
+					bucket_prev->next_valid =
+						bucket->next_valid;
+
+					memset(bucket, 0,
+						sizeof(struct rte_bucket_4_8));
+					bucket_index = (((uint8_t *)bucket -
+						(uint8_t *)f->memory)/f->bucket_size) - f->n_buckets;
+					f->stack[f->stack_pos++] = bucket_index;
+				}
+
+				return 0;
+			}
+		}
+	}
+
+	/* Key is not present in the bucket */
+	*key_found = 0;
+	return 0;
+}
+
+#define lookup_key8_cmp(key_in, bucket, pos)			\
+{								\
+	uint64_t xor[4], signature;				\
+								\
+	signature = ~bucket->signature;				\
+								\
+	xor[0] = (key_in[0] ^	 bucket->key[0]) | (signature & 1);\
+	xor[1] = (key_in[0] ^	 bucket->key[1]) | (signature & 2);\
+	xor[2] = (key_in[0] ^	 bucket->key[2]) | (signature & 4);\
+	xor[3] = (key_in[0] ^	 bucket->key[3]) | (signature & 8);\
+								\
+	pos = 4;						\
+	if (xor[0] == 0)					\
+		pos = 0;					\
+	if (xor[1] == 0)					\
+		pos = 1;					\
+	if (xor[2] == 0)					\
+		pos = 2;					\
+	if (xor[3] == 0)					\
+		pos = 3;					\
+}
+
+#define lookup1_stage0(pkt0_index, mbuf0, pkts, pkts_mask, f)	\
+{								\
+	uint64_t pkt_mask;					\
+	uint32_t key_offset = f->key_offset;\
+								\
+	pkt0_index = __builtin_ctzll(pkts_mask);		\
+	pkt_mask = 1LLU << pkt0_index;				\
+	pkts_mask &= ~pkt_mask;					\
+								\
+	mbuf0 = pkts[pkt0_index];				\
+	rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf0, key_offset));	\
+}
+
+#define lookup1_stage1(mbuf1, bucket1, f)			\
+{								\
+	uint64_t signature;					\
+	uint32_t bucket_index;					\
+								\
+	signature = RTE_MBUF_METADATA_UINT32(mbuf1, f->signature_offset);\
+	bucket_index = signature & (f->n_buckets - 1);		\
+	bucket1 = (struct rte_bucket_4_8 *)			\
+		&f->memory[bucket_index * f->bucket_size];	\
+	rte_prefetch0(bucket1);					\
+}
+
+#define lookup1_stage1_dosig(mbuf1, bucket1, f)			\
+{								\
+	uint64_t *key;						\
+	uint64_t signature;					\
+	uint32_t bucket_index;					\
+	uint64_t hash_key_buffer;				\
+								\
+	key = RTE_MBUF_METADATA_UINT64_PTR(mbuf1, f->key_offset);\
+	hash_key_buffer = *key & f->key_mask;			\
+	signature = f->f_hash(&hash_key_buffer,			\
+		RTE_TABLE_HASH_KEY_SIZE, f->seed);		\
+	bucket_index = signature & (f->n_buckets - 1);		\
+	bucket1 = (struct rte_bucket_4_8 *)			\
+		&f->memory[bucket_index * f->bucket_size];	\
+	rte_prefetch0(bucket1);					\
+}
+
+#define lookup1_stage2_lru(pkt2_index, mbuf2, bucket2,		\
+	pkts_mask_out, entries, f)				\
+{								\
+	void *a;						\
+	uint64_t pkt_mask;					\
+	uint64_t *key;						\
+	uint32_t pos;						\
+	uint64_t hash_key_buffer;				\
+								\
+	key = RTE_MBUF_METADATA_UINT64_PTR(mbuf2, f->key_offset);\
+	hash_key_buffer = key[0] & f->key_mask;			\
+								\
+	lookup_key8_cmp((&hash_key_buffer), bucket2, pos);	\
+								\
+	pkt_mask = ((bucket2->signature >> pos) & 1LLU) << pkt2_index;\
+	pkts_mask_out |= pkt_mask;				\
+								\
+	a = (void *) &bucket2->data[pos * f->entry_size];	\
+	rte_prefetch0(a);					\
+	entries[pkt2_index] = a;				\
+	lru_update(bucket2, pos);				\
+}
+
+#define lookup1_stage2_ext(pkt2_index, mbuf2, bucket2, pkts_mask_out,\
+	entries, buckets_mask, buckets, keys, f)		\
+{								\
+	struct rte_bucket_4_8 *bucket_next;			\
+	void *a;						\
+	uint64_t pkt_mask, bucket_mask;				\
+	uint64_t *key;						\
+	uint32_t pos;						\
+	uint64_t hash_key_buffer;				\
+								\
+	key = RTE_MBUF_METADATA_UINT64_PTR(mbuf2, f->key_offset);\
+	hash_key_buffer = *key & f->key_mask;			\
+								\
+	lookup_key8_cmp((&hash_key_buffer), bucket2, pos);	\
+								\
+	pkt_mask = ((bucket2->signature >> pos) & 1LLU) << pkt2_index;\
+	pkts_mask_out |= pkt_mask;				\
+								\
+	a = (void *) &bucket2->data[pos * f->entry_size];	\
+	rte_prefetch0(a);					\
+	entries[pkt2_index] = a;				\
+								\
+	bucket_mask = (~pkt_mask) & (bucket2->next_valid << pkt2_index);\
+	buckets_mask |= bucket_mask;				\
+	bucket_next = bucket2->next;				\
+	buckets[pkt2_index] = bucket_next;			\
+	keys[pkt2_index] = key;					\
+}
+
+#define lookup_grinder(pkt_index, buckets, keys, pkts_mask_out, entries,\
+	buckets_mask, f)					\
+{								\
+	struct rte_bucket_4_8 *bucket, *bucket_next;		\
+	void *a;						\
+	uint64_t pkt_mask, bucket_mask;				\
+	uint64_t *key;						\
+	uint32_t pos;						\
+	uint64_t hash_key_buffer;				\
+								\
+	bucket = buckets[pkt_index];				\
+	key = keys[pkt_index];					\
+	hash_key_buffer = (*key) & f->key_mask;			\
+								\
+	lookup_key8_cmp((&hash_key_buffer), bucket, pos);	\
+								\
+	pkt_mask = ((bucket->signature >> pos) & 1LLU) << pkt_index;\
+	pkts_mask_out |= pkt_mask;				\
+								\
+	a = (void *) &bucket->data[pos * f->entry_size];	\
+	rte_prefetch0(a);					\
+	entries[pkt_index] = a;					\
+								\
+	bucket_mask = (~pkt_mask) & (bucket->next_valid << pkt_index);\
+	buckets_mask |= bucket_mask;				\
+	bucket_next = bucket->next;				\
+	rte_prefetch0(bucket_next);				\
+	buckets[pkt_index] = bucket_next;			\
+	keys[pkt_index] = key;					\
+}
+
+#define lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01,\
+	pkts, pkts_mask, f)					\
+{								\
+	uint64_t pkt00_mask, pkt01_mask;			\
+	uint32_t key_offset = f->key_offset;		\
+								\
+	pkt00_index = __builtin_ctzll(pkts_mask);		\
+	pkt00_mask = 1LLU << pkt00_index;			\
+	pkts_mask &= ~pkt00_mask;				\
+								\
+	mbuf00 = pkts[pkt00_index];				\
+	rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf00, key_offset));\
+								\
+	pkt01_index = __builtin_ctzll(pkts_mask);		\
+	pkt01_mask = 1LLU << pkt01_index;			\
+	pkts_mask &= ~pkt01_mask;				\
+								\
+	mbuf01 = pkts[pkt01_index];				\
+	rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf01, key_offset));\
+}
+
+#define lookup2_stage0_with_odd_support(pkt00_index, pkt01_index,\
+	mbuf00, mbuf01, pkts, pkts_mask, f)			\
+{								\
+	uint64_t pkt00_mask, pkt01_mask;			\
+	uint32_t key_offset = f->key_offset;		\
+								\
+	pkt00_index = __builtin_ctzll(pkts_mask);		\
+	pkt00_mask = 1LLU << pkt00_index;			\
+	pkts_mask &= ~pkt00_mask;				\
+								\
+	mbuf00 = pkts[pkt00_index];				\
+	rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf00, key_offset));\
+								\
+	pkt01_index = __builtin_ctzll(pkts_mask);		\
+	if (pkts_mask == 0)					\
+		pkt01_index = pkt00_index;			\
+								\
+	pkt01_mask = 1LLU << pkt01_index;			\
+	pkts_mask &= ~pkt01_mask;				\
+								\
+	mbuf01 = pkts[pkt01_index];				\
+	rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf01, key_offset));\
+}
+
+#define lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f)	\
+{								\
+	uint64_t signature10, signature11;			\
+	uint32_t bucket10_index, bucket11_index;		\
+								\
+	signature10 = RTE_MBUF_METADATA_UINT32(mbuf10, f->signature_offset);\
+	bucket10_index = signature10 & (f->n_buckets - 1);	\
+	bucket10 = (struct rte_bucket_4_8 *)			\
+		&f->memory[bucket10_index * f->bucket_size];	\
+	rte_prefetch0(bucket10);				\
+								\
+	signature11 = RTE_MBUF_METADATA_UINT32(mbuf11, f->signature_offset);\
+	bucket11_index = signature11 & (f->n_buckets - 1);	\
+	bucket11 = (struct rte_bucket_4_8 *)			\
+		&f->memory[bucket11_index * f->bucket_size];	\
+	rte_prefetch0(bucket11);				\
+}
+
+#define lookup2_stage1_dosig(mbuf10, mbuf11, bucket10, bucket11, f)\
+{								\
+	uint64_t *key10, *key11;				\
+	uint64_t hash_offset_buffer10;				\
+	uint64_t hash_offset_buffer11;				\
+	uint64_t signature10, signature11;			\
+	uint32_t bucket10_index, bucket11_index;		\
+	rte_table_hash_op_hash f_hash = f->f_hash;		\
+	uint64_t seed = f->seed;				\
+	uint32_t key_offset = f->key_offset;			\
+								\
+	key10 = RTE_MBUF_METADATA_UINT64_PTR(mbuf10, key_offset);\
+	key11 = RTE_MBUF_METADATA_UINT64_PTR(mbuf11, key_offset);\
+	hash_offset_buffer10 = *key10 & f->key_mask;		\
+	hash_offset_buffer11 = *key11 & f->key_mask;		\
+								\
+	signature10 = f_hash(&hash_offset_buffer10,		\
+		RTE_TABLE_HASH_KEY_SIZE, seed);			\
+	bucket10_index = signature10 & (f->n_buckets - 1);	\
+	bucket10 = (struct rte_bucket_4_8 *)			\
+		&f->memory[bucket10_index * f->bucket_size];	\
+	rte_prefetch0(bucket10);				\
+								\
+	signature11 = f_hash(&hash_offset_buffer11,		\
+		RTE_TABLE_HASH_KEY_SIZE, seed);			\
+	bucket11_index = signature11 & (f->n_buckets - 1);	\
+	bucket11 = (struct rte_bucket_4_8 *)			\
+		&f->memory[bucket11_index * f->bucket_size];	\
+	rte_prefetch0(bucket11);				\
+}
+
+#define lookup2_stage2_lru(pkt20_index, pkt21_index, mbuf20, mbuf21,\
+	bucket20, bucket21, pkts_mask_out, entries, f)		\
+{								\
+	void *a20, *a21;					\
+	uint64_t pkt20_mask, pkt21_mask;			\
+	uint64_t *key20, *key21;				\
+	uint64_t hash_offset_buffer20;				\
+	uint64_t hash_offset_buffer21;				\
+	uint32_t pos20, pos21;					\
+								\
+	key20 = RTE_MBUF_METADATA_UINT64_PTR(mbuf20, f->key_offset);\
+	key21 = RTE_MBUF_METADATA_UINT64_PTR(mbuf21, f->key_offset);\
+	hash_offset_buffer20 = *key20 & f->key_mask;		\
+	hash_offset_buffer21 = *key21 & f->key_mask;		\
+								\
+	lookup_key8_cmp((&hash_offset_buffer20), bucket20, pos20);\
+	lookup_key8_cmp((&hash_offset_buffer21), bucket21, pos21);\
+								\
+	pkt20_mask = ((bucket20->signature >> pos20) & 1LLU) << pkt20_index;\
+	pkt21_mask = ((bucket21->signature >> pos21) & 1LLU) << pkt21_index;\
+	pkts_mask_out |= pkt20_mask | pkt21_mask;		\
+								\
+	a20 = (void *) &bucket20->data[pos20 * f->entry_size];	\
+	a21 = (void *) &bucket21->data[pos21 * f->entry_size];	\
+	rte_prefetch0(a20);					\
+	rte_prefetch0(a21);					\
+	entries[pkt20_index] = a20;				\
+	entries[pkt21_index] = a21;				\
+	lru_update(bucket20, pos20);				\
+	lru_update(bucket21, pos21);				\
+}
+
+#define lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21, bucket20, \
+	bucket21, pkts_mask_out, entries, buckets_mask, buckets, keys, f)\
+{								\
+	struct rte_bucket_4_8 *bucket20_next, *bucket21_next;	\
+	void *a20, *a21;					\
+	uint64_t pkt20_mask, pkt21_mask, bucket20_mask, bucket21_mask;\
+	uint64_t *key20, *key21;				\
+	uint64_t hash_offset_buffer20;				\
+	uint64_t hash_offset_buffer21;				\
+	uint32_t pos20, pos21;					\
+								\
+	key20 = RTE_MBUF_METADATA_UINT64_PTR(mbuf20, f->key_offset);\
+	key21 = RTE_MBUF_METADATA_UINT64_PTR(mbuf21, f->key_offset);\
+	hash_offset_buffer20 = *key20 & f->key_mask;		\
+	hash_offset_buffer21 = *key21 & f->key_mask;		\
+								\
+	lookup_key8_cmp((&hash_offset_buffer20), bucket20, pos20);\
+	lookup_key8_cmp((&hash_offset_buffer21), bucket21, pos21);\
+								\
+	pkt20_mask = ((bucket20->signature >> pos20) & 1LLU) << pkt20_index;\
+	pkt21_mask = ((bucket21->signature >> pos21) & 1LLU) << pkt21_index;\
+	pkts_mask_out |= pkt20_mask | pkt21_mask;		\
+								\
+	a20 = (void *) &bucket20->data[pos20 * f->entry_size];	\
+	a21 = (void *) &bucket21->data[pos21 * f->entry_size];	\
+	rte_prefetch0(a20);					\
+	rte_prefetch0(a21);					\
+	entries[pkt20_index] = a20;				\
+	entries[pkt21_index] = a21;				\
+								\
+	bucket20_mask = (~pkt20_mask) & (bucket20->next_valid << pkt20_index);\
+	bucket21_mask = (~pkt21_mask) & (bucket21->next_valid << pkt21_index);\
+	buckets_mask |= bucket20_mask | bucket21_mask;		\
+	bucket20_next = bucket20->next;				\
+	bucket21_next = bucket21->next;				\
+	buckets[pkt20_index] = bucket20_next;			\
+	buckets[pkt21_index] = bucket21_next;			\
+	keys[pkt20_index] = key20;				\
+	keys[pkt21_index] = key21;				\
+}
+
+static int
+rte_table_hash_lookup_key8_lru(
+	void *table,
+	struct rte_mbuf **pkts,
+	uint64_t pkts_mask,
+	uint64_t *lookup_hit_mask,
+	void **entries)
+{
+	struct rte_table_hash *f = (struct rte_table_hash *) table;
+	struct rte_bucket_4_8 *bucket10, *bucket11, *bucket20, *bucket21;
+	struct rte_mbuf *mbuf00, *mbuf01, *mbuf10, *mbuf11, *mbuf20, *mbuf21;
+	uint32_t pkt00_index, pkt01_index, pkt10_index,
+			pkt11_index, pkt20_index, pkt21_index;
+	uint64_t pkts_mask_out = 0;
+
+	__rte_unused uint32_t n_pkts_in = __builtin_popcountll(pkts_mask);
+	RTE_TABLE_HASH_KEY8_STATS_PKTS_IN_ADD(f, n_pkts_in);
+
+	/* Cannot run the pipeline with less than 5 packets */
+	if (__builtin_popcountll(pkts_mask) < 5) {
+		for ( ; pkts_mask; ) {
+			struct rte_bucket_4_8 *bucket;
+			struct rte_mbuf *mbuf;
+			uint32_t pkt_index;
+
+			lookup1_stage0(pkt_index, mbuf, pkts, pkts_mask, f);
+			lookup1_stage1(mbuf, bucket, f);
+			lookup1_stage2_lru(pkt_index, mbuf, bucket,
+					pkts_mask_out, entries, f);
+		}
+
+		*lookup_hit_mask = pkts_mask_out;
+		RTE_TABLE_HASH_KEY8_STATS_PKTS_LOOKUP_MISS(f, n_pkts_in - __builtin_popcountll(pkts_mask_out));
+		return 0;
+	}
+
+	/*
+	 * Pipeline fill
+	 *
+	 */
+	/* Pipeline stage 0 */
+	lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts,
+		pkts_mask, f);
+
+	/* Pipeline feed */
+	mbuf10 = mbuf00;
+	mbuf11 = mbuf01;
+	pkt10_index = pkt00_index;
+	pkt11_index = pkt01_index;
+
+	/* Pipeline stage 0 */
+	lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts,
+		pkts_mask, f);
+
+	/* Pipeline stage 1 */
+	lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
+
+	/*
+	 * Pipeline run
+	 *
+	 */
+	for ( ; pkts_mask; ) {
+		/* Pipeline feed */
+		bucket20 = bucket10;
+		bucket21 = bucket11;
+		mbuf20 = mbuf10;
+		mbuf21 = mbuf11;
+		mbuf10 = mbuf00;
+		mbuf11 = mbuf01;
+		pkt20_index = pkt10_index;
+		pkt21_index = pkt11_index;
+		pkt10_index = pkt00_index;
+		pkt11_index = pkt01_index;
+
+		/* Pipeline stage 0 */
+		lookup2_stage0_with_odd_support(pkt00_index, pkt01_index,
+			mbuf00, mbuf01, pkts, pkts_mask, f);
+
+		/* Pipeline stage 1 */
+		lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
+
+		/* Pipeline stage 2 */
+		lookup2_stage2_lru(pkt20_index, pkt21_index, mbuf20, mbuf21,
+			bucket20, bucket21, pkts_mask_out, entries, f);
+	}
+
+	/*
+	 * Pipeline flush
+	 *
+	 */
+	/* Pipeline feed */
+	bucket20 = bucket10;
+	bucket21 = bucket11;
+	mbuf20 = mbuf10;
+	mbuf21 = mbuf11;
+	mbuf10 = mbuf00;
+	mbuf11 = mbuf01;
+	pkt20_index = pkt10_index;
+	pkt21_index = pkt11_index;
+	pkt10_index = pkt00_index;
+	pkt11_index = pkt01_index;
+
+	/* Pipeline stage 1 */
+	lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
+
+	/* Pipeline stage 2 */
+	lookup2_stage2_lru(pkt20_index, pkt21_index, mbuf20, mbuf21,
+		bucket20, bucket21, pkts_mask_out, entries, f);
+
+	/* Pipeline feed */
+	bucket20 = bucket10;
+	bucket21 = bucket11;
+	mbuf20 = mbuf10;
+	mbuf21 = mbuf11;
+	pkt20_index = pkt10_index;
+	pkt21_index = pkt11_index;
+
+	/* Pipeline stage 2 */
+	lookup2_stage2_lru(pkt20_index, pkt21_index, mbuf20, mbuf21,
+		bucket20, bucket21, pkts_mask_out, entries, f);
+
+	*lookup_hit_mask = pkts_mask_out;
+	RTE_TABLE_HASH_KEY8_STATS_PKTS_LOOKUP_MISS(f, n_pkts_in - __builtin_popcountll(pkts_mask_out));
+	return 0;
+} /* rte_table_hash_lookup_key8_lru() */
+
+static int
+rte_table_hash_lookup_key8_lru_dosig(
+	void *table,
+	struct rte_mbuf **pkts,
+	uint64_t pkts_mask,
+	uint64_t *lookup_hit_mask,
+	void **entries)
+{
+	struct rte_table_hash *f = (struct rte_table_hash *) table;
+	struct rte_bucket_4_8 *bucket10, *bucket11, *bucket20, *bucket21;
+	struct rte_mbuf *mbuf00, *mbuf01, *mbuf10, *mbuf11, *mbuf20, *mbuf21;
+	uint32_t pkt00_index, pkt01_index, pkt10_index;
+	uint32_t pkt11_index, pkt20_index, pkt21_index;
+	uint64_t pkts_mask_out = 0;
+
+	__rte_unused uint32_t n_pkts_in = __builtin_popcountll(pkts_mask);
+	RTE_TABLE_HASH_KEY8_STATS_PKTS_IN_ADD(f, n_pkts_in);
+
+	/* Cannot run the pipeline with less than 5 packets */
+	if (__builtin_popcountll(pkts_mask) < 5) {
+		for ( ; pkts_mask; ) {
+			struct rte_bucket_4_8 *bucket;
+			struct rte_mbuf *mbuf;
+			uint32_t pkt_index;
+
+			lookup1_stage0(pkt_index, mbuf, pkts, pkts_mask, f);
+			lookup1_stage1_dosig(mbuf, bucket, f);
+			lookup1_stage2_lru(pkt_index, mbuf, bucket,
+				pkts_mask_out, entries, f);
+		}
+
+		*lookup_hit_mask = pkts_mask_out;
+		RTE_TABLE_HASH_KEY8_STATS_PKTS_LOOKUP_MISS(f, n_pkts_in - __builtin_popcountll(pkts_mask_out));
+		return 0;
+	}
+
+	/*
+	 * Pipeline fill
+	 *
+	 */
+	/* Pipeline stage 0 */
+	lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts,
+		pkts_mask, f);
+
+	/* Pipeline feed */
+	mbuf10 = mbuf00;
+	mbuf11 = mbuf01;
+	pkt10_index = pkt00_index;
+	pkt11_index = pkt01_index;
+
+	/* Pipeline stage 0 */
+	lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts,
+		pkts_mask, f);
+
+	/* Pipeline stage 1 */
+	lookup2_stage1_dosig(mbuf10, mbuf11, bucket10, bucket11, f);
+
+	/*
+	 * Pipeline run
+	 *
+	 */
+	for ( ; pkts_mask; ) {
+		/* Pipeline feed */
+		bucket20 = bucket10;
+		bucket21 = bucket11;
+		mbuf20 = mbuf10;
+		mbuf21 = mbuf11;
+		mbuf10 = mbuf00;
+		mbuf11 = mbuf01;
+		pkt20_index = pkt10_index;
+		pkt21_index = pkt11_index;
+		pkt10_index = pkt00_index;
+		pkt11_index = pkt01_index;
+
+		/* Pipeline stage 0 */
+		lookup2_stage0_with_odd_support(pkt00_index, pkt01_index,
+			mbuf00, mbuf01, pkts, pkts_mask, f);
+
+		/* Pipeline stage 1 */
+		lookup2_stage1_dosig(mbuf10, mbuf11, bucket10, bucket11, f);
+
+		/* Pipeline stage 2 */
+		lookup2_stage2_lru(pkt20_index, pkt21_index, mbuf20, mbuf21,
+			bucket20, bucket21, pkts_mask_out, entries, f);
+	}
+
+	/*
+	 * Pipeline flush
+	 *
+	 */
+	/* Pipeline feed */
+	bucket20 = bucket10;
+	bucket21 = bucket11;
+	mbuf20 = mbuf10;
+	mbuf21 = mbuf11;
+	mbuf10 = mbuf00;
+	mbuf11 = mbuf01;
+	pkt20_index = pkt10_index;
+	pkt21_index = pkt11_index;
+	pkt10_index = pkt00_index;
+	pkt11_index = pkt01_index;
+
+	/* Pipeline stage 1 */
+	lookup2_stage1_dosig(mbuf10, mbuf11, bucket10, bucket11, f);
+
+	/* Pipeline stage 2 */
+	lookup2_stage2_lru(pkt20_index, pkt21_index, mbuf20, mbuf21,
+		bucket20, bucket21, pkts_mask_out, entries, f);
+
+	/* Pipeline feed */
+	bucket20 = bucket10;
+	bucket21 = bucket11;
+	mbuf20 = mbuf10;
+	mbuf21 = mbuf11;
+	pkt20_index = pkt10_index;
+	pkt21_index = pkt11_index;
+
+	/* Pipeline stage 2 */
+	lookup2_stage2_lru(pkt20_index, pkt21_index, mbuf20, mbuf21,
+		bucket20, bucket21, pkts_mask_out, entries, f);
+
+	*lookup_hit_mask = pkts_mask_out;
+	RTE_TABLE_HASH_KEY8_STATS_PKTS_LOOKUP_MISS(f, n_pkts_in - __builtin_popcountll(pkts_mask_out));
+	return 0;
+} /* rte_table_hash_lookup_key8_lru_dosig() */
+
+static int
+rte_table_hash_lookup_key8_ext(
+	void *table,
+	struct rte_mbuf **pkts,
+	uint64_t pkts_mask,
+	uint64_t *lookup_hit_mask,
+	void **entries)
+{
+	struct rte_table_hash *f = (struct rte_table_hash *) table;
+	struct rte_bucket_4_8 *bucket10, *bucket11, *bucket20, *bucket21;
+	struct rte_mbuf *mbuf00, *mbuf01, *mbuf10, *mbuf11, *mbuf20, *mbuf21;
+	uint32_t pkt00_index, pkt01_index, pkt10_index;
+	uint32_t pkt11_index, pkt20_index, pkt21_index;
+	uint64_t pkts_mask_out = 0, buckets_mask = 0;
+	struct rte_bucket_4_8 *buckets[RTE_PORT_IN_BURST_SIZE_MAX];
+	uint64_t *keys[RTE_PORT_IN_BURST_SIZE_MAX];
+
+	__rte_unused uint32_t n_pkts_in = __builtin_popcountll(pkts_mask);
+	RTE_TABLE_HASH_KEY8_STATS_PKTS_IN_ADD(f, n_pkts_in);
+
+	/* Cannot run the pipeline with less than 5 packets */
+	if (__builtin_popcountll(pkts_mask) < 5) {
+		for ( ; pkts_mask; ) {
+			struct rte_bucket_4_8 *bucket;
+			struct rte_mbuf *mbuf;
+			uint32_t pkt_index;
+
+			lookup1_stage0(pkt_index, mbuf, pkts, pkts_mask, f);
+			lookup1_stage1(mbuf, bucket, f);
+			lookup1_stage2_ext(pkt_index, mbuf, bucket,
+				pkts_mask_out, entries, buckets_mask, buckets,
+				keys, f);
+		}
+
+		goto grind_next_buckets;
+	}
+
+	/*
+	 * Pipeline fill
+	 *
+	 */
+	/* Pipeline stage 0 */
+	lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts,
+		pkts_mask, f);
+
+	/* Pipeline feed */
+	mbuf10 = mbuf00;
+	mbuf11 = mbuf01;
+	pkt10_index = pkt00_index;
+	pkt11_index = pkt01_index;
+
+	/* Pipeline stage 0 */
+	lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts,
+		pkts_mask, f);
+
+	/* Pipeline stage 1 */
+	lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
+
+	/*
+	 * Pipeline run
+	 *
+	 */
+	for ( ; pkts_mask; ) {
+		/* Pipeline feed */
+		bucket20 = bucket10;
+		bucket21 = bucket11;
+		mbuf20 = mbuf10;
+		mbuf21 = mbuf11;
+		mbuf10 = mbuf00;
+		mbuf11 = mbuf01;
+		pkt20_index = pkt10_index;
+		pkt21_index = pkt11_index;
+		pkt10_index = pkt00_index;
+		pkt11_index = pkt01_index;
+
+		/* Pipeline stage 0 */
+		lookup2_stage0_with_odd_support(pkt00_index, pkt01_index,
+			mbuf00, mbuf01, pkts, pkts_mask, f);
+
+		/* Pipeline stage 1 */
+		lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
+
+		/* Pipeline stage 2 */
+		lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21,
+			bucket20, bucket21, pkts_mask_out, entries,
+			buckets_mask, buckets, keys, f);
+	}
+
+	/*
+	 * Pipeline flush
+	 *
+	 */
+	/* Pipeline feed */
+	bucket20 = bucket10;
+	bucket21 = bucket11;
+	mbuf20 = mbuf10;
+	mbuf21 = mbuf11;
+	mbuf10 = mbuf00;
+	mbuf11 = mbuf01;
+	pkt20_index = pkt10_index;
+	pkt21_index = pkt11_index;
+	pkt10_index = pkt00_index;
+	pkt11_index = pkt01_index;
+
+	/* Pipeline stage 1 */
+	lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
+
+	/* Pipeline stage 2 */
+	lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21,
+		bucket20, bucket21, pkts_mask_out, entries,
+		buckets_mask, buckets, keys, f);
+
+	/* Pipeline feed */
+	bucket20 = bucket10;
+	bucket21 = bucket11;
+	mbuf20 = mbuf10;
+	mbuf21 = mbuf11;
+	pkt20_index = pkt10_index;
+	pkt21_index = pkt11_index;
+
+	/* Pipeline stage 2 */
+	lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21,
+		bucket20, bucket21, pkts_mask_out, entries,
+		buckets_mask, buckets, keys, f);
+
+grind_next_buckets:
+	/* Grind next buckets */
+	for ( ; buckets_mask; ) {
+		uint64_t buckets_mask_next = 0;
+
+		for ( ; buckets_mask; ) {
+			uint64_t pkt_mask;
+			uint32_t pkt_index;
+
+			pkt_index = __builtin_ctzll(buckets_mask);
+			pkt_mask = 1LLU << pkt_index;
+			buckets_mask &= ~pkt_mask;
+
+			lookup_grinder(pkt_index, buckets, keys, pkts_mask_out,
+				entries, buckets_mask_next, f);
+		}
+
+		buckets_mask = buckets_mask_next;
+	}
+
+	*lookup_hit_mask = pkts_mask_out;
+	RTE_TABLE_HASH_KEY8_STATS_PKTS_LOOKUP_MISS(f, n_pkts_in - __builtin_popcountll(pkts_mask_out));
+	return 0;
+} /* rte_table_hash_lookup_key8_ext() */
+
+static int
+rte_table_hash_lookup_key8_ext_dosig(
+	void *table,
+	struct rte_mbuf **pkts,
+	uint64_t pkts_mask,
+	uint64_t *lookup_hit_mask,
+	void **entries)
+{
+	struct rte_table_hash *f = (struct rte_table_hash *) table;
+	struct rte_bucket_4_8 *bucket10, *bucket11, *bucket20, *bucket21;
+	struct rte_mbuf *mbuf00, *mbuf01, *mbuf10, *mbuf11, *mbuf20, *mbuf21;
+	uint32_t pkt00_index, pkt01_index, pkt10_index;
+	uint32_t pkt11_index, pkt20_index, pkt21_index;
+	uint64_t pkts_mask_out = 0, buckets_mask = 0;
+	struct rte_bucket_4_8 *buckets[RTE_PORT_IN_BURST_SIZE_MAX];
+	uint64_t *keys[RTE_PORT_IN_BURST_SIZE_MAX];
+
+	__rte_unused uint32_t n_pkts_in = __builtin_popcountll(pkts_mask);
+	RTE_TABLE_HASH_KEY8_STATS_PKTS_IN_ADD(f, n_pkts_in);
+
+	/* Cannot run the pipeline with less than 5 packets */
+	if (__builtin_popcountll(pkts_mask) < 5) {
+		for ( ; pkts_mask; ) {
+			struct rte_bucket_4_8 *bucket;
+			struct rte_mbuf *mbuf;
+			uint32_t pkt_index;
+
+			lookup1_stage0(pkt_index, mbuf, pkts, pkts_mask, f);
+			lookup1_stage1_dosig(mbuf, bucket, f);
+			lookup1_stage2_ext(pkt_index, mbuf, bucket,
+				pkts_mask_out, entries, buckets_mask,
+				buckets, keys, f);
+		}
+
+		goto grind_next_buckets;
+	}
+
+	/*
+	 * Pipeline fill
+	 *
+	 */
+	/* Pipeline stage 0 */
+	lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts,
+		pkts_mask, f);
+
+	/* Pipeline feed */
+	mbuf10 = mbuf00;
+	mbuf11 = mbuf01;
+	pkt10_index = pkt00_index;
+	pkt11_index = pkt01_index;
+
+	/* Pipeline stage 0 */
+	lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts,
+		pkts_mask, f);
+
+	/* Pipeline stage 1 */
+	lookup2_stage1_dosig(mbuf10, mbuf11, bucket10, bucket11, f);
+
+	/*
+	 * Pipeline run
+	 *
+	 */
+	for ( ; pkts_mask; ) {
+		/* Pipeline feed */
+		bucket20 = bucket10;
+		bucket21 = bucket11;
+		mbuf20 = mbuf10;
+		mbuf21 = mbuf11;
+		mbuf10 = mbuf00;
+		mbuf11 = mbuf01;
+		pkt20_index = pkt10_index;
+		pkt21_index = pkt11_index;
+		pkt10_index = pkt00_index;
+		pkt11_index = pkt01_index;
+
+		/* Pipeline stage 0 */
+		lookup2_stage0_with_odd_support(pkt00_index, pkt01_index,
+			mbuf00, mbuf01, pkts, pkts_mask, f);
+
+		/* Pipeline stage 1 */
+		lookup2_stage1_dosig(mbuf10, mbuf11, bucket10, bucket11, f);
+
+		/* Pipeline stage 2 */
+		lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21,
+			bucket20, bucket21, pkts_mask_out, entries,
+			buckets_mask, buckets, keys, f);
+	}
+
+	/*
+	 * Pipeline flush
+	 *
+	 */
+	/* Pipeline feed */
+	bucket20 = bucket10;
+	bucket21 = bucket11;
+	mbuf20 = mbuf10;
+	mbuf21 = mbuf11;
+	mbuf10 = mbuf00;
+	mbuf11 = mbuf01;
+	pkt20_index = pkt10_index;
+	pkt21_index = pkt11_index;
+	pkt10_index = pkt00_index;
+	pkt11_index = pkt01_index;
+
+	/* Pipeline stage 1 */
+	lookup2_stage1_dosig(mbuf10, mbuf11, bucket10, bucket11, f);
+
+	/* Pipeline stage 2 */
+	lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21,
+		bucket20, bucket21, pkts_mask_out, entries,
+		buckets_mask, buckets, keys, f);
+
+	/* Pipeline feed */
+	bucket20 = bucket10;
+	bucket21 = bucket11;
+	mbuf20 = mbuf10;
+	mbuf21 = mbuf11;
+	pkt20_index = pkt10_index;
+	pkt21_index = pkt11_index;
+
+	/* Pipeline stage 2 */
+	lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21,
+		bucket20, bucket21, pkts_mask_out, entries,
+		buckets_mask, buckets, keys, f);
+
+grind_next_buckets:
+	/* Grind next buckets */
+	for ( ; buckets_mask; ) {
+		uint64_t buckets_mask_next = 0;
+
+		for ( ; buckets_mask; ) {
+			uint64_t pkt_mask;
+			uint32_t pkt_index;
+
+			pkt_index = __builtin_ctzll(buckets_mask);
+			pkt_mask = 1LLU << pkt_index;
+			buckets_mask &= ~pkt_mask;
+
+			lookup_grinder(pkt_index, buckets, keys, pkts_mask_out,
+				entries, buckets_mask_next, f);
+		}
+
+		buckets_mask = buckets_mask_next;
+	}
+
+	*lookup_hit_mask = pkts_mask_out;
+	RTE_TABLE_HASH_KEY8_STATS_PKTS_LOOKUP_MISS(f, n_pkts_in - __builtin_popcountll(pkts_mask_out));
+	return 0;
+} /* rte_table_hash_lookup_key8_dosig_ext() */
+
+static int
+rte_table_hash_key8_stats_read(void *table, struct rte_table_stats *stats, int clear)
+{
+	struct rte_table_hash *t = (struct rte_table_hash *) table;
+
+	if (stats != NULL)
+		memcpy(stats, &t->stats, sizeof(t->stats));
+
+	if (clear)
+		memset(&t->stats, 0, sizeof(t->stats));
+
+	return 0;
+}
+
+struct rte_table_ops rte_table_hash_key8_lru_ops = {
+	.f_create = rte_table_hash_create_key8_lru,
+	.f_free = rte_table_hash_free_key8_lru,
+	.f_add = rte_table_hash_entry_add_key8_lru,
+	.f_delete = rte_table_hash_entry_delete_key8_lru,
+	.f_add_bulk = NULL,
+	.f_delete_bulk = NULL,
+	.f_lookup = rte_table_hash_lookup_key8_lru,
+	.f_stats = rte_table_hash_key8_stats_read,
+};
+
+struct rte_table_ops rte_table_hash_key8_lru_dosig_ops = {
+	.f_create = rte_table_hash_create_key8_lru,
+	.f_free = rte_table_hash_free_key8_lru,
+	.f_add = rte_table_hash_entry_add_key8_lru,
+	.f_delete = rte_table_hash_entry_delete_key8_lru,
+	.f_add_bulk = NULL,
+	.f_delete_bulk = NULL,
+	.f_lookup = rte_table_hash_lookup_key8_lru_dosig,
+	.f_stats = rte_table_hash_key8_stats_read,
+};
+
+struct rte_table_ops rte_table_hash_key8_ext_ops = {
+	.f_create = rte_table_hash_create_key8_ext,
+	.f_free = rte_table_hash_free_key8_ext,
+	.f_add = rte_table_hash_entry_add_key8_ext,
+	.f_delete = rte_table_hash_entry_delete_key8_ext,
+	.f_add_bulk = NULL,
+	.f_delete_bulk = NULL,
+	.f_lookup = rte_table_hash_lookup_key8_ext,
+	.f_stats = rte_table_hash_key8_stats_read,
+};
+
+struct rte_table_ops rte_table_hash_key8_ext_dosig_ops = {
+	.f_create = rte_table_hash_create_key8_ext,
+	.f_free = rte_table_hash_free_key8_ext,
+	.f_add = rte_table_hash_entry_add_key8_ext,
+	.f_delete = rte_table_hash_entry_delete_key8_ext,
+	.f_add_bulk = NULL,
+	.f_delete_bulk = NULL,
+	.f_lookup = rte_table_hash_lookup_key8_ext_dosig,
+	.f_stats = rte_table_hash_key8_stats_read,
+};
diff --git a/lib/librte_table/rte_table_hash_lru.c b/lib/librte_table/rte_table_hash_lru.c
new file mode 100644
index 00000000..407c62ab
--- /dev/null
+++ b/lib/librte_table/rte_table_hash_lru.c
@@ -0,0 +1,1102 @@
+/*-
+ *	 BSD LICENSE
+ *
+ *	 Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *	 All rights reserved.
+ *
+ *	 Redistribution and use in source and binary forms, with or without
+ *	 modification, are permitted provided that the following conditions
+ *	 are met:
+ *
+ *	* Redistributions of source code must retain the above copyright
+ *		 notice, this list of conditions and the following disclaimer.
+ *	* Redistributions in binary form must reproduce the above copyright
+ *		 notice, this list of conditions and the following disclaimer in
+ *		 the documentation and/or other materials provided with the
+ *		 distribution.
+ *	* Neither the name of Intel Corporation nor the names of its
+ *		 contributors may be used to endorse or promote products derived
+ *		 from this software without specific prior written permission.
+ *
+ *	 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *	 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *	 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *	 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *	 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *	 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *	 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *	 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *	 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *	 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *	 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <string.h>
+#include <stdio.h>
+
+#include <rte_common.h>
+#include <rte_mbuf.h>
+#include <rte_memory.h>
+#include <rte_malloc.h>
+#include <rte_log.h>
+
+#include "rte_table_hash.h"
+#include "rte_lru.h"
+
+#define KEYS_PER_BUCKET	4
+
+#ifdef RTE_TABLE_STATS_COLLECT
+
+#define RTE_TABLE_HASH_LRU_STATS_PKTS_IN_ADD(table, val) \
+	table->stats.n_pkts_in += val
+#define RTE_TABLE_HASH_LRU_STATS_PKTS_LOOKUP_MISS(table, val) \
+	table->stats.n_pkts_lookup_miss += val
+
+#else
+
+#define RTE_TABLE_HASH_LRU_STATS_PKTS_IN_ADD(table, val)
+#define RTE_TABLE_HASH_LRU_STATS_PKTS_LOOKUP_MISS(table, val)
+
+#endif
+
+struct bucket {
+	union {
+		struct bucket *next;
+		uint64_t lru_list;
+	};
+	uint16_t sig[KEYS_PER_BUCKET];
+	uint32_t key_pos[KEYS_PER_BUCKET];
+};
+
+struct grinder {
+	struct bucket *bkt;
+	uint64_t sig;
+	uint64_t match;
+	uint64_t match_pos;
+	uint32_t key_index;
+};
+
+struct rte_table_hash {
+	struct rte_table_stats stats;
+
+	/* Input parameters */
+	uint32_t key_size;
+	uint32_t entry_size;
+	uint32_t n_keys;
+	uint32_t n_buckets;
+	rte_table_hash_op_hash f_hash;
+	uint64_t seed;
+	uint32_t signature_offset;
+	uint32_t key_offset;
+
+	/* Internal */
+	uint64_t bucket_mask;
+	uint32_t key_size_shl;
+	uint32_t data_size_shl;
+	uint32_t key_stack_tos;
+
+	/* Grinder */
+	struct grinder grinders[RTE_PORT_IN_BURST_SIZE_MAX];
+
+	/* Tables */
+	struct bucket *buckets;
+	uint8_t *key_mem;
+	uint8_t *data_mem;
+	uint32_t *key_stack;
+
+	/* Table memory */
+	uint8_t memory[0] __rte_cache_aligned;
+};
+
+static int
+check_params_create(struct rte_table_hash_lru_params *params)
+{
+	uint32_t n_buckets_min;
+
+	/* key_size */
+	if ((params->key_size == 0) ||
+		(!rte_is_power_of_2(params->key_size))) {
+		RTE_LOG(ERR, TABLE, "%s: key_size invalid value\n", __func__);
+		return -EINVAL;
+	}
+
+	/* n_keys */
+	if ((params->n_keys == 0) ||
+		(!rte_is_power_of_2(params->n_keys))) {
+		RTE_LOG(ERR, TABLE, "%s: n_keys invalid value\n", __func__);
+		return -EINVAL;
+	}
+
+	/* n_buckets */
+	n_buckets_min = (params->n_keys + KEYS_PER_BUCKET - 1) / params->n_keys;
+	if ((params->n_buckets == 0) ||
+		(!rte_is_power_of_2(params->n_keys)) ||
+		(params->n_buckets < n_buckets_min)) {
+		RTE_LOG(ERR, TABLE, "%s: n_buckets invalid value\n", __func__);
+		return -EINVAL;
+	}
+
+	/* f_hash */
+	if (params->f_hash == NULL) {
+		RTE_LOG(ERR, TABLE, "%s: f_hash invalid value\n", __func__);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static void *
+rte_table_hash_lru_create(void *params, int socket_id, uint32_t entry_size)
+{
+	struct rte_table_hash_lru_params *p =
+		(struct rte_table_hash_lru_params *) params;
+	struct rte_table_hash *t;
+	uint32_t total_size, table_meta_sz;
+	uint32_t bucket_sz, key_sz, key_stack_sz, data_sz;
+	uint32_t bucket_offset, key_offset, key_stack_offset, data_offset;
+	uint32_t i;
+
+	/* Check input parameters */
+	if ((check_params_create(p) != 0) ||
+		(!rte_is_power_of_2(entry_size)) ||
+		((sizeof(struct rte_table_hash) % RTE_CACHE_LINE_SIZE) != 0) ||
+		(sizeof(struct bucket) != (RTE_CACHE_LINE_SIZE / 2))) {
+		return NULL;
+	}
+
+	/* Memory allocation */
+	table_meta_sz = RTE_CACHE_LINE_ROUNDUP(sizeof(struct rte_table_hash));
+	bucket_sz = RTE_CACHE_LINE_ROUNDUP(p->n_buckets * sizeof(struct bucket));
+	key_sz = RTE_CACHE_LINE_ROUNDUP(p->n_keys * p->key_size);
+	key_stack_sz = RTE_CACHE_LINE_ROUNDUP(p->n_keys * sizeof(uint32_t));
+	data_sz = RTE_CACHE_LINE_ROUNDUP(p->n_keys * entry_size);
+	total_size = table_meta_sz + bucket_sz + key_sz + key_stack_sz +
+		data_sz;
+
+	t = rte_zmalloc_socket("TABLE", total_size, RTE_CACHE_LINE_SIZE, socket_id);
+	if (t == NULL) {
+		RTE_LOG(ERR, TABLE,
+			"%s: Cannot allocate %u bytes for hash table\n",
+			__func__, total_size);
+		return NULL;
+	}
+	RTE_LOG(INFO, TABLE, "%s (%u-byte key): Hash table memory footprint is "
+		"%u bytes\n", __func__, p->key_size, total_size);
+
+	/* Memory initialization */
+	t->key_size = p->key_size;
+	t->entry_size = entry_size;
+	t->n_keys = p->n_keys;
+	t->n_buckets = p->n_buckets;
+	t->f_hash = p->f_hash;
+	t->seed = p->seed;
+	t->signature_offset = p->signature_offset;
+	t->key_offset = p->key_offset;
+
+	/* Internal */
+	t->bucket_mask = t->n_buckets - 1;
+	t->key_size_shl = __builtin_ctzl(p->key_size);
+	t->data_size_shl = __builtin_ctzl(entry_size);
+
+	/* Tables */
+	bucket_offset = 0;
+	key_offset = bucket_offset + bucket_sz;
+	key_stack_offset = key_offset + key_sz;
+	data_offset = key_stack_offset + key_stack_sz;
+
+	t->buckets = (struct bucket *) &t->memory[bucket_offset];
+	t->key_mem = &t->memory[key_offset];
+	t->key_stack = (uint32_t *) &t->memory[key_stack_offset];
+	t->data_mem = &t->memory[data_offset];
+
+	/* Key stack */
+	for (i = 0; i < t->n_keys; i++)
+		t->key_stack[i] = t->n_keys - 1 - i;
+	t->key_stack_tos = t->n_keys;
+
+	/* LRU */
+	for (i = 0; i < t->n_buckets; i++) {
+		struct bucket *bkt = &t->buckets[i];
+
+		lru_init(bkt);
+	}
+
+	return t;
+}
+
+static int
+rte_table_hash_lru_free(void *table)
+{
+	struct rte_table_hash *t = (struct rte_table_hash *) table;
+
+	/* Check input parameters */
+	if (t == NULL)
+		return -EINVAL;
+
+	rte_free(t);
+	return 0;
+}
+
+static int
+rte_table_hash_lru_entry_add(void *table, void *key, void *entry,
+	int *key_found, void **entry_ptr)
+{
+	struct rte_table_hash *t = (struct rte_table_hash *) table;
+	struct bucket *bkt;
+	uint64_t sig;
+	uint32_t bkt_index, i;
+
+	sig = t->f_hash(key, t->key_size, t->seed);
+	bkt_index = sig & t->bucket_mask;
+	bkt = &t->buckets[bkt_index];
+	sig = (sig >> 16) | 1LLU;
+
+	/* Key is present in the bucket */
+	for (i = 0; i < KEYS_PER_BUCKET; i++) {
+		uint64_t bkt_sig = (uint64_t) bkt->sig[i];
+		uint32_t bkt_key_index = bkt->key_pos[i];
+		uint8_t *bkt_key = &t->key_mem[bkt_key_index <<
+			t->key_size_shl];
+
+		if ((sig == bkt_sig) && (memcmp(key, bkt_key, t->key_size)
+			== 0)) {
+			uint8_t *data = &t->data_mem[bkt_key_index <<
+				t->data_size_shl];
+
+			memcpy(data, entry, t->entry_size);
+			lru_update(bkt, i);
+			*key_found = 1;
+			*entry_ptr = (void *) data;
+			return 0;
+		}
+	}
+
+	/* Key is not present in the bucket */
+	for (i = 0; i < KEYS_PER_BUCKET; i++) {
+		uint64_t bkt_sig = (uint64_t) bkt->sig[i];
+
+		if (bkt_sig == 0) {
+			uint32_t bkt_key_index;
+			uint8_t *bkt_key, *data;
+
+			/* Allocate new key */
+			if (t->key_stack_tos == 0) {
+				/* No keys available */
+				return -ENOSPC;
+			}
+			bkt_key_index = t->key_stack[--t->key_stack_tos];
+
+			/* Install new key */
+			bkt_key = &t->key_mem[bkt_key_index << t->key_size_shl];
+			data = &t->data_mem[bkt_key_index << t->data_size_shl];
+
+			bkt->sig[i] = (uint16_t) sig;
+			bkt->key_pos[i] = bkt_key_index;
+			memcpy(bkt_key, key, t->key_size);
+			memcpy(data, entry, t->entry_size);
+			lru_update(bkt, i);
+
+			*key_found = 0;
+			*entry_ptr = (void *) data;
+			return 0;
+		}
+	}
+
+	/* Bucket full */
+	{
+		uint64_t pos = lru_pos(bkt);
+		uint32_t bkt_key_index = bkt->key_pos[pos];
+		uint8_t *bkt_key = &t->key_mem[bkt_key_index <<
+			t->key_size_shl];
+		uint8_t *data = &t->data_mem[bkt_key_index << t->data_size_shl];
+
+		bkt->sig[pos] = (uint16_t) sig;
+		memcpy(bkt_key, key, t->key_size);
+		memcpy(data, entry, t->entry_size);
+		lru_update(bkt, pos);
+
+		*key_found = 0;
+		*entry_ptr = (void *) data;
+		return 0;
+	}
+}
+
+static int
+rte_table_hash_lru_entry_delete(void *table, void *key, int *key_found,
+	void *entry)
+{
+	struct rte_table_hash *t = (struct rte_table_hash *) table;
+	struct bucket *bkt;
+	uint64_t sig;
+	uint32_t bkt_index, i;
+
+	sig = t->f_hash(key, t->key_size, t->seed);
+	bkt_index = sig & t->bucket_mask;
+	bkt = &t->buckets[bkt_index];
+	sig = (sig >> 16) | 1LLU;
+
+	/* Key is present in the bucket */
+	for (i = 0; i < KEYS_PER_BUCKET; i++) {
+		uint64_t bkt_sig = (uint64_t) bkt->sig[i];
+		uint32_t bkt_key_index = bkt->key_pos[i];
+		uint8_t *bkt_key = &t->key_mem[bkt_key_index <<
+			t->key_size_shl];
+
+		if ((sig == bkt_sig) &&
+			(memcmp(key, bkt_key, t->key_size) == 0)) {
+			uint8_t *data = &t->data_mem[bkt_key_index <<
+				t->data_size_shl];
+
+			bkt->sig[i] = 0;
+			t->key_stack[t->key_stack_tos++] = bkt_key_index;
+			*key_found = 1;
+			memcpy(entry, data, t->entry_size);
+			return 0;
+		}
+	}
+
+	/* Key is not present in the bucket */
+	*key_found = 0;
+	return 0;
+}
+
+static int rte_table_hash_lru_lookup_unoptimized(
+	void *table,
+	struct rte_mbuf **pkts,
+	uint64_t pkts_mask,
+	uint64_t *lookup_hit_mask,
+	void **entries,
+	int dosig)
+{
+	struct rte_table_hash *t = (struct rte_table_hash *) table;
+	uint64_t pkts_mask_out = 0;
+
+	__rte_unused uint32_t n_pkts_in = __builtin_popcountll(pkts_mask);
+	RTE_TABLE_HASH_LRU_STATS_PKTS_IN_ADD(t, n_pkts_in);
+
+	for ( ; pkts_mask; ) {
+		struct bucket *bkt;
+		struct rte_mbuf *pkt;
+		uint8_t *key;
+		uint64_t pkt_mask, sig;
+		uint32_t pkt_index, bkt_index, i;
+
+		pkt_index = __builtin_ctzll(pkts_mask);
+		pkt_mask = 1LLU << pkt_index;
+		pkts_mask &= ~pkt_mask;
+
+		pkt = pkts[pkt_index];
+		key = RTE_MBUF_METADATA_UINT8_PTR(pkt, t->key_offset);
+		if (dosig)
+			sig = (uint64_t) t->f_hash(key, t->key_size, t->seed);
+		else
+			sig = RTE_MBUF_METADATA_UINT32(pkt,
+				t->signature_offset);
+
+		bkt_index = sig & t->bucket_mask;
+		bkt = &t->buckets[bkt_index];
+		sig = (sig >> 16) | 1LLU;
+
+		/* Key is present in the bucket */
+		for (i = 0; i < KEYS_PER_BUCKET; i++) {
+			uint64_t bkt_sig = (uint64_t) bkt->sig[i];
+			uint32_t bkt_key_index = bkt->key_pos[i];
+			uint8_t *bkt_key = &t->key_mem[bkt_key_index <<
+				t->key_size_shl];
+
+			if ((sig == bkt_sig) && (memcmp(key, bkt_key,
+				t->key_size) == 0)) {
+				uint8_t *data = &t->data_mem[bkt_key_index <<
+					t->data_size_shl];
+
+				lru_update(bkt, i);
+				pkts_mask_out |= pkt_mask;
+				entries[pkt_index] = (void *) data;
+				break;
+			}
+		}
+	}
+
+	*lookup_hit_mask = pkts_mask_out;
+	RTE_TABLE_HASH_LRU_STATS_PKTS_LOOKUP_MISS(t, n_pkts_in - __builtin_popcountll(pkts_mask_out));
+	return 0;
+}
+
+/***
+*
+* mask = match bitmask
+* match = at least one match
+* match_many = more than one match
+* match_pos = position of first match
+*
+* ----------------------------------------
+* mask		 match	 match_many	  match_pos
+* ----------------------------------------
+* 0000		 0		 0			  00
+* 0001		 1		 0			  00
+* 0010		 1		 0			  01
+* 0011		 1		 1			  00
+* ----------------------------------------
+* 0100		 1		 0			  10
+* 0101		 1		 1			  00
+* 0110		 1		 1			  01
+* 0111		 1		 1			  00
+* ----------------------------------------
+* 1000		 1		 0			  11
+* 1001		 1		 1			  00
+* 1010		 1		 1			  01
+* 1011		 1		 1			  00
+* ----------------------------------------
+* 1100		 1		 1			  10
+* 1101		 1		 1			  00
+* 1110		 1		 1			  01
+* 1111		 1		 1			  00
+* ----------------------------------------
+*
+* match = 1111_1111_1111_1110
+* match_many = 1111_1110_1110_1000
+* match_pos = 0001_0010_0001_0011__0001_0010_0001_0000
+*
+* match = 0xFFFELLU
+* match_many = 0xFEE8LLU
+* match_pos = 0x12131210LLU
+*
+***/
+
+#define LUT_MATCH						0xFFFELLU
+#define LUT_MATCH_MANY						0xFEE8LLU
+#define LUT_MATCH_POS						0x12131210LLU
+
+#define lookup_cmp_sig(mbuf_sig, bucket, match, match_many, match_pos)\
+{								\
+	uint64_t bucket_sig[4], mask[4], mask_all;		\
+								\
+	bucket_sig[0] = bucket->sig[0];				\
+	bucket_sig[1] = bucket->sig[1];				\
+	bucket_sig[2] = bucket->sig[2];				\
+	bucket_sig[3] = bucket->sig[3];				\
+								\
+	bucket_sig[0] ^= mbuf_sig;				\
+	bucket_sig[1] ^= mbuf_sig;				\
+	bucket_sig[2] ^= mbuf_sig;				\
+	bucket_sig[3] ^= mbuf_sig;				\
+								\
+	mask[0] = 0;						\
+	mask[1] = 0;						\
+	mask[2] = 0;						\
+	mask[3] = 0;						\
+								\
+	if (bucket_sig[0] == 0)					\
+		mask[0] = 1;					\
+	if (bucket_sig[1] == 0)					\
+		mask[1] = 2;					\
+	if (bucket_sig[2] == 0)					\
+		mask[2] = 4;					\
+	if (bucket_sig[3] == 0)					\
+		mask[3] = 8;					\
+								\
+	mask_all = (mask[0] | mask[1]) | (mask[2] | mask[3]);	\
+								\
+	match = (LUT_MATCH >> mask_all) & 1;			\
+	match_many = (LUT_MATCH_MANY >> mask_all) & 1;		\
+	match_pos = (LUT_MATCH_POS >> (mask_all << 1)) & 3;	\
+}
+
+#define lookup_cmp_key(mbuf, key, match_key, f)			\
+{								\
+	uint64_t *pkt_key = RTE_MBUF_METADATA_UINT64_PTR(mbuf, f->key_offset);\
+	uint64_t *bkt_key = (uint64_t *) key;			\
+								\
+	switch (f->key_size) {					\
+	case 8:							\
+	{							\
+		uint64_t xor = pkt_key[0] ^ bkt_key[0];		\
+		match_key = 0;					\
+		if (xor == 0)					\
+			match_key = 1;				\
+	}							\
+	break;							\
+								\
+	case 16:						\
+	{							\
+		uint64_t xor[2], or;				\
+								\
+		xor[0] = pkt_key[0] ^ bkt_key[0];		\
+		xor[1] = pkt_key[1] ^ bkt_key[1];		\
+		or = xor[0] | xor[1];				\
+		match_key = 0;					\
+		if (or == 0)					\
+			match_key = 1;				\
+	}							\
+	break;							\
+								\
+	case 32:						\
+	{							\
+		uint64_t xor[4], or;				\
+								\
+		xor[0] = pkt_key[0] ^ bkt_key[0];		\
+		xor[1] = pkt_key[1] ^ bkt_key[1];		\
+		xor[2] = pkt_key[2] ^ bkt_key[2];		\
+		xor[3] = pkt_key[3] ^ bkt_key[3];		\
+		or = xor[0] | xor[1] | xor[2] | xor[3];		\
+		match_key = 0;					\
+		if (or == 0)					\
+			match_key = 1;				\
+	}							\
+	break;							\
+								\
+	case 64:						\
+	{							\
+		uint64_t xor[8], or;				\
+								\
+		xor[0] = pkt_key[0] ^ bkt_key[0];		\
+		xor[1] = pkt_key[1] ^ bkt_key[1];		\
+		xor[2] = pkt_key[2] ^ bkt_key[2];		\
+		xor[3] = pkt_key[3] ^ bkt_key[3];		\
+		xor[4] = pkt_key[4] ^ bkt_key[4];		\
+		xor[5] = pkt_key[5] ^ bkt_key[5];		\
+		xor[6] = pkt_key[6] ^ bkt_key[6];		\
+		xor[7] = pkt_key[7] ^ bkt_key[7];		\
+		or = xor[0] | xor[1] | xor[2] | xor[3] |	\
+			xor[4] | xor[5] | xor[6] | xor[7];	\
+		match_key = 0;					\
+		if (or == 0)					\
+			match_key = 1;				\
+	}							\
+	break;							\
+								\
+	default:						\
+		match_key = 0;					\
+		if (memcmp(pkt_key, bkt_key, f->key_size) == 0)	\
+			match_key = 1;				\
+	}							\
+}
+
+#define lookup2_stage0(t, g, pkts, pkts_mask, pkt00_index, pkt01_index)\
+{								\
+	uint64_t pkt00_mask, pkt01_mask;			\
+	struct rte_mbuf *mbuf00, *mbuf01;			\
+	uint32_t key_offset = t->key_offset;		\
+								\
+	pkt00_index = __builtin_ctzll(pkts_mask);		\
+	pkt00_mask = 1LLU << pkt00_index;			\
+	pkts_mask &= ~pkt00_mask;				\
+	mbuf00 = pkts[pkt00_index];				\
+								\
+	pkt01_index = __builtin_ctzll(pkts_mask);		\
+	pkt01_mask = 1LLU << pkt01_index;			\
+	pkts_mask &= ~pkt01_mask;				\
+	mbuf01 = pkts[pkt01_index];				\
+								\
+	rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf00, key_offset));\
+	rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf01, key_offset));\
+}
+
+#define lookup2_stage0_with_odd_support(t, g, pkts, pkts_mask, pkt00_index, \
+	pkt01_index)						\
+{								\
+	uint64_t pkt00_mask, pkt01_mask;			\
+	struct rte_mbuf *mbuf00, *mbuf01;			\
+	uint32_t key_offset = t->key_offset;		\
+								\
+	pkt00_index = __builtin_ctzll(pkts_mask);		\
+	pkt00_mask = 1LLU << pkt00_index;			\
+	pkts_mask &= ~pkt00_mask;				\
+	mbuf00 = pkts[pkt00_index];				\
+								\
+	pkt01_index = __builtin_ctzll(pkts_mask);		\
+	if (pkts_mask == 0)					\
+		pkt01_index = pkt00_index;			\
+								\
+	pkt01_mask = 1LLU << pkt01_index;			\
+	pkts_mask &= ~pkt01_mask;				\
+	mbuf01 = pkts[pkt01_index];				\
+								\
+	rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf00, key_offset));\
+	rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf01, key_offset));\
+}
+
+#define lookup2_stage1(t, g, pkts, pkt10_index, pkt11_index)	\
+{								\
+	struct grinder *g10, *g11;				\
+	uint64_t sig10, sig11, bkt10_index, bkt11_index;	\
+	struct rte_mbuf *mbuf10, *mbuf11;			\
+	struct bucket *bkt10, *bkt11, *buckets = t->buckets;	\
+	uint64_t bucket_mask = t->bucket_mask;			\
+	uint32_t signature_offset = t->signature_offset;	\
+								\
+	mbuf10 = pkts[pkt10_index];				\
+	sig10 = (uint64_t) RTE_MBUF_METADATA_UINT32(mbuf10, signature_offset);\
+	bkt10_index = sig10 & bucket_mask;			\
+	bkt10 = &buckets[bkt10_index];				\
+								\
+	mbuf11 = pkts[pkt11_index];				\
+	sig11 = (uint64_t) RTE_MBUF_METADATA_UINT32(mbuf11, signature_offset);\
+	bkt11_index = sig11 & bucket_mask;			\
+	bkt11 = &buckets[bkt11_index];				\
+								\
+	rte_prefetch0(bkt10);					\
+	rte_prefetch0(bkt11);					\
+								\
+	g10 = &g[pkt10_index];					\
+	g10->sig = sig10;					\
+	g10->bkt = bkt10;					\
+								\
+	g11 = &g[pkt11_index];					\
+	g11->sig = sig11;					\
+	g11->bkt = bkt11;					\
+}
+
+#define lookup2_stage1_dosig(t, g, pkts, pkt10_index, pkt11_index)\
+{								\
+	struct grinder *g10, *g11;				\
+	uint64_t sig10, sig11, bkt10_index, bkt11_index;	\
+	struct rte_mbuf *mbuf10, *mbuf11;			\
+	struct bucket *bkt10, *bkt11, *buckets = t->buckets;	\
+	uint8_t *key10, *key11;					\
+	uint64_t bucket_mask = t->bucket_mask;			\
+	rte_table_hash_op_hash f_hash = t->f_hash;		\
+	uint64_t seed = t->seed;				\
+	uint32_t key_size = t->key_size;			\
+	uint32_t key_offset = t->key_offset;			\
+								\
+	mbuf10 = pkts[pkt10_index];				\
+	key10 = RTE_MBUF_METADATA_UINT8_PTR(mbuf10, key_offset);\
+	sig10 = (uint64_t) f_hash(key10, key_size, seed);	\
+	bkt10_index = sig10 & bucket_mask;			\
+	bkt10 = &buckets[bkt10_index];				\
+								\
+	mbuf11 = pkts[pkt11_index];				\
+	key11 = RTE_MBUF_METADATA_UINT8_PTR(mbuf11, key_offset);\
+	sig11 = (uint64_t) f_hash(key11, key_size, seed);	\
+	bkt11_index = sig11 & bucket_mask;			\
+	bkt11 = &buckets[bkt11_index];				\
+								\
+	rte_prefetch0(bkt10);					\
+	rte_prefetch0(bkt11);					\
+								\
+	g10 = &g[pkt10_index];					\
+	g10->sig = sig10;					\
+	g10->bkt = bkt10;					\
+								\
+	g11 = &g[pkt11_index];					\
+	g11->sig = sig11;					\
+	g11->bkt = bkt11;					\
+}
+
+#define lookup2_stage2(t, g, pkt20_index, pkt21_index, pkts_mask_match_many)\
+{								\
+	struct grinder *g20, *g21;				\
+	uint64_t sig20, sig21;					\
+	struct bucket *bkt20, *bkt21;				\
+	uint8_t *key20, *key21, *key_mem = t->key_mem;		\
+	uint64_t match20, match21, match_many20, match_many21;	\
+	uint64_t match_pos20, match_pos21;			\
+	uint32_t key20_index, key21_index, key_size_shl = t->key_size_shl;\
+								\
+	g20 = &g[pkt20_index];					\
+	sig20 = g20->sig;					\
+	bkt20 = g20->bkt;					\
+	sig20 = (sig20 >> 16) | 1LLU;				\
+	lookup_cmp_sig(sig20, bkt20, match20, match_many20, match_pos20);\
+	match20 <<= pkt20_index;				\
+	match_many20 <<= pkt20_index;				\
+	key20_index = bkt20->key_pos[match_pos20];		\
+	key20 = &key_mem[key20_index << key_size_shl];		\
+								\
+	g21 = &g[pkt21_index];					\
+	sig21 = g21->sig;					\
+	bkt21 = g21->bkt;					\
+	sig21 = (sig21 >> 16) | 1LLU;				\
+	lookup_cmp_sig(sig21, bkt21, match21, match_many21, match_pos21);\
+	match21 <<= pkt21_index;				\
+	match_many21 <<= pkt21_index;				\
+	key21_index = bkt21->key_pos[match_pos21];		\
+	key21 = &key_mem[key21_index << key_size_shl];		\
+								\
+	rte_prefetch0(key20);					\
+	rte_prefetch0(key21);					\
+								\
+	pkts_mask_match_many |= match_many20 | match_many21;	\
+								\
+	g20->match = match20;					\
+	g20->match_pos = match_pos20;				\
+	g20->key_index = key20_index;				\
+								\
+	g21->match = match21;					\
+	g21->match_pos = match_pos21;				\
+	g21->key_index = key21_index;				\
+}
+
+#define lookup2_stage3(t, g, pkts, pkt30_index, pkt31_index, pkts_mask_out, \
+	entries)						\
+{								\
+	struct grinder *g30, *g31;				\
+	struct rte_mbuf *mbuf30, *mbuf31;			\
+	struct bucket *bkt30, *bkt31;				\
+	uint8_t *key30, *key31, *key_mem = t->key_mem;		\
+	uint8_t *data30, *data31, *data_mem = t->data_mem;	\
+	uint64_t match30, match31, match_pos30, match_pos31;	\
+	uint64_t match_key30, match_key31, match_keys;		\
+	uint32_t key30_index, key31_index;			\
+	uint32_t key_size_shl = t->key_size_shl;		\
+	uint32_t data_size_shl = t->data_size_shl;		\
+								\
+	mbuf30 = pkts[pkt30_index];				\
+	g30 = &g[pkt30_index];					\
+	bkt30 = g30->bkt;					\
+	match30 = g30->match;					\
+	match_pos30 = g30->match_pos;				\
+	key30_index = g30->key_index;				\
+	key30 = &key_mem[key30_index << key_size_shl];		\
+	lookup_cmp_key(mbuf30, key30, match_key30, t);		\
+	match_key30 <<= pkt30_index;				\
+	match_key30 &= match30;					\
+	data30 = &data_mem[key30_index << data_size_shl];	\
+	entries[pkt30_index] = data30;				\
+								\
+	mbuf31 = pkts[pkt31_index];				\
+	g31 = &g[pkt31_index];					\
+	bkt31 = g31->bkt;					\
+	match31 = g31->match;					\
+	match_pos31 = g31->match_pos;				\
+	key31_index = g31->key_index;				\
+	key31 = &key_mem[key31_index << key_size_shl];		\
+	lookup_cmp_key(mbuf31, key31, match_key31, t);		\
+	match_key31 <<= pkt31_index;				\
+	match_key31 &= match31;					\
+	data31 = &data_mem[key31_index << data_size_shl];	\
+	entries[pkt31_index] = data31;				\
+								\
+	rte_prefetch0(data30);					\
+	rte_prefetch0(data31);					\
+								\
+	match_keys = match_key30 | match_key31;			\
+	pkts_mask_out |= match_keys;				\
+								\
+	if (match_key30 == 0)					\
+		match_pos30 = 4;				\
+	lru_update(bkt30, match_pos30);				\
+								\
+	if (match_key31 == 0)					\
+		match_pos31 = 4;				\
+	lru_update(bkt31, match_pos31);				\
+}
+
+/***
+* The lookup function implements a 4-stage pipeline, with each stage processing
+* two different packets. The purpose of pipelined implementation is to hide the
+* latency of prefetching the data structures and loosen the data dependency
+* between instructions.
+*
+*   p00  _______   p10  _______   p20  _______   p30  _______
+* ----->|       |----->|       |----->|       |----->|       |----->
+*       |   0   |      |   1   |      |   2   |      |   3   |
+* ----->|_______|----->|_______|----->|_______|----->|_______|----->
+*   p01            p11            p21            p31
+*
+* The naming convention is:
+*	  pXY = packet Y of stage X, X = 0 .. 3, Y = 0 .. 1
+*
+***/
+static int rte_table_hash_lru_lookup(
+	void *table,
+	struct rte_mbuf **pkts,
+	uint64_t pkts_mask,
+	uint64_t *lookup_hit_mask,
+	void **entries)
+{
+	struct rte_table_hash *t = (struct rte_table_hash *) table;
+	struct grinder *g = t->grinders;
+	uint64_t pkt00_index, pkt01_index, pkt10_index, pkt11_index;
+	uint64_t pkt20_index, pkt21_index, pkt30_index, pkt31_index;
+	uint64_t pkts_mask_out = 0, pkts_mask_match_many = 0;
+	int status = 0;
+
+	__rte_unused uint32_t n_pkts_in = __builtin_popcountll(pkts_mask);
+	RTE_TABLE_HASH_LRU_STATS_PKTS_IN_ADD(t, n_pkts_in);
+
+	/* Cannot run the pipeline with less than 7 packets */
+	if (__builtin_popcountll(pkts_mask) < 7)
+		return rte_table_hash_lru_lookup_unoptimized(table, pkts,
+			pkts_mask, lookup_hit_mask, entries, 0);
+
+	/* Pipeline stage 0 */
+	lookup2_stage0(t, g, pkts, pkts_mask, pkt00_index, pkt01_index);
+
+	/* Pipeline feed */
+	pkt10_index = pkt00_index;
+	pkt11_index = pkt01_index;
+
+	/* Pipeline stage 0 */
+	lookup2_stage0(t, g, pkts, pkts_mask, pkt00_index, pkt01_index);
+
+	/* Pipeline stage 1 */
+	lookup2_stage1(t, g, pkts, pkt10_index, pkt11_index);
+
+	/* Pipeline feed */
+	pkt20_index = pkt10_index;
+	pkt21_index = pkt11_index;
+	pkt10_index = pkt00_index;
+	pkt11_index = pkt01_index;
+
+	/* Pipeline stage 0 */
+	lookup2_stage0(t, g, pkts, pkts_mask, pkt00_index, pkt01_index);
+
+	/* Pipeline stage 1 */
+	lookup2_stage1(t, g, pkts, pkt10_index, pkt11_index);
+
+	/* Pipeline stage 2 */
+	lookup2_stage2(t, g, pkt20_index, pkt21_index, pkts_mask_match_many);
+
+	/*
+	* Pipeline run
+	*
+	*/
+	for ( ; pkts_mask; ) {
+		/* Pipeline feed */
+		pkt30_index = pkt20_index;
+		pkt31_index = pkt21_index;
+		pkt20_index = pkt10_index;
+		pkt21_index = pkt11_index;
+		pkt10_index = pkt00_index;
+		pkt11_index = pkt01_index;
+
+		/* Pipeline stage 0 */
+		lookup2_stage0_with_odd_support(t, g, pkts, pkts_mask,
+			pkt00_index, pkt01_index);
+
+		/* Pipeline stage 1 */
+		lookup2_stage1(t, g, pkts, pkt10_index, pkt11_index);
+
+		/* Pipeline stage 2 */
+		lookup2_stage2(t, g, pkt20_index, pkt21_index,
+			pkts_mask_match_many);
+
+		/* Pipeline stage 3 */
+		lookup2_stage3(t, g, pkts, pkt30_index, pkt31_index,
+			pkts_mask_out, entries);
+	}
+
+	/* Pipeline feed */
+	pkt30_index = pkt20_index;
+	pkt31_index = pkt21_index;
+	pkt20_index = pkt10_index;
+	pkt21_index = pkt11_index;
+	pkt10_index = pkt00_index;
+	pkt11_index = pkt01_index;
+
+	/* Pipeline stage 1 */
+	lookup2_stage1(t, g, pkts, pkt10_index, pkt11_index);
+
+	/* Pipeline stage 2 */
+	lookup2_stage2(t, g, pkt20_index, pkt21_index, pkts_mask_match_many);
+
+	/* Pipeline stage 3 */
+	lookup2_stage3(t, g, pkts, pkt30_index, pkt31_index, pkts_mask_out,
+		entries);
+
+	/* Pipeline feed */
+	pkt30_index = pkt20_index;
+	pkt31_index = pkt21_index;
+	pkt20_index = pkt10_index;
+	pkt21_index = pkt11_index;
+
+	/* Pipeline stage 2 */
+	lookup2_stage2(t, g, pkt20_index, pkt21_index, pkts_mask_match_many);
+
+	/* Pipeline stage 3 */
+	lookup2_stage3(t, g, pkts, pkt30_index, pkt31_index, pkts_mask_out,
+		entries);
+
+	/* Pipeline feed */
+	pkt30_index = pkt20_index;
+	pkt31_index = pkt21_index;
+
+	/* Pipeline stage 3 */
+	lookup2_stage3(t, g, pkts, pkt30_index, pkt31_index, pkts_mask_out,
+		entries);
+
+	/* Slow path */
+	pkts_mask_match_many &= ~pkts_mask_out;
+	if (pkts_mask_match_many) {
+		uint64_t pkts_mask_out_slow = 0;
+
+		status = rte_table_hash_lru_lookup_unoptimized(table, pkts,
+			pkts_mask_match_many, &pkts_mask_out_slow, entries, 0);
+		pkts_mask_out |= pkts_mask_out_slow;
+	}
+
+	*lookup_hit_mask = pkts_mask_out;
+	RTE_TABLE_HASH_LRU_STATS_PKTS_LOOKUP_MISS(t, n_pkts_in - __builtin_popcountll(pkts_mask_out));
+	return status;
+}
+
+static int rte_table_hash_lru_lookup_dosig(
+	void *table,
+	struct rte_mbuf **pkts,
+	uint64_t pkts_mask,
+	uint64_t *lookup_hit_mask,
+	void **entries)
+{
+	struct rte_table_hash *t = (struct rte_table_hash *) table;
+	struct grinder *g = t->grinders;
+	uint64_t pkt00_index, pkt01_index, pkt10_index, pkt11_index;
+	uint64_t pkt20_index, pkt21_index, pkt30_index, pkt31_index;
+	uint64_t pkts_mask_out = 0, pkts_mask_match_many = 0;
+	int status = 0;
+
+	__rte_unused uint32_t n_pkts_in = __builtin_popcountll(pkts_mask);
+	RTE_TABLE_HASH_LRU_STATS_PKTS_IN_ADD(t, n_pkts_in);
+
+	/* Cannot run the pipeline with less than 7 packets */
+	if (__builtin_popcountll(pkts_mask) < 7)
+		return rte_table_hash_lru_lookup_unoptimized(table, pkts,
+			pkts_mask, lookup_hit_mask, entries, 1);
+
+	/* Pipeline stage 0 */
+	lookup2_stage0(t, g, pkts, pkts_mask, pkt00_index, pkt01_index);
+
+	/* Pipeline feed */
+	pkt10_index = pkt00_index;
+	pkt11_index = pkt01_index;
+
+	/* Pipeline stage 0 */
+	lookup2_stage0(t, g, pkts, pkts_mask, pkt00_index, pkt01_index);
+
+	/* Pipeline stage 1 */
+	lookup2_stage1_dosig(t, g, pkts, pkt10_index, pkt11_index);
+
+	/* Pipeline feed */
+	pkt20_index = pkt10_index;
+	pkt21_index = pkt11_index;
+	pkt10_index = pkt00_index;
+	pkt11_index = pkt01_index;
+
+	/* Pipeline stage 0 */
+	lookup2_stage0(t, g, pkts, pkts_mask, pkt00_index, pkt01_index);
+
+	/* Pipeline stage 1 */
+	lookup2_stage1_dosig(t, g, pkts, pkt10_index, pkt11_index);
+
+	/* Pipeline stage 2 */
+	lookup2_stage2(t, g, pkt20_index, pkt21_index, pkts_mask_match_many);
+
+	/*
+	* Pipeline run
+	*
+	*/
+	for ( ; pkts_mask; ) {
+		/* Pipeline feed */
+		pkt30_index = pkt20_index;
+		pkt31_index = pkt21_index;
+		pkt20_index = pkt10_index;
+		pkt21_index = pkt11_index;
+		pkt10_index = pkt00_index;
+		pkt11_index = pkt01_index;
+
+		/* Pipeline stage 0 */
+		lookup2_stage0_with_odd_support(t, g, pkts, pkts_mask,
+			pkt00_index, pkt01_index);
+
+		/* Pipeline stage 1 */
+		lookup2_stage1_dosig(t, g, pkts, pkt10_index, pkt11_index);
+
+		/* Pipeline stage 2 */
+		lookup2_stage2(t, g, pkt20_index, pkt21_index,
+			pkts_mask_match_many);
+
+		/* Pipeline stage 3 */
+		lookup2_stage3(t, g, pkts, pkt30_index, pkt31_index,
+			pkts_mask_out, entries);
+	}
+
+	/* Pipeline feed */
+	pkt30_index = pkt20_index;
+	pkt31_index = pkt21_index;
+	pkt20_index = pkt10_index;
+	pkt21_index = pkt11_index;
+	pkt10_index = pkt00_index;
+	pkt11_index = pkt01_index;
+
+	/* Pipeline stage 1 */
+	lookup2_stage1_dosig(t, g, pkts, pkt10_index, pkt11_index);
+
+	/* Pipeline stage 2 */
+	lookup2_stage2(t, g, pkt20_index, pkt21_index, pkts_mask_match_many);
+
+	/* Pipeline stage 3 */
+	lookup2_stage3(t, g, pkts, pkt30_index, pkt31_index, pkts_mask_out,
+		entries);
+
+	/* Pipeline feed */
+	pkt30_index = pkt20_index;
+	pkt31_index = pkt21_index;
+	pkt20_index = pkt10_index;
+	pkt21_index = pkt11_index;
+
+	/* Pipeline stage 2 */
+	lookup2_stage2(t, g, pkt20_index, pkt21_index, pkts_mask_match_many);
+
+	/* Pipeline stage 3 */
+	lookup2_stage3(t, g, pkts, pkt30_index, pkt31_index, pkts_mask_out,
+		entries);
+
+	/* Pipeline feed */
+	pkt30_index = pkt20_index;
+	pkt31_index = pkt21_index;
+
+	/* Pipeline stage 3 */
+	lookup2_stage3(t, g, pkts, pkt30_index, pkt31_index, pkts_mask_out,
+		entries);
+
+	/* Slow path */
+	pkts_mask_match_many &= ~pkts_mask_out;
+	if (pkts_mask_match_many) {
+		uint64_t pkts_mask_out_slow = 0;
+
+		status = rte_table_hash_lru_lookup_unoptimized(table, pkts,
+			pkts_mask_match_many, &pkts_mask_out_slow, entries, 1);
+		pkts_mask_out |= pkts_mask_out_slow;
+	}
+
+	*lookup_hit_mask = pkts_mask_out;
+	RTE_TABLE_HASH_LRU_STATS_PKTS_LOOKUP_MISS(t, n_pkts_in - __builtin_popcountll(pkts_mask_out));
+	return status;
+}
+
+static int
+rte_table_hash_lru_stats_read(void *table, struct rte_table_stats *stats, int clear)
+{
+	struct rte_table_hash *t = (struct rte_table_hash *) table;
+
+	if (stats != NULL)
+		memcpy(stats, &t->stats, sizeof(t->stats));
+
+	if (clear)
+		memset(&t->stats, 0, sizeof(t->stats));
+
+	return 0;
+}
+
+struct rte_table_ops rte_table_hash_lru_ops = {
+	.f_create = rte_table_hash_lru_create,
+	.f_free = rte_table_hash_lru_free,
+	.f_add = rte_table_hash_lru_entry_add,
+	.f_delete = rte_table_hash_lru_entry_delete,
+	.f_add_bulk = NULL,
+	.f_delete_bulk = NULL,
+	.f_lookup = rte_table_hash_lru_lookup,
+	.f_stats = rte_table_hash_lru_stats_read,
+};
+
+struct rte_table_ops rte_table_hash_lru_dosig_ops = {
+	.f_create = rte_table_hash_lru_create,
+	.f_free = rte_table_hash_lru_free,
+	.f_add = rte_table_hash_lru_entry_add,
+	.f_delete = rte_table_hash_lru_entry_delete,
+	.f_add_bulk = NULL,
+	.f_delete_bulk = NULL,
+	.f_lookup = rte_table_hash_lru_lookup_dosig,
+	.f_stats = rte_table_hash_lru_stats_read,
+};
diff --git a/lib/librte_table/rte_table_lpm.c b/lib/librte_table/rte_table_lpm.c
new file mode 100644
index 00000000..cdeb0f5a
--- /dev/null
+++ b/lib/librte_table/rte_table_lpm.c
@@ -0,0 +1,393 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <string.h>
+#include <stdio.h>
+
+#include <rte_common.h>
+#include <rte_mbuf.h>
+#include <rte_memory.h>
+#include <rte_malloc.h>
+#include <rte_byteorder.h>
+#include <rte_log.h>
+#include <rte_lpm.h>
+
+#include "rte_table_lpm.h"
+
+#define RTE_TABLE_LPM_MAX_NEXT_HOPS                        256
+
+#ifdef RTE_TABLE_STATS_COLLECT
+
+#define RTE_TABLE_LPM_STATS_PKTS_IN_ADD(table, val) \
+	table->stats.n_pkts_in += val
+#define RTE_TABLE_LPM_STATS_PKTS_LOOKUP_MISS(table, val) \
+	table->stats.n_pkts_lookup_miss += val
+
+#else
+
+#define RTE_TABLE_LPM_STATS_PKTS_IN_ADD(table, val)
+#define RTE_TABLE_LPM_STATS_PKTS_LOOKUP_MISS(table, val)
+
+#endif
+
+struct rte_table_lpm {
+	struct rte_table_stats stats;
+
+	/* Input parameters */
+	uint32_t entry_size;
+	uint32_t entry_unique_size;
+	uint32_t n_rules;
+	uint32_t offset;
+
+	/* Handle to low-level LPM table */
+	struct rte_lpm *lpm;
+
+	/* Next Hop Table (NHT) */
+	uint32_t nht_users[RTE_TABLE_LPM_MAX_NEXT_HOPS];
+	uint32_t nht[0] __rte_cache_aligned;
+};
+
+static void *
+rte_table_lpm_create(void *params, int socket_id, uint32_t entry_size)
+{
+	struct rte_table_lpm_params *p = (struct rte_table_lpm_params *) params;
+	struct rte_table_lpm *lpm;
+	struct rte_lpm_config lpm_config;
+
+	uint32_t total_size, nht_size;
+
+	/* Check input parameters */
+	if (p == NULL) {
+		RTE_LOG(ERR, TABLE, "%s: NULL input parameters\n", __func__);
+		return NULL;
+	}
+	if (p->n_rules == 0) {
+		RTE_LOG(ERR, TABLE, "%s: Invalid n_rules\n", __func__);
+		return NULL;
+	}
+	if (p->number_tbl8s == 0) {
+		RTE_LOG(ERR, TABLE, "%s: Invalid number_tbl8s\n", __func__);
+		return NULL;
+	}
+	if (p->entry_unique_size == 0) {
+		RTE_LOG(ERR, TABLE, "%s: Invalid entry_unique_size\n",
+			__func__);
+		return NULL;
+	}
+	if (p->entry_unique_size > entry_size) {
+		RTE_LOG(ERR, TABLE, "%s: Invalid entry_unique_size\n",
+			__func__);
+		return NULL;
+	}
+	if (p->name == NULL) {
+		RTE_LOG(ERR, TABLE, "%s: Table name is NULL\n",
+			__func__);
+		return NULL;
+	}
+	entry_size = RTE_ALIGN(entry_size, sizeof(uint64_t));
+
+	/* Memory allocation */
+	nht_size = RTE_TABLE_LPM_MAX_NEXT_HOPS * entry_size;
+	total_size = sizeof(struct rte_table_lpm) + nht_size;
+	lpm = rte_zmalloc_socket("TABLE", total_size, RTE_CACHE_LINE_SIZE,
+		socket_id);
+	if (lpm == NULL) {
+		RTE_LOG(ERR, TABLE,
+			"%s: Cannot allocate %u bytes for LPM table\n",
+			__func__, total_size);
+		return NULL;
+	}
+
+	/* LPM low-level table creation */
+	lpm_config.max_rules = p->n_rules;
+	lpm_config.number_tbl8s = p->number_tbl8s;
+	lpm_config.flags = p->flags;
+	lpm->lpm = rte_lpm_create(p->name, socket_id, &lpm_config);
+
+	if (lpm->lpm == NULL) {
+		rte_free(lpm);
+		RTE_LOG(ERR, TABLE, "Unable to create low-level LPM table\n");
+		return NULL;
+	}
+
+	/* Memory initialization */
+	lpm->entry_size = entry_size;
+	lpm->entry_unique_size = p->entry_unique_size;
+	lpm->n_rules = p->n_rules;
+	lpm->offset = p->offset;
+
+	return lpm;
+}
+
+static int
+rte_table_lpm_free(void *table)
+{
+	struct rte_table_lpm *lpm = (struct rte_table_lpm *) table;
+
+	/* Check input parameters */
+	if (lpm == NULL) {
+		RTE_LOG(ERR, TABLE, "%s: table parameter is NULL\n", __func__);
+		return -EINVAL;
+	}
+
+	/* Free previously allocated resources */
+	rte_lpm_free(lpm->lpm);
+	rte_free(lpm);
+
+	return 0;
+}
+
+static int
+nht_find_free(struct rte_table_lpm *lpm, uint32_t *pos)
+{
+	uint32_t i;
+
+	for (i = 0; i < RTE_TABLE_LPM_MAX_NEXT_HOPS; i++) {
+		if (lpm->nht_users[i] == 0) {
+			*pos = i;
+			return 1;
+		}
+	}
+
+	return 0;
+}
+
+static int
+nht_find_existing(struct rte_table_lpm *lpm, void *entry, uint32_t *pos)
+{
+	uint32_t i;
+
+	for (i = 0; i < RTE_TABLE_LPM_MAX_NEXT_HOPS; i++) {
+		uint32_t *nht_entry = &lpm->nht[i * lpm->entry_size];
+
+		if ((lpm->nht_users[i] > 0) && (memcmp(nht_entry, entry,
+			lpm->entry_unique_size) == 0)) {
+			*pos = i;
+			return 1;
+		}
+	}
+
+	return 0;
+}
+
+static int
+rte_table_lpm_entry_add(
+	void *table,
+	void *key,
+	void *entry,
+	int *key_found,
+	void **entry_ptr)
+{
+	struct rte_table_lpm *lpm = (struct rte_table_lpm *) table;
+	struct rte_table_lpm_key *ip_prefix = (struct rte_table_lpm_key *) key;
+	uint32_t nht_pos, nht_pos0_valid;
+	int status;
+	uint32_t nht_pos0 = 0;
+
+	/* Check input parameters */
+	if (lpm == NULL) {
+		RTE_LOG(ERR, TABLE, "%s: table parameter is NULL\n", __func__);
+		return -EINVAL;
+	}
+	if (ip_prefix == NULL) {
+		RTE_LOG(ERR, TABLE, "%s: ip_prefix parameter is NULL\n",
+			__func__);
+		return -EINVAL;
+	}
+	if (entry == NULL) {
+		RTE_LOG(ERR, TABLE, "%s: entry parameter is NULL\n", __func__);
+		return -EINVAL;
+	}
+
+	if ((ip_prefix->depth == 0) || (ip_prefix->depth > 32)) {
+		RTE_LOG(ERR, TABLE, "%s: invalid depth (%d)\n",
+			__func__, ip_prefix->depth);
+		return -EINVAL;
+	}
+
+	/* Check if rule is already present in the table */
+	status = rte_lpm_is_rule_present(lpm->lpm, ip_prefix->ip,
+		ip_prefix->depth, &nht_pos0);
+	nht_pos0_valid = status > 0;
+
+	/* Find existing or free NHT entry */
+	if (nht_find_existing(lpm, entry, &nht_pos) == 0) {
+		uint32_t *nht_entry;
+
+		if (nht_find_free(lpm, &nht_pos) == 0) {
+			RTE_LOG(ERR, TABLE, "%s: NHT full\n", __func__);
+			return -1;
+		}
+
+		nht_entry = &lpm->nht[nht_pos * lpm->entry_size];
+		memcpy(nht_entry, entry, lpm->entry_size);
+	}
+
+	/* Add rule to low level LPM table */
+	if (rte_lpm_add(lpm->lpm, ip_prefix->ip, ip_prefix->depth, nht_pos) < 0) {
+		RTE_LOG(ERR, TABLE, "%s: LPM rule add failed\n", __func__);
+		return -1;
+	}
+
+	/* Commit NHT changes */
+	lpm->nht_users[nht_pos]++;
+	lpm->nht_users[nht_pos0] -= nht_pos0_valid;
+
+	*key_found = nht_pos0_valid;
+	*entry_ptr = (void *) &lpm->nht[nht_pos * lpm->entry_size];
+	return 0;
+}
+
+static int
+rte_table_lpm_entry_delete(
+	void *table,
+	void *key,
+	int *key_found,
+	void *entry)
+{
+	struct rte_table_lpm *lpm = (struct rte_table_lpm *) table;
+	struct rte_table_lpm_key *ip_prefix = (struct rte_table_lpm_key *) key;
+	uint32_t nht_pos;
+	int status;
+
+	/* Check input parameters */
+	if (lpm == NULL) {
+		RTE_LOG(ERR, TABLE, "%s: table parameter is NULL\n", __func__);
+		return -EINVAL;
+	}
+	if (ip_prefix == NULL) {
+		RTE_LOG(ERR, TABLE, "%s: ip_prefix parameter is NULL\n",
+			__func__);
+		return -EINVAL;
+	}
+	if ((ip_prefix->depth == 0) || (ip_prefix->depth > 32)) {
+		RTE_LOG(ERR, TABLE, "%s: invalid depth (%d)\n", __func__,
+			ip_prefix->depth);
+		return -EINVAL;
+	}
+
+	/* Return if rule is not present in the table */
+	status = rte_lpm_is_rule_present(lpm->lpm, ip_prefix->ip,
+		ip_prefix->depth, &nht_pos);
+	if (status < 0) {
+		RTE_LOG(ERR, TABLE, "%s: LPM algorithmic error\n", __func__);
+		return -1;
+	}
+	if (status == 0) {
+		*key_found = 0;
+		return 0;
+	}
+
+	/* Delete rule from the low-level LPM table */
+	status = rte_lpm_delete(lpm->lpm, ip_prefix->ip, ip_prefix->depth);
+	if (status) {
+		RTE_LOG(ERR, TABLE, "%s: LPM rule delete failed\n", __func__);
+		return -1;
+	}
+
+	/* Commit NHT changes */
+	lpm->nht_users[nht_pos]--;
+
+	*key_found = 1;
+	if (entry)
+		memcpy(entry, &lpm->nht[nht_pos * lpm->entry_size],
+			lpm->entry_size);
+
+	return 0;
+}
+
+static int
+rte_table_lpm_lookup(
+	void *table,
+	struct rte_mbuf **pkts,
+	uint64_t pkts_mask,
+	uint64_t *lookup_hit_mask,
+	void **entries)
+{
+	struct rte_table_lpm *lpm = (struct rte_table_lpm *) table;
+	uint64_t pkts_out_mask = 0;
+	uint32_t i;
+
+	__rte_unused uint32_t n_pkts_in = __builtin_popcountll(pkts_mask);
+	RTE_TABLE_LPM_STATS_PKTS_IN_ADD(lpm, n_pkts_in);
+
+	pkts_out_mask = 0;
+	for (i = 0; i < (uint32_t)(RTE_PORT_IN_BURST_SIZE_MAX -
+		__builtin_clzll(pkts_mask)); i++) {
+		uint64_t pkt_mask = 1LLU << i;
+
+		if (pkt_mask & pkts_mask) {
+			struct rte_mbuf *pkt = pkts[i];
+			uint32_t ip = rte_bswap32(
+				RTE_MBUF_METADATA_UINT32(pkt, lpm->offset));
+			int status;
+			uint32_t nht_pos;
+
+			status = rte_lpm_lookup(lpm->lpm, ip, &nht_pos);
+			if (status == 0) {
+				pkts_out_mask |= pkt_mask;
+				entries[i] = (void *) &lpm->nht[nht_pos *
+					lpm->entry_size];
+			}
+		}
+	}
+
+	*lookup_hit_mask = pkts_out_mask;
+	RTE_TABLE_LPM_STATS_PKTS_LOOKUP_MISS(lpm, n_pkts_in - __builtin_popcountll(pkts_out_mask));
+	return 0;
+}
+
+static int
+rte_table_lpm_stats_read(void *table, struct rte_table_stats *stats, int clear)
+{
+	struct rte_table_lpm *t = (struct rte_table_lpm *) table;
+
+	if (stats != NULL)
+		memcpy(stats, &t->stats, sizeof(t->stats));
+
+	if (clear)
+		memset(&t->stats, 0, sizeof(t->stats));
+
+	return 0;
+}
+
+struct rte_table_ops rte_table_lpm_ops = {
+	.f_create = rte_table_lpm_create,
+	.f_free = rte_table_lpm_free,
+	.f_add = rte_table_lpm_entry_add,
+	.f_delete = rte_table_lpm_entry_delete,
+	.f_add_bulk = NULL,
+	.f_delete_bulk = NULL,
+	.f_lookup = rte_table_lpm_lookup,
+	.f_stats = rte_table_lpm_stats_read,
+};
diff --git a/lib/librte_table/rte_table_lpm.h b/lib/librte_table/rte_table_lpm.h
new file mode 100644
index 00000000..f3033234
--- /dev/null
+++ b/lib/librte_table/rte_table_lpm.h
@@ -0,0 +1,124 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __INCLUDE_RTE_TABLE_LPM_H__
+#define __INCLUDE_RTE_TABLE_LPM_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @file
+ * RTE Table LPM for IPv4
+ *
+ * This table uses the Longest Prefix Match (LPM) algorithm to uniquely
+ * associate data to lookup keys.
+ *
+ * Use-case: IP routing table. Routes that are added to the table associate a
+ * next hop to an IP prefix. The IP prefix is specified as IP address and depth
+ * and cover for a multitude of lookup keys (i.e. destination IP addresses)
+ * that all share the same data (i.e. next hop). The next hop information
+ * typically contains the output interface ID, the IP address of the next hop
+ * station (which is part of the same IP network the output interface is
+ * connected to) and other flags and counters.
+ *
+ * The LPM primitive only allows associating an 8-bit number (next hop ID) to
+ * an IP prefix, while a routing table can potentially contain thousands of
+ * routes or even more. This means that the same next hop ID (and next hop
+ * information) has to be shared by multiple routes, which makes sense, as
+ * multiple remote networks could be reached through the same next hop.
+ * Therefore, when a route is added or updated, the LPM table has to check
+ * whether the same next hop is already in use before using a new next hop ID
+ * for this route.
+ *
+ * The comparison between different next hops is done for the first
+ * “entry_unique_size” bytes of the next hop information (configurable
+ * parameter), which have to uniquely identify the next hop, therefore the user
+ * has to carefully manage the format of the LPM table entry (i.e.  the next
+ * hop information) so that any next hop data that changes value during
+ * run-time (e.g. counters) is placed outside of this area.
+ *
+ ***/
+
+#include <stdint.h>
+
+#include "rte_table.h"
+
+/** LPM table parameters */
+struct rte_table_lpm_params {
+	/** Table name */
+	const char *name;
+
+	/** Maximum number of LPM rules (i.e. IP routes) */
+	uint32_t n_rules;
+
+	/**< Number of tbl8s to allocate. */
+	uint32_t number_tbl8s;
+
+	/**< This field is currently unused. */
+	int flags;
+
+	/** Number of bytes at the start of the table entry that uniquely
+	identify the entry. Cannot be bigger than table entry size. */
+	uint32_t entry_unique_size;
+
+	/** Byte offset within input packet meta-data where lookup key (i.e.
+	the destination IP address) is located. */
+	uint32_t offset;
+};
+
+/** LPM table rule (i.e. route), specified as IP prefix. While the key used by
+the lookup operation is the destination IP address (read from the input packet
+meta-data), the entry add and entry delete operations work with LPM rules, with
+each rule covering for a multitude of lookup keys (destination IP addresses)
+that share the same data (next hop). */
+struct rte_table_lpm_key {
+	/** IP address */
+	uint32_t ip;
+
+	/** IP address depth. The most significant "depth" bits of the IP
+	address specify the network part of the IP address, while the rest of
+	the bits specify the host part of the address and are ignored for the
+	purpose of route specification. */
+	uint8_t depth;
+};
+
+/** LPM table operations */
+extern struct rte_table_ops rte_table_lpm_ops;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/lib/librte_table/rte_table_lpm_ipv6.c b/lib/librte_table/rte_table_lpm_ipv6.c
new file mode 100644
index 00000000..836f4cf6
--- /dev/null
+++ b/lib/librte_table/rte_table_lpm_ipv6.c
@@ -0,0 +1,398 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <string.h>
+#include <stdio.h>
+
+#include <rte_common.h>
+#include <rte_mbuf.h>
+#include <rte_memory.h>
+#include <rte_malloc.h>
+#include <rte_byteorder.h>
+#include <rte_log.h>
+#include <rte_lpm6.h>
+
+#include "rte_table_lpm_ipv6.h"
+
+#define RTE_TABLE_LPM_MAX_NEXT_HOPS                        256
+
+#ifdef RTE_TABLE_STATS_COLLECT
+
+#define RTE_TABLE_LPM_IPV6_STATS_PKTS_IN_ADD(table, val) \
+	table->stats.n_pkts_in += val
+#define RTE_TABLE_LPM_IPV6_STATS_PKTS_LOOKUP_MISS(table, val) \
+	table->stats.n_pkts_lookup_miss += val
+
+#else
+
+#define RTE_TABLE_LPM_IPV6_STATS_PKTS_IN_ADD(table, val)
+#define RTE_TABLE_LPM_IPV6_STATS_PKTS_LOOKUP_MISS(table, val)
+
+#endif
+
+struct rte_table_lpm_ipv6 {
+	struct rte_table_stats stats;
+
+	/* Input parameters */
+	uint32_t entry_size;
+	uint32_t entry_unique_size;
+	uint32_t n_rules;
+	uint32_t offset;
+
+	/* Handle to low-level LPM table */
+	struct rte_lpm6 *lpm;
+
+	/* Next Hop Table (NHT) */
+	uint32_t nht_users[RTE_TABLE_LPM_MAX_NEXT_HOPS];
+	uint8_t nht[0] __rte_cache_aligned;
+};
+
+static void *
+rte_table_lpm_ipv6_create(void *params, int socket_id, uint32_t entry_size)
+{
+	struct rte_table_lpm_ipv6_params *p =
+		(struct rte_table_lpm_ipv6_params *) params;
+	struct rte_table_lpm_ipv6 *lpm;
+	struct rte_lpm6_config lpm6_config;
+	uint32_t total_size, nht_size;
+
+	/* Check input parameters */
+	if (p == NULL) {
+		RTE_LOG(ERR, TABLE, "%s: NULL input parameters\n", __func__);
+		return NULL;
+	}
+	if (p->n_rules == 0) {
+		RTE_LOG(ERR, TABLE, "%s: Invalid n_rules\n", __func__);
+		return NULL;
+	}
+	if (p->number_tbl8s == 0) {
+		RTE_LOG(ERR, TABLE, "%s: Invalid n_rules\n", __func__);
+		return NULL;
+	}
+	if (p->entry_unique_size == 0) {
+		RTE_LOG(ERR, TABLE, "%s: Invalid entry_unique_size\n",
+			__func__);
+		return NULL;
+	}
+	if (p->entry_unique_size > entry_size) {
+		RTE_LOG(ERR, TABLE, "%s: Invalid entry_unique_size\n",
+			__func__);
+		return NULL;
+	}
+	if (p->name == NULL) {
+		RTE_LOG(ERR, TABLE, "%s: Table name is NULL\n",
+			__func__);
+		return NULL;
+	}
+	entry_size = RTE_ALIGN(entry_size, sizeof(uint64_t));
+
+	/* Memory allocation */
+	nht_size = RTE_TABLE_LPM_MAX_NEXT_HOPS * entry_size;
+	total_size = sizeof(struct rte_table_lpm_ipv6) + nht_size;
+	lpm = rte_zmalloc_socket("TABLE", total_size, RTE_CACHE_LINE_SIZE,
+		socket_id);
+	if (lpm == NULL) {
+		RTE_LOG(ERR, TABLE,
+			"%s: Cannot allocate %u bytes for LPM IPv6 table\n",
+			__func__, total_size);
+		return NULL;
+	}
+
+	/* LPM low-level table creation */
+	lpm6_config.max_rules = p->n_rules;
+	lpm6_config.number_tbl8s = p->number_tbl8s;
+	lpm6_config.flags = 0;
+	lpm->lpm = rte_lpm6_create(p->name, socket_id, &lpm6_config);
+	if (lpm->lpm == NULL) {
+		rte_free(lpm);
+		RTE_LOG(ERR, TABLE,
+			"Unable to create low-level LPM IPv6 table\n");
+		return NULL;
+	}
+
+	/* Memory initialization */
+	lpm->entry_size = entry_size;
+	lpm->entry_unique_size = p->entry_unique_size;
+	lpm->n_rules = p->n_rules;
+	lpm->offset = p->offset;
+
+	return lpm;
+}
+
+static int
+rte_table_lpm_ipv6_free(void *table)
+{
+	struct rte_table_lpm_ipv6 *lpm = (struct rte_table_lpm_ipv6 *) table;
+
+	/* Check input parameters */
+	if (lpm == NULL) {
+		RTE_LOG(ERR, TABLE, "%s: table parameter is NULL\n", __func__);
+		return -EINVAL;
+	}
+
+	/* Free previously allocated resources */
+	rte_lpm6_free(lpm->lpm);
+	rte_free(lpm);
+
+	return 0;
+}
+
+static int
+nht_find_free(struct rte_table_lpm_ipv6 *lpm, uint32_t *pos)
+{
+	uint32_t i;
+
+	for (i = 0; i < RTE_TABLE_LPM_MAX_NEXT_HOPS; i++) {
+		if (lpm->nht_users[i] == 0) {
+			*pos = i;
+			return 1;
+		}
+	}
+
+	return 0;
+}
+
+static int
+nht_find_existing(struct rte_table_lpm_ipv6 *lpm, void *entry, uint32_t *pos)
+{
+	uint32_t i;
+
+	for (i = 0; i < RTE_TABLE_LPM_MAX_NEXT_HOPS; i++) {
+		uint8_t *nht_entry = &lpm->nht[i * lpm->entry_size];
+
+		if ((lpm->nht_users[i] > 0) && (memcmp(nht_entry, entry,
+			lpm->entry_unique_size) == 0)) {
+			*pos = i;
+			return 1;
+		}
+	}
+
+	return 0;
+}
+
+static int
+rte_table_lpm_ipv6_entry_add(
+	void *table,
+	void *key,
+	void *entry,
+	int *key_found,
+	void **entry_ptr)
+{
+	struct rte_table_lpm_ipv6 *lpm = (struct rte_table_lpm_ipv6 *) table;
+	struct rte_table_lpm_ipv6_key *ip_prefix =
+		(struct rte_table_lpm_ipv6_key *) key;
+	uint32_t nht_pos, nht_pos0_valid;
+	int status;
+	uint8_t nht_pos0;
+
+	/* Check input parameters */
+	if (lpm == NULL) {
+		RTE_LOG(ERR, TABLE, "%s: table parameter is NULL\n", __func__);
+		return -EINVAL;
+	}
+	if (ip_prefix == NULL) {
+		RTE_LOG(ERR, TABLE, "%s: ip_prefix parameter is NULL\n",
+			__func__);
+		return -EINVAL;
+	}
+	if (entry == NULL) {
+		RTE_LOG(ERR, TABLE, "%s: entry parameter is NULL\n", __func__);
+		return -EINVAL;
+	}
+
+	if ((ip_prefix->depth == 0) || (ip_prefix->depth > 128)) {
+		RTE_LOG(ERR, TABLE, "%s: invalid depth (%d)\n", __func__,
+			ip_prefix->depth);
+		return -EINVAL;
+	}
+
+	/* Check if rule is already present in the table */
+	status = rte_lpm6_is_rule_present(lpm->lpm, ip_prefix->ip,
+		ip_prefix->depth, &nht_pos0);
+	nht_pos0_valid = status > 0;
+
+	/* Find existing or free NHT entry */
+	if (nht_find_existing(lpm, entry, &nht_pos) == 0) {
+		uint8_t *nht_entry;
+
+		if (nht_find_free(lpm, &nht_pos) == 0) {
+			RTE_LOG(ERR, TABLE, "%s: NHT full\n", __func__);
+			return -1;
+		}
+
+		nht_entry = &lpm->nht[nht_pos * lpm->entry_size];
+		memcpy(nht_entry, entry, lpm->entry_size);
+	}
+
+	/* Add rule to low level LPM table */
+	if (rte_lpm6_add(lpm->lpm, ip_prefix->ip, ip_prefix->depth,
+		(uint8_t) nht_pos) < 0) {
+		RTE_LOG(ERR, TABLE, "%s: LPM IPv6 rule add failed\n", __func__);
+		return -1;
+	}
+
+	/* Commit NHT changes */
+	lpm->nht_users[nht_pos]++;
+	lpm->nht_users[nht_pos0] -= nht_pos0_valid;
+
+	*key_found = nht_pos0_valid;
+	*entry_ptr = (void *) &lpm->nht[nht_pos * lpm->entry_size];
+	return 0;
+}
+
+static int
+rte_table_lpm_ipv6_entry_delete(
+	void *table,
+	void *key,
+	int *key_found,
+	void *entry)
+{
+	struct rte_table_lpm_ipv6 *lpm = (struct rte_table_lpm_ipv6 *) table;
+	struct rte_table_lpm_ipv6_key *ip_prefix =
+		(struct rte_table_lpm_ipv6_key *) key;
+	uint8_t nht_pos;
+	int status;
+
+	/* Check input parameters */
+	if (lpm == NULL) {
+		RTE_LOG(ERR, TABLE, "%s: table parameter is NULL\n", __func__);
+		return -EINVAL;
+	}
+	if (ip_prefix == NULL) {
+		RTE_LOG(ERR, TABLE, "%s: ip_prefix parameter is NULL\n",
+			__func__);
+		return -EINVAL;
+	}
+	if ((ip_prefix->depth == 0) || (ip_prefix->depth > 128)) {
+		RTE_LOG(ERR, TABLE, "%s: invalid depth (%d)\n", __func__,
+			ip_prefix->depth);
+		return -EINVAL;
+	}
+
+	/* Return if rule is not present in the table */
+	status = rte_lpm6_is_rule_present(lpm->lpm, ip_prefix->ip,
+		ip_prefix->depth, &nht_pos);
+	if (status < 0) {
+		RTE_LOG(ERR, TABLE, "%s: LPM IPv6 algorithmic error\n",
+			__func__);
+		return -1;
+	}
+	if (status == 0) {
+		*key_found = 0;
+		return 0;
+	}
+
+	/* Delete rule from the low-level LPM table */
+	status = rte_lpm6_delete(lpm->lpm, ip_prefix->ip, ip_prefix->depth);
+	if (status) {
+		RTE_LOG(ERR, TABLE, "%s: LPM IPv6 rule delete failed\n",
+			__func__);
+		return -1;
+	}
+
+	/* Commit NHT changes */
+	lpm->nht_users[nht_pos]--;
+
+	*key_found = 1;
+	if (entry)
+		memcpy(entry, &lpm->nht[nht_pos * lpm->entry_size],
+			lpm->entry_size);
+
+	return 0;
+}
+
+static int
+rte_table_lpm_ipv6_lookup(
+	void *table,
+	struct rte_mbuf **pkts,
+	uint64_t pkts_mask,
+	uint64_t *lookup_hit_mask,
+	void **entries)
+{
+	struct rte_table_lpm_ipv6 *lpm = (struct rte_table_lpm_ipv6 *) table;
+	uint64_t pkts_out_mask = 0;
+	uint32_t i;
+
+	__rte_unused uint32_t n_pkts_in = __builtin_popcountll(pkts_mask);
+	RTE_TABLE_LPM_IPV6_STATS_PKTS_IN_ADD(lpm, n_pkts_in);
+
+	pkts_out_mask = 0;
+	for (i = 0; i < (uint32_t)(RTE_PORT_IN_BURST_SIZE_MAX -
+		__builtin_clzll(pkts_mask)); i++) {
+		uint64_t pkt_mask = 1LLU << i;
+
+		if (pkt_mask & pkts_mask) {
+			struct rte_mbuf *pkt = pkts[i];
+			uint8_t *ip = RTE_MBUF_METADATA_UINT8_PTR(pkt,
+				lpm->offset);
+			int status;
+			uint8_t nht_pos;
+
+			status = rte_lpm6_lookup(lpm->lpm, ip, &nht_pos);
+			if (status == 0) {
+				pkts_out_mask |= pkt_mask;
+				entries[i] = (void *) &lpm->nht[nht_pos *
+					lpm->entry_size];
+			}
+		}
+	}
+
+	*lookup_hit_mask = pkts_out_mask;
+	RTE_TABLE_LPM_IPV6_STATS_PKTS_LOOKUP_MISS(lpm, n_pkts_in - __builtin_popcountll(pkts_out_mask));
+	return 0;
+}
+
+static int
+rte_table_lpm_ipv6_stats_read(void *table, struct rte_table_stats *stats, int clear)
+{
+	struct rte_table_lpm_ipv6 *t = (struct rte_table_lpm_ipv6 *) table;
+
+	if (stats != NULL)
+		memcpy(stats, &t->stats, sizeof(t->stats));
+
+	if (clear)
+		memset(&t->stats, 0, sizeof(t->stats));
+
+	return 0;
+}
+
+struct rte_table_ops rte_table_lpm_ipv6_ops = {
+	.f_create = rte_table_lpm_ipv6_create,
+	.f_free = rte_table_lpm_ipv6_free,
+	.f_add = rte_table_lpm_ipv6_entry_add,
+	.f_delete = rte_table_lpm_ipv6_entry_delete,
+	.f_add_bulk = NULL,
+	.f_delete_bulk = NULL,
+	.f_lookup = rte_table_lpm_ipv6_lookup,
+	.f_stats = rte_table_lpm_ipv6_stats_read,
+};
diff --git a/lib/librte_table/rte_table_lpm_ipv6.h b/lib/librte_table/rte_table_lpm_ipv6.h
new file mode 100644
index 00000000..43aea399
--- /dev/null
+++ b/lib/librte_table/rte_table_lpm_ipv6.h
@@ -0,0 +1,122 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __INCLUDE_RTE_TABLE_LPM_IPV6_H__
+#define __INCLUDE_RTE_TABLE_LPM_IPV6_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @file
+ * RTE Table LPM for IPv6
+ *
+ * This table uses the Longest Prefix Match (LPM) algorithm to uniquely
+ * associate data to lookup keys.
+ *
+ * Use-case: IP routing table. Routes that are added to the table associate a
+ * next hop to an IP prefix. The IP prefix is specified as IP address and depth
+ * and cover for a multitude of lookup keys (i.e. destination IP addresses)
+ * that all share the same data (i.e. next hop). The next hop information
+ * typically contains the output interface ID, the IP address of the next hop
+ * station (which is part of the same IP network the output interface is
+ * connected to) and other flags and counters.
+ *
+ * The LPM primitive only allows associating an 8-bit number (next hop ID) to
+ * an IP prefix, while a routing table can potentially contain thousands of
+ * routes or even more. This means that the same next hop ID (and next hop
+ * information) has to be shared by multiple routes, which makes sense, as
+ * multiple remote networks could be reached through the same next hop.
+ * Therefore, when a route is added or updated, the LPM table has to check
+ * whether the same next hop is already in use before using a new next hop ID
+ * for this route.
+ *
+ * The comparison between different next hops is done for the first
+ * “entry_unique_size” bytes of the next hop information (configurable
+ * parameter), which have to uniquely identify the next hop, therefore the user
+ * has to carefully manage the format of the LPM table entry (i.e.  the next
+ * hop information) so that any next hop data that changes value during
+ * run-time (e.g. counters) is placed outside of this area.
+ *
+ ***/
+
+#include <stdint.h>
+
+#include "rte_table.h"
+
+#define RTE_LPM_IPV6_ADDR_SIZE 16
+
+/** LPM table parameters */
+struct rte_table_lpm_ipv6_params {
+	/** Table name */
+	const char *name;
+
+	/** Maximum number of LPM rules (i.e. IP routes) */
+	uint32_t n_rules;
+
+	uint32_t number_tbl8s;
+
+	/** Number of bytes at the start of the table entry that uniquely
+	identify the entry. Cannot be bigger than table entry size. */
+	uint32_t entry_unique_size;
+
+	/** Byte offset within input packet meta-data where lookup key (i.e.
+	the destination IP address) is located. */
+	uint32_t offset;
+};
+
+/** LPM table rule (i.e. route), specified as IP prefix. While the key used by
+the lookup operation is the destination IP address (read from the input packet
+meta-data), the entry add and entry delete operations work with LPM rules, with
+each rule covering for a multitude of lookup keys (destination IP addresses)
+that share the same data (next hop). */
+struct rte_table_lpm_ipv6_key {
+	/** IP address */
+	uint8_t ip[RTE_LPM_IPV6_ADDR_SIZE];
+
+	/** IP address depth. The most significant "depth" bits of the IP
+	address specify the network part of the IP address, while the rest of
+	the bits specify the host part of the address and are ignored for the
+	purpose of route specification. */
+	uint8_t depth;
+};
+
+/** LPM table operations */
+extern struct rte_table_ops rte_table_lpm_ipv6_ops;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/lib/librte_table/rte_table_stub.c b/lib/librte_table/rte_table_stub.c
new file mode 100644
index 00000000..691d681a
--- /dev/null
+++ b/lib/librte_table/rte_table_stub.c
@@ -0,0 +1,121 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <string.h>
+
+#include <rte_mbuf.h>
+#include <rte_malloc.h>
+
+#include "rte_table_stub.h"
+
+#ifdef RTE_TABLE_STATS_COLLECT
+
+#define RTE_TABLE_LPM_STATS_PKTS_IN_ADD(table, val) \
+	table->stats.n_pkts_in += val
+#define RTE_TABLE_LPM_STATS_PKTS_LOOKUP_MISS(table, val) \
+	table->stats.n_pkts_lookup_miss += val
+
+#else
+
+#define RTE_TABLE_LPM_STATS_PKTS_IN_ADD(table, val)
+#define RTE_TABLE_LPM_STATS_PKTS_LOOKUP_MISS(table, val)
+
+#endif
+
+struct rte_table_stub {
+	struct rte_table_stats stats;
+};
+
+static void *
+rte_table_stub_create(__rte_unused void *params,
+		__rte_unused int socket_id,
+		__rte_unused uint32_t entry_size)
+{
+	struct rte_table_stub *stub;
+	uint32_t size;
+
+	size = sizeof(struct rte_table_stub);
+	stub = rte_zmalloc_socket("TABLE", size, RTE_CACHE_LINE_SIZE,
+		socket_id);
+	if (stub == NULL) {
+		RTE_LOG(ERR, TABLE,
+			"%s: Cannot allocate %u bytes for stub table\n",
+			__func__, size);
+		return NULL;
+	}
+
+	return stub;
+}
+
+static int
+rte_table_stub_lookup(
+	__rte_unused void *table,
+	__rte_unused struct rte_mbuf **pkts,
+	__rte_unused uint64_t pkts_mask,
+	uint64_t *lookup_hit_mask,
+	__rte_unused void **entries)
+{
+	__rte_unused struct rte_table_stub *stub = (struct rte_table_stub *) table;
+	__rte_unused uint32_t n_pkts_in = __builtin_popcountll(pkts_mask);
+
+	RTE_TABLE_LPM_STATS_PKTS_IN_ADD(stub, n_pkts_in);
+	*lookup_hit_mask = 0;
+	RTE_TABLE_LPM_STATS_PKTS_LOOKUP_MISS(stub, n_pkts_in);
+
+	return 0;
+}
+
+static int
+rte_table_stub_stats_read(void *table, struct rte_table_stats *stats, int clear)
+{
+	struct rte_table_stub *t = (struct rte_table_stub *) table;
+
+	if (stats != NULL)
+		memcpy(stats, &t->stats, sizeof(t->stats));
+
+	if (clear)
+		memset(&t->stats, 0, sizeof(t->stats));
+
+	return 0;
+}
+
+struct rte_table_ops rte_table_stub_ops = {
+	.f_create = rte_table_stub_create,
+	.f_free = NULL,
+	.f_add = NULL,
+	.f_delete = NULL,
+	.f_add_bulk = NULL,
+	.f_delete_bulk = NULL,
+	.f_lookup = rte_table_stub_lookup,
+	.f_stats = rte_table_stub_stats_read,
+};
diff --git a/lib/librte_table/rte_table_stub.h b/lib/librte_table/rte_table_stub.h
new file mode 100644
index 00000000..e75340b0
--- /dev/null
+++ b/lib/librte_table/rte_table_stub.h
@@ -0,0 +1,62 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __INCLUDE_RTE_TABLE_STUB_H__
+#define __INCLUDE_RTE_TABLE_STUB_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @file
+ * RTE Table Stub
+ *
+ * The stub table lookup operation produces lookup miss for all input packets.
+ *
+ ***/
+
+#include <stdint.h>
+
+#include "rte_table.h"
+
+/** Stub table parameters: NONE */
+
+/** Stub table operations */
+extern struct rte_table_ops rte_table_stub_ops;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/lib/librte_table/rte_table_version.map b/lib/librte_table/rte_table_version.map
new file mode 100644
index 00000000..21386984
--- /dev/null
+++ b/lib/librte_table/rte_table_version.map
@@ -0,0 +1,28 @@
+DPDK_2.0 {
+	global:
+
+	rte_table_acl_ops;
+	rte_table_array_ops;
+	rte_table_hash_ext_ops;
+	rte_table_hash_key8_ext_dosig_ops;
+	rte_table_hash_key8_ext_ops;
+	rte_table_hash_key8_lru_dosig_ops;
+	rte_table_hash_key8_lru_ops;
+	rte_table_hash_key16_ext_ops;
+	rte_table_hash_key16_lru_ops;
+	rte_table_hash_key32_ext_ops;
+	rte_table_hash_key32_lru_ops;
+	rte_table_hash_lru_ops;
+	rte_table_lpm_ipv6_ops;
+	rte_table_lpm_ops;
+	rte_table_stub_ops;
+
+	local: *;
+};
+
+DPDK_2.2 {
+	global:
+
+	rte_table_hash_key16_ext_dosig_ops;
+
+} DPDK_2.0;
diff --git a/lib/librte_timer/Makefile b/lib/librte_timer/Makefile
new file mode 100644
index 00000000..2aabef85
--- /dev/null
+++ b/lib/librte_timer/Makefile
@@ -0,0 +1,52 @@
+#   BSD LICENSE
+#
+#   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+#   All rights reserved.
+#
+#   Redistribution and use in source and binary forms, with or without
+#   modification, are permitted provided that the following conditions
+#   are met:
+#
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in
+#       the documentation and/or other materials provided with the
+#       distribution.
+#     * Neither the name of Intel Corporation nor the names of its
+#       contributors may be used to endorse or promote products derived
+#       from this software without specific prior written permission.
+#
+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# library name
+LIB = librte_timer.a
+
+CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3
+
+EXPORT_MAP := rte_timer_version.map
+
+LIBABIVER := 1
+
+# all source are stored in SRCS-y
+SRCS-$(CONFIG_RTE_LIBRTE_TIMER) := rte_timer.c
+
+# install this header file
+SYMLINK-$(CONFIG_RTE_LIBRTE_TIMER)-include := rte_timer.h
+
+# this lib needs eal
+DEPDIRS-$(CONFIG_RTE_LIBRTE_TIMER) += lib/librte_eal
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_timer/rte_timer.c b/lib/librte_timer/rte_timer.c
new file mode 100644
index 00000000..3dcdab58
--- /dev/null
+++ b/lib/librte_timer/rte_timer.c
@@ -0,0 +1,637 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <string.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <assert.h>
+#include <sys/queue.h>
+
+#include <rte_atomic.h>
+#include <rte_common.h>
+#include <rte_cycles.h>
+#include <rte_per_lcore.h>
+#include <rte_memory.h>
+#include <rte_memzone.h>
+#include <rte_launch.h>
+#include <rte_eal.h>
+#include <rte_per_lcore.h>
+#include <rte_lcore.h>
+#include <rte_branch_prediction.h>
+#include <rte_spinlock.h>
+#include <rte_random.h>
+
+#include "rte_timer.h"
+
+LIST_HEAD(rte_timer_list, rte_timer);
+
+struct priv_timer {
+	struct rte_timer pending_head;  /**< dummy timer instance to head up list */
+	rte_spinlock_t list_lock;       /**< lock to protect list access */
+
+	/** per-core variable that true if a timer was updated on this
+	 *  core since last reset of the variable */
+	int updated;
+
+	/** track the current depth of the skiplist */
+	unsigned curr_skiplist_depth;
+
+	unsigned prev_lcore;              /**< used for lcore round robin */
+
+#ifdef RTE_LIBRTE_TIMER_DEBUG
+	/** per-lcore statistics */
+	struct rte_timer_debug_stats stats;
+#endif
+} __rte_cache_aligned;
+
+/** per-lcore private info for timers */
+static struct priv_timer priv_timer[RTE_MAX_LCORE];
+
+/* when debug is enabled, store some statistics */
+#ifdef RTE_LIBRTE_TIMER_DEBUG
+#define __TIMER_STAT_ADD(name, n) do {					\
+		unsigned __lcore_id = rte_lcore_id();			\
+		if (__lcore_id < RTE_MAX_LCORE)				\
+			priv_timer[__lcore_id].stats.name += (n);	\
+	} while(0)
+#else
+#define __TIMER_STAT_ADD(name, n) do {} while(0)
+#endif
+
+/* Init the timer library. */
+void
+rte_timer_subsystem_init(void)
+{
+	unsigned lcore_id;
+
+	/* since priv_timer is static, it's zeroed by default, so only init some
+	 * fields.
+	 */
+	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id ++) {
+		rte_spinlock_init(&priv_timer[lcore_id].list_lock);
+		priv_timer[lcore_id].prev_lcore = lcore_id;
+	}
+}
+
+/* Initialize the timer handle tim for use */
+void
+rte_timer_init(struct rte_timer *tim)
+{
+	union rte_timer_status status;
+
+	status.state = RTE_TIMER_STOP;
+	status.owner = RTE_TIMER_NO_OWNER;
+	tim->status.u32 = status.u32;
+}
+
+/*
+ * if timer is pending or stopped (or running on the same core than
+ * us), mark timer as configuring, and on success return the previous
+ * status of the timer
+ */
+static int
+timer_set_config_state(struct rte_timer *tim,
+		       union rte_timer_status *ret_prev_status)
+{
+	union rte_timer_status prev_status, status;
+	int success = 0;
+	unsigned lcore_id;
+
+	lcore_id = rte_lcore_id();
+
+	/* wait that the timer is in correct status before update,
+	 * and mark it as being configured */
+	while (success == 0) {
+		prev_status.u32 = tim->status.u32;
+
+		/* timer is running on another core, exit */
+		if (prev_status.state == RTE_TIMER_RUNNING &&
+		    prev_status.owner != (uint16_t)lcore_id)
+			return -1;
+
+		/* timer is being configured on another core */
+		if (prev_status.state == RTE_TIMER_CONFIG)
+			return -1;
+
+		/* here, we know that timer is stopped or pending,
+		 * mark it atomically as being configured */
+		status.state = RTE_TIMER_CONFIG;
+		status.owner = (int16_t)lcore_id;
+		success = rte_atomic32_cmpset(&tim->status.u32,
+					      prev_status.u32,
+					      status.u32);
+	}
+
+	ret_prev_status->u32 = prev_status.u32;
+	return 0;
+}
+
+/*
+ * if timer is pending, mark timer as running
+ */
+static int
+timer_set_running_state(struct rte_timer *tim)
+{
+	union rte_timer_status prev_status, status;
+	unsigned lcore_id = rte_lcore_id();
+	int success = 0;
+
+	/* wait that the timer is in correct status before update,
+	 * and mark it as running */
+	while (success == 0) {
+		prev_status.u32 = tim->status.u32;
+
+		/* timer is not pending anymore */
+		if (prev_status.state != RTE_TIMER_PENDING)
+			return -1;
+
+		/* here, we know that timer is stopped or pending,
+		 * mark it atomically as beeing configured */
+		status.state = RTE_TIMER_RUNNING;
+		status.owner = (int16_t)lcore_id;
+		success = rte_atomic32_cmpset(&tim->status.u32,
+					      prev_status.u32,
+					      status.u32);
+	}
+
+	return 0;
+}
+
+/*
+ * Return a skiplist level for a new entry.
+ * This probabalistically gives a level with p=1/4 that an entry at level n
+ * will also appear at level n+1.
+ */
+static uint32_t
+timer_get_skiplist_level(unsigned curr_depth)
+{
+#ifdef RTE_LIBRTE_TIMER_DEBUG
+	static uint32_t i, count = 0;
+	static uint32_t levels[MAX_SKIPLIST_DEPTH] = {0};
+#endif
+
+	/* probability value is 1/4, i.e. all at level 0, 1 in 4 is at level 1,
+	 * 1 in 16 at level 2, 1 in 64 at level 3, etc. Calculated using lowest
+	 * bit position of a (pseudo)random number.
+	 */
+	uint32_t rand = rte_rand() & (UINT32_MAX - 1);
+	uint32_t level = rand == 0 ? MAX_SKIPLIST_DEPTH : (rte_bsf32(rand)-1) / 2;
+
+	/* limit the levels used to one above our current level, so we don't,
+	 * for instance, have a level 0 and a level 7 without anything between
+	 */
+	if (level > curr_depth)
+		level = curr_depth;
+	if (level >= MAX_SKIPLIST_DEPTH)
+		level = MAX_SKIPLIST_DEPTH-1;
+#ifdef RTE_LIBRTE_TIMER_DEBUG
+	count ++;
+	levels[level]++;
+	if (count % 10000 == 0)
+		for (i = 0; i < MAX_SKIPLIST_DEPTH; i++)
+			printf("Level %u: %u\n", (unsigned)i, (unsigned)levels[i]);
+#endif
+	return level;
+}
+
+/*
+ * For a given time value, get the entries at each level which
+ * are <= that time value.
+ */
+static void
+timer_get_prev_entries(uint64_t time_val, unsigned tim_lcore,
+		struct rte_timer **prev)
+{
+	unsigned lvl = priv_timer[tim_lcore].curr_skiplist_depth;
+	prev[lvl] = &priv_timer[tim_lcore].pending_head;
+	while(lvl != 0) {
+		lvl--;
+		prev[lvl] = prev[lvl+1];
+		while (prev[lvl]->sl_next[lvl] &&
+				prev[lvl]->sl_next[lvl]->expire <= time_val)
+			prev[lvl] = prev[lvl]->sl_next[lvl];
+	}
+}
+
+/*
+ * Given a timer node in the skiplist, find the previous entries for it at
+ * all skiplist levels.
+ */
+static void
+timer_get_prev_entries_for_node(struct rte_timer *tim, unsigned tim_lcore,
+		struct rte_timer **prev)
+{
+	int i;
+	/* to get a specific entry in the list, look for just lower than the time
+	 * values, and then increment on each level individually if necessary
+	 */
+	timer_get_prev_entries(tim->expire - 1, tim_lcore, prev);
+	for (i = priv_timer[tim_lcore].curr_skiplist_depth - 1; i >= 0; i--) {
+		while (prev[i]->sl_next[i] != NULL &&
+				prev[i]->sl_next[i] != tim &&
+				prev[i]->sl_next[i]->expire <= tim->expire)
+			prev[i] = prev[i]->sl_next[i];
+	}
+}
+
+/*
+ * add in list, lock if needed
+ * timer must be in config state
+ * timer must not be in a list
+ */
+static void
+timer_add(struct rte_timer *tim, unsigned tim_lcore, int local_is_locked)
+{
+	unsigned lcore_id = rte_lcore_id();
+	unsigned lvl;
+	struct rte_timer *prev[MAX_SKIPLIST_DEPTH+1];
+
+	/* if timer needs to be scheduled on another core, we need to
+	 * lock the list; if it is on local core, we need to lock if
+	 * we are not called from rte_timer_manage() */
+	if (tim_lcore != lcore_id || !local_is_locked)
+		rte_spinlock_lock(&priv_timer[tim_lcore].list_lock);
+
+	/* find where exactly this element goes in the list of elements
+	 * for each depth. */
+	timer_get_prev_entries(tim->expire, tim_lcore, prev);
+
+	/* now assign it a new level and add at that level */
+	const unsigned tim_level = timer_get_skiplist_level(
+			priv_timer[tim_lcore].curr_skiplist_depth);
+	if (tim_level == priv_timer[tim_lcore].curr_skiplist_depth)
+		priv_timer[tim_lcore].curr_skiplist_depth++;
+
+	lvl = tim_level;
+	while (lvl > 0) {
+		tim->sl_next[lvl] = prev[lvl]->sl_next[lvl];
+		prev[lvl]->sl_next[lvl] = tim;
+		lvl--;
+	}
+	tim->sl_next[0] = prev[0]->sl_next[0];
+	prev[0]->sl_next[0] = tim;
+
+	/* save the lowest list entry into the expire field of the dummy hdr
+	 * NOTE: this is not atomic on 32-bit*/
+	priv_timer[tim_lcore].pending_head.expire = priv_timer[tim_lcore].\
+			pending_head.sl_next[0]->expire;
+
+	if (tim_lcore != lcore_id || !local_is_locked)
+		rte_spinlock_unlock(&priv_timer[tim_lcore].list_lock);
+}
+
+/*
+ * del from list, lock if needed
+ * timer must be in config state
+ * timer must be in a list
+ */
+static void
+timer_del(struct rte_timer *tim, union rte_timer_status prev_status,
+		int local_is_locked)
+{
+	unsigned lcore_id = rte_lcore_id();
+	unsigned prev_owner = prev_status.owner;
+	int i;
+	struct rte_timer *prev[MAX_SKIPLIST_DEPTH+1];
+
+	/* if timer needs is pending another core, we need to lock the
+	 * list; if it is on local core, we need to lock if we are not
+	 * called from rte_timer_manage() */
+	if (prev_owner != lcore_id || !local_is_locked)
+		rte_spinlock_lock(&priv_timer[prev_owner].list_lock);
+
+	/* save the lowest list entry into the expire field of the dummy hdr.
+	 * NOTE: this is not atomic on 32-bit */
+	if (tim == priv_timer[prev_owner].pending_head.sl_next[0])
+		priv_timer[prev_owner].pending_head.expire =
+				((tim->sl_next[0] == NULL) ? 0 : tim->sl_next[0]->expire);
+
+	/* adjust pointers from previous entries to point past this */
+	timer_get_prev_entries_for_node(tim, prev_owner, prev);
+	for (i = priv_timer[prev_owner].curr_skiplist_depth - 1; i >= 0; i--) {
+		if (prev[i]->sl_next[i] == tim)
+			prev[i]->sl_next[i] = tim->sl_next[i];
+	}
+
+	/* in case we deleted last entry at a level, adjust down max level */
+	for (i = priv_timer[prev_owner].curr_skiplist_depth - 1; i >= 0; i--)
+		if (priv_timer[prev_owner].pending_head.sl_next[i] == NULL)
+			priv_timer[prev_owner].curr_skiplist_depth --;
+		else
+			break;
+
+	if (prev_owner != lcore_id || !local_is_locked)
+		rte_spinlock_unlock(&priv_timer[prev_owner].list_lock);
+}
+
+/* Reset and start the timer associated with the timer handle (private func) */
+static int
+__rte_timer_reset(struct rte_timer *tim, uint64_t expire,
+		  uint64_t period, unsigned tim_lcore,
+		  rte_timer_cb_t fct, void *arg,
+		  int local_is_locked)
+{
+	union rte_timer_status prev_status, status;
+	int ret;
+	unsigned lcore_id = rte_lcore_id();
+
+	/* round robin for tim_lcore */
+	if (tim_lcore == (unsigned)LCORE_ID_ANY) {
+		if (lcore_id < RTE_MAX_LCORE) {
+			/* EAL thread with valid lcore_id */
+			tim_lcore = rte_get_next_lcore(
+				priv_timer[lcore_id].prev_lcore,
+				0, 1);
+			priv_timer[lcore_id].prev_lcore = tim_lcore;
+		} else
+			/* non-EAL thread do not run rte_timer_manage(),
+			 * so schedule the timer on the first enabled lcore. */
+			tim_lcore = rte_get_next_lcore(LCORE_ID_ANY, 0, 1);
+	}
+
+	/* wait that the timer is in correct status before update,
+	 * and mark it as being configured */
+	ret = timer_set_config_state(tim, &prev_status);
+	if (ret < 0)
+		return -1;
+
+	__TIMER_STAT_ADD(reset, 1);
+	if (prev_status.state == RTE_TIMER_RUNNING &&
+	    lcore_id < RTE_MAX_LCORE) {
+		priv_timer[lcore_id].updated = 1;
+	}
+
+	/* remove it from list */
+	if (prev_status.state == RTE_TIMER_PENDING) {
+		timer_del(tim, prev_status, local_is_locked);
+		__TIMER_STAT_ADD(pending, -1);
+	}
+
+	tim->period = period;
+	tim->expire = expire;
+	tim->f = fct;
+	tim->arg = arg;
+
+	__TIMER_STAT_ADD(pending, 1);
+	timer_add(tim, tim_lcore, local_is_locked);
+
+	/* update state: as we are in CONFIG state, only us can modify
+	 * the state so we don't need to use cmpset() here */
+	rte_wmb();
+	status.state = RTE_TIMER_PENDING;
+	status.owner = (int16_t)tim_lcore;
+	tim->status.u32 = status.u32;
+
+	return 0;
+}
+
+/* Reset and start the timer associated with the timer handle tim */
+int
+rte_timer_reset(struct rte_timer *tim, uint64_t ticks,
+		enum rte_timer_type type, unsigned tim_lcore,
+		rte_timer_cb_t fct, void *arg)
+{
+	uint64_t cur_time = rte_get_timer_cycles();
+	uint64_t period;
+
+	if (unlikely((tim_lcore != (unsigned)LCORE_ID_ANY) &&
+			!rte_lcore_is_enabled(tim_lcore)))
+		return -1;
+
+	if (type == PERIODICAL)
+		period = ticks;
+	else
+		period = 0;
+
+	return __rte_timer_reset(tim,  cur_time + ticks, period, tim_lcore,
+			  fct, arg, 0);
+}
+
+/* loop until rte_timer_reset() succeed */
+void
+rte_timer_reset_sync(struct rte_timer *tim, uint64_t ticks,
+		     enum rte_timer_type type, unsigned tim_lcore,
+		     rte_timer_cb_t fct, void *arg)
+{
+	while (rte_timer_reset(tim, ticks, type, tim_lcore,
+			       fct, arg) != 0)
+		rte_pause();
+}
+
+/* Stop the timer associated with the timer handle tim */
+int
+rte_timer_stop(struct rte_timer *tim)
+{
+	union rte_timer_status prev_status, status;
+	unsigned lcore_id = rte_lcore_id();
+	int ret;
+
+	/* wait that the timer is in correct status before update,
+	 * and mark it as being configured */
+	ret = timer_set_config_state(tim, &prev_status);
+	if (ret < 0)
+		return -1;
+
+	__TIMER_STAT_ADD(stop, 1);
+	if (prev_status.state == RTE_TIMER_RUNNING &&
+	    lcore_id < RTE_MAX_LCORE) {
+		priv_timer[lcore_id].updated = 1;
+	}
+
+	/* remove it from list */
+	if (prev_status.state == RTE_TIMER_PENDING) {
+		timer_del(tim, prev_status, 0);
+		__TIMER_STAT_ADD(pending, -1);
+	}
+
+	/* mark timer as stopped */
+	rte_wmb();
+	status.state = RTE_TIMER_STOP;
+	status.owner = RTE_TIMER_NO_OWNER;
+	tim->status.u32 = status.u32;
+
+	return 0;
+}
+
+/* loop until rte_timer_stop() succeed */
+void
+rte_timer_stop_sync(struct rte_timer *tim)
+{
+	while (rte_timer_stop(tim) != 0)
+		rte_pause();
+}
+
+/* Test the PENDING status of the timer handle tim */
+int
+rte_timer_pending(struct rte_timer *tim)
+{
+	return tim->status.state == RTE_TIMER_PENDING;
+}
+
+/* must be called periodically, run all timer that expired */
+void rte_timer_manage(void)
+{
+	union rte_timer_status status;
+	struct rte_timer *tim, *next_tim;
+	struct rte_timer *run_first_tim, **pprev;
+	unsigned lcore_id = rte_lcore_id();
+	struct rte_timer *prev[MAX_SKIPLIST_DEPTH + 1];
+	uint64_t cur_time;
+	int i, ret;
+
+	/* timer manager only runs on EAL thread with valid lcore_id */
+	assert(lcore_id < RTE_MAX_LCORE);
+
+	__TIMER_STAT_ADD(manage, 1);
+	/* optimize for the case where per-cpu list is empty */
+	if (priv_timer[lcore_id].pending_head.sl_next[0] == NULL)
+		return;
+	cur_time = rte_get_timer_cycles();
+
+#ifdef RTE_ARCH_X86_64
+	/* on 64-bit the value cached in the pending_head.expired will be
+	 * updated atomically, so we can consult that for a quick check here
+	 * outside the lock */
+	if (likely(priv_timer[lcore_id].pending_head.expire > cur_time))
+		return;
+#endif
+
+	/* browse ordered list, add expired timers in 'expired' list */
+	rte_spinlock_lock(&priv_timer[lcore_id].list_lock);
+
+	/* if nothing to do just unlock and return */
+	if (priv_timer[lcore_id].pending_head.sl_next[0] == NULL ||
+	    priv_timer[lcore_id].pending_head.sl_next[0]->expire > cur_time) {
+		rte_spinlock_unlock(&priv_timer[lcore_id].list_lock);
+		return;
+	}
+
+	/* save start of list of expired timers */
+	tim = priv_timer[lcore_id].pending_head.sl_next[0];
+
+	/* break the existing list at current time point */
+	timer_get_prev_entries(cur_time, lcore_id, prev);
+	for (i = priv_timer[lcore_id].curr_skiplist_depth -1; i >= 0; i--) {
+		priv_timer[lcore_id].pending_head.sl_next[i] =
+		    prev[i]->sl_next[i];
+		if (prev[i]->sl_next[i] == NULL)
+			priv_timer[lcore_id].curr_skiplist_depth--;
+		prev[i] ->sl_next[i] = NULL;
+	}
+
+	/* transition run-list from PENDING to RUNNING */
+	run_first_tim = tim;
+	pprev = &run_first_tim;
+
+	for ( ; tim != NULL; tim = next_tim) {
+		next_tim = tim->sl_next[0];
+
+		ret = timer_set_running_state(tim);
+		if (likely(ret == 0)) {
+			pprev = &tim->sl_next[0];
+		} else {
+			/* another core is trying to re-config this one,
+			 * remove it from local expired list and put it
+			 * back on the priv_timer[] skip list */
+			*pprev = next_tim;
+			timer_add(tim, lcore_id, 1);
+		}
+	}
+
+	/* update the next to expire timer value */
+	priv_timer[lcore_id].pending_head.expire =
+	    (priv_timer[lcore_id].pending_head.sl_next[0] == NULL) ? 0 :
+		priv_timer[lcore_id].pending_head.sl_next[0]->expire;
+
+	rte_spinlock_unlock(&priv_timer[lcore_id].list_lock);
+
+	/* now scan expired list and call callbacks */
+	for (tim = run_first_tim; tim != NULL; tim = next_tim) {
+		next_tim = tim->sl_next[0];
+		priv_timer[lcore_id].updated = 0;
+
+		/* execute callback function with list unlocked */
+		tim->f(tim, tim->arg);
+
+		__TIMER_STAT_ADD(pending, -1);
+		/* the timer was stopped or reloaded by the callback
+		 * function, we have nothing to do here */
+		if (priv_timer[lcore_id].updated == 1)
+			continue;
+
+		if (tim->period == 0) {
+			/* remove from done list and mark timer as stopped */
+			status.state = RTE_TIMER_STOP;
+			status.owner = RTE_TIMER_NO_OWNER;
+			rte_wmb();
+			tim->status.u32 = status.u32;
+		}
+		else {
+			/* keep it in list and mark timer as pending */
+			rte_spinlock_lock(&priv_timer[lcore_id].list_lock);
+			status.state = RTE_TIMER_PENDING;
+			__TIMER_STAT_ADD(pending, 1);
+			status.owner = (int16_t)lcore_id;
+			rte_wmb();
+			tim->status.u32 = status.u32;
+			__rte_timer_reset(tim, cur_time + tim->period,
+				tim->period, lcore_id, tim->f, tim->arg, 1);
+			rte_spinlock_unlock(&priv_timer[lcore_id].list_lock);
+		}
+	}
+}
+
+/* dump statistics about timers */
+void rte_timer_dump_stats(FILE *f)
+{
+#ifdef RTE_LIBRTE_TIMER_DEBUG
+	struct rte_timer_debug_stats sum;
+	unsigned lcore_id;
+
+	memset(&sum, 0, sizeof(sum));
+	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
+		sum.reset += priv_timer[lcore_id].stats.reset;
+		sum.stop += priv_timer[lcore_id].stats.stop;
+		sum.manage += priv_timer[lcore_id].stats.manage;
+		sum.pending += priv_timer[lcore_id].stats.pending;
+	}
+	fprintf(f, "Timer statistics:\n");
+	fprintf(f, "  reset = %"PRIu64"\n", sum.reset);
+	fprintf(f, "  stop = %"PRIu64"\n", sum.stop);
+	fprintf(f, "  manage = %"PRIu64"\n", sum.manage);
+	fprintf(f, "  pending = %"PRIu64"\n", sum.pending);
+#else
+	fprintf(f, "No timer statistics, RTE_LIBRTE_TIMER_DEBUG is disabled\n");
+#endif
+}
diff --git a/lib/librte_timer/rte_timer.h b/lib/librte_timer/rte_timer.h
new file mode 100644
index 00000000..77547c6b
--- /dev/null
+++ b/lib/librte_timer/rte_timer.h
@@ -0,0 +1,335 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_TIMER_H_
+#define _RTE_TIMER_H_
+
+/**
+ * @file
+ RTE Timer
+ *
+ * This library provides a timer service to RTE Data Plane execution
+ * units that allows the execution of callback functions asynchronously.
+ *
+ * - Timers can be periodic or single (one-shot).
+ * - The timers can be loaded from one core and executed on another. This has
+ *   to be specified in the call to rte_timer_reset().
+ * - High precision is possible. NOTE: this depends on the call frequency to
+ *   rte_timer_manage() that check the timer expiration for the local core.
+ * - If not used in an application, for improved performance, it can be
+ *   disabled at compilation time by not calling the rte_timer_manage()
+ *   to improve performance.
+ *
+ * The timer library uses the rte_get_hpet_cycles() function that
+ * uses the HPET, when available, to provide a reliable time reference. [HPET
+ * routines are provided by EAL, which falls back to using the chip TSC (time-
+ * stamp counter) as fallback when HPET is not available]
+ *
+ * This library provides an interface to add, delete and restart a
+ * timer. The API is based on the BSD callout(9) API with a few
+ * differences.
+ *
+ * See the RTE architecture documentation for more information about the
+ * design of this library.
+ */
+
+#include <stdio.h>
+#include <stdint.h>
+#include <stddef.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define RTE_TIMER_STOP    0 /**< State: timer is stopped. */
+#define RTE_TIMER_PENDING 1 /**< State: timer is scheduled. */
+#define RTE_TIMER_RUNNING 2 /**< State: timer function is running. */
+#define RTE_TIMER_CONFIG  3 /**< State: timer is being configured. */
+
+#define RTE_TIMER_NO_OWNER -2 /**< Timer has no owner. */
+
+/**
+ * Timer type: Periodic or single (one-shot).
+ */
+enum rte_timer_type {
+	SINGLE,
+	PERIODICAL
+};
+
+/**
+ * Timer status: A union of the state (stopped, pending, running,
+ * config) and an owner (the id of the lcore that owns the timer).
+ */
+union rte_timer_status {
+	struct {
+		uint16_t state;  /**< Stop, pending, running, config. */
+		int16_t owner;   /**< The lcore that owns the timer. */
+	};
+	uint32_t u32;            /**< To atomic-set status + owner. */
+};
+
+#ifdef RTE_LIBRTE_TIMER_DEBUG
+/**
+ * A structure that stores the timer statistics (per-lcore).
+ */
+struct rte_timer_debug_stats {
+	uint64_t reset;   /**< Number of success calls to rte_timer_reset(). */
+	uint64_t stop;    /**< Number of success calls to rte_timer_stop(). */
+	uint64_t manage;  /**< Number of calls to rte_timer_manage(). */
+	uint64_t pending; /**< Number of pending/running timers. */
+};
+#endif
+
+struct rte_timer;
+
+/**
+ * Callback function type for timer expiry.
+ */
+typedef void (*rte_timer_cb_t)(struct rte_timer *, void *);
+
+#define MAX_SKIPLIST_DEPTH 10
+
+/**
+ * A structure describing a timer in RTE.
+ */
+struct rte_timer
+{
+	uint64_t expire;       /**< Time when timer expire. */
+	struct rte_timer *sl_next[MAX_SKIPLIST_DEPTH];
+	volatile union rte_timer_status status; /**< Status of timer. */
+	uint64_t period;       /**< Period of timer (0 if not periodic). */
+	rte_timer_cb_t f;      /**< Callback function. */
+	void *arg;             /**< Argument to callback function. */
+};
+
+
+#ifdef __cplusplus
+/**
+ * A C++ static initializer for a timer structure.
+ */
+#define RTE_TIMER_INITIALIZER {             \
+	0,                                      \
+	{NULL},                                 \
+	{{RTE_TIMER_STOP, RTE_TIMER_NO_OWNER}}, \
+	0,                                      \
+	NULL,                                   \
+	NULL,                                   \
+	}
+#else
+/**
+ * A static initializer for a timer structure.
+ */
+#define RTE_TIMER_INITIALIZER {                      \
+		.status = {{                         \
+			.state = RTE_TIMER_STOP,     \
+			.owner = RTE_TIMER_NO_OWNER, \
+		}},                                  \
+	}
+#endif
+
+/**
+ * Initialize the timer library.
+ *
+ * Initializes internal variables (list, locks and so on) for the RTE
+ * timer library.
+ */
+void rte_timer_subsystem_init(void);
+
+/**
+ * Initialize a timer handle.
+ *
+ * The rte_timer_init() function initializes the timer handle *tim*
+ * for use. No operations can be performed on a timer before it is
+ * initialized.
+ *
+ * @param tim
+ *   The timer to initialize.
+ */
+void rte_timer_init(struct rte_timer *tim);
+
+/**
+ * Reset and start the timer associated with the timer handle.
+ *
+ * The rte_timer_reset() function resets and starts the timer
+ * associated with the timer handle *tim*. When the timer expires after
+ * *ticks* HPET cycles, the function specified by *fct* will be called
+ * with the argument *arg* on core *tim_lcore*.
+ *
+ * If the timer associated with the timer handle is already running
+ * (in the RUNNING state), the function will fail. The user has to check
+ * the return value of the function to see if there is a chance that the
+ * timer is in the RUNNING state.
+ *
+ * If the timer is being configured on another core (the CONFIG state),
+ * it will also fail.
+ *
+ * If the timer is pending or stopped, it will be rescheduled with the
+ * new parameters.
+ *
+ * @param tim
+ *   The timer handle.
+ * @param ticks
+ *   The number of cycles (see rte_get_hpet_hz()) before the callback
+ *   function is called.
+ * @param type
+ *   The type can be either:
+ *   - PERIODICAL: The timer is automatically reloaded after execution
+ *     (returns to the PENDING state)
+ *   - SINGLE: The timer is one-shot, that is, the timer goes to a
+ *     STOPPED state after execution.
+ * @param tim_lcore
+ *   The ID of the lcore where the timer callback function has to be
+ *   executed. If tim_lcore is LCORE_ID_ANY, the timer library will
+ *   launch it on a different core for each call (round-robin).
+ * @param fct
+ *   The callback function of the timer.
+ * @param arg
+ *   The user argument of the callback function.
+ * @return
+ *   - 0: Success; the timer is scheduled.
+ *   - (-1): Timer is in the RUNNING or CONFIG state.
+ */
+int rte_timer_reset(struct rte_timer *tim, uint64_t ticks,
+		    enum rte_timer_type type, unsigned tim_lcore,
+		    rte_timer_cb_t fct, void *arg);
+
+
+/**
+ * Loop until rte_timer_reset() succeeds.
+ *
+ * Reset and start the timer associated with the timer handle. Always
+ * succeed. See rte_timer_reset() for details.
+ *
+ * @param tim
+ *   The timer handle.
+ * @param ticks
+ *   The number of cycles (see rte_get_hpet_hz()) before the callback
+ *   function is called.
+ * @param type
+ *   The type can be either:
+ *   - PERIODICAL: The timer is automatically reloaded after execution
+ *     (returns to the PENDING state)
+ *   - SINGLE: The timer is one-shot, that is, the timer goes to a
+ *     STOPPED state after execution.
+ * @param tim_lcore
+ *   The ID of the lcore where the timer callback function has to be
+ *   executed. If tim_lcore is LCORE_ID_ANY, the timer library will
+ *   launch it on a different core for each call (round-robin).
+ * @param fct
+ *   The callback function of the timer.
+ * @param arg
+ *   The user argument of the callback function.
+ */
+void
+rte_timer_reset_sync(struct rte_timer *tim, uint64_t ticks,
+		     enum rte_timer_type type, unsigned tim_lcore,
+		     rte_timer_cb_t fct, void *arg);
+
+/**
+ * Stop a timer.
+ *
+ * The rte_timer_stop() function stops the timer associated with the
+ * timer handle *tim*. It may fail if the timer is currently running or
+ * being configured.
+ *
+ * If the timer is pending or stopped (for instance, already expired),
+ * the function will succeed. The timer handle tim must have been
+ * initialized using rte_timer_init(), otherwise, undefined behavior
+ * will occur.
+ *
+ * This function can be called safely from a timer callback. If it
+ * succeeds, the timer is not referenced anymore by the timer library
+ * and the timer structure can be freed (even in the callback
+ * function).
+ *
+ * @param tim
+ *   The timer handle.
+ * @return
+ *   - 0: Success; the timer is stopped.
+ *   - (-1): The timer is in the RUNNING or CONFIG state.
+ */
+int rte_timer_stop(struct rte_timer *tim);
+
+
+/**
+ * Loop until rte_timer_stop() succeeds.
+ *
+ * After a call to this function, the timer identified by *tim* is
+ * stopped. See rte_timer_stop() for details.
+ *
+ * @param tim
+ *   The timer handle.
+ */
+void rte_timer_stop_sync(struct rte_timer *tim);
+
+/**
+ * Test if a timer is pending.
+ *
+ * The rte_timer_pending() function tests the PENDING status
+ * of the timer handle *tim*. A PENDING timer is one that has been
+ * scheduled and whose function has not yet been called.
+ *
+ * @param tim
+ *   The timer handle.
+ * @return
+ *   - 0: The timer is not pending.
+ *   - 1: The timer is pending.
+ */
+int rte_timer_pending(struct rte_timer *tim);
+
+/**
+ * Manage the timer list and execute callback functions.
+ *
+ * This function must be called periodically from EAL lcores
+ * main_loop(). It browses the list of pending timers and runs all
+ * timers that are expired.
+ *
+ * The precision of the timer depends on the call frequency of this
+ * function. However, the more often the function is called, the more
+ * CPU resources it will use.
+ */
+void rte_timer_manage(void);
+
+/**
+ * Dump statistics about timers.
+ *
+ * @param f
+ *   A pointer to a file for output
+ */
+void rte_timer_dump_stats(FILE *f);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_TIMER_H_ */
diff --git a/lib/librte_timer/rte_timer_version.map b/lib/librte_timer/rte_timer_version.map
new file mode 100644
index 00000000..9b2e4b86
--- /dev/null
+++ b/lib/librte_timer/rte_timer_version.map
@@ -0,0 +1,15 @@
+DPDK_2.0 {
+	global:
+
+	rte_timer_dump_stats;
+	rte_timer_init;
+	rte_timer_manage;
+	rte_timer_pending;
+	rte_timer_reset;
+	rte_timer_reset_sync;
+	rte_timer_stop;
+	rte_timer_stop_sync;
+	rte_timer_subsystem_init;
+
+	local: *;
+};
diff --git a/lib/librte_vhost/Makefile b/lib/librte_vhost/Makefile
new file mode 100644
index 00000000..e33ff53e
--- /dev/null
+++ b/lib/librte_vhost/Makefile
@@ -0,0 +1,71 @@
+#   BSD LICENSE
+#
+#   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+#   All rights reserved.
+#
+#   Redistribution and use in source and binary forms, with or without
+#   modification, are permitted provided that the following conditions
+#   are met:
+#
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in
+#       the documentation and/or other materials provided with the
+#       distribution.
+#     * Neither the name of Intel Corporation nor the names of its
+#       contributors may be used to endorse or promote products derived
+#       from this software without specific prior written permission.
+#
+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# library name
+LIB = librte_vhost.a
+
+EXPORT_MAP := rte_vhost_version.map
+
+LIBABIVER := 2
+
+CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3 -D_FILE_OFFSET_BITS=64
+ifeq ($(CONFIG_RTE_LIBRTE_VHOST_USER),y)
+CFLAGS += -I vhost_user
+LDLIBS += -lpthread
+else
+CFLAGS += -I vhost_cuse
+LDLIBS += -lfuse
+endif
+
+ifeq ($(CONFIG_RTE_LIBRTE_VHOST_NUMA),y)
+LDLIBS += -lnuma
+endif
+
+# all source are stored in SRCS-y
+SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := virtio-net.c vhost_rxtx.c
+ifeq ($(CONFIG_RTE_LIBRTE_VHOST_USER),y)
+SRCS-$(CONFIG_RTE_LIBRTE_VHOST) += vhost_user/vhost-net-user.c vhost_user/virtio-net-user.c vhost_user/fd_man.c
+else
+SRCS-$(CONFIG_RTE_LIBRTE_VHOST) += vhost_cuse/vhost-net-cdev.c vhost_cuse/virtio-net-cdev.c vhost_cuse/eventfd_copy.c
+endif
+
+# install includes
+SYMLINK-$(CONFIG_RTE_LIBRTE_VHOST)-include += rte_virtio_net.h
+
+# dependencies
+DEPDIRS-$(CONFIG_RTE_LIBRTE_VHOST) += lib/librte_eal
+DEPDIRS-$(CONFIG_RTE_LIBRTE_VHOST) += lib/librte_ether
+DEPDIRS-$(CONFIG_RTE_LIBRTE_VHOST) += lib/librte_mbuf
+DEPDIRS-$(CONFIG_RTE_LIBRTE_VHOST) += lib/librte_net
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_vhost/eventfd_link/Makefile b/lib/librte_vhost/eventfd_link/Makefile
new file mode 100644
index 00000000..3140e8bf
--- /dev/null
+++ b/lib/librte_vhost/eventfd_link/Makefile
@@ -0,0 +1,41 @@
+#   BSD LICENSE
+#
+#   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+#   All rights reserved.
+#
+#   Redistribution and use in source and binary forms, with or without
+#   modification, are permitted provided that the following conditions
+#   are met:
+#
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in
+#       the documentation and/or other materials provided with the
+#       distribution.
+#     * Neither the name of Intel Corporation nor the names of its
+#       contributors may be used to endorse or promote products derived
+#       from this software without specific prior written permission.
+#
+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+RTE_KERNELDIR ?= /lib/modules/$(shell uname -r)/build
+
+obj-m += eventfd_link.o
+
+
+all:
+	make -C $(RTE_KERNELDIR) M=$(PWD) modules
+
+clean:
+	make -C $(RTE_KERNELDIR) M=$(PWD) clean
diff --git a/lib/librte_vhost/eventfd_link/eventfd_link.c b/lib/librte_vhost/eventfd_link/eventfd_link.c
new file mode 100644
index 00000000..4b05b5a8
--- /dev/null
+++ b/lib/librte_vhost/eventfd_link/eventfd_link.c
@@ -0,0 +1,277 @@
+/*-
+ * GPL LICENSE SUMMARY
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of version 2 of the GNU General Public License as
+ *   published by the Free Software Foundation.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *   General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program; if not, write to the Free Software
+ *   Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *   The full GNU General Public License is included in this distribution
+ *   in the file called LICENSE.GPL.
+ *
+ *   Contact Information:
+ *   Intel Corporation
+ */
+
+#include <linux/miscdevice.h>
+#include <linux/module.h>
+#include <linux/file.h>
+#include <linux/fdtable.h>
+#include <linux/syscalls.h>
+
+#include "eventfd_link.h"
+
+
+/*
+ * get_files_struct is copied from fs/file.c
+ */
+struct files_struct *
+get_files_struct(struct task_struct *task)
+{
+	struct files_struct *files;
+
+	task_lock(task);
+	files = task->files;
+	if (files)
+		atomic_inc(&files->count);
+	task_unlock(task);
+
+	return files;
+}
+
+/*
+ * put_files_struct is extracted from fs/file.c
+ */
+void
+put_files_struct(struct files_struct *files)
+{
+	if (atomic_dec_and_test(&files->count))
+		BUG();
+}
+
+static struct file *
+fget_from_files(struct files_struct *files, unsigned fd)
+{
+	struct file *file;
+
+	rcu_read_lock();
+	file = fcheck_files(files, fd);
+	if (file) {
+		if (file->f_mode & FMODE_PATH ||
+			!atomic_long_inc_not_zero(&file->f_count)) {
+
+			file = NULL;
+		}
+	}
+	rcu_read_unlock();
+
+	return file;
+}
+
+static long
+eventfd_link_ioctl_copy2(unsigned long arg)
+{
+	void __user *argp = (void __user *) arg;
+	struct task_struct *task_target = NULL;
+	struct file *file;
+	struct files_struct *files;
+	struct eventfd_copy2 eventfd_copy2;
+	long ret = -EFAULT;
+
+	if (copy_from_user(&eventfd_copy2, argp, sizeof(struct eventfd_copy2)))
+		goto out;
+
+	/*
+	 * Find the task struct for the target pid
+	 */
+	ret = -ESRCH;
+
+	task_target =
+		get_pid_task(find_vpid(eventfd_copy2.pid), PIDTYPE_PID);
+	if (task_target == NULL) {
+		pr_info("Unable to find pid %d\n", eventfd_copy2.pid);
+		goto out;
+	}
+
+	ret = -ESTALE;
+	files = get_files_struct(task_target);
+	if (files == NULL) {
+		pr_info("Failed to get target files struct\n");
+		goto out_task;
+	}
+
+	ret = -EBADF;
+	file = fget_from_files(files, eventfd_copy2.fd);
+	put_files_struct(files);
+
+	if (file == NULL) {
+		pr_info("Failed to get fd %d from target\n", eventfd_copy2.fd);
+		goto out_task;
+	}
+
+	/*
+	 * Install the file struct from the target process into the
+	 * newly allocated file desciptor of the source process.
+	 */
+	ret = get_unused_fd_flags(eventfd_copy2.flags);
+	if (ret < 0) {
+		fput(file);
+		goto out_task;
+	}
+	fd_install(ret, file);
+
+out_task:
+	put_task_struct(task_target);
+out:
+	return ret;
+}
+
+static long
+eventfd_link_ioctl_copy(unsigned long arg)
+{
+	void __user *argp = (void __user *) arg;
+	struct task_struct *task_target = NULL;
+	struct file *file;
+	struct files_struct *files;
+	struct fdtable *fdt;
+	struct eventfd_copy eventfd_copy;
+	long ret = -EFAULT;
+
+	if (copy_from_user(&eventfd_copy, argp, sizeof(struct eventfd_copy)))
+		goto out;
+
+	/*
+	 * Find the task struct for the target pid
+	 */
+	ret = -ESRCH;
+
+	task_target =
+		get_pid_task(find_vpid(eventfd_copy.target_pid), PIDTYPE_PID);
+	if (task_target == NULL) {
+		pr_info("Unable to find pid %d\n", eventfd_copy.target_pid);
+		goto out;
+	}
+
+	ret = -ESTALE;
+	files = get_files_struct(current);
+	if (files == NULL) {
+		pr_info("Failed to get current files struct\n");
+		goto out_task;
+	}
+
+	ret = -EBADF;
+	file = fget_from_files(files, eventfd_copy.source_fd);
+
+	if (file == NULL) {
+		pr_info("Failed to get fd %d from source\n",
+			eventfd_copy.source_fd);
+		put_files_struct(files);
+		goto out_task;
+	}
+
+	/*
+	 * Release the existing eventfd in the source process
+	 */
+	spin_lock(&files->file_lock);
+	fput(file);
+	filp_close(file, files);
+	fdt = files_fdtable(files);
+	fdt->fd[eventfd_copy.source_fd] = NULL;
+	spin_unlock(&files->file_lock);
+
+	put_files_struct(files);
+
+	/*
+	 * Find the file struct associated with the target fd.
+	 */
+
+	ret = -ESTALE;
+	files = get_files_struct(task_target);
+	if (files == NULL) {
+		pr_info("Failed to get target files struct\n");
+		goto out_task;
+	}
+
+	ret = -EBADF;
+	file = fget_from_files(files, eventfd_copy.target_fd);
+	put_files_struct(files);
+
+	if (file == NULL) {
+		pr_info("Failed to get fd %d from target\n",
+			eventfd_copy.target_fd);
+		goto out_task;
+	}
+
+	/*
+	 * Install the file struct from the target process into the
+	 * file desciptor of the source process,
+	 */
+
+	fd_install(eventfd_copy.source_fd, file);
+	ret = 0;
+
+out_task:
+	put_task_struct(task_target);
+out:
+	return ret;
+}
+
+static long
+eventfd_link_ioctl(struct file *f, unsigned int ioctl, unsigned long arg)
+{
+	long ret = -ENOIOCTLCMD;
+
+	switch (ioctl) {
+	case EVENTFD_COPY:
+		ret = eventfd_link_ioctl_copy(arg);
+		break;
+	case EVENTFD_COPY2:
+		ret = eventfd_link_ioctl_copy2(arg);
+		break;
+	}
+
+	return ret;
+}
+
+static const struct file_operations eventfd_link_fops = {
+	.owner = THIS_MODULE,
+	.unlocked_ioctl = eventfd_link_ioctl,
+};
+
+
+static struct miscdevice eventfd_link_misc = {
+	.minor = MISC_DYNAMIC_MINOR,
+	.name = "eventfd-link",
+	.fops = &eventfd_link_fops,
+};
+
+static int __init
+eventfd_link_init(void)
+{
+	return misc_register(&eventfd_link_misc);
+}
+
+module_init(eventfd_link_init);
+
+static void __exit
+eventfd_link_exit(void)
+{
+	misc_deregister(&eventfd_link_misc);
+}
+
+module_exit(eventfd_link_exit);
+
+MODULE_VERSION("0.0.1");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Anthony Fee");
+MODULE_DESCRIPTION("Link eventfd");
+MODULE_ALIAS("devname:eventfd-link");
diff --git a/lib/librte_vhost/eventfd_link/eventfd_link.h b/lib/librte_vhost/eventfd_link/eventfd_link.h
new file mode 100644
index 00000000..5ebc20b8
--- /dev/null
+++ b/lib/librte_vhost/eventfd_link/eventfd_link.h
@@ -0,0 +1,94 @@
+/*-
+ *  This file is provided under a dual BSD/GPLv2 license.  When using or
+ *  redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of version 2 of the GNU General Public License as
+ *   published by the Free Software Foundation.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *   General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program; if not, write to the Free Software
+ *   Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *   The full GNU General Public License is included in this distribution
+ *   in the file called LICENSE.GPL.
+ *
+ *   Contact Information:
+ *   Intel Corporation
+ *
+ * BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *   Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ *   Redistributions in binary form must reproduce the above copyright
+ *   notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the
+ *   distribution.
+ *   Neither the name of Intel Corporation nor the names of its
+ *   contributors may be used to endorse or promote products derived
+ *   from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef _EVENTFD_LINK_H_
+#define _EVENTFD_LINK_H_
+
+/*
+ * arguements for the EVENTFD_COPY ioctl
+ */
+struct eventfd_copy {
+	unsigned target_fd; /* fd in the target pid */
+	unsigned source_fd; /* fd in the calling pid */
+	pid_t target_pid; /* pid of the target pid */
+};
+
+/*
+ * ioctl to copy an fd entry in calling process to an fd in a target process
+ * NOTE: this one should be
+ * #define EVENTFD_COPY _IOWR('D', 1, struct eventfd_copy) actually
+ */
+#define EVENTFD_COPY 1
+
+/*
+ * arguments for the EVENTFD_COPY2 ioctl
+ */
+struct eventfd_copy2 {
+	unsigned fd; /* fd to steal */
+	pid_t pid; /* pid of the process to steal from */
+	unsigned flags; /* flags to allocate new fd with */
+};
+
+/*
+ * ioctl to copy an fd entry from the target process into newly allocated
+ * fd in the calling process
+ */
+#define EVENTFD_COPY2 _IOW('D', 2, struct eventfd_copy2)
+
+#endif /* _EVENTFD_LINK_H_ */
diff --git a/lib/librte_vhost/libvirt/qemu-wrap.py b/lib/librte_vhost/libvirt/qemu-wrap.py
new file mode 100755
index 00000000..e6a2cc9d
--- /dev/null
+++ b/lib/librte_vhost/libvirt/qemu-wrap.py
@@ -0,0 +1,387 @@
+#!/usr/bin/python
+#/*
+# *   BSD LICENSE
+# *
+# *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+# *   All rights reserved.
+# *
+# *   Redistribution and use in source and binary forms, with or without
+# *   modification, are permitted provided that the following conditions
+# *   are met:
+# *
+# *     * Redistributions of source code must retain the above copyright
+# *       notice, this list of conditions and the following disclaimer.
+# *     * Redistributions in binary form must reproduce the above copyright
+# *       notice, this list of conditions and the following disclaimer in
+# *       the documentation and/or other materials provided with the
+# *       distribution.
+# *     * Neither the name of Intel Corporation nor the names of its
+# *       contributors may be used to endorse or promote products derived
+# *       from this software without specific prior written permission.
+# *
+# *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# */
+
+#####################################################################
+# This script is designed to modify the call to the QEMU emulator
+# to support userspace vhost when starting a guest machine through
+# libvirt with vhost enabled. The steps to enable this are as follows
+# and should be run as root:
+#
+# 1. Place this script in a libvirtd's binary search PATH ($PATH)
+#    A good location would be in the same directory that the QEMU
+#    binary is located
+#
+# 2. Ensure that the script has the same owner/group and file
+#    permissions as the QEMU binary
+#
+# 3. Update the VM xml file using "virsh edit VM.xml"
+#
+#    3.a) Set the VM to use the launch script
+#
+#	Set the emulator path contained in the
+#		<emulator><emulator/> tags
+#
+#	e.g replace <emulator>/usr/bin/qemu-kvm<emulator/>
+#        with    <emulator>/usr/bin/qemu-wrap.py<emulator/>
+#
+#	 3.b) Set the VM's device's to use vhost-net offload
+#
+#		<interface type="network">
+#	<model type="virtio"/>
+#	<driver name="vhost"/>
+#		<interface/>
+#
+# 4. Enable libvirt to access our userpace device file by adding it to
+#    controllers cgroup for libvirtd using the following steps
+#
+#   4.a) In /etc/libvirt/qemu.conf add/edit the following lines:
+#         1) cgroup_controllers = [ ... "devices", ... ]
+#		  2) clear_emulator_capabilities = 0
+#         3) user = "root"
+#         4) group = "root"
+#         5) cgroup_device_acl = [
+#                "/dev/null", "/dev/full", "/dev/zero",
+#                "/dev/random", "/dev/urandom",
+#                "/dev/ptmx", "/dev/kvm", "/dev/kqemu",
+#                "/dev/rtc", "/dev/hpet", "/dev/net/tun",
+#                "/dev/<devbase-name>",
+#                "/dev/hugepages",
+#            ]
+#
+#   4.b) Disable SELinux or set to permissive mode
+#
+#   4.c) Mount cgroup device controller
+#        "mkdir /dev/cgroup"
+#        "mount -t cgroup none /dev/cgroup -o devices"
+#
+#   4.d) Set hugetlbfs_mount variable - ( Optional )
+#        VMs using userspace vhost must use hugepage backed
+#        memory. This can be enabled in the libvirt XML
+#        config by adding a memory backing section to the
+#        XML config e.g.
+#             <memoryBacking>
+#             <hugepages/>
+#             </memoryBacking>
+#        This memory backing section should be added after the
+#        <memory> and <currentMemory> sections. This will add
+#        flags "-mem-prealloc -mem-path <path>" to the QEMU
+#        command line. The hugetlbfs_mount variable can be used
+#        to override the default <path> passed through by libvirt.
+#
+#        if "-mem-prealloc" or "-mem-path <path>" are not passed
+#        through and a vhost device is detected then these options will
+#        be automatically added by this script. This script will detect
+#        the system hugetlbfs mount point to be used for <path>. The
+#        default <path> for this script can be overidden by the
+#        hugetlbfs_dir variable in the configuration section of this script.
+#
+#
+#   4.e) Restart the libvirtd system process
+#        e.g. on Fedora "systemctl restart libvirtd.service"
+#
+#
+#   4.f) Edit the Configuration Parameters section of this script
+#        to point to the correct emulator location and set any
+#        addition options
+#
+# The script modifies the libvirtd Qemu call by modifying/adding
+# options based on the configuration parameters below.
+# NOTE:
+#     emul_path and us_vhost_path must be set
+#     All other parameters are optional
+#####################################################################
+
+
+#############################################
+# Configuration Parameters
+#############################################
+#Path to QEMU binary
+emul_path = "/usr/local/bin/qemu-system-x86_64"
+
+#Path to userspace vhost device file
+# This filename should match the --dev-basename parameters of
+# the command used to launch the userspace vhost sample application e.g.
+# if the sample app lauch command is:
+#    ./build/vhost-switch ..... --dev-basename usvhost
+# then this variable should be set to:
+#   us_vhost_path = "/dev/usvhost"
+us_vhost_path = "/dev/usvhost"
+
+#List of additional user defined emulation options. These options will
+#be added to all Qemu calls
+emul_opts_user = []
+
+#List of additional user defined emulation options for vhost only.
+#These options will only be added to vhost enabled guests
+emul_opts_user_vhost = []
+
+#For all VHOST enabled VMs, the VM memory is preallocated from hugetlbfs
+# Set this variable to one to enable this option for all VMs
+use_huge_all = 0
+
+#Instead of autodetecting, override the hugetlbfs directory by setting
+#this variable
+hugetlbfs_dir = ""
+
+#############################################
+
+
+#############################################
+# ****** Do Not Modify Below this Line ******
+#############################################
+
+import sys, os, subprocess
+import time
+import signal
+
+
+#List of open userspace vhost file descriptors
+fd_list = []
+
+#additional virtio device flags when using userspace vhost
+vhost_flags = [ "csum=off",
+                "gso=off",
+                "guest_tso4=off",
+                "guest_tso6=off",
+                "guest_ecn=off"
+              ]
+
+#String of the path to the Qemu process pid
+qemu_pid = "/tmp/%d-qemu.pid" % os.getpid()
+
+#############################################
+# Signal haldler to kill Qemu subprocess
+#############################################
+def kill_qemu_process(signum, stack):
+    pidfile = open(qemu_pid, 'r')
+    pid = int(pidfile.read())
+    os.killpg(pid, signal.SIGTERM)
+    pidfile.close()
+
+
+#############################################
+# Find the system hugefile mount point.
+# Note:
+# if multiple hugetlbfs mount points exist
+# then the first one found will be used
+#############################################
+def find_huge_mount():
+
+    if (len(hugetlbfs_dir)):
+        return hugetlbfs_dir
+
+    huge_mount = ""
+
+    if (os.access("/proc/mounts", os.F_OK)):
+        f = open("/proc/mounts", "r")
+        line = f.readline()
+        while line:
+            line_split = line.split(" ")
+            if line_split[2] == 'hugetlbfs':
+                huge_mount = line_split[1]
+                break
+            line = f.readline()
+    else:
+        print "/proc/mounts not found"
+        exit (1)
+
+    f.close
+    if len(huge_mount) == 0:
+        print "Failed to find hugetlbfs mount point"
+        exit (1)
+
+    return huge_mount
+
+
+#############################################
+# Get a userspace Vhost file descriptor
+#############################################
+def get_vhost_fd():
+
+    if (os.access(us_vhost_path, os.F_OK)):
+        fd = os.open( us_vhost_path, os.O_RDWR)
+    else:
+        print ("US-Vhost file %s not found" %us_vhost_path)
+        exit (1)
+
+    return fd
+
+
+#############################################
+# Check for vhostfd. if found then replace
+# with our own vhost fd and append any vhost
+# flags onto the end
+#############################################
+def modify_netdev_arg(arg):
+
+    global fd_list
+    vhost_in_use = 0
+    s = ''
+    new_opts = []
+    netdev_opts = arg.split(",")
+
+    for opt in netdev_opts:
+        #check if vhost is used
+        if "vhost" == opt[:5]:
+            vhost_in_use = 1
+        else:
+            new_opts.append(opt)
+
+    #if using vhost append vhost options
+    if vhost_in_use == 1:
+        #append vhost on option
+        new_opts.append('vhost=on')
+        #append vhostfd ption
+        new_fd = get_vhost_fd()
+        new_opts.append('vhostfd=' + str(new_fd))
+        fd_list.append(new_fd)
+
+    #concatenate all options
+    for opt in new_opts:
+        if len(s) > 0:
+			s+=','
+
+        s+=opt
+
+    return s
+
+
+#############################################
+# Main
+#############################################
+def main():
+
+    global fd_list
+    global vhost_in_use
+    new_args = []
+    num_cmd_args = len(sys.argv)
+    emul_call = ''
+    mem_prealloc_set = 0
+    mem_path_set = 0
+    num = 0;
+
+    #parse the parameters
+    while (num < num_cmd_args):
+        arg = sys.argv[num]
+
+	#Check netdev +1 parameter for vhostfd
+        if arg == '-netdev':
+            num_vhost_devs = len(fd_list)
+            new_args.append(arg)
+
+            num+=1
+            arg = sys.argv[num]
+            mod_arg = modify_netdev_arg(arg)
+            new_args.append(mod_arg)
+
+            #append vhost flags if this is a vhost device
+            # and -device is the next arg
+            # i.e -device -opt1,-opt2,...,-opt3,%vhost
+            if (num_vhost_devs < len(fd_list)):
+                num+=1
+                arg = sys.argv[num]
+                if arg == '-device':
+                    new_args.append(arg)
+                    num+=1
+                    new_arg = sys.argv[num]
+                    for flag in vhost_flags:
+                        new_arg = ''.join([new_arg,',',flag])
+                    new_args.append(new_arg)
+                else:
+                    new_args.append(arg)
+        elif arg == '-mem-prealloc':
+            mem_prealloc_set = 1
+            new_args.append(arg)
+        elif arg == '-mem-path':
+            mem_path_set = 1
+            new_args.append(arg)
+
+        else:
+            new_args.append(arg)
+
+        num+=1
+
+    #Set Qemu binary location
+    emul_call+=emul_path
+    emul_call+=" "
+
+    #Add prealloc mem options if using vhost and not already added
+    if ((len(fd_list) > 0) and (mem_prealloc_set == 0)):
+        emul_call += "-mem-prealloc "
+
+    #Add mempath mem options if using vhost and not already added
+    if ((len(fd_list) > 0) and (mem_path_set == 0)):
+        #Detect and add hugetlbfs mount point
+        mp = find_huge_mount()
+        mp = "".join(["-mem-path ", mp])
+        emul_call += mp
+        emul_call += " "
+
+    #add user options
+    for opt in emul_opts_user:
+        emul_call += opt
+        emul_call += " "
+
+    #Add add user vhost only options
+    if len(fd_list) > 0:
+        for opt in emul_opts_user_vhost:
+            emul_call += opt
+            emul_call += " "
+
+    #Add updated libvirt options
+    iter_args = iter(new_args)
+    #skip 1st arg i.e. call to this script
+    next(iter_args)
+    for arg in iter_args:
+        emul_call+=str(arg)
+        emul_call+= " "
+
+    emul_call += "-pidfile %s " % qemu_pid
+    #Call QEMU
+    process = subprocess.Popen(emul_call, shell=True, preexec_fn=os.setsid)
+
+    for sig in [signal.SIGTERM, signal.SIGINT, signal.SIGHUP, signal.SIGQUIT]:
+        signal.signal(sig, kill_qemu_process)
+
+    process.wait()
+
+    #Close usvhost files
+    for fd in fd_list:
+        os.close(fd)
+    #Cleanup temporary files
+    if os.access(qemu_pid, os.F_OK):
+        os.remove(qemu_pid)
+
+if __name__ == "__main__":
+    main()
diff --git a/lib/librte_vhost/rte_vhost_version.map b/lib/librte_vhost/rte_vhost_version.map
new file mode 100644
index 00000000..3d8709e5
--- /dev/null
+++ b/lib/librte_vhost/rte_vhost_version.map
@@ -0,0 +1,22 @@
+DPDK_2.0 {
+	global:
+
+	rte_vhost_dequeue_burst;
+	rte_vhost_driver_callback_register;
+	rte_vhost_driver_register;
+	rte_vhost_driver_session_start;
+	rte_vhost_enable_guest_notification;
+	rte_vhost_enqueue_burst;
+	rte_vhost_feature_disable;
+	rte_vhost_feature_enable;
+	rte_vhost_feature_get;
+
+	local: *;
+};
+
+DPDK_2.1 {
+	global:
+
+	rte_vhost_driver_unregister;
+
+} DPDK_2.0;
diff --git a/lib/librte_vhost/rte_virtio_net.h b/lib/librte_vhost/rte_virtio_net.h
new file mode 100644
index 00000000..600b20b4
--- /dev/null
+++ b/lib/librte_vhost/rte_virtio_net.h
@@ -0,0 +1,286 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _VIRTIO_NET_H_
+#define _VIRTIO_NET_H_
+
+/**
+ * @file
+ * Interface to vhost net
+ */
+
+#include <stdint.h>
+#include <linux/vhost.h>
+#include <linux/virtio_ring.h>
+#include <linux/virtio_net.h>
+#include <sys/eventfd.h>
+#include <sys/socket.h>
+#include <linux/if.h>
+
+#include <rte_memory.h>
+#include <rte_mempool.h>
+#include <rte_ether.h>
+
+struct rte_mbuf;
+
+#define VHOST_MEMORY_MAX_NREGIONS 8
+
+/* Used to indicate that the device is running on a data core */
+#define VIRTIO_DEV_RUNNING 1
+
+/* Backend value set by guest. */
+#define VIRTIO_DEV_STOPPED -1
+
+
+/* Enum for virtqueue management. */
+enum {VIRTIO_RXQ, VIRTIO_TXQ, VIRTIO_QNUM};
+
+#define BUF_VECTOR_MAX 256
+
+/**
+ * Structure contains buffer address, length and descriptor index
+ * from vring to do scatter RX.
+ */
+struct buf_vector {
+	uint64_t buf_addr;
+	uint32_t buf_len;
+	uint32_t desc_idx;
+};
+
+/**
+ * Structure contains variables relevant to RX/TX virtqueues.
+ */
+struct vhost_virtqueue {
+	struct vring_desc	*desc;			/**< Virtqueue descriptor ring. */
+	struct vring_avail	*avail;			/**< Virtqueue available ring. */
+	struct vring_used	*used;			/**< Virtqueue used ring. */
+	uint32_t		size;			/**< Size of descriptor ring. */
+	uint32_t		backend;		/**< Backend value to determine if device should started/stopped. */
+	uint16_t		vhost_hlen;		/**< Vhost header length (varies depending on RX merge buffers. */
+	volatile uint16_t	last_used_idx;		/**< Last index used on the available ring */
+	volatile uint16_t	last_used_idx_res;	/**< Used for multiple devices reserving buffers. */
+#define VIRTIO_INVALID_EVENTFD		(-1)
+#define VIRTIO_UNINITIALIZED_EVENTFD	(-2)
+	int			callfd;			/**< Used to notify the guest (trigger interrupt). */
+	int			kickfd;			/**< Currently unused as polling mode is enabled. */
+	int			enabled;
+	uint64_t		log_guest_addr;		/**< Physical address of used ring, for logging */
+	uint64_t		reserved[15];		/**< Reserve some spaces for future extension. */
+	struct buf_vector	buf_vec[BUF_VECTOR_MAX];	/**< for scatter RX. */
+} __rte_cache_aligned;
+
+/* Old kernels have no such macro defined */
+#ifndef VIRTIO_NET_F_GUEST_ANNOUNCE
+ #define VIRTIO_NET_F_GUEST_ANNOUNCE 21
+#endif
+
+
+/*
+ * Make an extra wrapper for VIRTIO_NET_F_MQ and
+ * VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX as they are
+ * introduced since kernel v3.8. This makes our
+ * code buildable for older kernel.
+ */
+#ifdef VIRTIO_NET_F_MQ
+ #define VHOST_MAX_QUEUE_PAIRS	VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX
+ #define VHOST_SUPPORTS_MQ	(1ULL << VIRTIO_NET_F_MQ)
+#else
+ #define VHOST_MAX_QUEUE_PAIRS	1
+ #define VHOST_SUPPORTS_MQ	0
+#endif
+
+/*
+ * Define virtio 1.0 for older kernels
+ */
+#ifndef VIRTIO_F_VERSION_1
+ #define VIRTIO_F_VERSION_1 32
+#endif
+
+/**
+ * Device structure contains all configuration information relating to the device.
+ */
+struct virtio_net {
+	struct virtio_memory	*mem;		/**< QEMU memory and memory region information. */
+	uint64_t		features;	/**< Negotiated feature set. */
+	uint64_t		protocol_features;	/**< Negotiated protocol feature set. */
+	uint64_t		device_fh;	/**< device identifier. */
+	uint32_t		flags;		/**< Device flags. Only used to check if device is running on data core. */
+#define IF_NAME_SZ (PATH_MAX > IFNAMSIZ ? PATH_MAX : IFNAMSIZ)
+	char			ifname[IF_NAME_SZ];	/**< Name of the tap device or socket path. */
+	uint32_t		virt_qp_nb;	/**< number of queue pair we have allocated */
+	void			*priv;		/**< private context */
+	uint64_t		log_size;	/**< Size of log area */
+	uint64_t		log_base;	/**< Where dirty pages are logged */
+	struct ether_addr	mac;		/**< MAC address */
+	rte_atomic16_t		broadcast_rarp;	/**< A flag to tell if we need broadcast rarp packet */
+	uint64_t		reserved[61];	/**< Reserve some spaces for future extension. */
+	struct vhost_virtqueue	*virtqueue[VHOST_MAX_QUEUE_PAIRS * 2];	/**< Contains all virtqueue information. */
+} __rte_cache_aligned;
+
+/**
+ * Information relating to memory regions including offsets to addresses in QEMUs memory file.
+ */
+struct virtio_memory_regions {
+	uint64_t	guest_phys_address;	/**< Base guest physical address of region. */
+	uint64_t	guest_phys_address_end;	/**< End guest physical address of region. */
+	uint64_t	memory_size;		/**< Size of region. */
+	uint64_t	userspace_address;	/**< Base userspace address of region. */
+	uint64_t	address_offset;		/**< Offset of region for address translation. */
+};
+
+
+/**
+ * Memory structure includes region and mapping information.
+ */
+struct virtio_memory {
+	uint64_t	base_address;	/**< Base QEMU userspace address of the memory file. */
+	uint64_t	mapped_address;	/**< Mapped address of memory file base in our applications memory space. */
+	uint64_t	mapped_size;	/**< Total size of memory file. */
+	uint32_t	nregions;	/**< Number of memory regions. */
+	struct virtio_memory_regions      regions[0]; /**< Memory region information. */
+};
+
+/**
+ * Device and vring operations.
+ *
+ * Make sure to set VIRTIO_DEV_RUNNING to the device flags in new_device and
+ * remove it in destroy_device.
+ *
+ */
+struct virtio_net_device_ops {
+	int (*new_device)(struct virtio_net *);	/**< Add device. */
+	void (*destroy_device)(volatile struct virtio_net *);	/**< Remove device. */
+
+	int (*vring_state_changed)(struct virtio_net *dev, uint16_t queue_id, int enable);	/**< triggered when a vring is enabled or disabled */
+};
+
+static inline uint16_t __attribute__((always_inline))
+rte_vring_available_entries(struct virtio_net *dev, uint16_t queue_id)
+{
+	struct vhost_virtqueue *vq = dev->virtqueue[queue_id];
+
+	if (!vq->enabled)
+		return 0;
+
+	return *(volatile uint16_t *)&vq->avail->idx - vq->last_used_idx_res;
+}
+
+/**
+ * Function to convert guest physical addresses to vhost virtual addresses.
+ * This is used to convert guest virtio buffer addresses.
+ */
+static inline uint64_t __attribute__((always_inline))
+gpa_to_vva(struct virtio_net *dev, uint64_t guest_pa)
+{
+	struct virtio_memory_regions *region;
+	uint32_t regionidx;
+	uint64_t vhost_va = 0;
+
+	for (regionidx = 0; regionidx < dev->mem->nregions; regionidx++) {
+		region = &dev->mem->regions[regionidx];
+		if ((guest_pa >= region->guest_phys_address) &&
+			(guest_pa <= region->guest_phys_address_end)) {
+			vhost_va = region->address_offset + guest_pa;
+			break;
+		}
+	}
+	return vhost_va;
+}
+
+
+/**
+ *  Disable features in feature_mask. Returns 0 on success.
+ */
+int rte_vhost_feature_disable(uint64_t feature_mask);
+
+/**
+ *  Enable features in feature_mask. Returns 0 on success.
+ */
+int rte_vhost_feature_enable(uint64_t feature_mask);
+
+/* Returns currently supported vhost features */
+uint64_t rte_vhost_feature_get(void);
+
+int rte_vhost_enable_guest_notification(struct virtio_net *dev, uint16_t queue_id, int enable);
+
+/* Register vhost driver. dev_name could be different for multiple instance support. */
+int rte_vhost_driver_register(const char *dev_name);
+
+/* Unregister vhost driver. This is only meaningful to vhost user. */
+int rte_vhost_driver_unregister(const char *dev_name);
+
+/* Register callbacks. */
+int rte_vhost_driver_callback_register(struct virtio_net_device_ops const * const);
+/* Start vhost driver session blocking loop. */
+int rte_vhost_driver_session_start(void);
+
+/**
+ * This function adds buffers to the virtio devices RX virtqueue. Buffers can
+ * be received from the physical port or from another virtual device. A packet
+ * count is returned to indicate the number of packets that were succesfully
+ * added to the RX queue.
+ * @param dev
+ *  virtio-net device
+ * @param queue_id
+ *  virtio queue index in mq case
+ * @param pkts
+ *  array to contain packets to be enqueued
+ * @param count
+ *  packets num to be enqueued
+ * @return
+ *  num of packets enqueued
+ */
+uint16_t rte_vhost_enqueue_burst(struct virtio_net *dev, uint16_t queue_id,
+	struct rte_mbuf **pkts, uint16_t count);
+
+/**
+ * This function gets guest buffers from the virtio device TX virtqueue,
+ * construct host mbufs, copies guest buffer content to host mbufs and
+ * store them in pkts to be processed.
+ * @param dev
+ *  virtio-net device
+ * @param queue_id
+ *  virtio queue index in mq case
+ * @param mbuf_pool
+ *  mbuf_pool where host mbuf is allocated.
+ * @param pkts
+ *  array to contain packets to be dequeued
+ * @param count
+ *  packets num to be dequeued
+ * @return
+ *  num of packets dequeued
+ */
+uint16_t rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id,
+	struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count);
+
+#endif /* _VIRTIO_NET_H_ */
diff --git a/lib/librte_vhost/vhost-net.h b/lib/librte_vhost/vhost-net.h
new file mode 100644
index 00000000..f193a1f6
--- /dev/null
+++ b/lib/librte_vhost/vhost-net.h
@@ -0,0 +1,114 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _VHOST_NET_CDEV_H_
+#define _VHOST_NET_CDEV_H_
+#include <stdint.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <linux/vhost.h>
+
+#include <rte_log.h>
+
+#include "rte_virtio_net.h"
+
+/* Macros for printing using RTE_LOG */
+#define RTE_LOGTYPE_VHOST_CONFIG RTE_LOGTYPE_USER1
+#define RTE_LOGTYPE_VHOST_DATA   RTE_LOGTYPE_USER1
+
+#ifdef RTE_LIBRTE_VHOST_DEBUG
+#define VHOST_MAX_PRINT_BUFF 6072
+#define LOG_LEVEL RTE_LOG_DEBUG
+#define LOG_DEBUG(log_type, fmt, args...) RTE_LOG(DEBUG, log_type, fmt, ##args)
+#define PRINT_PACKET(device, addr, size, header) do { \
+	char *pkt_addr = (char *)(addr); \
+	unsigned int index; \
+	char packet[VHOST_MAX_PRINT_BUFF]; \
+	\
+	if ((header)) \
+		snprintf(packet, VHOST_MAX_PRINT_BUFF, "(%" PRIu64 ") Header size %d: ", (device->device_fh), (size)); \
+	else \
+		snprintf(packet, VHOST_MAX_PRINT_BUFF, "(%" PRIu64 ") Packet size %d: ", (device->device_fh), (size)); \
+	for (index = 0; index < (size); index++) { \
+		snprintf(packet + strnlen(packet, VHOST_MAX_PRINT_BUFF), VHOST_MAX_PRINT_BUFF - strnlen(packet, VHOST_MAX_PRINT_BUFF), \
+			"%02hhx ", pkt_addr[index]); \
+	} \
+	snprintf(packet + strnlen(packet, VHOST_MAX_PRINT_BUFF), VHOST_MAX_PRINT_BUFF - strnlen(packet, VHOST_MAX_PRINT_BUFF), "\n"); \
+	\
+	LOG_DEBUG(VHOST_DATA, "%s", packet); \
+} while (0)
+#else
+#define LOG_LEVEL RTE_LOG_INFO
+#define LOG_DEBUG(log_type, fmt, args...) do {} while (0)
+#define PRINT_PACKET(device, addr, size, header) do {} while (0)
+#endif
+
+
+/*
+ * Structure used to identify device context.
+ */
+struct vhost_device_ctx {
+	pid_t		pid;	/* PID of process calling the IOCTL. */
+	uint64_t	fh;	/* Populated with fi->fh to track the device index. */
+};
+
+int vhost_new_device(struct vhost_device_ctx);
+void vhost_destroy_device(struct vhost_device_ctx);
+
+void vhost_set_ifname(struct vhost_device_ctx,
+	const char *if_name, unsigned int if_len);
+
+int vhost_get_features(struct vhost_device_ctx, uint64_t *);
+int vhost_set_features(struct vhost_device_ctx, uint64_t *);
+
+int vhost_set_vring_num(struct vhost_device_ctx, struct vhost_vring_state *);
+int vhost_set_vring_addr(struct vhost_device_ctx, struct vhost_vring_addr *);
+int vhost_set_vring_base(struct vhost_device_ctx, struct vhost_vring_state *);
+int vhost_get_vring_base(struct vhost_device_ctx,
+	uint32_t, struct vhost_vring_state *);
+
+int vhost_set_vring_kick(struct vhost_device_ctx, struct vhost_vring_file *);
+int vhost_set_vring_call(struct vhost_device_ctx, struct vhost_vring_file *);
+
+int vhost_set_backend(struct vhost_device_ctx, struct vhost_vring_file *);
+
+int vhost_set_owner(struct vhost_device_ctx);
+int vhost_reset_owner(struct vhost_device_ctx);
+
+/*
+ * Backend-specific cleanup. Defined by vhost-cuse and vhost-user.
+ */
+void vhost_backend_cleanup(struct virtio_net *dev);
+
+#endif /* _VHOST_NET_CDEV_H_ */
diff --git a/lib/librte_vhost/vhost_cuse/eventfd_copy.c b/lib/librte_vhost/vhost_cuse/eventfd_copy.c
new file mode 100644
index 00000000..154b32a4
--- /dev/null
+++ b/lib/librte_vhost/vhost_cuse/eventfd_copy.c
@@ -0,0 +1,104 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <unistd.h>
+#include <sys/eventfd.h>
+#include <sys/ioctl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+#include <rte_log.h>
+
+#include "eventfd_link/eventfd_link.h"
+#include "eventfd_copy.h"
+#include "vhost-net.h"
+
+static const char eventfd_cdev[] = "/dev/eventfd-link";
+
+static int eventfd_link = -1;
+
+int
+eventfd_init(void)
+{
+	if (eventfd_link >= 0)
+		return 0;
+
+	eventfd_link = open(eventfd_cdev, O_RDWR);
+	if (eventfd_link < 0) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"eventfd_link module is not loaded\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+int
+eventfd_free(void)
+{
+	if (eventfd_link >= 0)
+		close(eventfd_link);
+	return 0;
+}
+
+/*
+ * This function uses the eventfd_link kernel module to copy an eventfd file
+ * descriptor provided by QEMU in to our process space.
+ */
+int
+eventfd_copy(int target_fd, int target_pid)
+{
+	int ret;
+	struct eventfd_copy2 eventfd_copy2;
+
+
+	/* Open the character device to the kernel module. */
+	/* TODO: check this earlier rather than fail until VM boots! */
+	if (eventfd_init() < 0)
+		return -1;
+
+	eventfd_copy2.fd = target_fd;
+	eventfd_copy2.pid = target_pid;
+	eventfd_copy2.flags = O_NONBLOCK | O_CLOEXEC;
+	/* Call the IOCTL to copy the eventfd. */
+	ret = ioctl(eventfd_link, EVENTFD_COPY2, &eventfd_copy2);
+
+	if (ret < 0) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"EVENTFD_COPY2 ioctl failed\n");
+		return -1;
+	}
+
+	return ret;
+}
diff --git a/lib/librte_vhost/vhost_cuse/eventfd_copy.h b/lib/librte_vhost/vhost_cuse/eventfd_copy.h
new file mode 100644
index 00000000..5f446ca0
--- /dev/null
+++ b/lib/librte_vhost/vhost_cuse/eventfd_copy.h
@@ -0,0 +1,45 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifndef _EVENTFD_H
+#define _EVENTFD_H
+
+int
+eventfd_init(void);
+
+int
+eventfd_free(void);
+
+int
+eventfd_copy(int target_fd, int target_pid);
+
+#endif
diff --git a/lib/librte_vhost/vhost_cuse/vhost-net-cdev.c b/lib/librte_vhost/vhost_cuse/vhost-net-cdev.c
new file mode 100644
index 00000000..c613e68e
--- /dev/null
+++ b/lib/librte_vhost/vhost_cuse/vhost-net-cdev.c
@@ -0,0 +1,426 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <errno.h>
+#include <fuse/cuse_lowlevel.h>
+#include <linux/limits.h>
+#include <linux/vhost.h>
+#include <stdint.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <rte_ethdev.h>
+#include <rte_log.h>
+#include <rte_string_fns.h>
+#include <rte_virtio_net.h>
+
+#include "virtio-net-cdev.h"
+#include "vhost-net.h"
+#include "eventfd_copy.h"
+
+#define FUSE_OPT_DUMMY "\0\0"
+#define FUSE_OPT_FORE  "-f\0\0"
+#define FUSE_OPT_NOMULTI "-s\0\0"
+
+static const uint32_t default_major = 231;
+static const uint32_t default_minor = 1;
+static const char cuse_device_name[] = "/dev/cuse";
+static const char default_cdev[] = "vhost-net";
+
+static struct fuse_session *session;
+
+/*
+ * Returns vhost_device_ctx from given fuse_req_t. The index is populated later
+ * when the device is added to the device linked list.
+ */
+static struct vhost_device_ctx
+fuse_req_to_vhost_ctx(fuse_req_t req, struct fuse_file_info *fi)
+{
+	struct vhost_device_ctx ctx;
+	struct fuse_ctx const *const req_ctx = fuse_req_ctx(req);
+
+	ctx.pid = req_ctx->pid;
+	ctx.fh = fi->fh;
+
+	return ctx;
+}
+
+/*
+ * When the device is created in QEMU it gets initialised here and
+ * added to the device linked list.
+ */
+static void
+vhost_net_open(fuse_req_t req, struct fuse_file_info *fi)
+{
+	struct vhost_device_ctx ctx = fuse_req_to_vhost_ctx(req, fi);
+	int err = 0;
+
+	err = vhost_new_device(ctx);
+	if (err == -1) {
+		fuse_reply_err(req, EPERM);
+		return;
+	}
+
+	fi->fh = err;
+
+	RTE_LOG(INFO, VHOST_CONFIG,
+		"(%"PRIu64") Device configuration started\n", fi->fh);
+	fuse_reply_open(req, fi);
+}
+
+/*
+ * When QEMU is shutdown or killed the device gets released.
+ */
+static void
+vhost_net_release(fuse_req_t req, struct fuse_file_info *fi)
+{
+	int err = 0;
+	struct vhost_device_ctx ctx = fuse_req_to_vhost_ctx(req, fi);
+
+	vhost_destroy_device(ctx);
+	RTE_LOG(INFO, VHOST_CONFIG, "(%"PRIu64") Device released\n", ctx.fh);
+	fuse_reply_err(req, err);
+}
+
+/*
+ * Boilerplate code for CUSE IOCTL
+ * Implicit arguments: ctx, req, result.
+ */
+#define VHOST_IOCTL(func) do {	\
+	result = (func)(ctx);	\
+	fuse_reply_ioctl(req, result, NULL, 0);	\
+} while (0)
+
+/*
+ * Boilerplate IOCTL RETRY
+ * Implicit arguments: req.
+ */
+#define VHOST_IOCTL_RETRY(size_r, size_w) do {	\
+	struct iovec iov_r = { arg, (size_r) };	\
+	struct iovec iov_w = { arg, (size_w) };	\
+	fuse_reply_ioctl_retry(req, &iov_r,	\
+		(size_r) ? 1 : 0, &iov_w, (size_w) ? 1 : 0);\
+} while (0)
+
+/*
+ * Boilerplate code for CUSE Read IOCTL
+ * Implicit arguments: ctx, req, result, in_bufsz, in_buf.
+ */
+#define VHOST_IOCTL_R(type, var, func) do {	\
+	if (!in_bufsz) {	\
+		VHOST_IOCTL_RETRY(sizeof(type), 0);\
+	} else {	\
+		(var) = *(const type*)in_buf;	\
+		result = func(ctx, &(var));	\
+		fuse_reply_ioctl(req, result, NULL, 0);\
+	}	\
+} while (0)
+
+/*
+ * Boilerplate code for CUSE Write IOCTL
+ * Implicit arguments: ctx, req, result, out_bufsz.
+ */
+#define VHOST_IOCTL_W(type, var, func) do {	\
+	if (!out_bufsz) {	\
+		VHOST_IOCTL_RETRY(0, sizeof(type));\
+	} else {	\
+		result = (func)(ctx, &(var));\
+		fuse_reply_ioctl(req, result, &(var), sizeof(type));\
+	} \
+} while (0)
+
+/*
+ * Boilerplate code for CUSE Read/Write IOCTL
+ * Implicit arguments: ctx, req, result, in_bufsz, in_buf.
+ */
+#define VHOST_IOCTL_RW(type1, var1, type2, var2, func) do {	\
+	if (!in_bufsz) {	\
+		VHOST_IOCTL_RETRY(sizeof(type1), sizeof(type2));\
+	} else {	\
+		(var1) = *(const type1*) (in_buf);	\
+		result = (func)(ctx, (var1), &(var2));	\
+		fuse_reply_ioctl(req, result, &(var2), sizeof(type2));\
+	}	\
+} while (0)
+
+/*
+ * The IOCTLs are handled using CUSE/FUSE in userspace. Depending on the type
+ * of IOCTL a buffer is requested to read or to write. This request is handled
+ * by FUSE and the buffer is then given to CUSE.
+ */
+static void
+vhost_net_ioctl(fuse_req_t req, int cmd, void *arg,
+		struct fuse_file_info *fi, __rte_unused unsigned flags,
+		const void *in_buf, size_t in_bufsz, size_t out_bufsz)
+{
+	struct vhost_device_ctx ctx = fuse_req_to_vhost_ctx(req, fi);
+	struct vhost_vring_file file;
+	struct vhost_vring_state state;
+	struct vhost_vring_addr addr;
+	uint64_t features;
+	uint32_t index;
+	int result = 0;
+
+	switch (cmd) {
+	case VHOST_NET_SET_BACKEND:
+		LOG_DEBUG(VHOST_CONFIG,
+			"(%"PRIu64") IOCTL: VHOST_NET_SET_BACKEND\n", ctx.fh);
+		if (!in_buf) {
+			VHOST_IOCTL_RETRY(sizeof(file), 0);
+			break;
+		}
+		file = *(const struct vhost_vring_file *)in_buf;
+		result = cuse_set_backend(ctx, &file);
+		fuse_reply_ioctl(req, result, NULL, 0);
+		break;
+
+	case VHOST_GET_FEATURES:
+		LOG_DEBUG(VHOST_CONFIG,
+			"(%"PRIu64") IOCTL: VHOST_GET_FEATURES\n", ctx.fh);
+		VHOST_IOCTL_W(uint64_t, features, vhost_get_features);
+		break;
+
+	case VHOST_SET_FEATURES:
+		LOG_DEBUG(VHOST_CONFIG,
+			"(%"PRIu64") IOCTL: VHOST_SET_FEATURES\n", ctx.fh);
+		VHOST_IOCTL_R(uint64_t, features, vhost_set_features);
+		break;
+
+	case VHOST_RESET_OWNER:
+		LOG_DEBUG(VHOST_CONFIG,
+			"(%"PRIu64") IOCTL: VHOST_RESET_OWNER\n", ctx.fh);
+		VHOST_IOCTL(vhost_reset_owner);
+		break;
+
+	case VHOST_SET_OWNER:
+		LOG_DEBUG(VHOST_CONFIG,
+			"(%"PRIu64") IOCTL: VHOST_SET_OWNER\n", ctx.fh);
+		VHOST_IOCTL(vhost_set_owner);
+		break;
+
+	case VHOST_SET_MEM_TABLE:
+		/*TODO fix race condition.*/
+		LOG_DEBUG(VHOST_CONFIG,
+			"(%"PRIu64") IOCTL: VHOST_SET_MEM_TABLE\n", ctx.fh);
+		static struct vhost_memory mem_temp;
+
+		switch (in_bufsz) {
+		case 0:
+			VHOST_IOCTL_RETRY(sizeof(struct vhost_memory), 0);
+			break;
+
+		case sizeof(struct vhost_memory):
+			mem_temp = *(const struct vhost_memory *) in_buf;
+
+			if (mem_temp.nregions > 0) {
+				VHOST_IOCTL_RETRY(sizeof(struct vhost_memory) +
+					(sizeof(struct vhost_memory_region) *
+						mem_temp.nregions), 0);
+			} else {
+				result = -1;
+				fuse_reply_ioctl(req, result, NULL, 0);
+			}
+			break;
+
+		default:
+			result = cuse_set_mem_table(ctx, in_buf,
+				mem_temp.nregions);
+			if (result)
+				fuse_reply_err(req, EINVAL);
+			else
+				fuse_reply_ioctl(req, result, NULL, 0);
+		}
+		break;
+
+	case VHOST_SET_VRING_NUM:
+		LOG_DEBUG(VHOST_CONFIG,
+			"(%"PRIu64") IOCTL: VHOST_SET_VRING_NUM\n", ctx.fh);
+		VHOST_IOCTL_R(struct vhost_vring_state, state,
+			vhost_set_vring_num);
+		break;
+
+	case VHOST_SET_VRING_BASE:
+		LOG_DEBUG(VHOST_CONFIG,
+			"(%"PRIu64") IOCTL: VHOST_SET_VRING_BASE\n", ctx.fh);
+		VHOST_IOCTL_R(struct vhost_vring_state, state,
+			vhost_set_vring_base);
+		break;
+
+	case VHOST_GET_VRING_BASE:
+		LOG_DEBUG(VHOST_CONFIG,
+			"(%"PRIu64") IOCTL: VHOST_GET_VRING_BASE\n", ctx.fh);
+		VHOST_IOCTL_RW(uint32_t, index,
+			struct vhost_vring_state, state, vhost_get_vring_base);
+		break;
+
+	case VHOST_SET_VRING_ADDR:
+		LOG_DEBUG(VHOST_CONFIG,
+			"(%"PRIu64") IOCTL: VHOST_SET_VRING_ADDR\n", ctx.fh);
+		VHOST_IOCTL_R(struct vhost_vring_addr, addr,
+			vhost_set_vring_addr);
+		break;
+
+	case VHOST_SET_VRING_KICK:
+	case VHOST_SET_VRING_CALL:
+		if (cmd == VHOST_SET_VRING_KICK)
+			LOG_DEBUG(VHOST_CONFIG,
+				"(%"PRIu64") IOCTL: VHOST_SET_VRING_KICK\n",
+			ctx.fh);
+		else
+			LOG_DEBUG(VHOST_CONFIG,
+				"(%"PRIu64") IOCTL: VHOST_SET_VRING_CALL\n",
+			ctx.fh);
+		if (!in_buf)
+			VHOST_IOCTL_RETRY(sizeof(struct vhost_vring_file), 0);
+		else {
+			int fd;
+			file = *(const struct vhost_vring_file *)in_buf;
+			LOG_DEBUG(VHOST_CONFIG,
+				"idx:%d fd:%d\n", file.index, file.fd);
+			fd = eventfd_copy(file.fd, ctx.pid);
+			if (fd < 0) {
+				fuse_reply_ioctl(req, -1, NULL, 0);
+				result = -1;
+				break;
+			}
+			file.fd = fd;
+			if (cmd == VHOST_SET_VRING_KICK) {
+				result = vhost_set_vring_kick(ctx, &file);
+				fuse_reply_ioctl(req, result, NULL, 0);
+			} else {
+				result = vhost_set_vring_call(ctx, &file);
+				fuse_reply_ioctl(req, result, NULL, 0);
+			}
+		}
+		break;
+
+	default:
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"(%"PRIu64") IOCTL: DOESN NOT EXIST\n", ctx.fh);
+		result = -1;
+		fuse_reply_ioctl(req, result, NULL, 0);
+	}
+
+	if (result < 0)
+		LOG_DEBUG(VHOST_CONFIG,
+			"(%"PRIu64") IOCTL: FAIL\n", ctx.fh);
+	else
+		LOG_DEBUG(VHOST_CONFIG,
+			"(%"PRIu64") IOCTL: SUCCESS\n", ctx.fh);
+}
+
+/*
+ * Structure handling open, release and ioctl function pointers is populated.
+ */
+static const struct cuse_lowlevel_ops vhost_net_ops = {
+	.open		= vhost_net_open,
+	.release	= vhost_net_release,
+	.ioctl		= vhost_net_ioctl,
+};
+
+/*
+ * cuse_info is populated and used to register the cuse device.
+ * vhost_net_device_ops are also passed when the device is registered in app.
+ */
+int
+rte_vhost_driver_register(const char *dev_name)
+{
+	struct cuse_info cuse_info;
+	char device_name[PATH_MAX] = "";
+	char char_device_name[PATH_MAX] = "";
+	const char *device_argv[] = { device_name };
+
+	char fuse_opt_dummy[] = FUSE_OPT_DUMMY;
+	char fuse_opt_fore[] = FUSE_OPT_FORE;
+	char fuse_opt_nomulti[] = FUSE_OPT_NOMULTI;
+	char *fuse_argv[] = {fuse_opt_dummy, fuse_opt_fore, fuse_opt_nomulti};
+
+	if (access(cuse_device_name, R_OK | W_OK) < 0) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"char device %s can't be accessed, maybe not exist\n",
+			cuse_device_name);
+		return -1;
+	}
+
+	if (eventfd_init() < 0)
+		return -1;
+
+	/*
+	 * The device name is created. This is passed to QEMU so that it can
+	 * register the device with our application.
+	 */
+	snprintf(device_name, PATH_MAX, "DEVNAME=%s", dev_name);
+	snprintf(char_device_name, PATH_MAX, "/dev/%s", dev_name);
+
+	/* Check if device already exists. */
+	if (access(char_device_name, F_OK) != -1) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"char device %s already exists\n", char_device_name);
+		return -1;
+	}
+
+	memset(&cuse_info, 0, sizeof(cuse_info));
+	cuse_info.dev_major = default_major;
+	cuse_info.dev_minor = default_minor;
+	cuse_info.dev_info_argc = 1;
+	cuse_info.dev_info_argv = device_argv;
+	cuse_info.flags = CUSE_UNRESTRICTED_IOCTL;
+
+	session = cuse_lowlevel_setup(3, fuse_argv,
+			&cuse_info, &vhost_net_ops, 0, NULL);
+	if (session == NULL)
+		return -1;
+
+	return 0;
+}
+
+/**
+ * An empty function for unregister
+ */
+int
+rte_vhost_driver_unregister(const char *dev_name __rte_unused)
+{
+	return 0;
+}
+
+/**
+ * The CUSE session is launched allowing the application to receive open,
+ * release and ioctl calls.
+ */
+int
+rte_vhost_driver_session_start(void)
+{
+	fuse_session_loop(session);
+
+	return 0;
+}
diff --git a/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c b/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c
new file mode 100644
index 00000000..a68a8bd4
--- /dev/null
+++ b/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c
@@ -0,0 +1,435 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdint.h>
+#include <dirent.h>
+#include <linux/vhost.h>
+#include <linux/virtio_net.h>
+#include <fuse/cuse_lowlevel.h>
+#include <stddef.h>
+#include <string.h>
+#include <stdlib.h>
+#include <sys/eventfd.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <linux/if_tun.h>
+#include <linux/if.h>
+#include <errno.h>
+
+#include <rte_log.h>
+
+#include "rte_virtio_net.h"
+#include "vhost-net.h"
+#include "virtio-net-cdev.h"
+#include "virtio-net.h"
+#include "eventfd_copy.h"
+
+/* Line size for reading maps file. */
+static const uint32_t BUFSIZE = PATH_MAX;
+
+/* Size of prot char array in procmap. */
+#define PROT_SZ 5
+
+/* Number of elements in procmap struct. */
+#define PROCMAP_SZ 8
+
+/* Structure containing information gathered from maps file. */
+struct procmap {
+	uint64_t va_start;	/* Start virtual address in file. */
+	uint64_t len;		/* Size of file. */
+	uint64_t pgoff;		/* Not used. */
+	uint32_t maj;		/* Not used. */
+	uint32_t min;		/* Not used. */
+	uint32_t ino;		/* Not used. */
+	char prot[PROT_SZ];	/* Not used. */
+	char fname[PATH_MAX];	/* File name. */
+};
+
+/*
+ * Locate the file containing QEMU's memory space and
+ * map it to our address space.
+ */
+static int
+host_memory_map(pid_t pid, uint64_t addr,
+	uint64_t *mapped_address, uint64_t *mapped_size)
+{
+	struct dirent *dptr = NULL;
+	struct procmap procmap;
+	DIR *dp = NULL;
+	int fd;
+	int i;
+	char memfile[PATH_MAX];
+	char mapfile[PATH_MAX];
+	char procdir[PATH_MAX];
+	char resolved_path[PATH_MAX];
+	char *path = NULL;
+	FILE *fmap;
+	void *map;
+	uint8_t found = 0;
+	char line[BUFSIZE];
+	char dlm[] = "-   :   ";
+	char *str, *sp, *in[PROCMAP_SZ];
+	char *end = NULL;
+
+	/* Path where mem files are located. */
+	snprintf(procdir, PATH_MAX, "/proc/%u/fd/", pid);
+	/* Maps file used to locate mem file. */
+	snprintf(mapfile, PATH_MAX, "/proc/%u/maps", pid);
+
+	fmap = fopen(mapfile, "r");
+	if (fmap == NULL) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"Failed to open maps file for pid %d\n",
+			pid);
+		return -1;
+	}
+
+	/* Read through maps file until we find out base_address. */
+	while (fgets(line, BUFSIZE, fmap) != 0) {
+		str = line;
+		errno = 0;
+		/* Split line into fields. */
+		for (i = 0; i < PROCMAP_SZ; i++) {
+			in[i] = strtok_r(str, &dlm[i], &sp);
+			if ((in[i] == NULL) || (errno != 0)) {
+				fclose(fmap);
+				return -1;
+			}
+			str = NULL;
+		}
+
+		/* Convert/Copy each field as needed. */
+		procmap.va_start = strtoull(in[0], &end, 16);
+		if ((in[0] == '\0') || (end == NULL) || (*end != '\0') ||
+			(errno != 0)) {
+			fclose(fmap);
+			return -1;
+		}
+
+		procmap.len = strtoull(in[1], &end, 16);
+		if ((in[1] == '\0') || (end == NULL) || (*end != '\0') ||
+			(errno != 0)) {
+			fclose(fmap);
+			return -1;
+		}
+
+		procmap.pgoff = strtoull(in[3], &end, 16);
+		if ((in[3] == '\0') || (end == NULL) || (*end != '\0') ||
+			(errno != 0)) {
+			fclose(fmap);
+			return -1;
+		}
+
+		procmap.maj = strtoul(in[4], &end, 16);
+		if ((in[4] == '\0') || (end == NULL) || (*end != '\0') ||
+			(errno != 0)) {
+			fclose(fmap);
+			return -1;
+		}
+
+		procmap.min = strtoul(in[5], &end, 16);
+		if ((in[5] == '\0') || (end == NULL) || (*end != '\0') ||
+			(errno != 0)) {
+			fclose(fmap);
+			return -1;
+		}
+
+		procmap.ino = strtoul(in[6], &end, 16);
+		if ((in[6] == '\0') || (end == NULL) || (*end != '\0') ||
+			(errno != 0)) {
+			fclose(fmap);
+			return -1;
+		}
+
+		memcpy(&procmap.prot, in[2], PROT_SZ);
+		memcpy(&procmap.fname, in[7], PATH_MAX);
+
+		if (procmap.va_start == addr) {
+			procmap.len = procmap.len - procmap.va_start;
+			found = 1;
+			break;
+		}
+	}
+	fclose(fmap);
+
+	if (!found) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"Failed to find memory file in pid %d maps file\n",
+			pid);
+		return -1;
+	}
+
+	/* Find the guest memory file among the process fds. */
+	dp = opendir(procdir);
+	if (dp == NULL) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"Cannot open pid %d process directory\n",
+			pid);
+		return -1;
+	}
+
+	found = 0;
+
+	/* Read the fd directory contents. */
+	while (NULL != (dptr = readdir(dp))) {
+		snprintf(memfile, PATH_MAX, "/proc/%u/fd/%s",
+				pid, dptr->d_name);
+		path = realpath(memfile, resolved_path);
+		if ((path == NULL) && (strlen(resolved_path) == 0)) {
+			RTE_LOG(ERR, VHOST_CONFIG,
+				"Failed to resolve fd directory\n");
+			closedir(dp);
+			return -1;
+		}
+		if (strncmp(resolved_path, procmap.fname,
+			strnlen(procmap.fname, PATH_MAX)) == 0) {
+			found = 1;
+			break;
+		}
+	}
+
+	closedir(dp);
+
+	if (found == 0) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"Failed to find memory file for pid %d\n",
+			pid);
+		return -1;
+	}
+	/* Open the shared memory file and map the memory into this process. */
+	fd = open(memfile, O_RDWR);
+
+	if (fd == -1) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"Failed to open %s for pid %d\n",
+			memfile, pid);
+		return -1;
+	}
+
+	map = mmap(0, (size_t)procmap.len, PROT_READ|PROT_WRITE,
+			MAP_POPULATE|MAP_SHARED, fd, 0);
+	close(fd);
+
+	if (map == MAP_FAILED) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"Error mapping the file %s for pid %d\n",
+			memfile, pid);
+		return -1;
+	}
+
+	/* Store the memory address and size in the device data structure */
+	*mapped_address = (uint64_t)(uintptr_t)map;
+	*mapped_size = procmap.len;
+
+	LOG_DEBUG(VHOST_CONFIG,
+		"Mem File: %s->%s - Size: %llu - VA: %p\n",
+		memfile, resolved_path,
+		(unsigned long long)*mapped_size, map);
+
+	return 0;
+}
+
+int
+cuse_set_mem_table(struct vhost_device_ctx ctx,
+	const struct vhost_memory *mem_regions_addr, uint32_t nregions)
+{
+	uint64_t size = offsetof(struct vhost_memory, regions);
+	uint32_t idx, valid_regions;
+	struct virtio_memory_regions *pregion;
+	struct vhost_memory_region *mem_regions = (void *)(uintptr_t)
+		((uint64_t)(uintptr_t)mem_regions_addr + size);
+	uint64_t base_address = 0, mapped_address, mapped_size;
+	struct virtio_net *dev;
+
+	dev = get_device(ctx);
+	if (dev == NULL)
+		return -1;
+
+	if (dev->mem && dev->mem->mapped_address) {
+		munmap((void *)(uintptr_t)dev->mem->mapped_address,
+			(size_t)dev->mem->mapped_size);
+		free(dev->mem);
+		dev->mem = NULL;
+	}
+
+	dev->mem = calloc(1, sizeof(struct virtio_memory) +
+		sizeof(struct virtio_memory_regions) * nregions);
+	if (dev->mem == NULL) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"(%"PRIu64") Failed to allocate memory for dev->mem\n",
+			dev->device_fh);
+		return -1;
+	}
+
+	pregion = &dev->mem->regions[0];
+
+	for (idx = 0; idx < nregions; idx++) {
+		pregion[idx].guest_phys_address =
+			mem_regions[idx].guest_phys_addr;
+		pregion[idx].guest_phys_address_end =
+			pregion[idx].guest_phys_address +
+			mem_regions[idx].memory_size;
+		pregion[idx].memory_size =
+			mem_regions[idx].memory_size;
+		pregion[idx].userspace_address =
+			mem_regions[idx].userspace_addr;
+
+		LOG_DEBUG(VHOST_CONFIG,
+			"REGION: %u - GPA: %p - QVA: %p - SIZE (%"PRIu64")\n",
+			idx,
+			(void *)(uintptr_t)pregion[idx].guest_phys_address,
+			(void *)(uintptr_t)pregion[idx].userspace_address,
+			pregion[idx].memory_size);
+
+		/*set the base address mapping*/
+		if (pregion[idx].guest_phys_address == 0x0) {
+			base_address =
+				pregion[idx].userspace_address;
+			/* Map VM memory file */
+			if (host_memory_map(ctx.pid, base_address,
+				&mapped_address, &mapped_size) != 0) {
+				free(dev->mem);
+				dev->mem = NULL;
+				return -1;
+			}
+			dev->mem->mapped_address = mapped_address;
+			dev->mem->base_address = base_address;
+			dev->mem->mapped_size = mapped_size;
+		}
+	}
+
+	/* Check that we have a valid base address. */
+	if (base_address == 0) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"Failed to find base address of qemu memory file.\n");
+		free(dev->mem);
+		dev->mem = NULL;
+		return -1;
+	}
+
+	valid_regions = nregions;
+	for (idx = 0; idx < nregions; idx++) {
+		if ((pregion[idx].userspace_address < base_address) ||
+			(pregion[idx].userspace_address >
+			(base_address + mapped_size)))
+			valid_regions--;
+	}
+
+
+	if (valid_regions != nregions) {
+		valid_regions = 0;
+		for (idx = nregions; 0 != idx--; ) {
+			if ((pregion[idx].userspace_address < base_address) ||
+			(pregion[idx].userspace_address >
+			(base_address + mapped_size))) {
+				memmove(&pregion[idx], &pregion[idx + 1],
+					sizeof(struct virtio_memory_regions) *
+					valid_regions);
+			} else
+				valid_regions++;
+		}
+	}
+
+	for (idx = 0; idx < valid_regions; idx++) {
+		pregion[idx].address_offset =
+			mapped_address - base_address +
+			pregion[idx].userspace_address -
+			pregion[idx].guest_phys_address;
+	}
+	dev->mem->nregions = valid_regions;
+
+	return 0;
+}
+
+/*
+ * Function to get the tap device name from the provided file descriptor and
+ * save it in the device structure.
+ */
+static int
+get_ifname(struct vhost_device_ctx ctx, struct virtio_net *dev, int tap_fd, int pid)
+{
+	int fd_tap;
+	struct ifreq ifr;
+	uint32_t ifr_size;
+	int ret;
+
+	fd_tap = eventfd_copy(tap_fd, pid);
+	if (fd_tap < 0)
+		return -1;
+
+	ret = ioctl(fd_tap, TUNGETIFF, &ifr);
+
+	if (close(fd_tap) < 0)
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"(%"PRIu64") fd close failed\n",
+			dev->device_fh);
+
+	if (ret >= 0) {
+		ifr_size = strnlen(ifr.ifr_name, sizeof(ifr.ifr_name));
+		vhost_set_ifname(ctx, ifr.ifr_name, ifr_size);
+	} else
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"(%"PRIu64") TUNGETIFF ioctl failed\n",
+			dev->device_fh);
+
+	return 0;
+}
+
+int cuse_set_backend(struct vhost_device_ctx ctx, struct vhost_vring_file *file)
+{
+	struct virtio_net *dev;
+
+	dev = get_device(ctx);
+	if (dev == NULL)
+		return -1;
+
+	if (!(dev->flags & VIRTIO_DEV_RUNNING) && file->fd != VIRTIO_DEV_STOPPED)
+		get_ifname(ctx, dev, file->fd, ctx.pid);
+
+	return vhost_set_backend(ctx, file);
+}
+
+void
+vhost_backend_cleanup(struct virtio_net *dev)
+{
+	/* Unmap QEMU memory file if mapped. */
+	if (dev->mem) {
+		munmap((void *)(uintptr_t)dev->mem->mapped_address,
+			(size_t)dev->mem->mapped_size);
+		free(dev->mem);
+		dev->mem = NULL;
+	}
+}
diff --git a/lib/librte_vhost/vhost_cuse/virtio-net-cdev.h b/lib/librte_vhost/vhost_cuse/virtio-net-cdev.h
new file mode 100644
index 00000000..eb6b0bab
--- /dev/null
+++ b/lib/librte_vhost/vhost_cuse/virtio-net-cdev.h
@@ -0,0 +1,48 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifndef _VIRTIO_NET_CDEV_H
+#define _VIRTIO_NET_CDEV_H
+
+#include <stdint.h>
+#include <linux/vhost.h>
+
+#include "vhost-net.h"
+
+int
+cuse_set_mem_table(struct vhost_device_ctx ctx,
+	const struct vhost_memory *mem_regions_addr, uint32_t nregions);
+
+int
+cuse_set_backend(struct vhost_device_ctx ctx, struct vhost_vring_file *);
+
+#endif
diff --git a/lib/librte_vhost/vhost_rxtx.c b/lib/librte_vhost/vhost_rxtx.c
new file mode 100644
index 00000000..750821a4
--- /dev/null
+++ b/lib/librte_vhost/vhost_rxtx.c
@@ -0,0 +1,947 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdint.h>
+#include <stdbool.h>
+#include <linux/virtio_net.h>
+
+#include <rte_mbuf.h>
+#include <rte_memcpy.h>
+#include <rte_ether.h>
+#include <rte_ip.h>
+#include <rte_virtio_net.h>
+#include <rte_tcp.h>
+#include <rte_udp.h>
+#include <rte_sctp.h>
+#include <rte_arp.h>
+
+#include "vhost-net.h"
+
+#define MAX_PKT_BURST 32
+#define VHOST_LOG_PAGE	4096
+
+static inline void __attribute__((always_inline))
+vhost_log_page(uint8_t *log_base, uint64_t page)
+{
+	log_base[page / 8] |= 1 << (page % 8);
+}
+
+static inline void __attribute__((always_inline))
+vhost_log_write(struct virtio_net *dev, uint64_t addr, uint64_t len)
+{
+	uint64_t page;
+
+	if (likely(((dev->features & (1ULL << VHOST_F_LOG_ALL)) == 0) ||
+		   !dev->log_base || !len))
+		return;
+
+	if (unlikely(dev->log_size <= ((addr + len - 1) / VHOST_LOG_PAGE / 8)))
+		return;
+
+	/* To make sure guest memory updates are committed before logging */
+	rte_smp_wmb();
+
+	page = addr / VHOST_LOG_PAGE;
+	while (page * VHOST_LOG_PAGE < addr + len) {
+		vhost_log_page((uint8_t *)(uintptr_t)dev->log_base, page);
+		page += 1;
+	}
+}
+
+static inline void __attribute__((always_inline))
+vhost_log_used_vring(struct virtio_net *dev, struct vhost_virtqueue *vq,
+		     uint64_t offset, uint64_t len)
+{
+	vhost_log_write(dev, vq->log_guest_addr + offset, len);
+}
+
+static bool
+is_valid_virt_queue_idx(uint32_t idx, int is_tx, uint32_t qp_nb)
+{
+	return (is_tx ^ (idx & 1)) == 0 && idx < qp_nb * VIRTIO_QNUM;
+}
+
+static void
+virtio_enqueue_offload(struct rte_mbuf *m_buf, struct virtio_net_hdr *net_hdr)
+{
+	if (m_buf->ol_flags & PKT_TX_L4_MASK) {
+		net_hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
+		net_hdr->csum_start = m_buf->l2_len + m_buf->l3_len;
+
+		switch (m_buf->ol_flags & PKT_TX_L4_MASK) {
+		case PKT_TX_TCP_CKSUM:
+			net_hdr->csum_offset = (offsetof(struct tcp_hdr,
+						cksum));
+			break;
+		case PKT_TX_UDP_CKSUM:
+			net_hdr->csum_offset = (offsetof(struct udp_hdr,
+						dgram_cksum));
+			break;
+		case PKT_TX_SCTP_CKSUM:
+			net_hdr->csum_offset = (offsetof(struct sctp_hdr,
+						cksum));
+			break;
+		}
+	}
+
+	if (m_buf->ol_flags & PKT_TX_TCP_SEG) {
+		if (m_buf->ol_flags & PKT_TX_IPV4)
+			net_hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
+		else
+			net_hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
+		net_hdr->gso_size = m_buf->tso_segsz;
+		net_hdr->hdr_len = m_buf->l2_len + m_buf->l3_len
+					+ m_buf->l4_len;
+	}
+}
+
+static inline void
+copy_virtio_net_hdr(struct vhost_virtqueue *vq, uint64_t desc_addr,
+		    struct virtio_net_hdr_mrg_rxbuf hdr)
+{
+	if (vq->vhost_hlen == sizeof(struct virtio_net_hdr_mrg_rxbuf))
+		*(struct virtio_net_hdr_mrg_rxbuf *)(uintptr_t)desc_addr = hdr;
+	else
+		*(struct virtio_net_hdr *)(uintptr_t)desc_addr = hdr.hdr;
+}
+
+static inline int __attribute__((always_inline))
+copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
+		  struct rte_mbuf *m, uint16_t desc_idx, uint32_t *copied)
+{
+	uint32_t desc_avail, desc_offset;
+	uint32_t mbuf_avail, mbuf_offset;
+	uint32_t cpy_len;
+	struct vring_desc *desc;
+	uint64_t desc_addr;
+	struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {{0, 0, 0, 0, 0, 0}, 0};
+
+	desc = &vq->desc[desc_idx];
+	if (unlikely(desc->len < vq->vhost_hlen))
+		return -1;
+
+	desc_addr = gpa_to_vva(dev, desc->addr);
+	rte_prefetch0((void *)(uintptr_t)desc_addr);
+
+	virtio_enqueue_offload(m, &virtio_hdr.hdr);
+	copy_virtio_net_hdr(vq, desc_addr, virtio_hdr);
+	vhost_log_write(dev, desc->addr, vq->vhost_hlen);
+	PRINT_PACKET(dev, (uintptr_t)desc_addr, vq->vhost_hlen, 0);
+
+	desc_offset = vq->vhost_hlen;
+	desc_avail  = desc->len - vq->vhost_hlen;
+
+	*copied = rte_pktmbuf_pkt_len(m);
+	mbuf_avail  = rte_pktmbuf_data_len(m);
+	mbuf_offset = 0;
+	while (mbuf_avail != 0 || m->next != NULL) {
+		/* done with current mbuf, fetch next */
+		if (mbuf_avail == 0) {
+			m = m->next;
+
+			mbuf_offset = 0;
+			mbuf_avail  = rte_pktmbuf_data_len(m);
+		}
+
+		/* done with current desc buf, fetch next */
+		if (desc_avail == 0) {
+			if ((desc->flags & VRING_DESC_F_NEXT) == 0) {
+				/* Room in vring buffer is not enough */
+				return -1;
+			}
+			if (unlikely(desc->next >= vq->size))
+				return -1;
+
+			desc = &vq->desc[desc->next];
+			desc_addr   = gpa_to_vva(dev, desc->addr);
+			desc_offset = 0;
+			desc_avail  = desc->len;
+		}
+
+		cpy_len = RTE_MIN(desc_avail, mbuf_avail);
+		rte_memcpy((void *)((uintptr_t)(desc_addr + desc_offset)),
+			rte_pktmbuf_mtod_offset(m, void *, mbuf_offset),
+			cpy_len);
+		vhost_log_write(dev, desc->addr + desc_offset, cpy_len);
+		PRINT_PACKET(dev, (uintptr_t)(desc_addr + desc_offset),
+			     cpy_len, 0);
+
+		mbuf_avail  -= cpy_len;
+		mbuf_offset += cpy_len;
+		desc_avail  -= cpy_len;
+		desc_offset += cpy_len;
+	}
+
+	return 0;
+}
+
+/*
+ * As many data cores may want to access available buffers
+ * they need to be reserved.
+ */
+static inline uint32_t
+reserve_avail_buf(struct vhost_virtqueue *vq, uint32_t count,
+		  uint16_t *start, uint16_t *end)
+{
+	uint16_t res_start_idx;
+	uint16_t res_end_idx;
+	uint16_t avail_idx;
+	uint16_t free_entries;
+	int success;
+
+	count = RTE_MIN(count, (uint32_t)MAX_PKT_BURST);
+
+again:
+	res_start_idx = vq->last_used_idx_res;
+	avail_idx = *((volatile uint16_t *)&vq->avail->idx);
+
+	free_entries = avail_idx - res_start_idx;
+	count = RTE_MIN(count, free_entries);
+	if (count == 0)
+		return 0;
+
+	res_end_idx = res_start_idx + count;
+
+	/*
+	 * update vq->last_used_idx_res atomically; try again if failed.
+	 *
+	 * TODO: Allow to disable cmpset if no concurrency in application.
+	 */
+	success = rte_atomic16_cmpset(&vq->last_used_idx_res,
+				      res_start_idx, res_end_idx);
+	if (unlikely(!success))
+		goto again;
+
+	*start = res_start_idx;
+	*end   = res_end_idx;
+
+	return count;
+}
+
+/**
+ * This function adds buffers to the virtio devices RX virtqueue. Buffers can
+ * be received from the physical port or from another virtio device. A packet
+ * count is returned to indicate the number of packets that are succesfully
+ * added to the RX queue. This function works when the mbuf is scattered, but
+ * it doesn't support the mergeable feature.
+ */
+static inline uint32_t __attribute__((always_inline))
+virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
+	      struct rte_mbuf **pkts, uint32_t count)
+{
+	struct vhost_virtqueue *vq;
+	uint16_t res_start_idx, res_end_idx;
+	uint16_t desc_indexes[MAX_PKT_BURST];
+	uint32_t i;
+
+	LOG_DEBUG(VHOST_DATA, "(%"PRIu64") virtio_dev_rx()\n", dev->device_fh);
+	if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->virt_qp_nb))) {
+		RTE_LOG(ERR, VHOST_DATA,
+			"%s (%"PRIu64"): virtqueue idx:%d invalid.\n",
+			__func__, dev->device_fh, queue_id);
+		return 0;
+	}
+
+	vq = dev->virtqueue[queue_id];
+	if (unlikely(vq->enabled == 0))
+		return 0;
+
+	count = reserve_avail_buf(vq, count, &res_start_idx, &res_end_idx);
+	if (count == 0)
+		return 0;
+
+	LOG_DEBUG(VHOST_DATA,
+		"(%"PRIu64") res_start_idx %d| res_end_idx Index %d\n",
+		dev->device_fh, res_start_idx, res_end_idx);
+
+	/* Retrieve all of the desc indexes first to avoid caching issues. */
+	rte_prefetch0(&vq->avail->ring[res_start_idx & (vq->size - 1)]);
+	for (i = 0; i < count; i++) {
+		desc_indexes[i] = vq->avail->ring[(res_start_idx + i) &
+						  (vq->size - 1)];
+	}
+
+	rte_prefetch0(&vq->desc[desc_indexes[0]]);
+	for (i = 0; i < count; i++) {
+		uint16_t desc_idx = desc_indexes[i];
+		uint16_t used_idx = (res_start_idx + i) & (vq->size - 1);
+		uint32_t copied;
+		int err;
+
+		err = copy_mbuf_to_desc(dev, vq, pkts[i], desc_idx, &copied);
+
+		vq->used->ring[used_idx].id = desc_idx;
+		if (unlikely(err))
+			vq->used->ring[used_idx].len = vq->vhost_hlen;
+		else
+			vq->used->ring[used_idx].len = copied + vq->vhost_hlen;
+		vhost_log_used_vring(dev, vq,
+			offsetof(struct vring_used, ring[used_idx]),
+			sizeof(vq->used->ring[used_idx]));
+
+		if (i + 1 < count)
+			rte_prefetch0(&vq->desc[desc_indexes[i+1]]);
+	}
+
+	rte_smp_wmb();
+
+	/* Wait until it's our turn to add our buffer to the used ring. */
+	while (unlikely(vq->last_used_idx != res_start_idx))
+		rte_pause();
+
+	*(volatile uint16_t *)&vq->used->idx += count;
+	vq->last_used_idx = res_end_idx;
+	vhost_log_used_vring(dev, vq,
+		offsetof(struct vring_used, idx),
+		sizeof(vq->used->idx));
+
+	/* flush used->idx update before we read avail->flags. */
+	rte_mb();
+
+	/* Kick the guest if necessary. */
+	if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT)
+			&& (vq->callfd >= 0))
+		eventfd_write(vq->callfd, (eventfd_t)1);
+	return count;
+}
+
+static inline int
+fill_vec_buf(struct vhost_virtqueue *vq, uint32_t avail_idx,
+	     uint32_t *allocated, uint32_t *vec_idx)
+{
+	uint16_t idx = vq->avail->ring[avail_idx & (vq->size - 1)];
+	uint32_t vec_id = *vec_idx;
+	uint32_t len    = *allocated;
+
+	while (1) {
+		if (unlikely(vec_id >= BUF_VECTOR_MAX || idx >= vq->size))
+			return -1;
+
+		len += vq->desc[idx].len;
+		vq->buf_vec[vec_id].buf_addr = vq->desc[idx].addr;
+		vq->buf_vec[vec_id].buf_len  = vq->desc[idx].len;
+		vq->buf_vec[vec_id].desc_idx = idx;
+		vec_id++;
+
+		if ((vq->desc[idx].flags & VRING_DESC_F_NEXT) == 0)
+			break;
+
+		idx = vq->desc[idx].next;
+	}
+
+	*allocated = len;
+	*vec_idx   = vec_id;
+
+	return 0;
+}
+
+/*
+ * As many data cores may want to access available buffers concurrently,
+ * they need to be reserved.
+ *
+ * Returns -1 on fail, 0 on success
+ */
+static inline int
+reserve_avail_buf_mergeable(struct vhost_virtqueue *vq, uint32_t size,
+			    uint16_t *start, uint16_t *end)
+{
+	uint16_t res_start_idx;
+	uint16_t res_cur_idx;
+	uint16_t avail_idx;
+	uint32_t allocated;
+	uint32_t vec_idx;
+	uint16_t tries;
+
+again:
+	res_start_idx = vq->last_used_idx_res;
+	res_cur_idx  = res_start_idx;
+
+	allocated = 0;
+	vec_idx   = 0;
+	tries     = 0;
+	while (1) {
+		avail_idx = *((volatile uint16_t *)&vq->avail->idx);
+		if (unlikely(res_cur_idx == avail_idx))
+			return -1;
+
+		if (unlikely(fill_vec_buf(vq, res_cur_idx, &allocated,
+					  &vec_idx) < 0))
+			return -1;
+
+		res_cur_idx++;
+		tries++;
+
+		if (allocated >= size)
+			break;
+
+		/*
+		 * if we tried all available ring items, and still
+		 * can't get enough buf, it means something abnormal
+		 * happened.
+		 */
+		if (unlikely(tries >= vq->size))
+			return -1;
+	}
+
+	/*
+	 * update vq->last_used_idx_res atomically.
+	 * retry again if failed.
+	 */
+	if (rte_atomic16_cmpset(&vq->last_used_idx_res,
+				res_start_idx, res_cur_idx) == 0)
+		goto again;
+
+	*start = res_start_idx;
+	*end   = res_cur_idx;
+	return 0;
+}
+
+static inline uint32_t __attribute__((always_inline))
+copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct vhost_virtqueue *vq,
+			    uint16_t res_start_idx, uint16_t res_end_idx,
+			    struct rte_mbuf *m)
+{
+	struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {{0, 0, 0, 0, 0, 0}, 0};
+	uint32_t vec_idx = 0;
+	uint16_t cur_idx = res_start_idx;
+	uint64_t desc_addr;
+	uint32_t mbuf_offset, mbuf_avail;
+	uint32_t desc_offset, desc_avail;
+	uint32_t cpy_len;
+	uint16_t desc_idx, used_idx;
+
+	if (unlikely(m == NULL))
+		return 0;
+
+	LOG_DEBUG(VHOST_DATA,
+		"(%"PRIu64") Current Index %d| End Index %d\n",
+		dev->device_fh, cur_idx, res_end_idx);
+
+	if (vq->buf_vec[vec_idx].buf_len < vq->vhost_hlen)
+		return -1;
+
+	desc_addr = gpa_to_vva(dev, vq->buf_vec[vec_idx].buf_addr);
+	rte_prefetch0((void *)(uintptr_t)desc_addr);
+
+	virtio_hdr.num_buffers = res_end_idx - res_start_idx;
+	LOG_DEBUG(VHOST_DATA, "(%"PRIu64") RX: Num merge buffers %d\n",
+		dev->device_fh, virtio_hdr.num_buffers);
+
+	virtio_enqueue_offload(m, &virtio_hdr.hdr);
+	copy_virtio_net_hdr(vq, desc_addr, virtio_hdr);
+	vhost_log_write(dev, vq->buf_vec[vec_idx].buf_addr, vq->vhost_hlen);
+	PRINT_PACKET(dev, (uintptr_t)desc_addr, vq->vhost_hlen, 0);
+
+	desc_avail  = vq->buf_vec[vec_idx].buf_len - vq->vhost_hlen;
+	desc_offset = vq->vhost_hlen;
+
+	mbuf_avail  = rte_pktmbuf_data_len(m);
+	mbuf_offset = 0;
+	while (mbuf_avail != 0 || m->next != NULL) {
+		/* done with current desc buf, get the next one */
+		if (desc_avail == 0) {
+			desc_idx = vq->buf_vec[vec_idx].desc_idx;
+
+			if (!(vq->desc[desc_idx].flags & VRING_DESC_F_NEXT)) {
+				/* Update used ring with desc information */
+				used_idx = cur_idx++ & (vq->size - 1);
+				vq->used->ring[used_idx].id  = desc_idx;
+				vq->used->ring[used_idx].len = desc_offset;
+				vhost_log_used_vring(dev, vq,
+					offsetof(struct vring_used,
+						 ring[used_idx]),
+					sizeof(vq->used->ring[used_idx]));
+			}
+
+			vec_idx++;
+			desc_addr = gpa_to_vva(dev, vq->buf_vec[vec_idx].buf_addr);
+
+			/* Prefetch buffer address. */
+			rte_prefetch0((void *)(uintptr_t)desc_addr);
+			desc_offset = 0;
+			desc_avail  = vq->buf_vec[vec_idx].buf_len;
+		}
+
+		/* done with current mbuf, get the next one */
+		if (mbuf_avail == 0) {
+			m = m->next;
+
+			mbuf_offset = 0;
+			mbuf_avail  = rte_pktmbuf_data_len(m);
+		}
+
+		cpy_len = RTE_MIN(desc_avail, mbuf_avail);
+		rte_memcpy((void *)((uintptr_t)(desc_addr + desc_offset)),
+			rte_pktmbuf_mtod_offset(m, void *, mbuf_offset),
+			cpy_len);
+		vhost_log_write(dev, vq->buf_vec[vec_idx].buf_addr + desc_offset,
+			cpy_len);
+		PRINT_PACKET(dev, (uintptr_t)(desc_addr + desc_offset),
+			cpy_len, 0);
+
+		mbuf_avail  -= cpy_len;
+		mbuf_offset += cpy_len;
+		desc_avail  -= cpy_len;
+		desc_offset += cpy_len;
+	}
+
+	used_idx = cur_idx & (vq->size - 1);
+	vq->used->ring[used_idx].id = vq->buf_vec[vec_idx].desc_idx;
+	vq->used->ring[used_idx].len = desc_offset;
+	vhost_log_used_vring(dev, vq,
+		offsetof(struct vring_used, ring[used_idx]),
+		sizeof(vq->used->ring[used_idx]));
+
+	return res_end_idx - res_start_idx;
+}
+
+static inline uint32_t __attribute__((always_inline))
+virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id,
+	struct rte_mbuf **pkts, uint32_t count)
+{
+	struct vhost_virtqueue *vq;
+	uint32_t pkt_idx = 0, nr_used = 0;
+	uint16_t start, end;
+
+	LOG_DEBUG(VHOST_DATA, "(%"PRIu64") virtio_dev_merge_rx()\n",
+		dev->device_fh);
+	if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->virt_qp_nb))) {
+		RTE_LOG(ERR, VHOST_DATA,
+			"%s (%"PRIu64"): virtqueue idx:%d invalid.\n",
+			__func__, dev->device_fh, queue_id);
+		return 0;
+	}
+
+	vq = dev->virtqueue[queue_id];
+	if (unlikely(vq->enabled == 0))
+		return 0;
+
+	count = RTE_MIN((uint32_t)MAX_PKT_BURST, count);
+	if (count == 0)
+		return 0;
+
+	for (pkt_idx = 0; pkt_idx < count; pkt_idx++) {
+		uint32_t pkt_len = pkts[pkt_idx]->pkt_len + vq->vhost_hlen;
+
+		if (unlikely(reserve_avail_buf_mergeable(vq, pkt_len,
+							 &start, &end) < 0)) {
+			LOG_DEBUG(VHOST_DATA,
+				"(%" PRIu64 ") Failed to get enough desc from vring\n",
+				dev->device_fh);
+			break;
+		}
+
+		nr_used = copy_mbuf_to_desc_mergeable(dev, vq, start, end,
+						      pkts[pkt_idx]);
+		rte_smp_wmb();
+
+		/*
+		 * Wait until it's our turn to add our buffer
+		 * to the used ring.
+		 */
+		while (unlikely(vq->last_used_idx != start))
+			rte_pause();
+
+		*(volatile uint16_t *)&vq->used->idx += nr_used;
+		vhost_log_used_vring(dev, vq, offsetof(struct vring_used, idx),
+			sizeof(vq->used->idx));
+		vq->last_used_idx = end;
+	}
+
+	if (likely(pkt_idx)) {
+		/* flush used->idx update before we read avail->flags. */
+		rte_mb();
+
+		/* Kick the guest if necessary. */
+		if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT)
+				&& (vq->callfd >= 0))
+			eventfd_write(vq->callfd, (eventfd_t)1);
+	}
+
+	return pkt_idx;
+}
+
+uint16_t
+rte_vhost_enqueue_burst(struct virtio_net *dev, uint16_t queue_id,
+	struct rte_mbuf **pkts, uint16_t count)
+{
+	if (dev->features & (1 << VIRTIO_NET_F_MRG_RXBUF))
+		return virtio_dev_merge_rx(dev, queue_id, pkts, count);
+	else
+		return virtio_dev_rx(dev, queue_id, pkts, count);
+}
+
+static void
+parse_ethernet(struct rte_mbuf *m, uint16_t *l4_proto, void **l4_hdr)
+{
+	struct ipv4_hdr *ipv4_hdr;
+	struct ipv6_hdr *ipv6_hdr;
+	void *l3_hdr = NULL;
+	struct ether_hdr *eth_hdr;
+	uint16_t ethertype;
+
+	eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *);
+
+	m->l2_len = sizeof(struct ether_hdr);
+	ethertype = rte_be_to_cpu_16(eth_hdr->ether_type);
+
+	if (ethertype == ETHER_TYPE_VLAN) {
+		struct vlan_hdr *vlan_hdr = (struct vlan_hdr *)(eth_hdr + 1);
+
+		m->l2_len += sizeof(struct vlan_hdr);
+		ethertype = rte_be_to_cpu_16(vlan_hdr->eth_proto);
+	}
+
+	l3_hdr = (char *)eth_hdr + m->l2_len;
+
+	switch (ethertype) {
+	case ETHER_TYPE_IPv4:
+		ipv4_hdr = (struct ipv4_hdr *)l3_hdr;
+		*l4_proto = ipv4_hdr->next_proto_id;
+		m->l3_len = (ipv4_hdr->version_ihl & 0x0f) * 4;
+		*l4_hdr = (char *)l3_hdr + m->l3_len;
+		m->ol_flags |= PKT_TX_IPV4;
+		break;
+	case ETHER_TYPE_IPv6:
+		ipv6_hdr = (struct ipv6_hdr *)l3_hdr;
+		*l4_proto = ipv6_hdr->proto;
+		m->l3_len = sizeof(struct ipv6_hdr);
+		*l4_hdr = (char *)l3_hdr + m->l3_len;
+		m->ol_flags |= PKT_TX_IPV6;
+		break;
+	default:
+		m->l3_len = 0;
+		*l4_proto = 0;
+		break;
+	}
+}
+
+static inline void __attribute__((always_inline))
+vhost_dequeue_offload(struct virtio_net_hdr *hdr, struct rte_mbuf *m)
+{
+	uint16_t l4_proto = 0;
+	void *l4_hdr = NULL;
+	struct tcp_hdr *tcp_hdr = NULL;
+
+	parse_ethernet(m, &l4_proto, &l4_hdr);
+	if (hdr->flags == VIRTIO_NET_HDR_F_NEEDS_CSUM) {
+		if (hdr->csum_start == (m->l2_len + m->l3_len)) {
+			switch (hdr->csum_offset) {
+			case (offsetof(struct tcp_hdr, cksum)):
+				if (l4_proto == IPPROTO_TCP)
+					m->ol_flags |= PKT_TX_TCP_CKSUM;
+				break;
+			case (offsetof(struct udp_hdr, dgram_cksum)):
+				if (l4_proto == IPPROTO_UDP)
+					m->ol_flags |= PKT_TX_UDP_CKSUM;
+				break;
+			case (offsetof(struct sctp_hdr, cksum)):
+				if (l4_proto == IPPROTO_SCTP)
+					m->ol_flags |= PKT_TX_SCTP_CKSUM;
+				break;
+			default:
+				break;
+			}
+		}
+	}
+
+	if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
+		switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
+		case VIRTIO_NET_HDR_GSO_TCPV4:
+		case VIRTIO_NET_HDR_GSO_TCPV6:
+			tcp_hdr = (struct tcp_hdr *)l4_hdr;
+			m->ol_flags |= PKT_TX_TCP_SEG;
+			m->tso_segsz = hdr->gso_size;
+			m->l4_len = (tcp_hdr->data_off & 0xf0) >> 2;
+			break;
+		default:
+			RTE_LOG(WARNING, VHOST_DATA,
+				"unsupported gso type %u.\n", hdr->gso_type);
+			break;
+		}
+	}
+}
+
+#define RARP_PKT_SIZE	64
+
+static int
+make_rarp_packet(struct rte_mbuf *rarp_mbuf, const struct ether_addr *mac)
+{
+	struct ether_hdr *eth_hdr;
+	struct arp_hdr  *rarp;
+
+	if (rarp_mbuf->buf_len < 64) {
+		RTE_LOG(WARNING, VHOST_DATA,
+			"failed to make RARP; mbuf size too small %u (< %d)\n",
+			rarp_mbuf->buf_len, RARP_PKT_SIZE);
+		return -1;
+	}
+
+	/* Ethernet header. */
+	eth_hdr = rte_pktmbuf_mtod_offset(rarp_mbuf, struct ether_hdr *, 0);
+	memset(eth_hdr->d_addr.addr_bytes, 0xff, ETHER_ADDR_LEN);
+	ether_addr_copy(mac, &eth_hdr->s_addr);
+	eth_hdr->ether_type = htons(ETHER_TYPE_RARP);
+
+	/* RARP header. */
+	rarp = (struct arp_hdr *)(eth_hdr + 1);
+	rarp->arp_hrd = htons(ARP_HRD_ETHER);
+	rarp->arp_pro = htons(ETHER_TYPE_IPv4);
+	rarp->arp_hln = ETHER_ADDR_LEN;
+	rarp->arp_pln = 4;
+	rarp->arp_op  = htons(ARP_OP_REVREQUEST);
+
+	ether_addr_copy(mac, &rarp->arp_data.arp_sha);
+	ether_addr_copy(mac, &rarp->arp_data.arp_tha);
+	memset(&rarp->arp_data.arp_sip, 0x00, 4);
+	memset(&rarp->arp_data.arp_tip, 0x00, 4);
+
+	rarp_mbuf->pkt_len  = rarp_mbuf->data_len = RARP_PKT_SIZE;
+
+	return 0;
+}
+
+static inline int __attribute__((always_inline))
+copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq,
+		  struct rte_mbuf *m, uint16_t desc_idx,
+		  struct rte_mempool *mbuf_pool)
+{
+	struct vring_desc *desc;
+	uint64_t desc_addr;
+	uint32_t desc_avail, desc_offset;
+	uint32_t mbuf_avail, mbuf_offset;
+	uint32_t cpy_len;
+	struct rte_mbuf *cur = m, *prev = m;
+	struct virtio_net_hdr *hdr;
+	/* A counter to avoid desc dead loop chain */
+	uint32_t nr_desc = 1;
+
+	desc = &vq->desc[desc_idx];
+	if (unlikely(desc->len < vq->vhost_hlen))
+		return -1;
+
+	desc_addr = gpa_to_vva(dev, desc->addr);
+	rte_prefetch0((void *)(uintptr_t)desc_addr);
+
+	/* Retrieve virtio net header */
+	hdr = (struct virtio_net_hdr *)((uintptr_t)desc_addr);
+	desc_avail  = desc->len - vq->vhost_hlen;
+	desc_offset = vq->vhost_hlen;
+
+	mbuf_offset = 0;
+	mbuf_avail  = m->buf_len - RTE_PKTMBUF_HEADROOM;
+	while (desc_avail != 0 || (desc->flags & VRING_DESC_F_NEXT) != 0) {
+		/* This desc reaches to its end, get the next one */
+		if (desc_avail == 0) {
+			if (unlikely(desc->next >= vq->size ||
+				     ++nr_desc >= vq->size))
+				return -1;
+			desc = &vq->desc[desc->next];
+
+			desc_addr = gpa_to_vva(dev, desc->addr);
+			rte_prefetch0((void *)(uintptr_t)desc_addr);
+
+			desc_offset = 0;
+			desc_avail  = desc->len;
+
+			PRINT_PACKET(dev, (uintptr_t)desc_addr, desc->len, 0);
+		}
+
+		/*
+		 * This mbuf reaches to its end, get a new one
+		 * to hold more data.
+		 */
+		if (mbuf_avail == 0) {
+			cur = rte_pktmbuf_alloc(mbuf_pool);
+			if (unlikely(cur == NULL)) {
+				RTE_LOG(ERR, VHOST_DATA, "Failed to "
+					"allocate memory for mbuf.\n");
+				return -1;
+			}
+
+			prev->next = cur;
+			prev->data_len = mbuf_offset;
+			m->nb_segs += 1;
+			m->pkt_len += mbuf_offset;
+			prev = cur;
+
+			mbuf_offset = 0;
+			mbuf_avail  = cur->buf_len - RTE_PKTMBUF_HEADROOM;
+		}
+
+		cpy_len = RTE_MIN(desc_avail, mbuf_avail);
+		rte_memcpy(rte_pktmbuf_mtod_offset(cur, void *, mbuf_offset),
+			(void *)((uintptr_t)(desc_addr + desc_offset)),
+			cpy_len);
+
+		mbuf_avail  -= cpy_len;
+		mbuf_offset += cpy_len;
+		desc_avail  -= cpy_len;
+		desc_offset += cpy_len;
+	}
+
+	prev->data_len = mbuf_offset;
+	m->pkt_len    += mbuf_offset;
+
+	if (hdr->flags != 0 || hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE)
+		vhost_dequeue_offload(hdr, m);
+
+	return 0;
+}
+
+uint16_t
+rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id,
+	struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count)
+{
+	struct rte_mbuf *rarp_mbuf = NULL;
+	struct vhost_virtqueue *vq;
+	uint32_t desc_indexes[MAX_PKT_BURST];
+	uint32_t used_idx;
+	uint32_t i = 0;
+	uint16_t free_entries;
+	uint16_t avail_idx;
+
+	if (unlikely(!is_valid_virt_queue_idx(queue_id, 1, dev->virt_qp_nb))) {
+		RTE_LOG(ERR, VHOST_DATA,
+			"%s (%"PRIu64"): virtqueue idx:%d invalid.\n",
+			__func__, dev->device_fh, queue_id);
+		return 0;
+	}
+
+	vq = dev->virtqueue[queue_id];
+	if (unlikely(vq->enabled == 0))
+		return 0;
+
+	/*
+	 * Construct a RARP broadcast packet, and inject it to the "pkts"
+	 * array, to looks like that guest actually send such packet.
+	 *
+	 * Check user_send_rarp() for more information.
+	 */
+	if (unlikely(rte_atomic16_cmpset((volatile uint16_t *)
+					 &dev->broadcast_rarp.cnt, 1, 0))) {
+		rarp_mbuf = rte_pktmbuf_alloc(mbuf_pool);
+		if (rarp_mbuf == NULL) {
+			RTE_LOG(ERR, VHOST_DATA,
+				"Failed to allocate memory for mbuf.\n");
+			return 0;
+		}
+
+		if (make_rarp_packet(rarp_mbuf, &dev->mac)) {
+			rte_pktmbuf_free(rarp_mbuf);
+			rarp_mbuf = NULL;
+		} else {
+			count -= 1;
+		}
+	}
+
+	avail_idx =  *((volatile uint16_t *)&vq->avail->idx);
+	free_entries = avail_idx - vq->last_used_idx;
+	if (free_entries == 0)
+		goto out;
+
+	LOG_DEBUG(VHOST_DATA, "%s (%"PRIu64")\n", __func__, dev->device_fh);
+
+	/* Prefetch available ring to retrieve head indexes. */
+	used_idx = vq->last_used_idx & (vq->size - 1);
+	rte_prefetch0(&vq->avail->ring[used_idx]);
+
+	count = RTE_MIN(count, MAX_PKT_BURST);
+	count = RTE_MIN(count, free_entries);
+	LOG_DEBUG(VHOST_DATA, "(%"PRIu64") about to dequeue %u buffers\n",
+			dev->device_fh, count);
+
+	/* Retrieve all of the head indexes first to avoid caching issues. */
+	for (i = 0; i < count; i++) {
+		desc_indexes[i] = vq->avail->ring[(vq->last_used_idx + i) &
+					(vq->size - 1)];
+	}
+
+	/* Prefetch descriptor index. */
+	rte_prefetch0(&vq->desc[desc_indexes[0]]);
+	rte_prefetch0(&vq->used->ring[vq->last_used_idx & (vq->size - 1)]);
+
+	for (i = 0; i < count; i++) {
+		int err;
+
+		if (likely(i + 1 < count)) {
+			rte_prefetch0(&vq->desc[desc_indexes[i + 1]]);
+			rte_prefetch0(&vq->used->ring[(used_idx + 1) &
+						      (vq->size - 1)]);
+		}
+
+		pkts[i] = rte_pktmbuf_alloc(mbuf_pool);
+		if (unlikely(pkts[i] == NULL)) {
+			RTE_LOG(ERR, VHOST_DATA,
+				"Failed to allocate memory for mbuf.\n");
+			break;
+		}
+		err = copy_desc_to_mbuf(dev, vq, pkts[i], desc_indexes[i],
+					mbuf_pool);
+		if (unlikely(err)) {
+			rte_pktmbuf_free(pkts[i]);
+			break;
+		}
+
+		used_idx = vq->last_used_idx++ & (vq->size - 1);
+		vq->used->ring[used_idx].id  = desc_indexes[i];
+		vq->used->ring[used_idx].len = 0;
+		vhost_log_used_vring(dev, vq,
+				offsetof(struct vring_used, ring[used_idx]),
+				sizeof(vq->used->ring[used_idx]));
+	}
+
+	rte_smp_wmb();
+	rte_smp_rmb();
+	vq->used->idx += i;
+	vhost_log_used_vring(dev, vq, offsetof(struct vring_used, idx),
+			sizeof(vq->used->idx));
+
+	/* Kick guest if required. */
+	if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT)
+			&& (vq->callfd >= 0))
+		eventfd_write(vq->callfd, (eventfd_t)1);
+
+out:
+	if (unlikely(rarp_mbuf != NULL)) {
+		/*
+		 * Inject it to the head of "pkts" array, so that switch's mac
+		 * learning table will get updated first.
+		 */
+		memmove(&pkts[1], pkts, i * sizeof(struct rte_mbuf *));
+		pkts[0] = rarp_mbuf;
+		i += 1;
+	}
+
+	return i;
+}
diff --git a/lib/librte_vhost/vhost_user/fd_man.c b/lib/librte_vhost/vhost_user/fd_man.c
new file mode 100644
index 00000000..087aaed6
--- /dev/null
+++ b/lib/librte_vhost/vhost_user/fd_man.c
@@ -0,0 +1,289 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/socket.h>
+#include <sys/select.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <rte_common.h>
+#include <rte_log.h>
+
+#include "fd_man.h"
+
+/**
+ * Returns the index in the fdset for a given fd.
+ * If fd is -1, it means to search for a free entry.
+ * @return
+ *   index for the fd, or -1 if fd isn't in the fdset.
+ */
+static int
+fdset_find_fd(struct fdset *pfdset, int fd)
+{
+	int i;
+
+	if (pfdset == NULL)
+		return -1;
+
+	for (i = 0; i < MAX_FDS && pfdset->fd[i].fd != fd; i++)
+		;
+
+	return i ==  MAX_FDS ? -1 : i;
+}
+
+static int
+fdset_find_free_slot(struct fdset *pfdset)
+{
+	return fdset_find_fd(pfdset, -1);
+}
+
+static void
+fdset_add_fd(struct fdset  *pfdset, int idx, int fd,
+	fd_cb rcb, fd_cb wcb, void *dat)
+{
+	struct fdentry *pfdentry;
+
+	if (pfdset == NULL || idx >= MAX_FDS)
+		return;
+
+	pfdentry = &pfdset->fd[idx];
+	pfdentry->fd = fd;
+	pfdentry->rcb = rcb;
+	pfdentry->wcb = wcb;
+	pfdentry->dat = dat;
+}
+
+/**
+ * Fill the read/write fd_set with the fds in the fdset.
+ * @return
+ *  the maximum fds filled in the read/write fd_set.
+ */
+static int
+fdset_fill(fd_set *rfset, fd_set *wfset, struct fdset *pfdset)
+{
+	struct fdentry *pfdentry;
+	int i, maxfds = -1;
+	int num = MAX_FDS;
+
+	if (pfdset == NULL)
+		return -1;
+
+	for (i = 0; i < num; i++) {
+		pfdentry = &pfdset->fd[i];
+		if (pfdentry->fd != -1) {
+			int added = 0;
+			if (pfdentry->rcb && rfset) {
+				FD_SET(pfdentry->fd, rfset);
+				added = 1;
+			}
+			if (pfdentry->wcb && wfset) {
+				FD_SET(pfdentry->fd, wfset);
+				added = 1;
+			}
+			if (added)
+				maxfds = pfdentry->fd < maxfds ?
+					maxfds : pfdentry->fd;
+		}
+	}
+	return maxfds;
+}
+
+void
+fdset_init(struct fdset *pfdset)
+{
+	int i;
+
+	if (pfdset == NULL)
+		return;
+
+	for (i = 0; i < MAX_FDS; i++)
+		pfdset->fd[i].fd = -1;
+	pfdset->num = 0;
+}
+
+/**
+ * Register the fd in the fdset with read/write handler and context.
+ */
+int
+fdset_add(struct fdset *pfdset, int fd, fd_cb rcb, fd_cb wcb, void *dat)
+{
+	int i;
+
+	if (pfdset == NULL || fd == -1)
+		return -1;
+
+	pthread_mutex_lock(&pfdset->fd_mutex);
+
+	/* Find a free slot in the list. */
+	i = fdset_find_free_slot(pfdset);
+	if (i == -1) {
+		pthread_mutex_unlock(&pfdset->fd_mutex);
+		return -2;
+	}
+
+	fdset_add_fd(pfdset, i, fd, rcb, wcb, dat);
+	pfdset->num++;
+
+	pthread_mutex_unlock(&pfdset->fd_mutex);
+
+	return 0;
+}
+
+/**
+ *  Unregister the fd from the fdset.
+ */
+void
+fdset_del(struct fdset *pfdset, int fd)
+{
+	int i;
+
+	if (pfdset == NULL || fd == -1)
+		return;
+
+	do {
+		pthread_mutex_lock(&pfdset->fd_mutex);
+
+		i = fdset_find_fd(pfdset, fd);
+		if (i != -1 && pfdset->fd[i].busy == 0) {
+			/* busy indicates r/wcb is executing! */
+			pfdset->fd[i].fd = -1;
+			pfdset->fd[i].rcb = pfdset->fd[i].wcb = NULL;
+			pfdset->num--;
+			i = -1;
+		}
+		pthread_mutex_unlock(&pfdset->fd_mutex);
+	} while (i != -1);
+}
+
+/**
+ *  Unregister the fd at the specified slot from the fdset.
+ */
+static void
+fdset_del_slot(struct fdset *pfdset, int index)
+{
+	if (pfdset == NULL || index < 0 || index >= MAX_FDS)
+		return;
+
+	pthread_mutex_lock(&pfdset->fd_mutex);
+
+	pfdset->fd[index].fd = -1;
+	pfdset->fd[index].rcb = pfdset->fd[index].wcb = NULL;
+	pfdset->num--;
+
+	pthread_mutex_unlock(&pfdset->fd_mutex);
+}
+
+/**
+ * This functions runs in infinite blocking loop until there is no fd in
+ * pfdset. It calls corresponding r/w handler if there is event on the fd.
+ *
+ * Before the callback is called, we set the flag to busy status; If other
+ * thread(now rte_vhost_driver_unregister) calls fdset_del concurrently, it
+ * will wait until the flag is reset to zero(which indicates the callback is
+ * finished), then it could free the context after fdset_del.
+ */
+void
+fdset_event_dispatch(struct fdset *pfdset)
+{
+	fd_set rfds, wfds;
+	int i, maxfds;
+	struct fdentry *pfdentry;
+	int num = MAX_FDS;
+	fd_cb rcb, wcb;
+	void *dat;
+	int fd;
+	int remove1, remove2;
+	int ret;
+
+	if (pfdset == NULL)
+		return;
+
+	while (1) {
+		struct timeval tv;
+		tv.tv_sec = 1;
+		tv.tv_usec = 0;
+		FD_ZERO(&rfds);
+		FD_ZERO(&wfds);
+		pthread_mutex_lock(&pfdset->fd_mutex);
+
+		maxfds = fdset_fill(&rfds, &wfds, pfdset);
+
+		pthread_mutex_unlock(&pfdset->fd_mutex);
+
+		/*
+		 * When select is blocked, other threads might unregister
+		 * listenfds from and register new listenfds into fdset.
+		 * When select returns, the entries for listenfds in the fdset
+		 * might have been updated. It is ok if there is unwanted call
+		 * for new listenfds.
+		 */
+		ret = select(maxfds + 1, &rfds, &wfds, NULL, &tv);
+		if (ret <= 0)
+			continue;
+
+		for (i = 0; i < num; i++) {
+			remove1 = remove2 = 0;
+			pthread_mutex_lock(&pfdset->fd_mutex);
+			pfdentry = &pfdset->fd[i];
+			fd = pfdentry->fd;
+			rcb = pfdentry->rcb;
+			wcb = pfdentry->wcb;
+			dat = pfdentry->dat;
+			pfdentry->busy = 1;
+			pthread_mutex_unlock(&pfdset->fd_mutex);
+			if (fd >= 0 && FD_ISSET(fd, &rfds) && rcb)
+				rcb(fd, dat, &remove1);
+			if (fd >= 0 && FD_ISSET(fd, &wfds) && wcb)
+				wcb(fd, dat, &remove2);
+			pfdentry->busy = 0;
+			/*
+			 * fdset_del needs to check busy flag.
+			 * We don't allow fdset_del to be called in callback
+			 * directly.
+			 */
+			/*
+			 * When we are to clean up the fd from fdset,
+			 * because the fd is closed in the cb,
+			 * the old fd val could be reused by when creates new
+			 * listen fd in another thread, we couldn't call
+			 * fd_set_del.
+			 */
+			if (remove1 || remove2)
+				fdset_del_slot(pfdset, i);
+		}
+	}
+}
diff --git a/lib/librte_vhost/vhost_user/fd_man.h b/lib/librte_vhost/vhost_user/fd_man.h
new file mode 100644
index 00000000..74ecde2c
--- /dev/null
+++ b/lib/librte_vhost/vhost_user/fd_man.h
@@ -0,0 +1,67 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _FD_MAN_H_
+#define _FD_MAN_H_
+#include <stdint.h>
+#include <pthread.h>
+
+#define MAX_FDS 1024
+
+typedef void (*fd_cb)(int fd, void *dat, int *remove);
+
+struct fdentry {
+	int fd;		/* -1 indicates this entry is empty */
+	fd_cb rcb;	/* callback when this fd is readable. */
+	fd_cb wcb;	/* callback when this fd is writeable.*/
+	void *dat;	/* fd context */
+	int busy;	/* whether this entry is being used in cb. */
+};
+
+struct fdset {
+	struct fdentry fd[MAX_FDS];
+	pthread_mutex_t fd_mutex;
+	int num;	/* current fd number of this fdset */
+};
+
+
+void fdset_init(struct fdset *pfdset);
+
+int fdset_add(struct fdset *pfdset, int fd,
+	fd_cb rcb, fd_cb wcb, void *dat);
+
+void fdset_del(struct fdset *pfdset, int fd);
+
+void fdset_event_dispatch(struct fdset *pfdset);
+
+#endif
diff --git a/lib/librte_vhost/vhost_user/vhost-net-user.c b/lib/librte_vhost/vhost_user/vhost-net-user.c
new file mode 100644
index 00000000..df2bd648
--- /dev/null
+++ b/lib/librte_vhost/vhost_user/vhost-net-user.c
@@ -0,0 +1,531 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdint.h>
+#include <stdio.h>
+#include <limits.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <errno.h>
+#include <pthread.h>
+
+#include <rte_log.h>
+#include <rte_virtio_net.h>
+
+#include "fd_man.h"
+#include "vhost-net-user.h"
+#include "vhost-net.h"
+#include "virtio-net-user.h"
+
+#define MAX_VIRTIO_BACKLOG 128
+
+static void vserver_new_vq_conn(int fd, void *data, int *remove);
+static void vserver_message_handler(int fd, void *dat, int *remove);
+
+struct connfd_ctx {
+	struct vhost_server *vserver;
+	uint32_t fh;
+};
+
+#define MAX_VHOST_SERVER 1024
+struct _vhost_server {
+	struct vhost_server *server[MAX_VHOST_SERVER];
+	struct fdset fdset;
+	int vserver_cnt;
+	pthread_mutex_t server_mutex;
+};
+
+static struct _vhost_server g_vhost_server = {
+	.fdset = {
+		.fd = { [0 ... MAX_FDS - 1] = {-1, NULL, NULL, NULL, 0} },
+		.fd_mutex = PTHREAD_MUTEX_INITIALIZER,
+		.num = 0
+	},
+	.vserver_cnt = 0,
+	.server_mutex = PTHREAD_MUTEX_INITIALIZER,
+};
+
+static const char *vhost_message_str[VHOST_USER_MAX] = {
+	[VHOST_USER_NONE] = "VHOST_USER_NONE",
+	[VHOST_USER_GET_FEATURES] = "VHOST_USER_GET_FEATURES",
+	[VHOST_USER_SET_FEATURES] = "VHOST_USER_SET_FEATURES",
+	[VHOST_USER_SET_OWNER] = "VHOST_USER_SET_OWNER",
+	[VHOST_USER_RESET_OWNER] = "VHOST_USER_RESET_OWNER",
+	[VHOST_USER_SET_MEM_TABLE] = "VHOST_USER_SET_MEM_TABLE",
+	[VHOST_USER_SET_LOG_BASE] = "VHOST_USER_SET_LOG_BASE",
+	[VHOST_USER_SET_LOG_FD] = "VHOST_USER_SET_LOG_FD",
+	[VHOST_USER_SET_VRING_NUM] = "VHOST_USER_SET_VRING_NUM",
+	[VHOST_USER_SET_VRING_ADDR] = "VHOST_USER_SET_VRING_ADDR",
+	[VHOST_USER_SET_VRING_BASE] = "VHOST_USER_SET_VRING_BASE",
+	[VHOST_USER_GET_VRING_BASE] = "VHOST_USER_GET_VRING_BASE",
+	[VHOST_USER_SET_VRING_KICK] = "VHOST_USER_SET_VRING_KICK",
+	[VHOST_USER_SET_VRING_CALL] = "VHOST_USER_SET_VRING_CALL",
+	[VHOST_USER_SET_VRING_ERR]  = "VHOST_USER_SET_VRING_ERR",
+	[VHOST_USER_GET_PROTOCOL_FEATURES]  = "VHOST_USER_GET_PROTOCOL_FEATURES",
+	[VHOST_USER_SET_PROTOCOL_FEATURES]  = "VHOST_USER_SET_PROTOCOL_FEATURES",
+	[VHOST_USER_GET_QUEUE_NUM]  = "VHOST_USER_GET_QUEUE_NUM",
+	[VHOST_USER_SET_VRING_ENABLE]  = "VHOST_USER_SET_VRING_ENABLE",
+	[VHOST_USER_SEND_RARP]  = "VHOST_USER_SEND_RARP",
+};
+
+/**
+ * Create a unix domain socket, bind to path and listen for connection.
+ * @return
+ *  socket fd or -1 on failure
+ */
+static int
+uds_socket(const char *path)
+{
+	struct sockaddr_un un;
+	int sockfd;
+	int ret;
+
+	if (path == NULL)
+		return -1;
+
+	sockfd = socket(AF_UNIX, SOCK_STREAM, 0);
+	if (sockfd < 0)
+		return -1;
+	RTE_LOG(INFO, VHOST_CONFIG, "socket created, fd:%d\n", sockfd);
+
+	memset(&un, 0, sizeof(un));
+	un.sun_family = AF_UNIX;
+	snprintf(un.sun_path, sizeof(un.sun_path), "%s", path);
+	ret = bind(sockfd, (struct sockaddr *)&un, sizeof(un));
+	if (ret == -1) {
+		RTE_LOG(ERR, VHOST_CONFIG, "fail to bind fd:%d, remove file:%s and try again.\n",
+			sockfd, path);
+		goto err;
+	}
+	RTE_LOG(INFO, VHOST_CONFIG, "bind to %s\n", path);
+
+	ret = listen(sockfd, MAX_VIRTIO_BACKLOG);
+	if (ret == -1)
+		goto err;
+
+	return sockfd;
+
+err:
+	close(sockfd);
+	return -1;
+}
+
+/* return bytes# of read on success or negative val on failure. */
+static int
+read_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num)
+{
+	struct iovec iov;
+	struct msghdr msgh;
+	size_t fdsize = fd_num * sizeof(int);
+	char control[CMSG_SPACE(fdsize)];
+	struct cmsghdr *cmsg;
+	int ret;
+
+	memset(&msgh, 0, sizeof(msgh));
+	iov.iov_base = buf;
+	iov.iov_len  = buflen;
+
+	msgh.msg_iov = &iov;
+	msgh.msg_iovlen = 1;
+	msgh.msg_control = control;
+	msgh.msg_controllen = sizeof(control);
+
+	ret = recvmsg(sockfd, &msgh, 0);
+	if (ret <= 0) {
+		RTE_LOG(ERR, VHOST_CONFIG, "recvmsg failed\n");
+		return ret;
+	}
+
+	if (msgh.msg_flags & (MSG_TRUNC | MSG_CTRUNC)) {
+		RTE_LOG(ERR, VHOST_CONFIG, "truncted msg\n");
+		return -1;
+	}
+
+	for (cmsg = CMSG_FIRSTHDR(&msgh); cmsg != NULL;
+		cmsg = CMSG_NXTHDR(&msgh, cmsg)) {
+		if ((cmsg->cmsg_level == SOL_SOCKET) &&
+			(cmsg->cmsg_type == SCM_RIGHTS)) {
+			memcpy(fds, CMSG_DATA(cmsg), fdsize);
+			break;
+		}
+	}
+
+	return ret;
+}
+
+/* return bytes# of read on success or negative val on failure. */
+static int
+read_vhost_message(int sockfd, struct VhostUserMsg *msg)
+{
+	int ret;
+
+	ret = read_fd_message(sockfd, (char *)msg, VHOST_USER_HDR_SIZE,
+		msg->fds, VHOST_MEMORY_MAX_NREGIONS);
+	if (ret <= 0)
+		return ret;
+
+	if (msg && msg->size) {
+		if (msg->size > sizeof(msg->payload)) {
+			RTE_LOG(ERR, VHOST_CONFIG,
+				"invalid msg size: %d\n", msg->size);
+			return -1;
+		}
+		ret = read(sockfd, &msg->payload, msg->size);
+		if (ret <= 0)
+			return ret;
+		if (ret != (int)msg->size) {
+			RTE_LOG(ERR, VHOST_CONFIG,
+				"read control message failed\n");
+			return -1;
+		}
+	}
+
+	return ret;
+}
+
+static int
+send_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num)
+{
+
+	struct iovec iov;
+	struct msghdr msgh;
+	size_t fdsize = fd_num * sizeof(int);
+	char control[CMSG_SPACE(fdsize)];
+	struct cmsghdr *cmsg;
+	int ret;
+
+	memset(&msgh, 0, sizeof(msgh));
+	iov.iov_base = buf;
+	iov.iov_len = buflen;
+
+	msgh.msg_iov = &iov;
+	msgh.msg_iovlen = 1;
+
+	if (fds && fd_num > 0) {
+		msgh.msg_control = control;
+		msgh.msg_controllen = sizeof(control);
+		cmsg = CMSG_FIRSTHDR(&msgh);
+		cmsg->cmsg_len = CMSG_LEN(fdsize);
+		cmsg->cmsg_level = SOL_SOCKET;
+		cmsg->cmsg_type = SCM_RIGHTS;
+		memcpy(CMSG_DATA(cmsg), fds, fdsize);
+	} else {
+		msgh.msg_control = NULL;
+		msgh.msg_controllen = 0;
+	}
+
+	do {
+		ret = sendmsg(sockfd, &msgh, 0);
+	} while (ret < 0 && errno == EINTR);
+
+	if (ret < 0) {
+		RTE_LOG(ERR, VHOST_CONFIG,  "sendmsg error\n");
+		return ret;
+	}
+
+	return ret;
+}
+
+static int
+send_vhost_message(int sockfd, struct VhostUserMsg *msg)
+{
+	int ret;
+
+	if (!msg)
+		return 0;
+
+	msg->flags &= ~VHOST_USER_VERSION_MASK;
+	msg->flags |= VHOST_USER_VERSION;
+	msg->flags |= VHOST_USER_REPLY_MASK;
+
+	ret = send_fd_message(sockfd, (char *)msg,
+		VHOST_USER_HDR_SIZE + msg->size, NULL, 0);
+
+	return ret;
+}
+
+/* call back when there is new virtio connection.  */
+static void
+vserver_new_vq_conn(int fd, void *dat, __rte_unused int *remove)
+{
+	struct vhost_server *vserver = (struct vhost_server *)dat;
+	int conn_fd;
+	struct connfd_ctx *ctx;
+	int fh;
+	struct vhost_device_ctx vdev_ctx = { (pid_t)0, 0 };
+	unsigned int size;
+
+	conn_fd = accept(fd, NULL, NULL);
+	RTE_LOG(INFO, VHOST_CONFIG,
+		"new virtio connection is %d\n", conn_fd);
+	if (conn_fd < 0)
+		return;
+
+	ctx = calloc(1, sizeof(*ctx));
+	if (ctx == NULL) {
+		close(conn_fd);
+		return;
+	}
+
+	fh = vhost_new_device(vdev_ctx);
+	if (fh == -1) {
+		free(ctx);
+		close(conn_fd);
+		return;
+	}
+
+	vdev_ctx.fh = fh;
+	size = strnlen(vserver->path, PATH_MAX);
+	vhost_set_ifname(vdev_ctx, vserver->path,
+		size);
+
+	RTE_LOG(INFO, VHOST_CONFIG, "new device, handle is %d\n", fh);
+
+	ctx->vserver = vserver;
+	ctx->fh = fh;
+	fdset_add(&g_vhost_server.fdset,
+		conn_fd, vserver_message_handler, NULL, ctx);
+}
+
+/* callback when there is message on the connfd */
+static void
+vserver_message_handler(int connfd, void *dat, int *remove)
+{
+	struct vhost_device_ctx ctx;
+	struct connfd_ctx *cfd_ctx = (struct connfd_ctx *)dat;
+	struct VhostUserMsg msg;
+	uint64_t features;
+	int ret;
+
+	ctx.fh = cfd_ctx->fh;
+	ret = read_vhost_message(connfd, &msg);
+	if (ret <= 0 || msg.request >= VHOST_USER_MAX) {
+		if (ret < 0)
+			RTE_LOG(ERR, VHOST_CONFIG,
+				"vhost read message failed\n");
+		else if (ret == 0)
+			RTE_LOG(INFO, VHOST_CONFIG,
+				"vhost peer closed\n");
+		else
+			RTE_LOG(ERR, VHOST_CONFIG,
+				"vhost read incorrect message\n");
+
+		close(connfd);
+		*remove = 1;
+		free(cfd_ctx);
+		vhost_destroy_device(ctx);
+
+		return;
+	}
+
+	RTE_LOG(INFO, VHOST_CONFIG, "read message %s\n",
+		vhost_message_str[msg.request]);
+	switch (msg.request) {
+	case VHOST_USER_GET_FEATURES:
+		ret = vhost_get_features(ctx, &features);
+		msg.payload.u64 = features;
+		msg.size = sizeof(msg.payload.u64);
+		send_vhost_message(connfd, &msg);
+		break;
+	case VHOST_USER_SET_FEATURES:
+		features = msg.payload.u64;
+		vhost_set_features(ctx, &features);
+		break;
+
+	case VHOST_USER_GET_PROTOCOL_FEATURES:
+		msg.payload.u64 = VHOST_USER_PROTOCOL_FEATURES;
+		msg.size = sizeof(msg.payload.u64);
+		send_vhost_message(connfd, &msg);
+		break;
+	case VHOST_USER_SET_PROTOCOL_FEATURES:
+		user_set_protocol_features(ctx, msg.payload.u64);
+		break;
+
+	case VHOST_USER_SET_OWNER:
+		vhost_set_owner(ctx);
+		break;
+	case VHOST_USER_RESET_OWNER:
+		vhost_reset_owner(ctx);
+		break;
+
+	case VHOST_USER_SET_MEM_TABLE:
+		user_set_mem_table(ctx, &msg);
+		break;
+
+	case VHOST_USER_SET_LOG_BASE:
+		user_set_log_base(ctx, &msg);
+
+		/* it needs a reply */
+		msg.size = sizeof(msg.payload.u64);
+		send_vhost_message(connfd, &msg);
+		break;
+	case VHOST_USER_SET_LOG_FD:
+		close(msg.fds[0]);
+		RTE_LOG(INFO, VHOST_CONFIG, "not implemented.\n");
+		break;
+
+	case VHOST_USER_SET_VRING_NUM:
+		vhost_set_vring_num(ctx, &msg.payload.state);
+		break;
+	case VHOST_USER_SET_VRING_ADDR:
+		vhost_set_vring_addr(ctx, &msg.payload.addr);
+		break;
+	case VHOST_USER_SET_VRING_BASE:
+		vhost_set_vring_base(ctx, &msg.payload.state);
+		break;
+
+	case VHOST_USER_GET_VRING_BASE:
+		ret = user_get_vring_base(ctx, &msg.payload.state);
+		msg.size = sizeof(msg.payload.state);
+		send_vhost_message(connfd, &msg);
+		break;
+
+	case VHOST_USER_SET_VRING_KICK:
+		user_set_vring_kick(ctx, &msg);
+		break;
+	case VHOST_USER_SET_VRING_CALL:
+		user_set_vring_call(ctx, &msg);
+		break;
+
+	case VHOST_USER_SET_VRING_ERR:
+		if (!(msg.payload.u64 & VHOST_USER_VRING_NOFD_MASK))
+			close(msg.fds[0]);
+		RTE_LOG(INFO, VHOST_CONFIG, "not implemented\n");
+		break;
+
+	case VHOST_USER_GET_QUEUE_NUM:
+		msg.payload.u64 = VHOST_MAX_QUEUE_PAIRS;
+		msg.size = sizeof(msg.payload.u64);
+		send_vhost_message(connfd, &msg);
+		break;
+
+	case VHOST_USER_SET_VRING_ENABLE:
+		user_set_vring_enable(ctx, &msg.payload.state);
+		break;
+	case VHOST_USER_SEND_RARP:
+		user_send_rarp(ctx, &msg);
+		break;
+
+	default:
+		break;
+
+	}
+}
+
+/**
+ * Creates and initialise the vhost server.
+ */
+int
+rte_vhost_driver_register(const char *path)
+{
+	struct vhost_server *vserver;
+
+	pthread_mutex_lock(&g_vhost_server.server_mutex);
+
+	if (g_vhost_server.vserver_cnt == MAX_VHOST_SERVER) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"error: the number of servers reaches maximum\n");
+		pthread_mutex_unlock(&g_vhost_server.server_mutex);
+		return -1;
+	}
+
+	vserver = calloc(sizeof(struct vhost_server), 1);
+	if (vserver == NULL) {
+		pthread_mutex_unlock(&g_vhost_server.server_mutex);
+		return -1;
+	}
+
+	vserver->listenfd = uds_socket(path);
+	if (vserver->listenfd < 0) {
+		free(vserver);
+		pthread_mutex_unlock(&g_vhost_server.server_mutex);
+		return -1;
+	}
+
+	vserver->path = strdup(path);
+
+	fdset_add(&g_vhost_server.fdset, vserver->listenfd,
+		vserver_new_vq_conn, NULL, vserver);
+
+	g_vhost_server.server[g_vhost_server.vserver_cnt++] = vserver;
+	pthread_mutex_unlock(&g_vhost_server.server_mutex);
+
+	return 0;
+}
+
+
+/**
+ * Unregister the specified vhost server
+ */
+int
+rte_vhost_driver_unregister(const char *path)
+{
+	int i;
+	int count;
+
+	pthread_mutex_lock(&g_vhost_server.server_mutex);
+
+	for (i = 0; i < g_vhost_server.vserver_cnt; i++) {
+		if (!strcmp(g_vhost_server.server[i]->path, path)) {
+			fdset_del(&g_vhost_server.fdset,
+				g_vhost_server.server[i]->listenfd);
+
+			close(g_vhost_server.server[i]->listenfd);
+			free(g_vhost_server.server[i]->path);
+			free(g_vhost_server.server[i]);
+
+			unlink(path);
+
+			count = --g_vhost_server.vserver_cnt;
+			g_vhost_server.server[i] = g_vhost_server.server[count];
+			g_vhost_server.server[count] = NULL;
+			pthread_mutex_unlock(&g_vhost_server.server_mutex);
+
+			return 0;
+		}
+	}
+	pthread_mutex_unlock(&g_vhost_server.server_mutex);
+
+	return -1;
+}
+
+int
+rte_vhost_driver_session_start(void)
+{
+	fdset_event_dispatch(&g_vhost_server.fdset);
+	return 0;
+}
diff --git a/lib/librte_vhost/vhost_user/vhost-net-user.h b/lib/librte_vhost/vhost_user/vhost-net-user.h
new file mode 100644
index 00000000..e3bb4138
--- /dev/null
+++ b/lib/librte_vhost/vhost_user/vhost-net-user.h
@@ -0,0 +1,117 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _VHOST_NET_USER_H
+#define _VHOST_NET_USER_H
+
+#include <stdint.h>
+#include <linux/vhost.h>
+
+#include "rte_virtio_net.h"
+#include "fd_man.h"
+
+struct vhost_server {
+	char *path; /**< The path the uds is bind to. */
+	int listenfd;     /**< The listener sockfd. */
+};
+
+/* refer to hw/virtio/vhost-user.c */
+
+typedef enum VhostUserRequest {
+	VHOST_USER_NONE = 0,
+	VHOST_USER_GET_FEATURES = 1,
+	VHOST_USER_SET_FEATURES = 2,
+	VHOST_USER_SET_OWNER = 3,
+	VHOST_USER_RESET_OWNER = 4,
+	VHOST_USER_SET_MEM_TABLE = 5,
+	VHOST_USER_SET_LOG_BASE = 6,
+	VHOST_USER_SET_LOG_FD = 7,
+	VHOST_USER_SET_VRING_NUM = 8,
+	VHOST_USER_SET_VRING_ADDR = 9,
+	VHOST_USER_SET_VRING_BASE = 10,
+	VHOST_USER_GET_VRING_BASE = 11,
+	VHOST_USER_SET_VRING_KICK = 12,
+	VHOST_USER_SET_VRING_CALL = 13,
+	VHOST_USER_SET_VRING_ERR = 14,
+	VHOST_USER_GET_PROTOCOL_FEATURES = 15,
+	VHOST_USER_SET_PROTOCOL_FEATURES = 16,
+	VHOST_USER_GET_QUEUE_NUM = 17,
+	VHOST_USER_SET_VRING_ENABLE = 18,
+	VHOST_USER_SEND_RARP = 19,
+	VHOST_USER_MAX
+} VhostUserRequest;
+
+typedef struct VhostUserMemoryRegion {
+	uint64_t guest_phys_addr;
+	uint64_t memory_size;
+	uint64_t userspace_addr;
+	uint64_t mmap_offset;
+} VhostUserMemoryRegion;
+
+typedef struct VhostUserMemory {
+	uint32_t nregions;
+	uint32_t padding;
+	VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
+} VhostUserMemory;
+
+typedef struct VhostUserLog {
+	uint64_t mmap_size;
+	uint64_t mmap_offset;
+} VhostUserLog;
+
+typedef struct VhostUserMsg {
+	VhostUserRequest request;
+
+#define VHOST_USER_VERSION_MASK     0x3
+#define VHOST_USER_REPLY_MASK       (0x1 << 2)
+	uint32_t flags;
+	uint32_t size; /* the following payload size */
+	union {
+#define VHOST_USER_VRING_IDX_MASK   0xff
+#define VHOST_USER_VRING_NOFD_MASK  (0x1<<8)
+		uint64_t u64;
+		struct vhost_vring_state state;
+		struct vhost_vring_addr addr;
+		VhostUserMemory memory;
+		VhostUserLog    log;
+	} payload;
+	int fds[VHOST_MEMORY_MAX_NREGIONS];
+} __attribute((packed)) VhostUserMsg;
+
+#define VHOST_USER_HDR_SIZE offsetof(VhostUserMsg, payload.u64)
+
+/* The version of the protocol we support */
+#define VHOST_USER_VERSION    0x1
+
+/*****************************************************************************/
+#endif
diff --git a/lib/librte_vhost/vhost_user/virtio-net-user.c b/lib/librte_vhost/vhost_user/virtio-net-user.c
new file mode 100644
index 00000000..f5248bc4
--- /dev/null
+++ b/lib/librte_vhost/vhost_user/virtio-net-user.c
@@ -0,0 +1,446 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include <rte_common.h>
+#include <rte_log.h>
+
+#include "virtio-net.h"
+#include "virtio-net-user.h"
+#include "vhost-net-user.h"
+#include "vhost-net.h"
+
+struct orig_region_map {
+	int fd;
+	uint64_t mapped_address;
+	uint64_t mapped_size;
+	uint64_t blksz;
+};
+
+#define orig_region(ptr, nregions) \
+	((struct orig_region_map *)RTE_PTR_ADD((ptr), \
+		sizeof(struct virtio_memory) + \
+		sizeof(struct virtio_memory_regions) * (nregions)))
+
+static uint64_t
+get_blk_size(int fd)
+{
+	struct stat stat;
+
+	fstat(fd, &stat);
+	return (uint64_t)stat.st_blksize;
+}
+
+static void
+free_mem_region(struct virtio_net *dev)
+{
+	struct orig_region_map *region;
+	unsigned int idx;
+
+	if (!dev || !dev->mem)
+		return;
+
+	region = orig_region(dev->mem, dev->mem->nregions);
+	for (idx = 0; idx < dev->mem->nregions; idx++) {
+		if (region[idx].mapped_address) {
+			munmap((void *)(uintptr_t)region[idx].mapped_address,
+					region[idx].mapped_size);
+			close(region[idx].fd);
+		}
+	}
+}
+
+void
+vhost_backend_cleanup(struct virtio_net *dev)
+{
+	if (dev->mem) {
+		free_mem_region(dev);
+		free(dev->mem);
+		dev->mem = NULL;
+	}
+}
+
+int
+user_set_mem_table(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg)
+{
+	struct VhostUserMemory memory = pmsg->payload.memory;
+	struct virtio_memory_regions *pregion;
+	uint64_t mapped_address, mapped_size;
+	struct virtio_net *dev;
+	unsigned int idx = 0;
+	struct orig_region_map *pregion_orig;
+	uint64_t alignment;
+
+	/* unmap old memory regions one by one*/
+	dev = get_device(ctx);
+	if (dev == NULL)
+		return -1;
+
+	/* Remove from the data plane. */
+	if (dev->flags & VIRTIO_DEV_RUNNING)
+		notify_ops->destroy_device(dev);
+
+	if (dev->mem) {
+		free_mem_region(dev);
+		free(dev->mem);
+		dev->mem = NULL;
+	}
+
+	dev->mem = calloc(1,
+		sizeof(struct virtio_memory) +
+		sizeof(struct virtio_memory_regions) * memory.nregions +
+		sizeof(struct orig_region_map) * memory.nregions);
+	if (dev->mem == NULL) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"(%"PRIu64") Failed to allocate memory for dev->mem\n",
+			dev->device_fh);
+		return -1;
+	}
+	dev->mem->nregions = memory.nregions;
+
+	pregion_orig = orig_region(dev->mem, memory.nregions);
+	for (idx = 0; idx < memory.nregions; idx++) {
+		pregion = &dev->mem->regions[idx];
+		pregion->guest_phys_address =
+			memory.regions[idx].guest_phys_addr;
+		pregion->guest_phys_address_end =
+			memory.regions[idx].guest_phys_addr +
+			memory.regions[idx].memory_size;
+		pregion->memory_size =
+			memory.regions[idx].memory_size;
+		pregion->userspace_address =
+			memory.regions[idx].userspace_addr;
+
+		/* This is ugly */
+		mapped_size = memory.regions[idx].memory_size +
+			memory.regions[idx].mmap_offset;
+
+		/* mmap() without flag of MAP_ANONYMOUS, should be called
+		 * with length argument aligned with hugepagesz at older
+		 * longterm version Linux, like 2.6.32 and 3.2.72, or
+		 * mmap() will fail with EINVAL.
+		 *
+		 * to avoid failure, make sure in caller to keep length
+		 * aligned.
+		 */
+		alignment = get_blk_size(pmsg->fds[idx]);
+		mapped_size = RTE_ALIGN_CEIL(mapped_size, alignment);
+
+		mapped_address = (uint64_t)(uintptr_t)mmap(NULL,
+			mapped_size,
+			PROT_READ | PROT_WRITE, MAP_SHARED,
+			pmsg->fds[idx],
+			0);
+
+		RTE_LOG(INFO, VHOST_CONFIG,
+			"mapped region %d fd:%d to:%p sz:0x%"PRIx64" "
+			"off:0x%"PRIx64" align:0x%"PRIx64"\n",
+			idx, pmsg->fds[idx], (void *)(uintptr_t)mapped_address,
+			mapped_size, memory.regions[idx].mmap_offset,
+			alignment);
+
+		if (mapped_address == (uint64_t)(uintptr_t)MAP_FAILED) {
+			RTE_LOG(ERR, VHOST_CONFIG,
+				"mmap qemu guest failed.\n");
+			goto err_mmap;
+		}
+
+		pregion_orig[idx].mapped_address = mapped_address;
+		pregion_orig[idx].mapped_size = mapped_size;
+		pregion_orig[idx].blksz = alignment;
+		pregion_orig[idx].fd = pmsg->fds[idx];
+
+		mapped_address +=  memory.regions[idx].mmap_offset;
+
+		pregion->address_offset = mapped_address -
+			pregion->guest_phys_address;
+
+		if (memory.regions[idx].guest_phys_addr == 0) {
+			dev->mem->base_address =
+				memory.regions[idx].userspace_addr;
+			dev->mem->mapped_address =
+				pregion->address_offset;
+		}
+
+		LOG_DEBUG(VHOST_CONFIG,
+			"REGION: %u GPA: %p QEMU VA: %p SIZE (%"PRIu64")\n",
+			idx,
+			(void *)(uintptr_t)pregion->guest_phys_address,
+			(void *)(uintptr_t)pregion->userspace_address,
+			 pregion->memory_size);
+	}
+
+	return 0;
+
+err_mmap:
+	while (idx--) {
+		munmap((void *)(uintptr_t)pregion_orig[idx].mapped_address,
+				pregion_orig[idx].mapped_size);
+		close(pregion_orig[idx].fd);
+	}
+	free(dev->mem);
+	dev->mem = NULL;
+	return -1;
+}
+
+static int
+vq_is_ready(struct vhost_virtqueue *vq)
+{
+	return vq && vq->desc   &&
+	       vq->kickfd != VIRTIO_UNINITIALIZED_EVENTFD &&
+	       vq->callfd != VIRTIO_UNINITIALIZED_EVENTFD;
+}
+
+static int
+virtio_is_ready(struct virtio_net *dev)
+{
+	struct vhost_virtqueue *rvq, *tvq;
+	uint32_t i;
+
+	for (i = 0; i < dev->virt_qp_nb; i++) {
+		rvq = dev->virtqueue[i * VIRTIO_QNUM + VIRTIO_RXQ];
+		tvq = dev->virtqueue[i * VIRTIO_QNUM + VIRTIO_TXQ];
+
+		if (!vq_is_ready(rvq) || !vq_is_ready(tvq)) {
+			RTE_LOG(INFO, VHOST_CONFIG,
+				"virtio is not ready for processing.\n");
+			return 0;
+		}
+	}
+
+	RTE_LOG(INFO, VHOST_CONFIG,
+		"virtio is now ready for processing.\n");
+	return 1;
+}
+
+void
+user_set_vring_call(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg)
+{
+	struct vhost_vring_file file;
+
+	file.index = pmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
+	if (pmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK)
+		file.fd = VIRTIO_INVALID_EVENTFD;
+	else
+		file.fd = pmsg->fds[0];
+	RTE_LOG(INFO, VHOST_CONFIG,
+		"vring call idx:%d file:%d\n", file.index, file.fd);
+	vhost_set_vring_call(ctx, &file);
+}
+
+
+/*
+ *  In vhost-user, when we receive kick message, will test whether virtio
+ *  device is ready for packet processing.
+ */
+void
+user_set_vring_kick(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg)
+{
+	struct vhost_vring_file file;
+	struct virtio_net *dev = get_device(ctx);
+
+	file.index = pmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
+	if (pmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK)
+		file.fd = VIRTIO_INVALID_EVENTFD;
+	else
+		file.fd = pmsg->fds[0];
+	RTE_LOG(INFO, VHOST_CONFIG,
+		"vring kick idx:%d file:%d\n", file.index, file.fd);
+	vhost_set_vring_kick(ctx, &file);
+
+	if (virtio_is_ready(dev) &&
+		!(dev->flags & VIRTIO_DEV_RUNNING))
+			notify_ops->new_device(dev);
+}
+
+/*
+ * when virtio is stopped, qemu will send us the GET_VRING_BASE message.
+ */
+int
+user_get_vring_base(struct vhost_device_ctx ctx,
+	struct vhost_vring_state *state)
+{
+	struct virtio_net *dev = get_device(ctx);
+
+	if (dev == NULL)
+		return -1;
+	/* We have to stop the queue (virtio) if it is running. */
+	if (dev->flags & VIRTIO_DEV_RUNNING)
+		notify_ops->destroy_device(dev);
+
+	/* Here we are safe to get the last used index */
+	vhost_get_vring_base(ctx, state->index, state);
+
+	RTE_LOG(INFO, VHOST_CONFIG,
+		"vring base idx:%d file:%d\n", state->index, state->num);
+	/*
+	 * Based on current qemu vhost-user implementation, this message is
+	 * sent and only sent in vhost_vring_stop.
+	 * TODO: cleanup the vring, it isn't usable since here.
+	 */
+	if (dev->virtqueue[state->index]->kickfd >= 0)
+		close(dev->virtqueue[state->index]->kickfd);
+
+	dev->virtqueue[state->index]->kickfd = VIRTIO_UNINITIALIZED_EVENTFD;
+
+	return 0;
+}
+
+/*
+ * when virtio queues are ready to work, qemu will send us to
+ * enable the virtio queue pair.
+ */
+int
+user_set_vring_enable(struct vhost_device_ctx ctx,
+		      struct vhost_vring_state *state)
+{
+	struct virtio_net *dev = get_device(ctx);
+	int enable = (int)state->num;
+
+	RTE_LOG(INFO, VHOST_CONFIG,
+		"set queue enable: %d to qp idx: %d\n",
+		enable, state->index);
+
+	if (notify_ops->vring_state_changed) {
+		notify_ops->vring_state_changed(dev, state->index, enable);
+	}
+
+	dev->virtqueue[state->index]->enabled = enable;
+
+	return 0;
+}
+
+void
+user_set_protocol_features(struct vhost_device_ctx ctx,
+			   uint64_t protocol_features)
+{
+	struct virtio_net *dev;
+
+	dev = get_device(ctx);
+	if (dev == NULL || protocol_features & ~VHOST_USER_PROTOCOL_FEATURES)
+		return;
+
+	dev->protocol_features = protocol_features;
+}
+
+int
+user_set_log_base(struct vhost_device_ctx ctx,
+		 struct VhostUserMsg *msg)
+{
+	struct virtio_net *dev;
+	int fd = msg->fds[0];
+	uint64_t size, off;
+	void *addr;
+
+	dev = get_device(ctx);
+	if (!dev)
+		return -1;
+
+	if (fd < 0) {
+		RTE_LOG(ERR, VHOST_CONFIG, "invalid log fd: %d\n", fd);
+		return -1;
+	}
+
+	if (msg->size != sizeof(VhostUserLog)) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"invalid log base msg size: %"PRId32" != %d\n",
+			msg->size, (int)sizeof(VhostUserLog));
+		return -1;
+	}
+
+	size = msg->payload.log.mmap_size;
+	off  = msg->payload.log.mmap_offset;
+	RTE_LOG(INFO, VHOST_CONFIG,
+		"log mmap size: %"PRId64", offset: %"PRId64"\n",
+		size, off);
+
+	/*
+	 * mmap from 0 to workaround a hugepage mmap bug: mmap will
+	 * fail when offset is not page size aligned.
+	 */
+	addr = mmap(0, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+	if (addr == MAP_FAILED) {
+		RTE_LOG(ERR, VHOST_CONFIG, "mmap log base failed!\n");
+		return -1;
+	}
+
+	/* TODO: unmap on stop */
+	dev->log_base = (uint64_t)(uintptr_t)addr + off;
+	dev->log_size = size;
+
+	return 0;
+}
+
+/*
+ * An rarp packet is constructed and broadcasted to notify switches about
+ * the new location of the migrated VM, so that packets from outside will
+ * not be lost after migration.
+ *
+ * However, we don't actually "send" a rarp packet here, instead, we set
+ * a flag 'broadcast_rarp' to let rte_vhost_dequeue_burst() inject it.
+ */
+int
+user_send_rarp(struct vhost_device_ctx ctx, struct VhostUserMsg *msg)
+{
+	struct virtio_net *dev;
+	uint8_t *mac = (uint8_t *)&msg->payload.u64;
+
+	dev = get_device(ctx);
+	if (!dev)
+		return -1;
+
+	RTE_LOG(DEBUG, VHOST_CONFIG,
+		":: mac: %02x:%02x:%02x:%02x:%02x:%02x\n",
+		mac[0], mac[1], mac[2], mac[3], mac[4], mac[5]);
+	memcpy(dev->mac.addr_bytes, mac, 6);
+
+	/*
+	 * Set the flag to inject a RARP broadcast packet at
+	 * rte_vhost_dequeue_burst().
+	 *
+	 * rte_smp_wmb() is for making sure the mac is copied
+	 * before the flag is set.
+	 */
+	rte_smp_wmb();
+	rte_atomic16_set(&dev->broadcast_rarp, 1);
+
+	return 0;
+}
diff --git a/lib/librte_vhost/vhost_user/virtio-net-user.h b/lib/librte_vhost/vhost_user/virtio-net-user.h
new file mode 100644
index 00000000..cefec162
--- /dev/null
+++ b/lib/librte_vhost/vhost_user/virtio-net-user.h
@@ -0,0 +1,64 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _VIRTIO_NET_USER_H
+#define _VIRTIO_NET_USER_H
+
+#include "vhost-net.h"
+#include "vhost-net-user.h"
+
+#define VHOST_USER_PROTOCOL_F_MQ	0
+#define VHOST_USER_PROTOCOL_F_LOG_SHMFD	1
+#define VHOST_USER_PROTOCOL_F_RARP	2
+
+#define VHOST_USER_PROTOCOL_FEATURES	((1ULL << VHOST_USER_PROTOCOL_F_MQ) | \
+					 (1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD) |\
+					 (1ULL << VHOST_USER_PROTOCOL_F_RARP))
+
+int user_set_mem_table(struct vhost_device_ctx, struct VhostUserMsg *);
+
+void user_set_vring_call(struct vhost_device_ctx, struct VhostUserMsg *);
+
+void user_set_vring_kick(struct vhost_device_ctx, struct VhostUserMsg *);
+
+void user_set_protocol_features(struct vhost_device_ctx ctx,
+				uint64_t protocol_features);
+int user_set_log_base(struct vhost_device_ctx ctx, struct VhostUserMsg *);
+int user_send_rarp(struct vhost_device_ctx ctx, struct VhostUserMsg *);
+
+int user_get_vring_base(struct vhost_device_ctx, struct vhost_vring_state *);
+
+int user_set_vring_enable(struct vhost_device_ctx ctx,
+			  struct vhost_vring_state *state);
+
+#endif
diff --git a/lib/librte_vhost/virtio-net.c b/lib/librte_vhost/virtio-net.c
new file mode 100644
index 00000000..d870ad97
--- /dev/null
+++ b/lib/librte_vhost/virtio-net.c
@@ -0,0 +1,772 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/vhost.h>
+#include <linux/virtio_net.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <sys/mman.h>
+#include <unistd.h>
+#ifdef RTE_LIBRTE_VHOST_NUMA
+#include <numaif.h>
+#endif
+
+#include <sys/socket.h>
+
+#include <rte_ethdev.h>
+#include <rte_log.h>
+#include <rte_string_fns.h>
+#include <rte_memory.h>
+#include <rte_malloc.h>
+#include <rte_virtio_net.h>
+
+#include "vhost-net.h"
+#include "virtio-net.h"
+
+#define MAX_VHOST_DEVICE	1024
+static struct virtio_net *vhost_devices[MAX_VHOST_DEVICE];
+
+/* device ops to add/remove device to/from data core. */
+struct virtio_net_device_ops const *notify_ops;
+
+#define VHOST_USER_F_PROTOCOL_FEATURES	30
+
+/* Features supported by this lib. */
+#define VHOST_SUPPORTED_FEATURES ((1ULL << VIRTIO_NET_F_MRG_RXBUF) | \
+				(1ULL << VIRTIO_NET_F_CTRL_VQ) | \
+				(1ULL << VIRTIO_NET_F_CTRL_RX) | \
+				(1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) | \
+				(VHOST_SUPPORTS_MQ)            | \
+				(1ULL << VIRTIO_F_VERSION_1)   | \
+				(1ULL << VHOST_F_LOG_ALL)      | \
+				(1ULL << VHOST_USER_F_PROTOCOL_FEATURES) | \
+				(1ULL << VIRTIO_NET_F_HOST_TSO4) | \
+				(1ULL << VIRTIO_NET_F_HOST_TSO6) | \
+				(1ULL << VIRTIO_NET_F_CSUM)    | \
+				(1ULL << VIRTIO_NET_F_GUEST_CSUM) | \
+				(1ULL << VIRTIO_NET_F_GUEST_TSO4) | \
+				(1ULL << VIRTIO_NET_F_GUEST_TSO6))
+
+static uint64_t VHOST_FEATURES = VHOST_SUPPORTED_FEATURES;
+
+
+/*
+ * Converts QEMU virtual address to Vhost virtual address. This function is
+ * used to convert the ring addresses to our address space.
+ */
+static uint64_t
+qva_to_vva(struct virtio_net *dev, uint64_t qemu_va)
+{
+	struct virtio_memory_regions *region;
+	uint64_t vhost_va = 0;
+	uint32_t regionidx = 0;
+
+	/* Find the region where the address lives. */
+	for (regionidx = 0; regionidx < dev->mem->nregions; regionidx++) {
+		region = &dev->mem->regions[regionidx];
+		if ((qemu_va >= region->userspace_address) &&
+			(qemu_va <= region->userspace_address +
+			region->memory_size)) {
+			vhost_va = qemu_va + region->guest_phys_address +
+				region->address_offset -
+				region->userspace_address;
+			break;
+		}
+	}
+	return vhost_va;
+}
+
+
+struct virtio_net *
+get_device(struct vhost_device_ctx ctx)
+{
+	struct virtio_net *dev = vhost_devices[ctx.fh];
+
+	if (unlikely(!dev)) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"(%"PRIu64") device not found.\n", ctx.fh);
+	}
+
+	return dev;
+}
+
+static void
+cleanup_vq(struct vhost_virtqueue *vq, int destroy)
+{
+	if ((vq->callfd >= 0) && (destroy != 0))
+		close(vq->callfd);
+	if (vq->kickfd >= 0)
+		close(vq->kickfd);
+}
+
+/*
+ * Unmap any memory, close any file descriptors and
+ * free any memory owned by a device.
+ */
+static void
+cleanup_device(struct virtio_net *dev, int destroy)
+{
+	uint32_t i;
+
+	vhost_backend_cleanup(dev);
+
+	for (i = 0; i < dev->virt_qp_nb; i++) {
+		cleanup_vq(dev->virtqueue[i * VIRTIO_QNUM + VIRTIO_RXQ], destroy);
+		cleanup_vq(dev->virtqueue[i * VIRTIO_QNUM + VIRTIO_TXQ], destroy);
+	}
+}
+
+/*
+ * Release virtqueues and device memory.
+ */
+static void
+free_device(struct virtio_net *dev)
+{
+	uint32_t i;
+
+	for (i = 0; i < dev->virt_qp_nb; i++)
+		rte_free(dev->virtqueue[i * VIRTIO_QNUM]);
+
+	rte_free(dev);
+}
+
+static void
+init_vring_queue(struct vhost_virtqueue *vq, int qp_idx)
+{
+	memset(vq, 0, sizeof(struct vhost_virtqueue));
+
+	vq->kickfd = VIRTIO_UNINITIALIZED_EVENTFD;
+	vq->callfd = VIRTIO_UNINITIALIZED_EVENTFD;
+
+	/* Backends are set to -1 indicating an inactive device. */
+	vq->backend = -1;
+
+	/* always set the default vq pair to enabled */
+	if (qp_idx == 0)
+		vq->enabled = 1;
+}
+
+static void
+init_vring_queue_pair(struct virtio_net *dev, uint32_t qp_idx)
+{
+	uint32_t base_idx = qp_idx * VIRTIO_QNUM;
+
+	init_vring_queue(dev->virtqueue[base_idx + VIRTIO_RXQ], qp_idx);
+	init_vring_queue(dev->virtqueue[base_idx + VIRTIO_TXQ], qp_idx);
+}
+
+static void
+reset_vring_queue(struct vhost_virtqueue *vq, int qp_idx)
+{
+	int callfd;
+
+	callfd = vq->callfd;
+	init_vring_queue(vq, qp_idx);
+	vq->callfd = callfd;
+}
+
+static void
+reset_vring_queue_pair(struct virtio_net *dev, uint32_t qp_idx)
+{
+	uint32_t base_idx = qp_idx * VIRTIO_QNUM;
+
+	reset_vring_queue(dev->virtqueue[base_idx + VIRTIO_RXQ], qp_idx);
+	reset_vring_queue(dev->virtqueue[base_idx + VIRTIO_TXQ], qp_idx);
+}
+
+static int
+alloc_vring_queue_pair(struct virtio_net *dev, uint32_t qp_idx)
+{
+	struct vhost_virtqueue *virtqueue = NULL;
+	uint32_t virt_rx_q_idx = qp_idx * VIRTIO_QNUM + VIRTIO_RXQ;
+	uint32_t virt_tx_q_idx = qp_idx * VIRTIO_QNUM + VIRTIO_TXQ;
+
+	virtqueue = rte_malloc(NULL,
+			       sizeof(struct vhost_virtqueue) * VIRTIO_QNUM, 0);
+	if (virtqueue == NULL) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"Failed to allocate memory for virt qp:%d.\n", qp_idx);
+		return -1;
+	}
+
+	dev->virtqueue[virt_rx_q_idx] = virtqueue;
+	dev->virtqueue[virt_tx_q_idx] = virtqueue + VIRTIO_TXQ;
+
+	init_vring_queue_pair(dev, qp_idx);
+
+	dev->virt_qp_nb += 1;
+
+	return 0;
+}
+
+/*
+ * Reset some variables in device structure, while keeping few
+ * others untouched, such as device_fh, ifname, virt_qp_nb: they
+ * should be same unless the device is removed.
+ */
+static void
+reset_device(struct virtio_net *dev)
+{
+	uint32_t i;
+
+	dev->features = 0;
+	dev->protocol_features = 0;
+	dev->flags = 0;
+
+	for (i = 0; i < dev->virt_qp_nb; i++)
+		reset_vring_queue_pair(dev, i);
+}
+
+/*
+ * Function is called from the CUSE open function. The device structure is
+ * initialised and a new entry is added to the device configuration linked
+ * list.
+ */
+int
+vhost_new_device(struct vhost_device_ctx ctx)
+{
+	struct virtio_net *dev;
+	int i;
+
+	dev = rte_zmalloc(NULL, sizeof(struct virtio_net), 0);
+	if (dev == NULL) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"(%"PRIu64") Failed to allocate memory for dev.\n",
+			ctx.fh);
+		return -1;
+	}
+
+	for (i = 0; i < MAX_VHOST_DEVICE; i++) {
+		if (vhost_devices[i] == NULL)
+			break;
+	}
+	if (i == MAX_VHOST_DEVICE) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"Failed to find a free slot for new device.\n");
+		return -1;
+	}
+
+	vhost_devices[i] = dev;
+	dev->device_fh   = i;
+
+	return i;
+}
+
+/*
+ * Function is called from the CUSE release function. This function will
+ * cleanup the device and remove it from device configuration linked list.
+ */
+void
+vhost_destroy_device(struct vhost_device_ctx ctx)
+{
+	struct virtio_net *dev = get_device(ctx);
+
+	if (dev == NULL)
+		return;
+
+	if (dev->flags & VIRTIO_DEV_RUNNING)
+		notify_ops->destroy_device(dev);
+
+	cleanup_device(dev, 1);
+	free_device(dev);
+
+	vhost_devices[ctx.fh] = NULL;
+}
+
+void
+vhost_set_ifname(struct vhost_device_ctx ctx,
+	const char *if_name, unsigned int if_len)
+{
+	struct virtio_net *dev;
+	unsigned int len;
+
+	dev = get_device(ctx);
+	if (dev == NULL)
+		return;
+
+	len = if_len > sizeof(dev->ifname) ?
+		sizeof(dev->ifname) : if_len;
+
+	strncpy(dev->ifname, if_name, len);
+}
+
+
+/*
+ * Called from CUSE IOCTL: VHOST_SET_OWNER
+ * This function just returns success at the moment unless
+ * the device hasn't been initialised.
+ */
+int
+vhost_set_owner(struct vhost_device_ctx ctx)
+{
+	struct virtio_net *dev;
+
+	dev = get_device(ctx);
+	if (dev == NULL)
+		return -1;
+
+	return 0;
+}
+
+/*
+ * Called from CUSE IOCTL: VHOST_RESET_OWNER
+ */
+int
+vhost_reset_owner(struct vhost_device_ctx ctx)
+{
+	struct virtio_net *dev;
+
+	dev = get_device(ctx);
+	if (dev == NULL)
+		return -1;
+
+	if (dev->flags & VIRTIO_DEV_RUNNING)
+		notify_ops->destroy_device(dev);
+
+	cleanup_device(dev, 0);
+	reset_device(dev);
+	return 0;
+}
+
+/*
+ * Called from CUSE IOCTL: VHOST_GET_FEATURES
+ * The features that we support are requested.
+ */
+int
+vhost_get_features(struct vhost_device_ctx ctx, uint64_t *pu)
+{
+	struct virtio_net *dev;
+
+	dev = get_device(ctx);
+	if (dev == NULL)
+		return -1;
+
+	/* Send our supported features. */
+	*pu = VHOST_FEATURES;
+	return 0;
+}
+
+/*
+ * Called from CUSE IOCTL: VHOST_SET_FEATURES
+ * We receive the negotiated features supported by us and the virtio device.
+ */
+int
+vhost_set_features(struct vhost_device_ctx ctx, uint64_t *pu)
+{
+	struct virtio_net *dev;
+	uint16_t vhost_hlen;
+	uint16_t i;
+
+	dev = get_device(ctx);
+	if (dev == NULL)
+		return -1;
+	if (*pu & ~VHOST_FEATURES)
+		return -1;
+
+	dev->features = *pu;
+	if (dev->features &
+		((1 << VIRTIO_NET_F_MRG_RXBUF) | (1ULL << VIRTIO_F_VERSION_1))) {
+		vhost_hlen = sizeof(struct virtio_net_hdr_mrg_rxbuf);
+	} else {
+		vhost_hlen = sizeof(struct virtio_net_hdr);
+	}
+	LOG_DEBUG(VHOST_CONFIG,
+		"(%"PRIu64") Mergeable RX buffers %s, virtio 1 %s\n",
+		dev->device_fh,
+		(dev->features & (1 << VIRTIO_NET_F_MRG_RXBUF)) ? "on" : "off",
+		(dev->features & (1ULL << VIRTIO_F_VERSION_1)) ? "on" : "off");
+
+	for (i = 0; i < dev->virt_qp_nb; i++) {
+		uint16_t base_idx = i * VIRTIO_QNUM;
+
+		dev->virtqueue[base_idx + VIRTIO_RXQ]->vhost_hlen = vhost_hlen;
+		dev->virtqueue[base_idx + VIRTIO_TXQ]->vhost_hlen = vhost_hlen;
+	}
+
+	return 0;
+}
+
+/*
+ * Called from CUSE IOCTL: VHOST_SET_VRING_NUM
+ * The virtio device sends us the size of the descriptor ring.
+ */
+int
+vhost_set_vring_num(struct vhost_device_ctx ctx,
+	struct vhost_vring_state *state)
+{
+	struct virtio_net *dev;
+
+	dev = get_device(ctx);
+	if (dev == NULL)
+		return -1;
+
+	/* State->index refers to the queue index. The txq is 1, rxq is 0. */
+	dev->virtqueue[state->index]->size = state->num;
+
+	return 0;
+}
+
+/*
+ * Reallocate virtio_dev and vhost_virtqueue data structure to make them on the
+ * same numa node as the memory of vring descriptor.
+ */
+#ifdef RTE_LIBRTE_VHOST_NUMA
+static struct virtio_net*
+numa_realloc(struct virtio_net *dev, int index)
+{
+	int oldnode, newnode;
+	struct virtio_net *old_dev;
+	struct vhost_virtqueue *old_vq, *vq;
+	int ret;
+
+	/*
+	 * vq is allocated on pairs, we should try to do realloc
+	 * on first queue of one queue pair only.
+	 */
+	if (index % VIRTIO_QNUM != 0)
+		return dev;
+
+	old_dev = dev;
+	vq = old_vq = dev->virtqueue[index];
+
+	ret = get_mempolicy(&newnode, NULL, 0, old_vq->desc,
+			    MPOL_F_NODE | MPOL_F_ADDR);
+
+	/* check if we need to reallocate vq */
+	ret |= get_mempolicy(&oldnode, NULL, 0, old_vq,
+			     MPOL_F_NODE | MPOL_F_ADDR);
+	if (ret) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"Unable to get vq numa information.\n");
+		return dev;
+	}
+	if (oldnode != newnode) {
+		RTE_LOG(INFO, VHOST_CONFIG,
+			"reallocate vq from %d to %d node\n", oldnode, newnode);
+		vq = rte_malloc_socket(NULL, sizeof(*vq) * VIRTIO_QNUM, 0,
+				       newnode);
+		if (!vq)
+			return dev;
+
+		memcpy(vq, old_vq, sizeof(*vq) * VIRTIO_QNUM);
+		rte_free(old_vq);
+	}
+
+	/* check if we need to reallocate dev */
+	ret = get_mempolicy(&oldnode, NULL, 0, old_dev,
+			    MPOL_F_NODE | MPOL_F_ADDR);
+	if (ret) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"Unable to get dev numa information.\n");
+		goto out;
+	}
+	if (oldnode != newnode) {
+		RTE_LOG(INFO, VHOST_CONFIG,
+			"reallocate dev from %d to %d node\n",
+			oldnode, newnode);
+		dev = rte_malloc_socket(NULL, sizeof(*dev), 0, newnode);
+		if (!dev) {
+			dev = old_dev;
+			goto out;
+		}
+
+		memcpy(dev, old_dev, sizeof(*dev));
+		rte_free(old_dev);
+	}
+
+out:
+	dev->virtqueue[index] = vq;
+	dev->virtqueue[index + 1] = vq + 1;
+	vhost_devices[dev->device_fh] = dev;
+
+	return dev;
+}
+#else
+static struct virtio_net*
+numa_realloc(struct virtio_net *dev, int index __rte_unused)
+{
+	return dev;
+}
+#endif
+
+/*
+ * Called from CUSE IOCTL: VHOST_SET_VRING_ADDR
+ * The virtio device sends us the desc, used and avail ring addresses.
+ * This function then converts these to our address space.
+ */
+int
+vhost_set_vring_addr(struct vhost_device_ctx ctx, struct vhost_vring_addr *addr)
+{
+	struct virtio_net *dev;
+	struct vhost_virtqueue *vq;
+
+	dev = get_device(ctx);
+	if ((dev == NULL) || (dev->mem == NULL))
+		return -1;
+
+	/* addr->index refers to the queue index. The txq 1, rxq is 0. */
+	vq = dev->virtqueue[addr->index];
+
+	/* The addresses are converted from QEMU virtual to Vhost virtual. */
+	vq->desc = (struct vring_desc *)(uintptr_t)qva_to_vva(dev,
+			addr->desc_user_addr);
+	if (vq->desc == 0) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"(%"PRIu64") Failed to find desc ring address.\n",
+			dev->device_fh);
+		return -1;
+	}
+
+	dev = numa_realloc(dev, addr->index);
+	vq = dev->virtqueue[addr->index];
+
+	vq->avail = (struct vring_avail *)(uintptr_t)qva_to_vva(dev,
+			addr->avail_user_addr);
+	if (vq->avail == 0) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"(%"PRIu64") Failed to find avail ring address.\n",
+			dev->device_fh);
+		return -1;
+	}
+
+	vq->used = (struct vring_used *)(uintptr_t)qva_to_vva(dev,
+			addr->used_user_addr);
+	if (vq->used == 0) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"(%"PRIu64") Failed to find used ring address.\n",
+			dev->device_fh);
+		return -1;
+	}
+
+	vq->log_guest_addr = addr->log_guest_addr;
+
+	LOG_DEBUG(VHOST_CONFIG, "(%"PRIu64") mapped address desc: %p\n",
+			dev->device_fh, vq->desc);
+	LOG_DEBUG(VHOST_CONFIG, "(%"PRIu64") mapped address avail: %p\n",
+			dev->device_fh, vq->avail);
+	LOG_DEBUG(VHOST_CONFIG, "(%"PRIu64") mapped address used: %p\n",
+			dev->device_fh, vq->used);
+	LOG_DEBUG(VHOST_CONFIG, "(%"PRIu64") log_guest_addr: %"PRIx64"\n",
+			dev->device_fh, vq->log_guest_addr);
+
+	return 0;
+}
+
+/*
+ * Called from CUSE IOCTL: VHOST_SET_VRING_BASE
+ * The virtio device sends us the available ring last used index.
+ */
+int
+vhost_set_vring_base(struct vhost_device_ctx ctx,
+	struct vhost_vring_state *state)
+{
+	struct virtio_net *dev;
+
+	dev = get_device(ctx);
+	if (dev == NULL)
+		return -1;
+
+	/* State->index refers to the queue index. The txq is 1, rxq is 0. */
+	dev->virtqueue[state->index]->last_used_idx = state->num;
+	dev->virtqueue[state->index]->last_used_idx_res = state->num;
+
+	return 0;
+}
+
+/*
+ * Called from CUSE IOCTL: VHOST_GET_VRING_BASE
+ * We send the virtio device our available ring last used index.
+ */
+int
+vhost_get_vring_base(struct vhost_device_ctx ctx, uint32_t index,
+	struct vhost_vring_state *state)
+{
+	struct virtio_net *dev;
+
+	dev = get_device(ctx);
+	if (dev == NULL)
+		return -1;
+
+	state->index = index;
+	/* State->index refers to the queue index. The txq is 1, rxq is 0. */
+	state->num = dev->virtqueue[state->index]->last_used_idx;
+
+	return 0;
+}
+
+
+/*
+ * Called from CUSE IOCTL: VHOST_SET_VRING_CALL
+ * The virtio device sends an eventfd to interrupt the guest. This fd gets
+ * copied into our process space.
+ */
+int
+vhost_set_vring_call(struct vhost_device_ctx ctx, struct vhost_vring_file *file)
+{
+	struct virtio_net *dev;
+	struct vhost_virtqueue *vq;
+	uint32_t cur_qp_idx = file->index / VIRTIO_QNUM;
+
+	dev = get_device(ctx);
+	if (dev == NULL)
+		return -1;
+
+	/*
+	 * FIXME: VHOST_SET_VRING_CALL is the first per-vring message
+	 * we get, so we do vring queue pair allocation here.
+	 */
+	if (cur_qp_idx + 1 > dev->virt_qp_nb) {
+		if (alloc_vring_queue_pair(dev, cur_qp_idx) < 0)
+			return -1;
+	}
+
+	/* file->index refers to the queue index. The txq is 1, rxq is 0. */
+	vq = dev->virtqueue[file->index];
+	assert(vq != NULL);
+
+	if (vq->callfd >= 0)
+		close(vq->callfd);
+
+	vq->callfd = file->fd;
+
+	return 0;
+}
+
+/*
+ * Called from CUSE IOCTL: VHOST_SET_VRING_KICK
+ * The virtio device sends an eventfd that it can use to notify us.
+ * This fd gets copied into our process space.
+ */
+int
+vhost_set_vring_kick(struct vhost_device_ctx ctx, struct vhost_vring_file *file)
+{
+	struct virtio_net *dev;
+	struct vhost_virtqueue *vq;
+
+	dev = get_device(ctx);
+	if (dev == NULL)
+		return -1;
+
+	/* file->index refers to the queue index. The txq is 1, rxq is 0. */
+	vq = dev->virtqueue[file->index];
+
+	if (vq->kickfd >= 0)
+		close(vq->kickfd);
+
+	vq->kickfd = file->fd;
+
+	return 0;
+}
+
+/*
+ * Called from CUSE IOCTL: VHOST_NET_SET_BACKEND
+ * To complete device initialisation when the virtio driver is loaded,
+ * we are provided with a valid fd for a tap device (not used by us).
+ * If this happens then we can add the device to a data core.
+ * When the virtio driver is removed we get fd=-1.
+ * At that point we remove the device from the data core.
+ * The device will still exist in the device configuration linked list.
+ */
+int
+vhost_set_backend(struct vhost_device_ctx ctx, struct vhost_vring_file *file)
+{
+	struct virtio_net *dev;
+
+	dev = get_device(ctx);
+	if (dev == NULL)
+		return -1;
+
+	/* file->index refers to the queue index. The txq is 1, rxq is 0. */
+	dev->virtqueue[file->index]->backend = file->fd;
+
+	/*
+	 * If the device isn't already running and both backend fds are set,
+	 * we add the device.
+	 */
+	if (!(dev->flags & VIRTIO_DEV_RUNNING)) {
+		if (((int)dev->virtqueue[VIRTIO_TXQ]->backend != VIRTIO_DEV_STOPPED) &&
+			((int)dev->virtqueue[VIRTIO_RXQ]->backend != VIRTIO_DEV_STOPPED)) {
+			return notify_ops->new_device(dev);
+		}
+	/* Otherwise we remove it. */
+	} else
+		if (file->fd == VIRTIO_DEV_STOPPED)
+			notify_ops->destroy_device(dev);
+	return 0;
+}
+
+int rte_vhost_enable_guest_notification(struct virtio_net *dev,
+	uint16_t queue_id, int enable)
+{
+	if (enable) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"guest notification isn't supported.\n");
+		return -1;
+	}
+
+	dev->virtqueue[queue_id]->used->flags = VRING_USED_F_NO_NOTIFY;
+	return 0;
+}
+
+uint64_t rte_vhost_feature_get(void)
+{
+	return VHOST_FEATURES;
+}
+
+int rte_vhost_feature_disable(uint64_t feature_mask)
+{
+	VHOST_FEATURES = VHOST_FEATURES & ~feature_mask;
+	return 0;
+}
+
+int rte_vhost_feature_enable(uint64_t feature_mask)
+{
+	if ((feature_mask & VHOST_SUPPORTED_FEATURES) == feature_mask) {
+		VHOST_FEATURES = VHOST_FEATURES | feature_mask;
+		return 0;
+	}
+	return -1;
+}
+
+/*
+ * Register ops so that we can add/remove device to data core.
+ */
+int
+rte_vhost_driver_callback_register(struct virtio_net_device_ops const * const ops)
+{
+	notify_ops = ops;
+
+	return 0;
+}
diff --git a/lib/librte_vhost/virtio-net.h b/lib/librte_vhost/virtio-net.h
new file mode 100644
index 00000000..75fb57e5
--- /dev/null
+++ b/lib/librte_vhost/virtio-net.h
@@ -0,0 +1,43 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _VIRTIO_NET_H
+#define _VIRTIO_NET_H
+
+#include "vhost-net.h"
+#include "rte_virtio_net.h"
+
+struct virtio_net_device_ops const *notify_ops;
+struct virtio_net *get_device(struct vhost_device_ctx ctx);
+
+#endif
-- 
cgit 1.2.3-korg